diff --git a/.gitignore b/.gitignore
index 856505c..36920cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,3 +50,8 @@ _doc/
 _bin/
 .settings/
 ImagePlay.VC.db
+
+# release-related dirs
+IPL/release/
+IPL/debug/
+IPL/lib/
diff --git a/IPL/.qmake.stash b/IPL/.qmake.stash
deleted file mode 100644
index 1b8387c..0000000
--- a/IPL/.qmake.stash
+++ /dev/null
@@ -1,67 +0,0 @@
-QMAKE_XCODE_DEVELOPER_PATH = /Applications/Xcode.app/Contents/Developer
-QMAKE_XCODE_VERSION = 6.1.1
-QMAKE_MAC_SDK.macosx.path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.10.sdk
-QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
-QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
-QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_FIX_RPATH = \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \
-    -id
-QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_AR = \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \
-    cq
-QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_RANLIB = \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \
-    -s
-QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
-QMAKE_MAC_SDK.macx-clang.macosx.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
-QMAKE_MAC_SDK.macosx.platform_name = macosx
-QMAKE_MAC_SDK.macosx.version = 10.10
-QMAKE_MAC_SDK.macosx.platform_path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
-QMAKE_MAC_SDK.macosx10.11.path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk
-QMAKE_MAC_SDK.macosx10.11.platform_path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
-QMAKE_MAC_SDK.macosx10.11.version = 10.11
-QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
-QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
-QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_FIX_RPATH = \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \
-    -id
-QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_AR = \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \
-    cq
-QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_RANLIB = \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \
-    -s
-QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
-QMAKE_MAC_SDK.macx-clang.macosx10.11.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
-QMAKE_MAC_SDK.macosx10.11.platform_name = macosx
-QMAKE_MAC_SDK.macosx10.12.path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk
-QMAKE_MAC_SDK.macosx10.12.platform_path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
-QMAKE_MAC_SDK.macosx10.12.version = 10.12
-QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_CC = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
-QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_CXX = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
-QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_FIX_RPATH = \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/install_name_tool \
-    -id
-QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_AR = \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar \
-    cq
-QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_RANLIB = \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ranlib \
-    -s
-QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_LINK = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
-QMAKE_MAC_SDK.macx-clang.macosx10.12.QMAKE_LINK_SHLIB = /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
-QMAKE_MAC_SDK.macosx10.12.platform_name = macosx
-QMAKE_DEFAULT_INCDIRS = \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1 \
-    /usr/local/include \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/8.0.0/include \
-    /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include \
-    /usr/include \
-    "/System/Library/Frameworks (framework directory)" \
-    "/Library/Frameworks (framework directory)"
-QMAKE_DEFAULT_LIBDIRS = \
-    /lib \
-    /usr/lib
-QMAKE_MAC_SDK.macosx10.12.Path = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.12.sdk
-QMAKE_MAC_SDK.macosx10.12.PlatformPath = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
-QMAKE_MAC_SDK.macosx10.12.SDKVersion = 10.12
diff --git a/IPL/IPL.pro b/IPL/IPL.pro
index 0a656ba..e38c58b 100644
--- a/IPL/IPL.pro
+++ b/IPL/IPL.pro
@@ -1,166 +1,166 @@
-#############################################################################
-#
-#  This file is part of ImagePlay.
-#
-#  ImagePlay is free software: you can redistribute it and/or modify
-#  it under the terms of the GNU General Public License as published by
-#  the Free Software Foundation, either version 3 of the License, or
-#  (at your option) any later version.
-#
-#  ImagePlay is distributed in the hope that it will be useful,
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#  GNU General Public License for more details.
-#
-#  You should have received a copy of the GNU General Public License
-#  along with ImagePlay.  If not, see <http://www.gnu.org/licenses/>.
-#
-##############################################################################
-
-CONFIG -= qt
-
-TARGET = IPL
-CONFIG(debug, debug|release): DESTDIR  = ../ImagePlay/debug
-else: DESTDIR  = ../ImagePlay/release
-
-#define platform variable for folder name
-win32 {contains(QMAKE_TARGET.arch, x86_64) {PLATFORM = x64} else {PLATFORM = Win32}}
-macx {PLATFORM = macx}
-unix:!macx:!android {PLATFORM = linux}
-
-#define configuration variable for folder name
-CONFIG(debug, debug|release) {CONFIGURATION = Debug} else {CONFIGURATION = Release}
-
-DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM
-OBJECTS_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM
-MOC_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM
-RCC_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM
-UI_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM
-
-#TEMPLATE = vclib
-TEMPLATE = lib
-#CONFIG += lib_bundle
-
-
-DEFINES += IPL_LIBRARY
-
-HEADERS += $$files(*.h,true)
-SOURCES += $$files(*.cpp,true)
-OTHER_FILES += $$files(*,true)
-
-#win32: LIBS += -L$$PWD/lib/FreeImage/ -lFreeImage
-
-#INCLUDEPATH += $$PWD/lib/FreeImage/
-#DEPENDPATH += $$PWD/lib/FreeImage/
-
-#win32: PRE_TARGETDEPS += $$PWD/lib/FreeImage/FreeImage.lib
-
-win32 {
-    # dirent
-    INCLUDEPATH += $$PWD/lib/
-    SOURCES += include/dirent/dirent.c
-    HEADERS += include/dirent/dirent.h
-
-    # freeimage
-    LIBS += -L$$PWD/../_lib/freeimage/ -lFreeImage
-    INCLUDEPATH += $$PWD/../_lib/freeimage
-    DEPENDPATH += $$PWD/../_lib/freeimage
-
-    # opencv
-    CONFIG(release, debug|release) {
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_core310
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_imgproc310
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_highgui310
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_videoio310
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_calib3d310
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_optflow310
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_features2d310
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_xfeatures2d310
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_photo310
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_xphoto310
-    } else {
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_core310d
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_imgproc310d
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_highgui310d
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_videoio310d
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_calib3d310d
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_optflow310d
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_features2d310d
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_xfeatures2d310d
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_photo310d
-        LIBS += -L$$PWD/../_lib/opencv/x64/vc14/lib/ -lopencv_xphoto310d
-    }
-}
-
-
-
-macx {
-    QMAKE_MAC_SDK = macosx10.12
-    LIBS += -L$$PWD/../_lib/freeimage/ -lfreeimage-3.16.0
-
-    INCLUDEPATH += $$PWD/../_lib/freeimage
-    DEPENDPATH += $$PWD/../_lib/freeimage
-
-    #DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM/ImagePlay.app/Contents/Frameworks/
-    DESTDIR = ../_lib/
-
-    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_core.3.1.0
-    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_imgproc.3.1.0
-    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_highgui.3.1.0
-    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_videoio.3.1.0
-    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_calib3d.3.1.0
-    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_optflow.3.1.0
-    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_features2d.3.1.0
-    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_xfeatures2d.3.1.0
-    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_photo.3.1.0
-    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_xphoto.3.1.0
-
-}
-
-linux {
-    CONFIG += staticlib
-
-    LIBS += -lfreeimage
-    LIBS += -lopencv_core
-    LIBS += -lopencv_imgproc
-    LIBS += -lopencv_highgui
-    LIBS += -lopencv_videoio
-    LIBS += -lopencv_calib3d
-    LIBS += -lopencv_optflow
-    LIBS += -lopencv_features2d
-    LIBS += -lopencv_xfeatures2d
-    LIBS += -lopencv_photo
-    LIBS += -lopencv_xphoto
-
-    DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM/
-}
-
-msvc {
-    QMAKE_CXXFLAGS += -openmp
-    QMAKE_LFLAGS   += -openmp
-
-    #QMAKE_CXXFLAGS_RELEASE -= -O1
-    #QMAKE_CXXFLAGS_RELEASE -= -O2
-    #QMAKE_CXXFLAGS_RELEASE *= -O3
-}
-
-clang {
-    CONFIG +=c++11
-    QMAKE_CXXFLAGS += -openmp
-    QMAKE_LFLAGS   += -openmp
-}
-
-gcc:!clang {
-    CONFIG +=c++11
-    QMAKE_CXXFLAGS += -fopenmp
-    QMAKE_LFLAGS   += -fopenmp
-    LIBS += -lgomp
-}
-
-
-# IPL
-INCLUDEPATH += $$PWD/include/
-INCLUDEPATH += $$PWD/include/processes/
-
-# OpenCV
-INCLUDEPATH += $$PWD/include/opencv/
+#############################################################################
+#
+#  This file is part of ImagePlay.
+#
+#  ImagePlay is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  ImagePlay is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with ImagePlay.  If not, see <http://www.gnu.org/licenses/>.
+#
+##############################################################################
+
+CONFIG -= qt
+
+TARGET = IPL
+CONFIG(debug, debug|release): DESTDIR  = ../ImagePlay/debug
+else: DESTDIR  = ../ImagePlay/release
+
+#define platform variable for folder name
+win32 {contains(QMAKE_TARGET.arch, x86_64) {PLATFORM = x64} else {PLATFORM = Win32}}
+macx {PLATFORM = macx}
+unix:!macx:!android {PLATFORM = linux}
+
+#define configuration variable for folder name
+CONFIG(debug, debug|release) {CONFIGURATION = Debug} else {CONFIGURATION = Release}
+
+DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM
+OBJECTS_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM
+MOC_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM
+RCC_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM
+UI_DIR = ../intermediate/$$TARGET/$$CONFIGURATION/$$PLATFORM
+
+#TEMPLATE = vclib
+TEMPLATE = lib
+#CONFIG += lib_bundle
+
+
+DEFINES += IPL_LIBRARY
+
+HEADERS += $$files(*.h,true)
+SOURCES += $$files(*.cpp,true)
+OTHER_FILES += $$files(*,true)
+
+win32: LIBS += -L$$PWD/lib/FreeImage/ -lFreeImage
+
+INCLUDEPATH += $$PWD/lib/FreeImage/
+DEPENDPATH += $$PWD/lib/FreeImage/
+
+win32: PRE_TARGETDEPS += $$PWD/lib/FreeImage/FreeImage.lib
+
+win32 {
+    # dirent
+    INCLUDEPATH += $$PWD/lib/
+    SOURCES += include/dirent/dirent.c
+    HEADERS += include/dirent/dirent.h
+
+    # freeimage
+    LIBS += -L$$PWD/../_lib/freeimage/ -lFreeImage
+    INCLUDEPATH += $$PWD/../_lib/freeimage
+    DEPENDPATH += $$PWD/../_lib/freeimage
+
+    # opencv
+    CONFIG(release, debug|release) {
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_core430
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_imgproc430
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_highgui430
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_videoio430
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_calib3d430
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_optflow430
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_features2d430
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_xfeatures2d430
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_photo430
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_xphoto430
+    } else {
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_core430d
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_imgproc430d
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_highgui430d
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_videoio430d
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_calib3d430d
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_optflow430d
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_features2d430d
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_xfeatures2d430d
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_photo430d
+        LIBS += -L$$PWD/../_lib/opencv/x64/vc16/lib/ -lopencv_xphoto430d
+    }
+}
+
+
+
+macx {
+    QMAKE_MAC_SDK = macosx10.12
+    LIBS += -L$$PWD/../_lib/freeimage/ -lfreeimage-3.16.0
+
+    INCLUDEPATH += $$PWD/../_lib/freeimage
+    DEPENDPATH += $$PWD/../_lib/freeimage
+
+    #DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM/ImagePlay.app/Contents/Frameworks/
+    DESTDIR = ../_lib/
+
+    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_core.3.1.0
+    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_imgproc.3.1.0
+    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_highgui.3.1.0
+    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_videoio.3.1.0
+    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_calib3d.3.1.0
+    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_optflow.3.1.0
+    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_features2d.3.1.0
+    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_xfeatures2d.3.1.0
+    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_photo.3.1.0
+    LIBS += -L$$PWD/../_lib/opencv/x64/clang/lib/ -lopencv_xphoto.3.1.0
+
+}
+
+linux {
+    CONFIG += staticlib
+
+    LIBS += -lfreeimage
+    LIBS += -lopencv_core
+    LIBS += -lopencv_imgproc
+    LIBS += -lopencv_highgui
+    LIBS += -lopencv_videoio
+    LIBS += -lopencv_calib3d
+    LIBS += -lopencv_optflow
+    LIBS += -lopencv_features2d
+    LIBS += -lopencv_xfeatures2d
+    LIBS += -lopencv_photo
+    LIBS += -lopencv_xphoto
+
+    DESTDIR = ../_bin/$$CONFIGURATION/$$PLATFORM/
+}
+
+msvc {
+    QMAKE_CXXFLAGS += -openmp
+    QMAKE_LFLAGS   += -openmp
+
+    #QMAKE_CXXFLAGS_RELEASE -= -O1
+    #QMAKE_CXXFLAGS_RELEASE -= -O2
+    #QMAKE_CXXFLAGS_RELEASE *= -O3
+}
+
+clang {
+    CONFIG +=c++11
+    QMAKE_CXXFLAGS += -openmp
+    QMAKE_LFLAGS   += -openmp
+}
+
+gcc:!clang {
+    CONFIG +=c++11
+    QMAKE_CXXFLAGS += -fopenmp
+    QMAKE_LFLAGS   += -fopenmp
+    LIBS += -lgomp
+}
+
+
+# IPL
+INCLUDEPATH += $$PWD/include/
+INCLUDEPATH += $$PWD/include/processes/
+
+# OpenCV
+INCLUDEPATH += $$PWD/include/opencv/
diff --git a/IPL/include/opencv/opencv/cv.hpp b/IPL/include/opencv/opencv/cv.hpp
deleted file mode 100644
index e498d7a..0000000
--- a/IPL/include/opencv/opencv/cv.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_OLD_CV_HPP__
-#define __OPENCV_OLD_CV_HPP__
-
-//#if defined(__GNUC__)
-//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
-//#endif
-
-#include "cv.h"
-#include "opencv2/core.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/photo.hpp"
-#include "opencv2/video.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/features2d.hpp"
-#include "opencv2/calib3d.hpp"
-#include "opencv2/objdetect.hpp"
-
-#endif
diff --git a/IPL/include/opencv/opencv/cvaux.h b/IPL/include/opencv/opencv/cvaux.h
deleted file mode 100644
index fe86c5d..0000000
--- a/IPL/include/opencv/opencv/cvaux.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_OLD_AUX_H__
-#define __OPENCV_OLD_AUX_H__
-
-//#if defined(__GNUC__)
-//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
-//#endif
-
-#include "opencv2/core/core_c.h"
-#include "opencv2/imgproc/imgproc_c.h"
-#include "opencv2/photo/photo_c.h"
-#include "opencv2/video/tracking_c.h"
-#include "opencv2/objdetect/objdetect_c.h"
-
-#endif
-
-/* End of file. */
diff --git a/IPL/include/opencv/opencv/cvwimage.h b/IPL/include/opencv/opencv/cvwimage.h
deleted file mode 100644
index de89c92..0000000
--- a/IPL/include/opencv/opencv/cvwimage.h
+++ /dev/null
@@ -1,46 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to
-//  this license.  If you do not agree to this license, do not download,
-//  install, copy or use the software.
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2008, Google, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//  * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//  * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//  * The name of Intel Corporation or contributors may not be used to endorse
-//     or promote products derived from this software without specific
-//     prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is"
-// and any express or implied warranties, including, but not limited to, the
-// implied warranties of merchantability and fitness for a particular purpose
-// are disclaimed. In no event shall the Intel Corporation or contributors be
-// liable for any direct, indirect, incidental, special, exemplary, or
-// consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-
-
-#ifndef __OPENCV_OLD_WIMAGE_HPP__
-#define __OPENCV_OLD_WIMAGE_HPP__
-
-#include "opencv2/core/wimage.hpp"
-
-#endif
diff --git a/IPL/include/opencv/opencv/cxmisc.h b/IPL/include/opencv/opencv/cxmisc.h
deleted file mode 100644
index 6c93a0c..0000000
--- a/IPL/include/opencv/opencv/cxmisc.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __OPENCV_OLD_CXMISC_H__
-#define __OPENCV_OLD_CXMISC_H__
-
-#ifdef __cplusplus
-#  include "opencv2/core/utility.hpp"
-#endif
-
-#endif
diff --git a/IPL/include/opencv/opencv/ml.h b/IPL/include/opencv/opencv/ml.h
deleted file mode 100644
index d8e967f..0000000
--- a/IPL/include/opencv/opencv/ml.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_OLD_ML_H__
-#define __OPENCV_OLD_ML_H__
-
-#include "opencv2/core/core_c.h"
-#include "opencv2/ml.hpp"
-
-#endif
diff --git a/IPL/include/opencv/opencv2/aruco.hpp b/IPL/include/opencv/opencv2/aruco.hpp
index 3f45dc1..3cf62d2 100644
--- a/IPL/include/opencv/opencv2/aruco.hpp
+++ b/IPL/include/opencv/opencv2/aruco.hpp
@@ -49,14 +49,16 @@ the use of this software, even if advised of the possibility of such damage.
  * These markers are useful for easy, fast and robust camera pose estimation.ç
  *
  * The main functionalities are:
- * - Detection of markers in a image
+ * - Detection of markers in an image
  * - Pose estimation from a single marker or from a board/set of markers
  * - Detection of ChArUco board for high subpixel accuracy
  * - Camera calibration from both, ArUco boards and ChArUco boards.
  * - Detection of ChArUco diamond markers
  * The samples directory includes easy examples of how to use the module.
  *
- * The implementation is based on the ArUco Library by R. Muñoz-Salinas and S. Garrido-Jurado.
+ * The implementation is based on the ArUco Library by R. Muñoz-Salinas and S. Garrido-Jurado @cite Aruco2014.
+ *
+ * Markers can also be detected based on the AprilTag 2 @cite wang2016iros fiducial detection method.
  *
  * @sa S. Garrido-Jurado, R. Muñoz-Salinas, F. J. Madrid-Cuevas, and M. J. Marín-Jiménez. 2014.
  * "Automatic generation and detection of highly reliable fiducial markers under occlusion".
@@ -76,7 +78,12 @@ namespace aruco {
 //! @addtogroup aruco
 //! @{
 
-
+enum CornerRefineMethod{
+    CORNER_REFINE_NONE,     ///< Tag and corners detection based on the ArUco approach
+    CORNER_REFINE_SUBPIX,   ///< ArUco approach and refine the corners locations using corner subpixel accuracy
+    CORNER_REFINE_CONTOUR,  ///< ArUco approach and refine the corners locations using the contour-points line fitting
+    CORNER_REFINE_APRILTAG, ///< Tag and corners detection based on the AprilTag 2 approach @cite wang2016iros
+};
 
 /**
  * @brief Parameters for the detectMarker process:
@@ -100,18 +107,20 @@ namespace aruco {
  * - minMarkerDistanceRate: minimum mean distance beetween two marker corners to be considered
  *   similar, so that the smaller one is removed. The rate is relative to the smaller perimeter
  *   of the two markers (default 0.05).
- * - doCornerRefinement: do subpixel refinement or not
+ * - cornerRefinementMethod: corner refinement method. (CORNER_REFINE_NONE, no refinement.
+ *   CORNER_REFINE_SUBPIX, do subpixel refinement. CORNER_REFINE_CONTOUR use contour-Points,
+ *   CORNER_REFINE_APRILTAG  use the AprilTag2 approach)
  * - cornerRefinementWinSize: window size for the corner refinement process (in pixels) (default 5).
  * - cornerRefinementMaxIterations: maximum number of iterations for stop criteria of the corner
  *   refinement process (default 30).
  * - cornerRefinementMinAccuracy: minimum error for the stop cristeria of the corner refinement
  *   process (default: 0.1)
  * - markerBorderBits: number of bits of the marker border, i.e. marker border width (default 1).
- * - perpectiveRemovePixelPerCell: number of bits (per dimension) for each cell of the marker
+ * - perspectiveRemovePixelPerCell: number of bits (per dimension) for each cell of the marker
  *   when removing the perspective (default 8).
  * - perspectiveRemoveIgnoredMarginPerCell: width of the margin of pixels on each cell not
  *   considered for the determination of the cell bit. Represents the rate respect to the total
- *   size of the cell, i.e. perpectiveRemovePixelPerCell (default 0.13)
+ *   size of the cell, i.e. perspectiveRemovePixelPerCell (default 0.13)
  * - maxErroneousBitsInBorderRate: maximum number of accepted erroneous bits in the border (i.e.
  *   number of allowed white bits in the border). Represented as a rate respect to the total
  *   number of bits per marker (default 0.35).
@@ -120,6 +129,23 @@ namespace aruco {
  *   than 128 or not) (default 5.0)
  * - errorCorrectionRate error correction rate respect to the maximun error correction capability
  *   for each dictionary. (default 0.6).
+ * - aprilTagMinClusterPixels: reject quads containing too few pixels.
+ * - aprilTagMaxNmaxima: how many corner candidates to consider when segmenting a group of pixels into a quad.
+ * - aprilTagCriticalRad: Reject quads where pairs of edges have angles that are close to straight or close to
+ *   180 degrees. Zero means that no quads are rejected. (In radians).
+ * - aprilTagMaxLineFitMse:  When fitting lines to the contours, what is the maximum mean squared error
+ *   allowed?  This is useful in rejecting contours that are far from being quad shaped; rejecting
+ *   these quads "early" saves expensive decoding processing.
+ * - aprilTagMinWhiteBlackDiff: When we build our model of black & white pixels, we add an extra check that
+ *   the white model must be (overall) brighter than the black model.  How much brighter? (in pixel values, [0,255]).
+ * - aprilTagDeglitch:  should the thresholded image be deglitched? Only useful for very noisy images
+ * - aprilTagQuadDecimate: Detection of quads can be done on a lower-resolution image, improving speed at a
+ *   cost of pose accuracy and a slight decrease in detection rate. Decoding the binary payload is still
+ *   done at full resolution.
+ * - aprilTagQuadSigma: What Gaussian blur should be applied to the segmented image (used for quad detection?)
+ *   Parameter is the standard deviation in pixels.  Very noisy images benefit from non-zero values (e.g. 0.8).
+ * - detectInvertedMarker: to check if there is a white marker. In order to generate a "white" marker just
+ *   invert a normal marker by using a tilde, ~markerImage. (default false)
  */
 struct CV_EXPORTS_W DetectorParameters {
 
@@ -137,7 +163,7 @@ struct CV_EXPORTS_W DetectorParameters {
     CV_PROP_RW double minCornerDistanceRate;
     CV_PROP_RW int minDistanceToBorder;
     CV_PROP_RW double minMarkerDistanceRate;
-    CV_PROP_RW bool doCornerRefinement;
+    CV_PROP_RW int cornerRefinementMethod;
     CV_PROP_RW int cornerRefinementWinSize;
     CV_PROP_RW int cornerRefinementMaxIterations;
     CV_PROP_RW double cornerRefinementMinAccuracy;
@@ -147,6 +173,21 @@ struct CV_EXPORTS_W DetectorParameters {
     CV_PROP_RW double maxErroneousBitsInBorderRate;
     CV_PROP_RW double minOtsuStdDev;
     CV_PROP_RW double errorCorrectionRate;
+
+    // April :: User-configurable parameters.
+    CV_PROP_RW float aprilTagQuadDecimate;
+    CV_PROP_RW float aprilTagQuadSigma;
+
+    // April :: Internal variables
+    CV_PROP_RW int aprilTagMinClusterPixels;
+    CV_PROP_RW int aprilTagMaxNmaxima;
+    CV_PROP_RW float aprilTagCriticalRad;
+    CV_PROP_RW float aprilTagMaxLineFitMse;
+    CV_PROP_RW int aprilTagMinWhiteBlackDiff;
+    CV_PROP_RW int aprilTagDeglitch;
+
+    // to detect white (inverted) markers
+    CV_PROP_RW bool detectInvertedMarker;
 };
 
 
@@ -165,6 +206,10 @@ struct CV_EXPORTS_W DetectorParameters {
  * @param parameters marker detection parameters
  * @param rejectedImgPoints contains the imgPoints of those squares whose inner code has not a
  * correct codification. Useful for debugging purposes.
+ * @param cameraMatrix optional input 3x3 floating-point camera matrix
+ * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
+ * @param distCoeff optional vector of distortion coefficients
+ * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
  *
  * Performs marker detection in the input image. Only markers included in the specific dictionary
  * are searched. For each detected marker, it returns the 2D position of its corner in the image
@@ -173,9 +218,9 @@ struct CV_EXPORTS_W DetectorParameters {
  * @sa estimatePoseSingleMarkers,  estimatePoseBoard
  *
  */
-CV_EXPORTS_W void detectMarkers(InputArray image, Ptr<Dictionary> &dictionary, OutputArrayOfArrays corners,
+CV_EXPORTS_W void detectMarkers(InputArray image, const Ptr<Dictionary> &dictionary, OutputArrayOfArrays corners,
                                 OutputArray ids, const Ptr<DetectorParameters> &parameters = DetectorParameters::create(),
-                                OutputArrayOfArrays rejectedImgPoints = noArray());
+                                OutputArrayOfArrays rejectedImgPoints = noArray(), InputArray cameraMatrix= noArray(), InputArray distCoeff= noArray());
 
 
 
@@ -192,10 +237,11 @@ CV_EXPORTS_W void detectMarkers(InputArray image, Ptr<Dictionary> &dictionary, O
  * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
  * @param distCoeffs vector of distortion coefficients
  * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
- * @param rvecs array of output rotation vectors (@sa Rodrigues) (e.g. std::vector<cv::Vec3d>>).
+ * @param rvecs array of output rotation vectors (@sa Rodrigues) (e.g. std::vector<cv::Vec3d>).
  * Each element in rvecs corresponds to the specific marker in imgPoints.
- * @param tvecs array of output translation vectors (e.g. std::vector<cv::Vec3d>>).
+ * @param tvecs array of output translation vectors (e.g. std::vector<cv::Vec3d>).
  * Each element in tvecs corresponds to the specific marker in imgPoints.
+ * @param _objPoints array of object points of all the marker corners
  *
  * This function receives the detected markers and returns their pose estimation respect to
  * the camera individually. So for each marker, one rotation and translation vector is returned.
@@ -209,14 +255,14 @@ CV_EXPORTS_W void detectMarkers(InputArray image, Ptr<Dictionary> &dictionary, O
  */
 CV_EXPORTS_W void estimatePoseSingleMarkers(InputArrayOfArrays corners, float markerLength,
                                             InputArray cameraMatrix, InputArray distCoeffs,
-                                            OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs);
+                                            OutputArray rvecs, OutputArray tvecs, OutputArray _objPoints = noArray());
 
 
 
 /**
  * @brief Board of markers
  *
- * A board is a set of markers in the 3D space with a common cordinate system.
+ * A board is a set of markers in the 3D space with a common coordinate system.
  * The common form of a board of marker is a planar (2D) board, however any 3D layout can be used.
  * A Board object is composed by:
  * - The object points of the marker corners, i.e. their coordinates respect to the board system.
@@ -226,23 +272,32 @@ CV_EXPORTS_W void estimatePoseSingleMarkers(InputArrayOfArrays corners, float ma
 class CV_EXPORTS_W Board {
 
     public:
-    // array of object points of all the marker corners in the board
-    // each marker include its 4 corners, i.e. for M markers, the size is Mx4
-    std::vector< std::vector< Point3f > > objPoints;
-
-    // the dictionary of markers employed for this board
-    Ptr<Dictionary> dictionary;
-
-    // vector of the identifiers of the markers in the board (same size than objPoints)
-    // The identifiers refers to the board dictionary
-    std::vector< int > ids;
+    /**
+    * @brief Provide way to create Board by passing necessary data. Specially needed in Python.
+    *
+    * @param objPoints array of object points of all the marker corners in the board
+    * @param dictionary the dictionary of markers employed for this board
+    * @param ids vector of the identifiers of the markers in the board
+    *
+    */
+    CV_WRAP static Ptr<Board> create(InputArrayOfArrays objPoints, const Ptr<Dictionary> &dictionary, InputArray ids);
+    /// array of object points of all the marker corners in the board
+    /// each marker include its 4 corners in CCW order. For M markers, the size is Mx4.
+    CV_PROP std::vector< std::vector< Point3f > > objPoints;
+
+    /// the dictionary of markers employed for this board
+    CV_PROP Ptr<Dictionary> dictionary;
+
+    /// vector of the identifiers of the markers in the board (same size than objPoints)
+    /// The identifiers refers to the board dictionary
+    CV_PROP std::vector< int > ids;
 };
 
 
 
 /**
  * @brief Planar board with grid arrangement of markers
- * More common type of board. All markers are placed in the same plane in a grid arrangment.
+ * More common type of board. All markers are placed in the same plane in a grid arrangement.
  * The board can be drawn using drawPlanarBoard() function (@sa drawPlanarBoard)
  */
 class CV_EXPORTS_W GridBoard : public Board {
@@ -259,7 +314,7 @@ class CV_EXPORTS_W GridBoard : public Board {
      *
      * This function return the image of the GridBoard, ready to be printed.
      */
-    void draw(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1);
+    CV_WRAP void draw(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1);
 
 
     /**
@@ -277,29 +332,29 @@ class CV_EXPORTS_W GridBoard : public Board {
      * the marker size and marker separation.
      */
     CV_WRAP static Ptr<GridBoard> create(int markersX, int markersY, float markerLength,
-                                         float markerSeparation, Ptr<Dictionary> &dictionary, int firstMarker = 0);
+                                         float markerSeparation, const Ptr<Dictionary> &dictionary, int firstMarker = 0);
 
     /**
       *
       */
-    Size getGridSize() const { return Size(_markersX, _markersY); }
+    CV_WRAP Size getGridSize() const { return Size(_markersX, _markersY); }
 
     /**
       *
       */
-    float getMarkerLength() const { return _markerLength; }
+    CV_WRAP float getMarkerLength() const { return _markerLength; }
 
     /**
       *
       */
-    float getMarkerSeparation() const { return _markerSeparation; }
+    CV_WRAP float getMarkerSeparation() const { return _markerSeparation; }
 
 
     private:
     // number of markers in X and Y directions
     int _markersX, _markersY;
 
-    // marker side lenght (normally in meters)
+    // marker side length (normally in meters)
     float _markerLength;
 
     // separation between markers in the grid
@@ -322,8 +377,10 @@ class CV_EXPORTS_W GridBoard : public Board {
  * @param distCoeffs vector of distortion coefficients
  * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
  * @param rvec Output vector (e.g. cv::Mat) corresponding to the rotation vector of the board
- * (@sa Rodrigues).
+ * (see cv::Rodrigues). Used as initial guess if not empty.
  * @param tvec Output vector (e.g. cv::Mat) corresponding to the translation vector of the board.
+ * @param useExtrinsicGuess defines whether initial guess for \b rvec and \b tvec will be used or not.
+ * Used as initial guess if not empty.
  *
  * This function receives the detected markers and returns the pose of a marker board composed
  * by those markers.
@@ -334,9 +391,9 @@ class CV_EXPORTS_W GridBoard : public Board {
  * The function returns the number of markers from the input employed for the board pose estimation.
  * Note that returning a 0 means the pose has not been estimated.
  */
-CV_EXPORTS_W int estimatePoseBoard(InputArrayOfArrays corners, InputArray ids, Ptr<Board> &board,
-                                   InputArray cameraMatrix, InputArray distCoeffs, OutputArray rvec,
-                                   OutputArray tvec);
+CV_EXPORTS_W int estimatePoseBoard(InputArrayOfArrays corners, InputArray ids, const Ptr<Board> &board,
+                                   InputArray cameraMatrix, InputArray distCoeffs, InputOutputArray rvec,
+                                   InputOutputArray tvec, bool useExtrinsicGuess = false);
 
 
 
@@ -373,8 +430,8 @@ CV_EXPORTS_W int estimatePoseBoard(InputArrayOfArrays corners, InputArray ids, P
  * homography, and all the marker corners in the board must have the same Z coordinate.
  */
 CV_EXPORTS_W void refineDetectedMarkers(
-    InputArray image, Ptr<Board> &board, InputOutputArrayOfArrays detectedCorners,
-    InputOutputArray detectedIds, InputOutputArray rejectedCorners,
+    InputArray image,const  Ptr<Board> &board, InputOutputArrayOfArrays detectedCorners,
+    InputOutputArray detectedIds, InputOutputArrayOfArrays rejectedCorners,
     InputArray cameraMatrix = noArray(), InputArray distCoeffs = noArray(),
     float minRepDistance = 10.f, float errorCorrectionRate = 3.f, bool checkAllOrders = true,
     OutputArray recoveredIdxs = noArray(), const Ptr<DetectorParameters> &parameters = DetectorParameters::create());
@@ -419,6 +476,8 @@ CV_EXPORTS_W void drawDetectedMarkers(InputOutputArray image, InputArrayOfArrays
  *
  * Given the pose estimation of a marker or board, this function draws the axis of the world
  * coordinate system, i.e. the system centered on the marker/board. Useful for debugging purposes.
+ *
+ * @deprecated use cv::drawFrameAxes
  */
 CV_EXPORTS_W void drawAxis(InputOutputArray image, InputArray cameraMatrix, InputArray distCoeffs,
                            InputArray rvec, InputArray tvec, float length);
@@ -437,7 +496,7 @@ CV_EXPORTS_W void drawAxis(InputOutputArray image, InputArray cameraMatrix, Inpu
  *
  * This function returns a marker image in its canonical form (i.e. ready to be printed)
  */
-CV_EXPORTS_W void drawMarker(Ptr<Dictionary> &dictionary, int id, int sidePixels, OutputArray img,
+CV_EXPORTS_W void drawMarker(const Ptr<Dictionary> &dictionary, int id, int sidePixels, OutputArray img,
                              int borderBits = 1);
 
 
@@ -457,7 +516,7 @@ CV_EXPORTS_W void drawMarker(Ptr<Dictionary> &dictionary, int id, int sidePixels
  * This function return the image of a planar board, ready to be printed. It assumes
  * the Board layout specified is planar by ignoring the z coordinates of the object points.
  */
-CV_EXPORTS_W void drawPlanarBoard(Ptr<Board> &board, Size outSize, OutputArray img,
+CV_EXPORTS_W void drawPlanarBoard(const Ptr<Board> &board, Size outSize, OutputArray img,
                                   int marginSize = 0, int borderBits = 1);
 
 
@@ -474,7 +533,7 @@ void _drawPlanarBoardImpl(Board *board, Size outSize, OutputArray img,
  * @brief Calibrate a camera using aruco markers
  *
  * @param corners vector of detected marker corners in all frames.
- * The corners should have the same format returned by detectMarkers (@sa detectMarkers).
+ * The corners should have the same format returned by detectMarkers (see #detectMarkers).
  * @param ids list of identifiers for each marker in corners
  * @param counter number of markers in each frame so that corners and ids can be split
  * @param board Marker Board layout
@@ -491,20 +550,52 @@ void _drawPlanarBoardImpl(Board *board, Size outSize, OutputArray img,
  * from the model coordinate space (in which object points are specified) to the world coordinate
  * space, that is, a real position of the board pattern in the k-th pattern view (k=0.. *M* -1).
  * @param tvecs Output vector of translation vectors estimated for each pattern view.
- * @param flags flags Different flags  for the calibration process (@sa calibrateCamera)
+ * @param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic parameters.
+ * Order of deviations values:
+ * \f$(f_x, f_y, c_x, c_y, k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 , s_1, s_2, s_3,
+ * s_4, \tau_x, \tau_y)\f$ If one of parameters is not estimated, it's deviation is equals to zero.
+ * @param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic parameters.
+ * Order of deviations values: \f$(R_1, T_1, \dotsc , R_M, T_M)\f$ where M is number of pattern views,
+ * \f$R_i, T_i\f$ are concatenated 1x3 vectors.
+ * @param perViewErrors Output vector of average re-projection errors estimated for each pattern view.
+ * @param flags flags Different flags  for the calibration process (see #calibrateCamera for details).
  * @param criteria Termination criteria for the iterative optimization algorithm.
  *
  * This function calibrates a camera using an Aruco Board. The function receives a list of
  * detected markers from several views of the Board. The process is similar to the chessboard
  * calibration in calibrateCamera(). The function returns the final re-projection error.
  */
-CV_EXPORTS_W double calibrateCameraAruco(
-    InputArrayOfArrays corners, InputArray ids, InputArray counter, Ptr<Board> &board,
+CV_EXPORTS_AS(calibrateCameraArucoExtended) double calibrateCameraAruco(
+    InputArrayOfArrays corners, InputArray ids, InputArray counter, const Ptr<Board> &board,
     Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
-    OutputArrayOfArrays rvecs = noArray(), OutputArrayOfArrays tvecs = noArray(), int flags = 0,
+    OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+    OutputArray stdDeviationsIntrinsics, OutputArray stdDeviationsExtrinsics,
+    OutputArray perViewErrors, int flags = 0,
     TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON));
 
 
+/** @brief It's the same function as #calibrateCameraAruco but without calibration error estimation.
+ */
+CV_EXPORTS_W double calibrateCameraAruco(
+  InputArrayOfArrays corners, InputArray ids, InputArray counter, const Ptr<Board> &board,
+  Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+  OutputArrayOfArrays rvecs = noArray(), OutputArrayOfArrays tvecs = noArray(), int flags = 0,
+  TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON));
+
+
+/**
+ * @brief Given a board configuration and a set of detected markers, returns the corresponding
+ * image points and object points to call solvePnP
+ *
+ * @param board Marker board layout.
+ * @param detectedCorners List of detected marker corners of the board.
+ * @param detectedIds List of identifiers for each marker.
+ * @param objPoints Vector of vectors of board marker points in the board coordinate space.
+ * @param imgPoints Vector of vectors of the projections of board marker corner points.
+*/
+CV_EXPORTS_W void getBoardObjectAndImagePoints(const Ptr<Board> &board, InputArrayOfArrays detectedCorners,
+  InputArray detectedIds, OutputArray objPoints, OutputArray imgPoints);
+
 
 //! @}
 }
diff --git a/IPL/include/opencv/opencv2/aruco/charuco.hpp b/IPL/include/opencv/opencv2/aruco/charuco.hpp
index ff448ce..2e6ae62 100644
--- a/IPL/include/opencv/opencv2/aruco/charuco.hpp
+++ b/IPL/include/opencv/opencv2/aruco/charuco.hpp
@@ -63,11 +63,11 @@ class CV_EXPORTS_W CharucoBoard : public Board {
 
     public:
     // vector of chessboard 3D corners precalculated
-    std::vector< Point3f > chessboardCorners;
+    CV_PROP std::vector< Point3f > chessboardCorners;
 
     // for each charuco corner, nearest marker id and nearest marker corner id of each marker
-    std::vector< std::vector< int > > nearestMarkerIdx;
-    std::vector< std::vector< int > > nearestMarkerCorners;
+    CV_PROP std::vector< std::vector< int > > nearestMarkerIdx;
+    CV_PROP std::vector< std::vector< int > > nearestMarkerCorners;
 
     /**
      * @brief Draw a ChArUco board
@@ -80,7 +80,7 @@ class CV_EXPORTS_W CharucoBoard : public Board {
      *
      * This function return the image of the ChArUco board, ready to be printed.
      */
-    void draw(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1);
+    CV_WRAP void draw(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1);
 
 
     /**
@@ -98,22 +98,22 @@ class CV_EXPORTS_W CharucoBoard : public Board {
      * and the size of the markers and chessboard squares.
      */
     CV_WRAP static Ptr<CharucoBoard> create(int squaresX, int squaresY, float squareLength,
-                                            float markerLength, Ptr<Dictionary> &dictionary);
+                                            float markerLength, const Ptr<Dictionary> &dictionary);
 
     /**
       *
       */
-    Size getChessboardSize() const { return Size(_squaresX, _squaresY); }
+    CV_WRAP Size getChessboardSize() const { return Size(_squaresX, _squaresY); }
 
     /**
       *
       */
-    float getSquareLength() const { return _squareLength; }
+    CV_WRAP float getSquareLength() const { return _squareLength; }
 
     /**
       *
       */
-    float getMarkerLength() const { return _markerLength; }
+    CV_WRAP float getMarkerLength() const { return _markerLength; }
 
     private:
     void _getNearestMarkerCorners();
@@ -124,7 +124,7 @@ class CV_EXPORTS_W CharucoBoard : public Board {
     // size of chessboard squares side (normally in meters)
     float _squareLength;
 
-    // marker side lenght (normally in meters)
+    // marker side length (normally in meters)
     float _markerLength;
 };
 
@@ -146,6 +146,7 @@ class CV_EXPORTS_W CharucoBoard : public Board {
  * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
  * @param distCoeffs optional vector of distortion coefficients
  * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
+ * @param minMarkers number of adjacent markers that must be detected to return a charuco corner
  *
  * This function receives the detected markers and returns the 2D position of the chessboard corners
  * from a ChArUco board using the detected Aruco markers. If camera parameters are provided,
@@ -155,10 +156,10 @@ class CV_EXPORTS_W CharucoBoard : public Board {
  * The function returns the number of interpolated corners.
  */
 CV_EXPORTS_W int interpolateCornersCharuco(InputArrayOfArrays markerCorners, InputArray markerIds,
-                                           InputArray image, Ptr<CharucoBoard> &board,
+                                           InputArray image, const Ptr<CharucoBoard> &board,
                                            OutputArray charucoCorners, OutputArray charucoIds,
                                            InputArray cameraMatrix = noArray(),
-                                           InputArray distCoeffs = noArray());
+                                           InputArray distCoeffs = noArray(), int minMarkers = 2);
 
 
 
@@ -173,16 +174,18 @@ CV_EXPORTS_W int interpolateCornersCharuco(InputArrayOfArrays markerCorners, Inp
  * @param distCoeffs vector of distortion coefficients
  * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
  * @param rvec Output vector (e.g. cv::Mat) corresponding to the rotation vector of the board
- * (@sa Rodrigues).
+ * (see cv::Rodrigues).
  * @param tvec Output vector (e.g. cv::Mat) corresponding to the translation vector of the board.
+ * @param useExtrinsicGuess defines whether initial guess for \b rvec and \b tvec will be used or not.
  *
  * This function estimates a Charuco board pose from some detected corners.
  * The function checks if the input corners are enough and valid to perform pose estimation.
  * If pose estimation is valid, returns true, else returns false.
  */
 CV_EXPORTS_W bool estimatePoseCharucoBoard(InputArray charucoCorners, InputArray charucoIds,
-                                           Ptr<CharucoBoard> &board, InputArray cameraMatrix,
-                                           InputArray distCoeffs, OutputArray rvec, OutputArray tvec);
+                                           const Ptr<CharucoBoard> &board, InputArray cameraMatrix,
+                                           InputArray distCoeffs, InputOutputArray rvec,
+                                           InputOutputArray tvec, bool useExtrinsicGuess = false);
 
 
 
@@ -223,19 +226,36 @@ CV_EXPORTS_W void drawDetectedCornersCharuco(InputOutputArray image, InputArray
  * from the model coordinate space (in which object points are specified) to the world coordinate
  * space, that is, a real position of the board pattern in the k-th pattern view (k=0.. *M* -1).
  * @param tvecs Output vector of translation vectors estimated for each pattern view.
- * @param flags flags Different flags  for the calibration process (@sa calibrateCamera)
+ * @param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic parameters.
+ * Order of deviations values:
+ * \f$(f_x, f_y, c_x, c_y, k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 , s_1, s_2, s_3,
+ * s_4, \tau_x, \tau_y)\f$ If one of parameters is not estimated, it's deviation is equals to zero.
+ * @param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic parameters.
+ * Order of deviations values: \f$(R_1, T_1, \dotsc , R_M, T_M)\f$ where M is number of pattern views,
+ * \f$R_i, T_i\f$ are concatenated 1x3 vectors.
+ * @param perViewErrors Output vector of average re-projection errors estimated for each pattern view.
+ * @param flags flags Different flags  for the calibration process (see #calibrateCamera for details).
  * @param criteria Termination criteria for the iterative optimization algorithm.
  *
  * This function calibrates a camera using a set of corners of a  Charuco Board. The function
  * receives a list of detected corners and its identifiers from several views of the Board.
  * The function returns the final re-projection error.
  */
-CV_EXPORTS_W double calibrateCameraCharuco(
-    InputArrayOfArrays charucoCorners, InputArrayOfArrays charucoIds, Ptr<CharucoBoard> &board,
+CV_EXPORTS_AS(calibrateCameraCharucoExtended) double calibrateCameraCharuco(
+    InputArrayOfArrays charucoCorners, InputArrayOfArrays charucoIds, const Ptr<CharucoBoard> &board,
     Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
-    OutputArrayOfArrays rvecs = noArray(), OutputArrayOfArrays tvecs = noArray(), int flags = 0,
+    OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+    OutputArray stdDeviationsIntrinsics, OutputArray stdDeviationsExtrinsics,
+    OutputArray perViewErrors, int flags = 0,
     TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON));
 
+/** @brief It's the same function as #calibrateCameraCharuco but without calibration error estimation.
+*/
+CV_EXPORTS_W double calibrateCameraCharuco(
+  InputArrayOfArrays charucoCorners, InputArrayOfArrays charucoIds, const Ptr<CharucoBoard> &board,
+  Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+  OutputArrayOfArrays rvecs = noArray(), OutputArrayOfArrays tvecs = noArray(), int flags = 0,
+  TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON));
 
 
 
@@ -309,7 +329,7 @@ CV_EXPORTS_W void drawDetectedDiamonds(InputOutputArray image, InputArrayOfArray
  * This function return the image of a ChArUco marker, ready to be printed.
  */
 // TODO cannot be exported yet; conversion from/to Vec4i is not wrapped in core
-CV_EXPORTS void drawCharucoDiamond(Ptr<Dictionary> &dictionary, Vec4i ids, int squareLength,
+CV_EXPORTS void drawCharucoDiamond(const Ptr<Dictionary> &dictionary, Vec4i ids, int squareLength,
                                    int markerLength, OutputArray img, int marginSize = 0,
                                    int borderBits = 1);
 
diff --git a/IPL/include/opencv/opencv2/aruco/dictionary.hpp b/IPL/include/opencv/opencv2/aruco/dictionary.hpp
index 3ef1a82..27c7e5d 100644
--- a/IPL/include/opencv/opencv2/aruco/dictionary.hpp
+++ b/IPL/include/opencv/opencv2/aruco/dictionary.hpp
@@ -84,14 +84,14 @@ class CV_EXPORTS_W Dictionary {
     /**
      * @see generateCustomDictionary
      */
-    CV_WRAP_AS(create) static Ptr<Dictionary> create(int nMarkers, int markerSize);
+    CV_WRAP_AS(create) static Ptr<Dictionary> create(int nMarkers, int markerSize, int randomSeed=0);
 
 
     /**
      * @see generateCustomDictionary
      */
     CV_WRAP_AS(create_from) static Ptr<Dictionary> create(int nMarkers, int markerSize,
-            Ptr<Dictionary> &baseDictionary);
+            const Ptr<Dictionary> &baseDictionary, int randomSeed=0);
 
     /**
      * @see getPredefinedDictionary
@@ -114,19 +114,19 @@ class CV_EXPORTS_W Dictionary {
     /**
      * @brief Draw a canonical marker image
      */
-    void drawMarker(int id, int sidePixels, OutputArray _img, int borderBits = 1) const;
+    CV_WRAP void drawMarker(int id, int sidePixels, OutputArray _img, int borderBits = 1) const;
 
 
     /**
       * @brief Transform matrix of bits to list of bytes in the 4 rotations
       */
-    static Mat getByteListFromBits(const Mat &bits);
+    CV_WRAP static Mat getByteListFromBits(const Mat &bits);
 
 
     /**
       * @brief Transform list of bytes to matrix of bits
       */
-    static Mat getBitsFromByteList(const Mat &byteList, int markerSize);
+    CV_WRAP static Mat getBitsFromByteList(const Mat &byteList, int markerSize);
 };
 
 
@@ -138,7 +138,7 @@ class CV_EXPORTS_W Dictionary {
  * - DICT_ARUCO_ORIGINAL: standard ArUco Library Markers. 1024 markers, 5x5 bits, 0 minimum
                           distance
  */
-enum CV_EXPORTS_W_SIMPLE PREDEFINED_DICTIONARY_NAME {
+enum PREDEFINED_DICTIONARY_NAME {
     DICT_4X4_50 = 0,
     DICT_4X4_100,
     DICT_4X4_250,
@@ -155,7 +155,11 @@ enum CV_EXPORTS_W_SIMPLE PREDEFINED_DICTIONARY_NAME {
     DICT_7X7_100,
     DICT_7X7_250,
     DICT_7X7_1000,
-    DICT_ARUCO_ORIGINAL
+    DICT_ARUCO_ORIGINAL,
+    DICT_APRILTAG_16h5,     ///< 4x4 bits, minimum hamming distance between any two codes = 5, 30 codes
+    DICT_APRILTAG_25h9,     ///< 5x5 bits, minimum hamming distance between any two codes = 9, 35 codes
+    DICT_APRILTAG_36h10,    ///< 6x6 bits, minimum hamming distance between any two codes = 10, 2320 codes
+    DICT_APRILTAG_36h11     ///< 6x6 bits, minimum hamming distance between any two codes = 11, 587 codes
 };
 
 
@@ -176,7 +180,8 @@ CV_EXPORTS_W Ptr<Dictionary> getPredefinedDictionary(int dict);
   */
 CV_EXPORTS_AS(custom_dictionary) Ptr<Dictionary> generateCustomDictionary(
         int nMarkers,
-        int markerSize);
+        int markerSize,
+        int randomSeed=0);
 
 
 /**
@@ -185,6 +190,7 @@ CV_EXPORTS_AS(custom_dictionary) Ptr<Dictionary> generateCustomDictionary(
   * @param nMarkers number of markers in the dictionary
   * @param markerSize number of bits per dimension of each markers
   * @param baseDictionary Include the markers in this dictionary at the beginning (optional)
+  * @param randomSeed a user supplied seed for theRNG()
   *
   * This function creates a new dictionary composed by nMarkers markers and each markers composed
   * by markerSize x markerSize bits. If baseDictionary is provided, its markers are directly
@@ -194,7 +200,8 @@ CV_EXPORTS_AS(custom_dictionary) Ptr<Dictionary> generateCustomDictionary(
 CV_EXPORTS_AS(custom_dictionary_from) Ptr<Dictionary> generateCustomDictionary(
         int nMarkers,
         int markerSize,
-        Ptr<Dictionary> &baseDictionary);
+        const Ptr<Dictionary> &baseDictionary,
+        int randomSeed=0);
 
 
 
diff --git a/IPL/include/opencv/opencv2/bgsegm.hpp b/IPL/include/opencv/opencv2/bgsegm.hpp
index 5a4ae3f..8ace5d9 100644
--- a/IPL/include/opencv/opencv2/bgsegm.hpp
+++ b/IPL/include/opencv/opencv2/bgsegm.hpp
@@ -183,7 +183,193 @@ class CV_EXPORTS_W BackgroundSubtractorGMG : public BackgroundSubtractor
 @param decisionThreshold Threshold value, above which it is marked foreground, else background.
  */
 CV_EXPORTS_W Ptr<BackgroundSubtractorGMG> createBackgroundSubtractorGMG(int initializationFrames=120,
-                                                                        double decisionThreshold=0.8);                                  
+                                                                        double decisionThreshold=0.8);
+
+/** @brief Background subtraction based on counting.
+
+  About as fast as MOG2 on a high end system.
+  More than twice faster than MOG2 on cheap hardware (benchmarked on Raspberry Pi3).
+
+  %Algorithm by Sagi Zeevi ( https://github.com/sagi-z/BackgroundSubtractorCNT )
+*/
+class CV_EXPORTS_W BackgroundSubtractorCNT  : public BackgroundSubtractor
+{
+public:
+    // BackgroundSubtractor interface
+    CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0;
+    CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE = 0;
+
+    /** @brief Returns number of frames with same pixel color to consider stable.
+    */
+    CV_WRAP virtual int getMinPixelStability() const = 0;
+    /** @brief Sets the number of frames with same pixel color to consider stable.
+    */
+    CV_WRAP virtual void setMinPixelStability(int value) = 0;
+
+    /** @brief Returns maximum allowed credit for a pixel in history.
+    */
+    CV_WRAP virtual int getMaxPixelStability() const = 0;
+    /** @brief Sets the maximum allowed credit for a pixel in history.
+    */
+    CV_WRAP virtual void setMaxPixelStability(int value) = 0;
+
+    /** @brief Returns if we're giving a pixel credit for being stable for a long time.
+    */
+    CV_WRAP virtual bool getUseHistory() const = 0;
+    /** @brief Sets if we're giving a pixel credit for being stable for a long time.
+    */
+    CV_WRAP virtual void setUseHistory(bool value) = 0;
+
+    /** @brief Returns if we're parallelizing the algorithm.
+    */
+    CV_WRAP virtual bool getIsParallel() const = 0;
+    /** @brief Sets if we're parallelizing the algorithm.
+    */
+    CV_WRAP virtual void setIsParallel(bool value) = 0;
+};
+
+/** @brief Creates a CNT Background Subtractor
+
+@param minPixelStability number of frames with same pixel color to consider stable
+@param useHistory determines if we're giving a pixel credit for being stable for a long time
+@param maxPixelStability maximum allowed credit for a pixel in history
+@param isParallel determines if we're parallelizing the algorithm
+ */
+
+CV_EXPORTS_W Ptr<BackgroundSubtractorCNT>
+createBackgroundSubtractorCNT(int minPixelStability = 15,
+                              bool useHistory = true,
+                              int maxPixelStability = 15*60,
+                              bool isParallel = true);
+
+enum LSBPCameraMotionCompensation {
+    LSBP_CAMERA_MOTION_COMPENSATION_NONE = 0,
+    LSBP_CAMERA_MOTION_COMPENSATION_LK
+};
+
+/** @brief Implementation of the different yet better algorithm which is called GSOC, as it was implemented during GSOC and was not originated from any paper.
+
+This algorithm demonstrates better performance on CDNET 2014 dataset compared to other algorithms in OpenCV.
+ */
+class CV_EXPORTS_W BackgroundSubtractorGSOC : public BackgroundSubtractor
+{
+public:
+    // BackgroundSubtractor interface
+    CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0;
+
+    CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE = 0;
+};
+
+/** @brief Background Subtraction using Local SVD Binary Pattern. More details about the algorithm can be found at @cite LGuo2016
+ */
+class CV_EXPORTS_W BackgroundSubtractorLSBP : public BackgroundSubtractor
+{
+public:
+    // BackgroundSubtractor interface
+    CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0;
+
+    CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE = 0;
+};
+
+/** @brief This is for calculation of the LSBP descriptors.
+ */
+class CV_EXPORTS_W BackgroundSubtractorLSBPDesc
+{
+public:
+    static void calcLocalSVDValues(OutputArray localSVDValues, const Mat& frame);
+
+    static void computeFromLocalSVDValues(OutputArray desc, const Mat& localSVDValues, const Point2i* LSBPSamplePoints);
+
+    static void compute(OutputArray desc, const Mat& frame, const Point2i* LSBPSamplePoints);
+};
+
+/** @brief Creates an instance of BackgroundSubtractorGSOC algorithm.
+
+Implementation of the different yet better algorithm which is called GSOC, as it was implemented during GSOC and was not originated from any paper.
+
+@param mc Whether to use camera motion compensation.
+@param nSamples Number of samples to maintain at each point of the frame.
+@param replaceRate Probability of replacing the old sample - how fast the model will update itself.
+@param propagationRate Probability of propagating to neighbors.
+@param hitsThreshold How many positives the sample must get before it will be considered as a possible replacement.
+@param alpha Scale coefficient for threshold.
+@param beta Bias coefficient for threshold.
+@param blinkingSupressionDecay Blinking supression decay factor.
+@param blinkingSupressionMultiplier Blinking supression multiplier.
+@param noiseRemovalThresholdFacBG Strength of the noise removal for background points.
+@param noiseRemovalThresholdFacFG Strength of the noise removal for foreground points.
+ */
+CV_EXPORTS_W Ptr<BackgroundSubtractorGSOC> createBackgroundSubtractorGSOC(int mc = LSBP_CAMERA_MOTION_COMPENSATION_NONE, int nSamples = 20, float replaceRate = 0.003f, float propagationRate = 0.01f, int hitsThreshold = 32, float alpha = 0.01f, float beta = 0.0022f, float blinkingSupressionDecay = 0.1f, float blinkingSupressionMultiplier = 0.1f, float noiseRemovalThresholdFacBG = 0.0004f, float noiseRemovalThresholdFacFG = 0.0008f);
+
+/** @brief Creates an instance of BackgroundSubtractorLSBP algorithm.
+
+Background Subtraction using Local SVD Binary Pattern. More details about the algorithm can be found at @cite LGuo2016
+
+@param mc Whether to use camera motion compensation.
+@param nSamples Number of samples to maintain at each point of the frame.
+@param LSBPRadius LSBP descriptor radius.
+@param Tlower Lower bound for T-values. See @cite LGuo2016 for details.
+@param Tupper Upper bound for T-values. See @cite LGuo2016 for details.
+@param Tinc Increase step for T-values. See @cite LGuo2016 for details.
+@param Tdec Decrease step for T-values. See @cite LGuo2016 for details.
+@param Rscale Scale coefficient for threshold values.
+@param Rincdec Increase/Decrease step for threshold values.
+@param noiseRemovalThresholdFacBG Strength of the noise removal for background points.
+@param noiseRemovalThresholdFacFG Strength of the noise removal for foreground points.
+@param LSBPthreshold Threshold for LSBP binary string.
+@param minCount Minimal number of matches for sample to be considered as foreground.
+ */
+CV_EXPORTS_W Ptr<BackgroundSubtractorLSBP> createBackgroundSubtractorLSBP(int mc = LSBP_CAMERA_MOTION_COMPENSATION_NONE, int nSamples = 20, int LSBPRadius = 16, float Tlower = 2.0f, float Tupper = 32.0f, float Tinc = 1.0f, float Tdec = 0.05f, float Rscale = 10.0f, float Rincdec = 0.005f, float noiseRemovalThresholdFacBG = 0.0004f, float noiseRemovalThresholdFacFG = 0.0008f, int LSBPthreshold = 8, int minCount = 2);
+
+/** @brief Synthetic frame sequence generator for testing background subtraction algorithms.
+
+ It will generate the moving object on top of the background.
+ It will apply some distortion to the background to make the test more complex.
+ */
+class CV_EXPORTS_W SyntheticSequenceGenerator : public Algorithm
+{
+private:
+    const double amplitude;
+    const double wavelength;
+    const double wavespeed;
+    const double objspeed;
+    unsigned timeStep;
+    Point2d pos;
+    Point2d dir;
+    Mat background;
+    Mat object;
+    RNG rng;
+
+public:
+    /** @brief Creates an instance of SyntheticSequenceGenerator.
+
+    @param background Background image for object.
+    @param object Object image which will move slowly over the background.
+    @param amplitude Amplitude of wave distortion applied to background.
+    @param wavelength Length of waves in distortion applied to background.
+    @param wavespeed How fast waves will move.
+    @param objspeed How fast object will fly over background.
+     */
+    CV_WRAP SyntheticSequenceGenerator(InputArray background, InputArray object, double amplitude, double wavelength, double wavespeed, double objspeed);
+
+    /** @brief Obtain the next frame in the sequence.
+
+    @param frame Output frame.
+    @param gtMask Output ground-truth (reference) segmentation mask object/background.
+     */
+    CV_WRAP void getNextFrame(OutputArray frame, OutputArray gtMask);
+};
+
+/** @brief Creates an instance of SyntheticSequenceGenerator.
+
+@param background Background image for object.
+@param object Object image which will move slowly over the background.
+@param amplitude Amplitude of wave distortion applied to background.
+@param wavelength Length of waves in distortion applied to background.
+@param wavespeed How fast waves will move.
+@param objspeed How fast object will fly over background.
+ */
+CV_EXPORTS_W Ptr<SyntheticSequenceGenerator> createSyntheticSequenceGenerator(InputArray background, InputArray object, double amplitude = 2.0, double wavelength = 20.0, double wavespeed = 0.2, double objspeed = 6.0);
 
 //! @}
 
diff --git a/IPL/include/opencv/opencv2/bioinspired/retina.hpp b/IPL/include/opencv/opencv2/bioinspired/retina.hpp
index 4ed6f3a..91c8148 100644
--- a/IPL/include/opencv/opencv2/bioinspired/retina.hpp
+++ b/IPL/include/opencv/opencv2/bioinspired/retina.hpp
@@ -146,7 +146,7 @@ enum {
     </opencv_storage>
     @endcode
       */
-    struct RetinaParameters{ 
+    struct RetinaParameters{
         //! Outer Plexiform Layer (OPL) and Inner Plexiform Layer Parvocellular (IplParvo) parameters
         struct OPLandIplParvoParameters{
                OPLandIplParvoParameters():colorMode(true),
@@ -208,7 +208,7 @@ class CV_EXPORTS_W Retina : public Algorithm {
 
 public:
 
-    
+
     /** @brief Retreive retina input buffer size
     @return the retina input buffer size
      */
@@ -226,8 +226,9 @@ class CV_EXPORTS_W Retina : public Algorithm {
     - warning, Exceptions are thrown if read XML file is not valid
     @param retinaParameterFile the parameters filename
     @param applyDefaultSetupOnFailure set to true if an error must be thrown on error
-    You can retreive the current parameers structure using method Retina::getParameters and update
-    it before running method Retina::setup
+
+    You can retrieve the current parameters structure using the method Retina::getParameters and update
+    it before running method Retina::setup.
      */
     CV_WRAP virtual void setup(String retinaParameterFile="", const bool applyDefaultSetupOnFailure=true)=0;
 
@@ -259,7 +260,7 @@ class CV_EXPORTS_W Retina : public Algorithm {
     CV_WRAP virtual void write( String fs ) const=0;
 
     /** @overload */
-    virtual void write( FileStorage& fs ) const=0;
+    virtual void write( FileStorage& fs ) const CV_OVERRIDE = 0;
 
     /** @brief Setup the OPL and IPL parvo channels (see biologocal model)
 
@@ -421,37 +422,30 @@ class CV_EXPORTS_W Retina : public Algorithm {
     Retina::getParvo methods
      */
     CV_WRAP virtual void activateContoursProcessing(const bool activate)=0;
-};
-
-//! @relates bioinspired::Retina
-//! @{
-
-/** @overload */
-CV_EXPORTS_W Ptr<Retina> createRetina(Size inputSize);
-/** @brief Constructors from standardized interfaces : retreive a smart pointer to a Retina instance
-
-@param inputSize the input frame size
-@param colorMode the chosen processing mode : with or without color processing
-@param colorSamplingMethod specifies which kind of color sampling will be used :
--   cv::bioinspired::RETINA_COLOR_RANDOM: each pixel position is either R, G or B in a random choice
--   cv::bioinspired::RETINA_COLOR_DIAGONAL: color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR...
--   cv::bioinspired::RETINA_COLOR_BAYER: standard bayer sampling
-@param useRetinaLogSampling activate retina log sampling, if true, the 2 following parameters can
-be used
-@param reductionFactor only usefull if param useRetinaLogSampling=true, specifies the reduction
-factor of the output frame (as the center (fovea) is high resolution and corners can be
-underscaled, then a reduction of the output is allowed without precision leak
-@param samplingStrenght only usefull if param useRetinaLogSampling=true, specifies the strenght of
-the log scale that is applied
- */
-CV_EXPORTS_W Ptr<Retina> createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const float reductionFactor=1.0f, const float samplingStrenght=10.0f);
-
-#ifdef HAVE_OPENCV_OCL
-Ptr<Retina> createRetina_OCL(Size inputSize);
-Ptr<Retina> createRetina_OCL(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const float reductionFactor=1.0f, const float samplingStrenght=10.0f);
-#endif
 
-//! @}
+    /** @overload */
+    CV_WRAP static Ptr<Retina> create(Size inputSize);
+    /** @brief Constructors from standardized interfaces : retreive a smart pointer to a Retina instance
+
+    @param inputSize the input frame size
+    @param colorMode the chosen processing mode : with or without color processing
+    @param colorSamplingMethod specifies which kind of color sampling will be used :
+    -   cv::bioinspired::RETINA_COLOR_RANDOM: each pixel position is either R, G or B in a random choice
+    -   cv::bioinspired::RETINA_COLOR_DIAGONAL: color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR...
+    -   cv::bioinspired::RETINA_COLOR_BAYER: standard bayer sampling
+    @param useRetinaLogSampling activate retina log sampling, if true, the 2 following parameters can
+    be used
+    @param reductionFactor only usefull if param useRetinaLogSampling=true, specifies the reduction
+    factor of the output frame (as the center (fovea) is high resolution and corners can be
+    underscaled, then a reduction of the output is allowed without precision leak
+    @param samplingStrength only usefull if param useRetinaLogSampling=true, specifies the strength of
+    the log scale that is applied
+     */
+    CV_WRAP static Ptr<Retina> create(Size inputSize, const bool colorMode,
+                                           int colorSamplingMethod=RETINA_COLOR_BAYER,
+                                           const bool useRetinaLogSampling=false,
+                                           const float reductionFactor=1.0f, const float samplingStrength=10.0f);
+};
 
 //! @}
 
diff --git a/IPL/include/opencv/opencv2/bioinspired/retinafasttonemapping.hpp b/IPL/include/opencv/opencv2/bioinspired/retinafasttonemapping.hpp
index c65709d..ba1a872 100644
--- a/IPL/include/opencv/opencv2/bioinspired/retinafasttonemapping.hpp
+++ b/IPL/include/opencv/opencv2/bioinspired/retinafasttonemapping.hpp
@@ -126,10 +126,10 @@ class CV_EXPORTS_W RetinaFastToneMapping : public Algorithm
     (default is 1, see reference paper)
      */
     CV_WRAP virtual void setup(const float photoreceptorsNeighborhoodRadius=3.f, const float ganglioncellsNeighborhoodRadius=1.f, const float meanLuminanceModulatorK=1.f)=0;
+
+    CV_WRAP static Ptr<RetinaFastToneMapping> create(Size inputSize);
 };
 
-//! @relates bioinspired::RetinaFastToneMapping
-CV_EXPORTS_W Ptr<RetinaFastToneMapping> createRetinaFastToneMapping(Size inputSize);
 
 //! @}
 
diff --git a/IPL/include/opencv/opencv2/bioinspired/transientareassegmentationmodule.hpp b/IPL/include/opencv/opencv2/bioinspired/transientareassegmentationmodule.hpp
index b11b61d..d5f5b2f 100644
--- a/IPL/include/opencv/opencv2/bioinspired/transientareassegmentationmodule.hpp
+++ b/IPL/include/opencv/opencv2/bioinspired/transientareassegmentationmodule.hpp
@@ -80,7 +80,7 @@ namespace bioinspired
 /** @brief parameter structure that stores the transient events detector setup parameters
 */
 struct SegmentationParameters{ // CV_EXPORTS_W_MAP to export to python native dictionnaries
-	// default structure instance construction with default values	
+	// default structure instance construction with default values
 	SegmentationParameters():
 	    thresholdON(100),
 	    thresholdOFF(100),
@@ -171,7 +171,7 @@ class CV_EXPORTS_W TransientAreasSegmentationModule: public Algorithm
     /** @brief write xml/yml formated parameters information
     @param fs : a cv::Filestorage object ready to be filled
     */
-    virtual void write( cv::FileStorage& fs ) const=0;
+    virtual void write( cv::FileStorage& fs ) const CV_OVERRIDE = 0;
 
     /** @brief main processing method, get result using methods getSegmentationPicture()
     @param inputToSegment : the image to process, it must match the instance buffer size !
@@ -180,20 +180,19 @@ class CV_EXPORTS_W TransientAreasSegmentationModule: public Algorithm
     CV_WRAP virtual void run(InputArray inputToSegment, const int channelIndex=0)=0;
 
     /** @brief access function
-    @return the last segmentation result: a boolean picture which is resampled between 0 and 255 for a display purpose
-   */
+    return the last segmentation result: a boolean picture which is resampled between 0 and 255 for a display purpose
+    */
     CV_WRAP virtual void getSegmentationPicture(OutputArray transientAreas)=0;
 
     /** @brief cleans all the buffers of the instance
     */
     CV_WRAP virtual void clearAllBuffers()=0;
-};
 
-/** @brief allocator
-@param inputSize : size of the images input to segment (output will be the same size)
-@relates bioinspired::TransientAreasSegmentationModule
- */
-CV_EXPORTS_W Ptr<TransientAreasSegmentationModule> createTransientAreasSegmentationModule(Size inputSize);
+    /** @brief allocator
+    @param inputSize : size of the images input to segment (output will be the same size)
+     */
+    CV_WRAP static Ptr<TransientAreasSegmentationModule> create(Size inputSize);
+};
 
 //! @}
 
diff --git a/IPL/include/opencv/opencv2/calib3d.hpp b/IPL/include/opencv/opencv2/calib3d.hpp
index e26e8c1..517c4cd 100644
--- a/IPL/include/opencv/opencv2/calib3d.hpp
+++ b/IPL/include/opencv/opencv2/calib3d.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CALIB3D_HPP__
-#define __OPENCV_CALIB3D_HPP__
+#ifndef OPENCV_CALIB3D_HPP
+#define OPENCV_CALIB3D_HPP
 
 #include "opencv2/core.hpp"
 #include "opencv2/features2d.hpp"
@@ -51,85 +51,254 @@
 /**
   @defgroup calib3d Camera Calibration and 3D Reconstruction
 
-The functions in this section use a so-called pinhole camera model. In this model, a scene view is
-formed by projecting 3D points into the image plane using a perspective transformation.
+The functions in this section use a so-called pinhole camera model. The view of a scene
+is obtained by projecting a scene's 3D point \f$P_w\f$ into the image plane using a perspective
+transformation which forms the corresponding pixel \f$p\f$. Both \f$P_w\f$ and \f$p\f$ are
+represented in homogeneous coordinates, i.e. as 3D and 2D homogeneous vector respectively. You will
+find a brief introduction to projective geometry, homogeneous vectors and homogeneous
+transformations at the end of this section's introduction. For more succinct notation, we often drop
+the 'homogeneous' and say vector instead of homogeneous vector.
 
-\f[s  \; m' = A [R|t] M'\f]
+The distortion-free projective transformation given by a  pinhole camera model is shown below.
 
-or
+\f[s \; p = A \begin{bmatrix} R|t \end{bmatrix} P_w,\f]
 
-\f[s  \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}
+where \f$P_w\f$ is a 3D point expressed with respect to the world coordinate system,
+\f$p\f$ is a 2D pixel in the image plane, \f$A\f$ is the intrinsic camera matrix,
+\f$R\f$ and \f$t\f$ are the rotation and translation that describe the change of coordinates from
+world to camera coordinate systems (or camera frame) and \f$s\f$ is the projective transformation's
+arbitrary scaling and not part of the camera model.
+
+The intrinsic camera matrix \f$A\f$ (notation used as in @cite Zhang2000 and also generally notated
+as \f$K\f$) projects 3D points given in the camera coordinate system to 2D pixel coordinates, i.e.
+
+\f[p = A P_c.\f]
+
+The camera matrix \f$A\f$ is composed of the focal lengths \f$f_x\f$ and \f$f_y\f$, which are
+expressed in pixel units, and the principal point \f$(c_x, c_y)\f$, that is usually close to the
+image center:
+
+\f[A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1},\f]
+
+and thus
+
+\f[s \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1} \vecthree{X_c}{Y_c}{Z_c}.\f]
+
+The matrix of intrinsic parameters does not depend on the scene viewed. So, once estimated, it can
+be re-used as long as the focal length is fixed (in case of a zoom lens). Thus, if an image from the
+camera is scaled by a factor, all of these parameters need to be scaled (multiplied/divided,
+respectively) by the same factor.
+
+The joint rotation-translation matrix \f$[R|t]\f$ is the matrix product of a projective
+transformation and a homogeneous transformation. The 3-by-4 projective transformation maps 3D points
+represented in camera coordinates to 2D poins in the image plane and represented in normalized
+camera coordinates \f$x' = X_c / Z_c\f$ and \f$y' = Y_c / Z_c\f$:
+
+\f[Z_c \begin{bmatrix}
+x' \\
+y' \\
+1
+\end{bmatrix} = \begin{bmatrix}
+1 & 0 & 0 & 0 \\
+0 & 1 & 0 & 0 \\
+0 & 0 & 1 & 0
+\end{bmatrix}
 \begin{bmatrix}
-r_{11} & r_{12} & r_{13} & t_1  \\
-r_{21} & r_{22} & r_{23} & t_2  \\
-r_{31} & r_{32} & r_{33} & t_3
+X_c \\
+Y_c \\
+Z_c \\
+1
+\end{bmatrix}.\f]
+
+The homogeneous transformation is encoded by the extrinsic parameters \f$R\f$ and \f$t\f$ and
+represents the change of basis from world coordinate system \f$w\f$ to the camera coordinate sytem
+\f$c\f$. Thus, given the representation of the point \f$P\f$ in world coordinates, \f$P_w\f$, we
+obtain \f$P\f$'s representation in the camera coordinate system, \f$P_c\f$, by
+
+\f[P_c = \begin{bmatrix}
+R & t \\
+0 & 1
+\end{bmatrix} P_w,\f]
+
+This homogeneous transformation is composed out of \f$R\f$, a 3-by-3 rotation matrix, and \f$t\f$, a
+3-by-1 translation vector:
+
+\f[\begin{bmatrix}
+R & t \\
+0 & 1
+\end{bmatrix} = \begin{bmatrix}
+r_{11} & r_{12} & r_{13} & t_x \\
+r_{21} & r_{22} & r_{23} & t_y \\
+r_{31} & r_{32} & r_{33} & t_z \\
+0 & 0 & 0 & 1
+\end{bmatrix},
+\f]
+
+and therefore
+
+\f[\begin{bmatrix}
+X_c \\
+Y_c \\
+Z_c \\
+1
+\end{bmatrix} = \begin{bmatrix}
+r_{11} & r_{12} & r_{13} & t_x \\
+r_{21} & r_{22} & r_{23} & t_y \\
+r_{31} & r_{32} & r_{33} & t_z \\
+0 & 0 & 0 & 1
 \end{bmatrix}
 \begin{bmatrix}
-X \\
-Y \\
-Z \\
+X_w \\
+Y_w \\
+Z_w \\
+1
+\end{bmatrix}.\f]
+
+Combining the projective transformation and the homogeneous transformation, we obtain the projective
+transformation that maps 3D points in world coordinates into 2D points in the image plane and in
+normalized camera coordinates:
+
+\f[Z_c \begin{bmatrix}
+x' \\
+y' \\
+1
+\end{bmatrix} = \begin{bmatrix} R|t \end{bmatrix} \begin{bmatrix}
+X_w \\
+Y_w \\
+Z_w \\
+1
+\end{bmatrix} = \begin{bmatrix}
+r_{11} & r_{12} & r_{13} & t_x \\
+r_{21} & r_{22} & r_{23} & t_y \\
+r_{31} & r_{32} & r_{33} & t_z
+\end{bmatrix}
+\begin{bmatrix}
+X_w \\
+Y_w \\
+Z_w \\
+1
+\end{bmatrix},\f]
+
+with \f$x' = X_c / Z_c\f$ and \f$y' = Y_c / Z_c\f$. Putting the equations for instrincs and extrinsics together, we can write out
+\f$s \; p = A \begin{bmatrix} R|t \end{bmatrix} P_w\f$ as
+
+\f[s \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}
+\begin{bmatrix}
+r_{11} & r_{12} & r_{13} & t_x \\
+r_{21} & r_{22} & r_{23} & t_y \\
+r_{31} & r_{32} & r_{33} & t_z
+\end{bmatrix}
+\begin{bmatrix}
+X_w \\
+Y_w \\
+Z_w \\
 1
+\end{bmatrix}.\f]
+
+If \f$Z_c \ne 0\f$, the transformation above is equivalent to the following,
+
+\f[\begin{bmatrix}
+u \\
+v
+\end{bmatrix} = \begin{bmatrix}
+f_x X_c/Z_c + c_x \\
+f_y Y_c/Z_c + c_y
 \end{bmatrix}\f]
 
-where:
-
--   \f$(X, Y, Z)\f$ are the coordinates of a 3D point in the world coordinate space
--   \f$(u, v)\f$ are the coordinates of the projection point in pixels
--   \f$A\f$ is a camera matrix, or a matrix of intrinsic parameters
--   \f$(cx, cy)\f$ is a principal point that is usually at the image center
--   \f$fx, fy\f$ are the focal lengths expressed in pixel units.
-
-Thus, if an image from the camera is scaled by a factor, all of these parameters should be scaled
-(multiplied/divided, respectively) by the same factor. The matrix of intrinsic parameters does not
-depend on the scene viewed. So, once estimated, it can be re-used as long as the focal length is
-fixed (in case of zoom lens). The joint rotation-translation matrix \f$[R|t]\f$ is called a matrix of
-extrinsic parameters. It is used to describe the camera motion around a static scene, or vice versa,
-rigid motion of an object in front of a still camera. That is, \f$[R|t]\f$ translates coordinates of a
-point \f$(X, Y, Z)\f$ to a coordinate system, fixed with respect to the camera. The transformation above
-is equivalent to the following (when \f$z \ne 0\f$ ):
-
-\f[\begin{array}{l}
-\vecthree{x}{y}{z} = R  \vecthree{X}{Y}{Z} + t \\
-x' = x/z \\
-y' = y/z \\
-u = f_x*x' + c_x \\
-v = f_y*y' + c_y
-\end{array}\f]
-
-Real lenses usually have some distortion, mostly radial distortion and slight tangential distortion.
+with
+
+\f[\vecthree{X_c}{Y_c}{Z_c} = \begin{bmatrix}
+R|t
+\end{bmatrix} \begin{bmatrix}
+X_w \\
+Y_w \\
+Z_w \\
+1
+\end{bmatrix}.\f]
+
+The following figure illustrates the pinhole camera model.
+
+![Pinhole camera model](pics/pinhole_camera_model.png)
+
+Real lenses usually have some distortion, mostly radial distortion, and slight tangential distortion.
 So, the above model is extended as:
 
-\f[\begin{array}{l}
-\vecthree{x}{y}{z} = R  \vecthree{X}{Y}{Z} + t \\
-x' = x/z \\
-y' = y/z \\
-x'' = x'  \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + 2 p_1 x' y' + p_2(r^2 + 2 x'^2) + s_1 r^2 + s_2 r^4 \\
-y'' = y'  \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_3 r^2 + s_4 r^4 \\
-\text{where} \quad r^2 = x'^2 + y'^2  \\
-u = f_x*x'' + c_x \\
-v = f_y*y'' + c_y
-\end{array}\f]
-
-\f$k_1\f$, \f$k_2\f$, \f$k_3\f$, \f$k_4\f$, \f$k_5\f$, and \f$k_6\f$ are radial distortion coefficients. \f$p_1\f$ and \f$p_2\f$ are
-tangential distortion coefficients. \f$s_1\f$, \f$s_2\f$, \f$s_3\f$, and \f$s_4\f$, are the thin prism distortion
-coefficients. Higher-order coefficients are not considered in OpenCV.
-
-In some cases the image sensor may be tilted in order to focus an oblique plane in front of the
-camera (Scheimpfug condition). This can be useful for particle image velocimetry (PIV) or
+\f[\begin{bmatrix}
+u \\
+v
+\end{bmatrix} = \begin{bmatrix}
+f_x x'' + c_x \\
+f_y y'' + c_y
+\end{bmatrix}\f]
+
+where
+
+\f[\begin{bmatrix}
+x'' \\
+y''
+\end{bmatrix} = \begin{bmatrix}
+x' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + 2 p_1 x' y' + p_2(r^2 + 2 x'^2) + s_1 r^2 + s_2 r^4 \\
+y' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_3 r^2 + s_4 r^4 \\
+\end{bmatrix}\f]
+
+with
+
+\f[r^2 = x'^2 + y'^2\f]
+
+and
+
+\f[\begin{bmatrix}
+x'\\
+y'
+\end{bmatrix} = \begin{bmatrix}
+X_c/Z_c \\
+Y_c/Z_c
+\end{bmatrix},\f]
+
+if \f$Z_c \ne 0\f$.
+
+The distortion parameters are the radial coefficients \f$k_1\f$, \f$k_2\f$, \f$k_3\f$, \f$k_4\f$, \f$k_5\f$, and \f$k_6\f$
+,\f$p_1\f$ and \f$p_2\f$ are the tangential distortion coefficients, and \f$s_1\f$, \f$s_2\f$, \f$s_3\f$, and \f$s_4\f$,
+are the thin prism distortion coefficients. Higher-order coefficients are not considered in OpenCV.
+
+The next figures show two common types of radial distortion: barrel distortion
+(\f$ 1 + k_1 r^2 + k_2 r^4 + k_3 r^6 \f$ monotonically decreasing)
+and pincushion distortion (\f$ 1 + k_1 r^2 + k_2 r^4 + k_3 r^6 \f$ monotonically increasing).
+Radial distortion is always monotonic for real lenses,
+and if the estimator produces a non-monotonic result,
+this should be considered a calibration failure.
+More generally, radial distortion must be monotonic and the distortion function must be bijective.
+A failed estimation result may look deceptively good near the image center
+but will work poorly in e.g. AR/SFM applications.
+The optimization method used in OpenCV camera calibration does not include these constraints as
+the framework does not support the required integer programming and polynomial inequalities.
+See [issue #15992](https://github.com/opencv/opencv/issues/15992) for additional information.
+
+![](pics/distortion_examples.png)
+![](pics/distortion_examples2.png)
+
+In some cases, the image sensor may be tilted in order to focus an oblique plane in front of the
+camera (Scheimpflug principle). This can be useful for particle image velocimetry (PIV) or
 triangulation with a laser fan. The tilt causes a perspective distortion of \f$x''\f$ and
-\f$y''\f$. This distortion can be modelled in the following way, see e.g. @cite Louhichi07.
+\f$y''\f$. This distortion can be modeled in the following way, see e.g. @cite Louhichi07.
 
-\f[\begin{array}{l}
-s\vecthree{x'''}{y'''}{1} =
+\f[\begin{bmatrix}
+u \\
+v
+\end{bmatrix} = \begin{bmatrix}
+f_x x''' + c_x \\
+f_y y''' + c_y
+\end{bmatrix},\f]
+
+where
+
+\f[s\vecthree{x'''}{y'''}{1} =
 \vecthreethree{R_{33}(\tau_x, \tau_y)}{0}{-R_{13}(\tau_x, \tau_y)}
 {0}{R_{33}(\tau_x, \tau_y)}{-R_{23}(\tau_x, \tau_y)}
-{0}{0}{1} R(\tau_x, \tau_y) \vecthree{x''}{y''}{1}\\
-u = f_x*x''' + c_x \\
-v = f_y*y''' + c_y
-\end{array}\f]
+{0}{0}{1} R(\tau_x, \tau_y) \vecthree{x''}{y''}{1}\f]
 
-where the matrix \f$R(\tau_x, \tau_y)\f$ is defined by two rotations with angular parameter \f$\tau_x\f$
-and \f$\tau_y\f$, respectively,
+and the matrix \f$R(\tau_x, \tau_y)\f$ is defined by two rotations with angular parameter
+\f$\tau_x\f$ and \f$\tau_y\f$, respectively,
 
 \f[
 R(\tau_x, \tau_y) =
@@ -148,8 +317,8 @@ vector. That is, if the vector contains four elements, it means that \f$k_3=0\f$
 coefficients do not depend on the scene viewed. Thus, they also belong to the intrinsic camera
 parameters. And they remain the same regardless of the captured image resolution. If, for example, a
 camera has been calibrated on images of 320 x 240 resolution, absolutely the same distortion
-coefficients can be used for 640 x 480 images from the same camera while \f$f_x\f$, \f$f_y\f$, \f$c_x\f$, and
-\f$c_y\f$ need to be scaled appropriately.
+coefficients can be used for 640 x 480 images from the same camera while \f$f_x\f$, \f$f_y\f$,
+\f$c_x\f$, and \f$c_y\f$ need to be scaled appropriately.
 
 The functions below use the above model to do the following:
 
@@ -161,15 +330,68 @@ pattern (every view is described by several 3D-2D point correspondences).
 -   Estimate the relative position and orientation of the stereo camera "heads" and compute the
 *rectification* transformation that makes the camera optical axes parallel.
 
+<B> Homogeneous Coordinates </B><br>
+Homogeneous Coordinates are a system of coordinates that are used in projective geometry. Their use
+allows to represent points at infinity by finite coordinates and simplifies formulas when compared
+to the cartesian counterparts, e.g. they have the advantage that affine transformations can be
+expressed as linear homogeneous transformation.
+
+One obtains the homogeneous vector \f$P_h\f$ by appending a 1 along an n-dimensional cartesian
+vector \f$P\f$ e.g. for a 3D cartesian vector the mapping \f$P \rightarrow P_h\f$ is:
+
+\f[\begin{bmatrix}
+X \\
+Y \\
+Z
+\end{bmatrix} \rightarrow \begin{bmatrix}
+X \\
+Y \\
+Z \\
+1
+\end{bmatrix}.\f]
+
+For the inverse mapping \f$P_h \rightarrow P\f$, one divides all elements of the homogeneous vector
+by its last element, e.g. for a 3D homogeneous vector one gets its 2D cartesian counterpart by:
+
+\f[\begin{bmatrix}
+X \\
+Y \\
+W
+\end{bmatrix} \rightarrow \begin{bmatrix}
+X / W \\
+Y / W
+\end{bmatrix},\f]
+
+if \f$W \ne 0\f$.
+
+Due to this mapping, all multiples \f$k P_h\f$, for \f$k \ne 0\f$, of a homogeneous point represent
+the same point \f$P_h\f$. An intuitive understanding of this property is that under a projective
+transformation, all multiples of \f$P_h\f$ are mapped to the same point. This is the physical
+observation one does for pinhole cameras, as all points along a ray through the camera's pinhole are
+projected to the same image point, e.g. all points along the red ray in the image of the pinhole
+camera model above would be mapped to the same image coordinate. This property is also the source
+for the scale ambiguity s in the equation of the pinhole camera model.
+
+As mentioned, by using homogeneous coordinates we can express any change of basis parameterized by
+\f$R\f$ and \f$t\f$ as a linear transformation, e.g. for the change of basis from coordinate system
+0 to coordinate system 1 becomes:
+
+\f[P_1 = R P_0 + t \rightarrow P_{h_1} = \begin{bmatrix}
+R & t \\
+0 & 1
+\end{bmatrix} P_{h_0}.\f]
+
 @note
-   -   A calibration sample for 3 cameras in horizontal position can be found at
+    -   Many functions in this module take a camera matrix as an input parameter. Although all
+        functions assume the same structure of this parameter, they may name it differently. The
+        parameter's description, however, will be clear in that a camera matrix with the structure
+        shown above is required.
+    -   A calibration sample for 3 cameras in a horizontal position can be found at
         opencv_source_code/samples/cpp/3calibration.cpp
     -   A calibration sample based on a sequence of images can be found at
         opencv_source_code/samples/cpp/calibration.cpp
     -   A calibration sample in order to do 3D reconstruction can be found at
         opencv_source_code/samples/cpp/build3dmodel.cpp
-    -   A calibration sample of an artificially generated camera and chessboard patterns can be
-        found at opencv_source_code/samples/cpp/calibration_artificial.cpp
     -   A calibration example on stereo calibration can be found at
         opencv_source_code/samples/cpp/stereo_calib.cpp
     -   A calibration example on stereo matching can be found at
@@ -190,7 +412,7 @@ pattern (every view is described by several 3D-2D point correspondences).
 
     \f[x = Xc_1 \\ y = Xc_2 \\ z = Xc_3\f]
 
-    The pinehole projection coordinates of P is [a; b] where
+    The pinhole projection coordinates of P is [a; b] where
 
     \f[a = x / z \ and \ b = y / z \\ r^2 = a^2 + b^2 \\ \theta = atan(r)\f]
 
@@ -219,23 +441,40 @@ namespace cv
 //! @{
 
 //! type of the robust estimation algorithm
-enum { LMEDS  = 4, //!< least-median algorithm
+enum { LMEDS  = 4, //!< least-median of squares algorithm
        RANSAC = 8, //!< RANSAC algorithm
        RHO    = 16 //!< RHO algorithm
      };
 
-enum { SOLVEPNP_ITERATIVE = 0,
-       SOLVEPNP_EPNP      = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp
-       SOLVEPNP_P3P       = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete
-       SOLVEPNP_DLS       = 3, //!< A Direct Least-Squares (DLS) Method for PnP  @cite hesch2011direct
-       SOLVEPNP_UPNP      = 4  //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive
-
+enum SolvePnPMethod {
+    SOLVEPNP_ITERATIVE   = 0,
+    SOLVEPNP_EPNP        = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp
+    SOLVEPNP_P3P         = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete
+    SOLVEPNP_DLS         = 3, //!< A Direct Least-Squares (DLS) Method for PnP  @cite hesch2011direct
+    SOLVEPNP_UPNP        = 4, //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive
+    SOLVEPNP_AP3P        = 5, //!< An Efficient Algebraic Solution to the Perspective-Three-Point Problem @cite Ke17
+    SOLVEPNP_IPPE        = 6, //!< Infinitesimal Plane-Based Pose Estimation @cite Collins14 \n
+                              //!< Object points must be coplanar.
+    SOLVEPNP_IPPE_SQUARE = 7, //!< Infinitesimal Plane-Based Pose Estimation @cite Collins14 \n
+                              //!< This is a special case suitable for marker pose estimation.\n
+                              //!< 4 coplanar object points must be defined in the following order:
+                              //!<   - point 0: [-squareLength / 2,  squareLength / 2, 0]
+                              //!<   - point 1: [ squareLength / 2,  squareLength / 2, 0]
+                              //!<   - point 2: [ squareLength / 2, -squareLength / 2, 0]
+                              //!<   - point 3: [-squareLength / 2, -squareLength / 2, 0]
+#ifndef CV_DOXYGEN
+    SOLVEPNP_MAX_COUNT        //!< Used for count
+#endif
 };
 
 enum { CALIB_CB_ADAPTIVE_THRESH = 1,
        CALIB_CB_NORMALIZE_IMAGE = 2,
        CALIB_CB_FILTER_QUADS    = 4,
-       CALIB_CB_FAST_CHECK      = 8
+       CALIB_CB_FAST_CHECK      = 8,
+       CALIB_CB_EXHAUSTIVE      = 16,
+       CALIB_CB_ACCURACY        = 32,
+       CALIB_CB_LARGER          = 64,
+       CALIB_CB_MARKER          = 128
      };
 
 enum { CALIB_CB_SYMMETRIC_GRID  = 1,
@@ -243,7 +482,8 @@ enum { CALIB_CB_SYMMETRIC_GRID  = 1,
        CALIB_CB_CLUSTERING      = 4
      };
 
-enum { CALIB_USE_INTRINSIC_GUESS = 0x00001,
+enum { CALIB_NINTRINSIC          = 18,
+       CALIB_USE_INTRINSIC_GUESS = 0x00001,
        CALIB_FIX_ASPECT_RATIO    = 0x00002,
        CALIB_FIX_PRINCIPAL_POINT = 0x00004,
        CALIB_ZERO_TANGENT_DIST   = 0x00008,
@@ -259,21 +499,32 @@ enum { CALIB_USE_INTRINSIC_GUESS = 0x00001,
        CALIB_FIX_S1_S2_S3_S4     = 0x10000,
        CALIB_TILTED_MODEL        = 0x40000,
        CALIB_FIX_TAUX_TAUY       = 0x80000,
+       CALIB_USE_QR              = 0x100000, //!< use QR instead of SVD decomposition for solving. Faster but potentially less precise
+       CALIB_FIX_TANGENT_DIST    = 0x200000,
        // only for stereo
        CALIB_FIX_INTRINSIC       = 0x00100,
        CALIB_SAME_FOCAL_LENGTH   = 0x00200,
        // for stereo rectification
        CALIB_ZERO_DISPARITY      = 0x00400,
        CALIB_USE_LU              = (1 << 17), //!< use LU instead of SVD decomposition for solving. much faster but potentially less precise
+       CALIB_USE_EXTRINSIC_GUESS = (1 << 22)  //!< for stereoCalibrate
      };
 
 //! the algorithm for finding fundamental matrix
 enum { FM_7POINT = 1, //!< 7-point algorithm
        FM_8POINT = 2, //!< 8-point algorithm
-       FM_LMEDS  = 4, //!< least-median algorithm
-       FM_RANSAC = 8  //!< RANSAC algorithm
+       FM_LMEDS  = 4, //!< least-median algorithm. 7-point algorithm is used.
+       FM_RANSAC = 8  //!< RANSAC algorithm. It needs at least 15 points. 7-point algorithm is used.
      };
 
+enum HandEyeCalibrationMethod
+{
+    CALIB_HAND_EYE_TSAI         = 0, //!< A New Technique for Fully Autonomous and Efficient 3D Robotics Hand/Eye Calibration @cite Tsai89
+    CALIB_HAND_EYE_PARK         = 1, //!< Robot Sensor Calibration: Solving AX = XB on the Euclidean Group @cite Park94
+    CALIB_HAND_EYE_HORAUD       = 2, //!< Hand-eye Calibration @cite Horaud95
+    CALIB_HAND_EYE_ANDREFF      = 3, //!< On-line Hand-Eye Calibration @cite Andreff99
+    CALIB_HAND_EYE_DANIILIDIS   = 4  //!< Hand-Eye Calibration Using Dual Quaternions @cite Daniilidis98
+};
 
 
 /** @brief Converts a rotation matrix to a rotation vector or vice versa.
@@ -283,7 +534,7 @@ enum { FM_7POINT = 1, //!< 7-point algorithm
 @param jacobian Optional output Jacobian matrix, 3x9 or 9x3, which is a matrix of partial
 derivatives of the output array components with respect to the input array components.
 
-\f[\begin{array}{l} \theta \leftarrow norm(r) \\ r  \leftarrow r/ \theta \\ R =  \cos{\theta} I + (1- \cos{\theta} ) r r^T +  \sin{\theta} \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} \end{array}\f]
+\f[\begin{array}{l} \theta \leftarrow norm(r) \\ r  \leftarrow r/ \theta \\ R =  \cos(\theta) I + (1- \cos{\theta} ) r r^T +  \sin(\theta) \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} \end{array}\f]
 
 Inverse transformation can be also done easily, since
 
@@ -291,32 +542,114 @@ Inverse transformation can be also done easily, since
 
 A rotation vector is a convenient and most compact representation of a rotation matrix (since any
 rotation matrix has just 3 degrees of freedom). The representation is used in the global 3D geometry
-optimization procedures like calibrateCamera, stereoCalibrate, or solvePnP .
+optimization procedures like @ref calibrateCamera, @ref stereoCalibrate, or @ref solvePnP .
+
+@note More information about the computation of the derivative of a 3D rotation matrix with respect to its exponential coordinate
+can be found in:
+    - A Compact Formula for the Derivative of a 3-D Rotation in Exponential Coordinates, Guillermo Gallego, Anthony J. Yezzi @cite Gallego2014ACF
+
+@note Useful information on SE(3) and Lie Groups can be found in:
+    - A tutorial on SE(3) transformation parameterizations and on-manifold optimization, Jose-Luis Blanco @cite blanco2010tutorial
+    - Lie Groups for 2D and 3D Transformation, Ethan Eade @cite Eade17
+    - A micro Lie theory for state estimation in robotics, Joan Solà, Jérémie Deray, Dinesh Atchuthan @cite Sol2018AML
  */
 CV_EXPORTS_W void Rodrigues( InputArray src, OutputArray dst, OutputArray jacobian = noArray() );
 
+
+
+/** Levenberg-Marquardt solver. Starting with the specified vector of parameters it
+    optimizes the target vector criteria "err"
+    (finds local minima of each target vector component absolute value).
+
+    When needed, it calls user-provided callback.
+*/
+class CV_EXPORTS LMSolver : public Algorithm
+{
+public:
+    class CV_EXPORTS Callback
+    {
+    public:
+        virtual ~Callback() {}
+        /**
+         computes error and Jacobian for the specified vector of parameters
+
+         @param param the current vector of parameters
+         @param err output vector of errors: err_i = actual_f_i - ideal_f_i
+         @param J output Jacobian: J_ij = d(err_i)/d(param_j)
+
+         when J=noArray(), it means that it does not need to be computed.
+         Dimensionality of error vector and param vector can be different.
+         The callback should explicitly allocate (with "create" method) each output array
+         (unless it's noArray()).
+        */
+        virtual bool compute(InputArray param, OutputArray err, OutputArray J) const = 0;
+    };
+
+    /**
+       Runs Levenberg-Marquardt algorithm using the passed vector of parameters as the start point.
+       The final vector of parameters (whether the algorithm converged or not) is stored at the same
+       vector. The method returns the number of iterations used. If it's equal to the previously specified
+       maxIters, there is a big chance the algorithm did not converge.
+
+       @param param initial/final vector of parameters.
+
+       Note that the dimensionality of parameter space is defined by the size of param vector,
+       and the dimensionality of optimized criteria is defined by the size of err vector
+       computed by the callback.
+    */
+    virtual int run(InputOutputArray param) const = 0;
+
+    /**
+       Sets the maximum number of iterations
+       @param maxIters the number of iterations
+    */
+    virtual void setMaxIters(int maxIters) = 0;
+    /**
+       Retrieves the current maximum number of iterations
+    */
+    virtual int getMaxIters() const = 0;
+
+    /**
+       Creates Levenberg-Marquard solver
+
+       @param cb callback
+       @param maxIters maximum number of iterations that can be further
+         modified using setMaxIters() method.
+    */
+    static Ptr<LMSolver> create(const Ptr<LMSolver::Callback>& cb, int maxIters);
+    static Ptr<LMSolver> create(const Ptr<LMSolver::Callback>& cb, int maxIters, double eps);
+};
+
+
+
+/** @example samples/cpp/tutorial_code/features2D/Homography/pose_from_homography.cpp
+An example program about pose estimation from coplanar points
+
+Check @ref tutorial_homography "the corresponding tutorial" for more details
+*/
+
 /** @brief Finds a perspective transformation between two planes.
 
 @param srcPoints Coordinates of the points in the original plane, a matrix of the type CV_32FC2
 or vector\<Point2f\> .
 @param dstPoints Coordinates of the points in the target plane, a matrix of the type CV_32FC2 or
 a vector\<Point2f\> .
-@param method Method used to computed a homography matrix. The following methods are possible:
--   **0** - a regular method using all the points
+@param method Method used to compute a homography matrix. The following methods are possible:
+-   **0** - a regular method using all the points, i.e., the least squares method
 -   **RANSAC** - RANSAC-based robust method
 -   **LMEDS** - Least-Median robust method
--   **RHO**    - PROSAC-based robust method
+-   **RHO** - PROSAC-based robust method
 @param ransacReprojThreshold Maximum allowed reprojection error to treat a point pair as an inlier
 (used in the RANSAC and RHO methods only). That is, if
-\f[\| \texttt{dstPoints} _i -  \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \|  >  \texttt{ransacReprojThreshold}\f]
-then the point \f$i\f$ is considered an outlier. If srcPoints and dstPoints are measured in pixels,
+\f[\| \texttt{dstPoints} _i -  \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \|_2  >  \texttt{ransacReprojThreshold}\f]
+then the point \f$i\f$ is considered as an outlier. If srcPoints and dstPoints are measured in pixels,
 it usually makes sense to set this parameter somewhere in the range of 1 to 10.
 @param mask Optional output mask set by a robust method ( RANSAC or LMEDS ). Note that the input
 mask values are ignored.
-@param maxIters The maximum number of RANSAC iterations, 2000 is the maximum it can be.
+@param maxIters The maximum number of RANSAC iterations.
 @param confidence Confidence level, between 0 and 1.
 
-The functions find and return the perspective transformation \f$H\f$ between the source and the
+The function finds and returns the perspective transformation \f$H\f$ between the source and the
 destination planes:
 
 \f[s_i  \vecthree{x'_i}{y'_i}{1} \sim H  \vecthree{x_i}{y_i}{1}\f]
@@ -331,10 +664,10 @@ pairs to compute an initial homography estimate with a simple least-squares sche
 However, if not all of the point pairs ( \f$srcPoints_i\f$, \f$dstPoints_i\f$ ) fit the rigid perspective
 transformation (that is, there are some outliers), this initial estimate will be poor. In this case,
 you can use one of the three robust methods. The methods RANSAC, LMeDS and RHO try many different
-random subsets of the corresponding point pairs (of four pairs each), estimate the homography matrix
-using this subset and a simple least-square algorithm, and then compute the quality/goodness of the
-computed homography (which is the number of inliers for RANSAC or the median re-projection error for
-LMeDs). The best subset is then used to produce the initial estimate of the homography matrix and
+random subsets of the corresponding point pairs (of four pairs each, collinear pairs are discarded), estimate the homography matrix
+using this subset and a simple least-squares algorithm, and then compute the quality/goodness of the
+computed homography (which is the number of inliers for RANSAC or the least median re-projection error for
+LMeDS). The best subset is then used to produce the initial estimate of the homography matrix and
 the mask of inliers/outliers.
 
 Regardless of the method, robust or not, the computed homography matrix is refined further (using
@@ -347,17 +680,12 @@ correctly only when there are more than 50% of inliers. Finally, if there are no
 noise is rather small, use the default method (method=0).
 
 The function is used to find initial intrinsic and extrinsic matrices. Homography matrix is
-determined up to a scale. Thus, it is normalized so that \f$h_{33}=1\f$. Note that whenever an H matrix
+determined up to a scale. Thus, it is normalized so that \f$h_{33}=1\f$. Note that whenever an \f$H\f$ matrix
 cannot be estimated, an empty one will be returned.
 
 @sa
-   getAffineTransform, getPerspectiveTransform, estimateRigidTransform, warpPerspective,
-    perspectiveTransform
-
-@note
-   -   A example on calculating a homography for image matching can be found at
-        opencv_source_code/samples/cpp/video_homography.cpp
-
+getAffineTransform, estimateAffine2D, estimateAffinePartial2D, getPerspectiveTransform, warpPerspective,
+perspectiveTransform
  */
 CV_EXPORTS_W Mat findHomography( InputArray srcPoints, InputArray dstPoints,
                                  int method = 0, double ransacReprojThreshold = 3,
@@ -383,8 +711,8 @@ and a rotation matrix.
 
 It optionally returns three rotation matrices, one for each axis, and the three Euler angles in
 degrees (as the return value) that could be used in OpenGL. Note, there is always more than one
-sequence of rotations about the three principle axes that results in the same orientation of an
-object, eg. see @cite Slabaugh . Returned tree rotation matrices and corresponding three Euler angules
+sequence of rotations about the three principal axes that results in the same orientation of an
+object, e.g. see @cite Slabaugh . Returned tree rotation matrices and corresponding three Euler angles
 are only one of the possible solutions.
  */
 CV_EXPORTS_W Vec3d RQDecomp3x3( InputArray src, OutputArray mtxR, OutputArray mtxQ,
@@ -409,8 +737,8 @@ matrix and the position of a camera.
 
 It optionally returns three rotation matrices, one for each axis, and three Euler angles that could
 be used in OpenGL. Note, there is always more than one sequence of rotations about the three
-principle axes that results in the same orientation of an object, eg. see @cite Slabaugh . Returned
-tree rotation matrices and corresponding three Euler angules are only one of the possible solutions.
+principal axes that results in the same orientation of an object, e.g. see @cite Slabaugh . Returned
+tree rotation matrices and corresponding three Euler angles are only one of the possible solutions.
 
 The function is based on RQDecomp3x3 .
  */
@@ -444,15 +772,14 @@ CV_EXPORTS_W void matMulDeriv( InputArray A, InputArray B, OutputArray dABdA, Ou
 @param tvec2 Second translation vector.
 @param rvec3 Output rotation vector of the superposition.
 @param tvec3 Output translation vector of the superposition.
-@param dr3dr1
-@param dr3dt1
-@param dr3dr2
-@param dr3dt2
-@param dt3dr1
-@param dt3dt1
-@param dt3dr2
-@param dt3dt2 Optional output derivatives of rvec3 or tvec3 with regard to rvec1, rvec2, tvec1 and
-tvec2, respectively.
+@param dr3dr1 Optional output derivative of rvec3 with regard to rvec1
+@param dr3dt1 Optional output derivative of rvec3 with regard to tvec1
+@param dr3dr2 Optional output derivative of rvec3 with regard to rvec2
+@param dr3dt2 Optional output derivative of rvec3 with regard to tvec2
+@param dt3dr1 Optional output derivative of tvec3 with regard to rvec1
+@param dt3dt1 Optional output derivative of tvec3 with regard to tvec1
+@param dt3dr2 Optional output derivative of tvec3 with regard to rvec2
+@param dt3dt2 Optional output derivative of tvec3 with regard to tvec2
 
 The functions compute:
 
@@ -476,35 +803,37 @@ CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1,
 
 /** @brief Projects 3D points to an image plane.
 
-@param objectPoints Array of object points, 3xN/Nx3 1-channel or 1xN/Nx1 3-channel (or
-vector\<Point3f\> ), where N is the number of points in the view.
-@param rvec Rotation vector. See Rodrigues for details.
-@param tvec Translation vector.
+@param objectPoints Array of object points expressed wrt. the world coordinate frame. A 3xN/Nx3
+1-channel or 1xN/Nx1 3-channel (or vector\<Point3f\> ), where N is the number of points in the view.
+@param rvec The rotation vector (@ref Rodrigues) that, together with tvec, performs a change of
+basis from world to camera coordinate system, see @ref calibrateCamera for details.
+@param tvec The translation vector, see parameter description above.
 @param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$ .
 @param distCoeffs Input vector of distortion coefficients
 \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
 4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed.
-@param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or
+@param imagePoints Output array of image points, 1xN/Nx1 2-channel, or
 vector\<Point2f\> .
 @param jacobian Optional output 2Nx(10+\<numDistCoeffs\>) jacobian matrix of derivatives of image
 points with respect to components of the rotation vector, translation vector, focal lengths,
 coordinates of the principal point and the distortion coefficients. In the old interface different
 components of the jacobian are returned via different output parameters.
 @param aspectRatio Optional "fixed aspect ratio" parameter. If the parameter is not 0, the
-function assumes that the aspect ratio (*fx/fy*) is fixed and correspondingly adjusts the jacobian
-matrix.
-
-The function computes projections of 3D points to the image plane given intrinsic and extrinsic
-camera parameters. Optionally, the function computes Jacobians - matrices of partial derivatives of
-image points coordinates (as functions of all the input parameters) with respect to the particular
-parameters, intrinsic and/or extrinsic. The Jacobians are used during the global optimization in
-calibrateCamera, solvePnP, and stereoCalibrate . The function itself can also be used to compute a
-re-projection error given the current intrinsic and extrinsic parameters.
-
-@note By setting rvec=tvec=(0,0,0) or by setting cameraMatrix to a 3x3 identity matrix, or by
-passing zero distortion coefficients, you can get various useful partial cases of the function. This
-means that you can compute the distorted coordinates for a sparse set of points or apply a
-perspective transformation (and also compute the derivatives) in the ideal zero-distortion setup.
+function assumes that the aspect ratio (\f$f_x / f_y\f$) is fixed and correspondingly adjusts the
+jacobian matrix.
+
+The function computes the 2D projections of 3D points to the image plane, given intrinsic and
+extrinsic camera parameters. Optionally, the function computes Jacobians -matrices of partial
+derivatives of image points coordinates (as functions of all the input parameters) with respect to
+the particular parameters, intrinsic and/or extrinsic. The Jacobians are used during the global
+optimization in @ref calibrateCamera, @ref solvePnP, and @ref stereoCalibrate. The function itself
+can also be used to compute a re-projection error, given the current intrinsic and extrinsic
+parameters.
+
+@note By setting rvec = tvec = \f$[0, 0, 0]\f$, or by setting cameraMatrix to a 3x3 identity matrix,
+or by passing zero distortion coefficients, one can get various useful partial cases of the
+function. This means, one can compute the distorted coordinates for a sparse set of points or apply
+a perspective transformation (and also compute the derivatives) in the ideal zero-distortion setup.
  */
 CV_EXPORTS_W void projectPoints( InputArray objectPoints,
                                  InputArray rvec, InputArray tvec,
@@ -513,43 +842,162 @@ CV_EXPORTS_W void projectPoints( InputArray objectPoints,
                                  OutputArray jacobian = noArray(),
                                  double aspectRatio = 0 );
 
+/** @example samples/cpp/tutorial_code/features2D/Homography/homography_from_camera_displacement.cpp
+An example program about homography from the camera displacement
+
+Check @ref tutorial_homography "the corresponding tutorial" for more details
+*/
+
 /** @brief Finds an object pose from 3D-2D point correspondences.
+This function returns the rotation and the translation vectors that transform a 3D point expressed in the object
+coordinate frame to the camera coordinate frame, using different methods:
+- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.
+- @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar.
+- @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.
+Number of input points must be 4. Object points must be defined in the following order:
+  - point 0: [-squareLength / 2,  squareLength / 2, 0]
+  - point 1: [ squareLength / 2,  squareLength / 2, 0]
+  - point 2: [ squareLength / 2, -squareLength / 2, 0]
+  - point 3: [-squareLength / 2, -squareLength / 2, 0]
+- for all the other flags, number of input points must be >= 4 and object points can be in any configuration.
 
 @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
-1xN/Nx1 3-channel, where N is the number of points. vector\<Point3f\> can be also passed here.
+1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
 @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
-where N is the number of points. vector\<Point2f\> can be also passed here.
-@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
+where N is the number of points. vector\<Point2d\> can be also passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
 @param distCoeffs Input vector of distortion coefficients
 \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
 assumed.
-@param rvec Output rotation vector (see Rodrigues ) that, together with tvec , brings points from
+@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
 the model coordinate system to the camera coordinate system.
 @param tvec Output translation vector.
-@param useExtrinsicGuess Parameter used for SOLVEPNP_ITERATIVE. If true (1), the function uses
+@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
 the provided rvec and tvec values as initial approximations of the rotation and translation
 vectors, respectively, and further optimizes them.
 @param flags Method for solving a PnP problem:
--   **SOLVEPNP_ITERATIVE** Iterative method is based on Levenberg-Marquardt optimization. In
+-   **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In
 this case the function finds such a pose that minimizes reprojection error, that is the sum
 of squared distances between the observed projections imagePoints and the projected (using
 projectPoints ) objectPoints .
 -   **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
-"Complete Solution Classification for the Perspective-Three-Point Problem". In this case the
-function requires exactly four object and image points.
--   **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
-paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation".
--   **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
-"A Direct Least-Squares (DLS) Method for PnP".
--   **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
-F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
-Estimation". In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
+"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
+In this case the function requires exactly four object and image points.
+-   **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis
+"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
+In this case the function requires exactly four object and image points.
+-   **SOLVEPNP_EPNP** Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
+paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
+-   **SOLVEPNP_DLS** Method is based on the paper of J. Hesch and S. Roumeliotis.
+"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
+-   **SOLVEPNP_UPNP** Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
+F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
+Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
 assuming that both have the same value. Then the cameraMatrix is updated with the estimated
 focal length.
+-   **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli.
+"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
+-   **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli.
+"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
+It requires 4 coplanar object points defined in the following order:
+  - point 0: [-squareLength / 2,  squareLength / 2, 0]
+  - point 1: [ squareLength / 2,  squareLength / 2, 0]
+  - point 2: [ squareLength / 2, -squareLength / 2, 0]
+  - point 3: [-squareLength / 2, -squareLength / 2, 0]
 
 The function estimates the object pose given a set of object points, their corresponding image
-projections, as well as the camera matrix and the distortion coefficients.
+projections, as well as the camera matrix and the distortion coefficients, see the figure below
+(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
+and the Z-axis forward).
+
+![](pnp.jpg)
+
+Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
+using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$:
+
+\f[
+  \begin{align*}
+  \begin{bmatrix}
+  u \\
+  v \\
+  1
+  \end{bmatrix} &=
+  \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix} \\
+  \begin{bmatrix}
+  u \\
+  v \\
+  1
+  \end{bmatrix} &=
+  \begin{bmatrix}
+  f_x & 0 & c_x \\
+  0 & f_y & c_y \\
+  0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  1 & 0 & 0 & 0 \\
+  0 & 1 & 0 & 0 \\
+  0 & 0 & 1 & 0
+  \end{bmatrix}
+  \begin{bmatrix}
+  r_{11} & r_{12} & r_{13} & t_x \\
+  r_{21} & r_{22} & r_{23} & t_y \\
+  r_{31} & r_{32} & r_{33} & t_z \\
+  0 & 0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix}
+  \end{align*}
+\f]
+
+The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
+a 3D point expressed in the world frame into the camera frame:
+
+\f[
+  \begin{align*}
+  \begin{bmatrix}
+  X_c \\
+  Y_c \\
+  Z_c \\
+  1
+  \end{bmatrix} &=
+  \hspace{0.2em} ^{c}\bf{T}_w
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix} \\
+  \begin{bmatrix}
+  X_c \\
+  Y_c \\
+  Z_c \\
+  1
+  \end{bmatrix} &=
+  \begin{bmatrix}
+  r_{11} & r_{12} & r_{13} & t_x \\
+  r_{21} & r_{22} & r_{23} & t_y \\
+  r_{31} & r_{32} & r_{33} & t_z \\
+  0 & 0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix}
+  \end{align*}
+\f]
 
 @note
    -   An example of how to use solvePnP for planar augmented reality can be found at
@@ -564,6 +1012,22 @@ projections, as well as the camera matrix and the distortion coefficients.
         - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of
         it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints =
         np.ascontiguousarray(D[:,:2]).reshape((N,1,2))
+   -   The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are
+       unstable and sometimes give completely wrong results. If you pass one of these two
+       flags, **SOLVEPNP_EPNP** method will be used instead.
+   -   The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P**
+       methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions
+       of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error).
+   -   With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points
+       are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the
+       global solution to converge.
+   -   With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar.
+   -   With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation.
+       Number of input points must be 4. Object points must be defined in the following order:
+         - point 0: [-squareLength / 2,  squareLength / 2, 0]
+         - point 1: [ squareLength / 2,  squareLength / 2, 0]
+         - point 2: [ squareLength / 2, -squareLength / 2, 0]
+         - point 3: [-squareLength / 2, -squareLength / 2, 0]
  */
 CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
                             InputArray cameraMatrix, InputArray distCoeffs,
@@ -573,18 +1037,18 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
 /** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme.
 
 @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
-1xN/Nx1 3-channel, where N is the number of points. vector\<Point3f\> can be also passed here.
+1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
 @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
-where N is the number of points. vector\<Point2f\> can be also passed here.
+where N is the number of points. vector\<Point2d\> can be also passed here.
 @param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
 @param distCoeffs Input vector of distortion coefficients
 \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
 assumed.
-@param rvec Output rotation vector (see Rodrigues ) that, together with tvec , brings points from
+@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
 the model coordinate system to the camera coordinate system.
 @param tvec Output translation vector.
-@param useExtrinsicGuess Parameter used for SOLVEPNP_ITERATIVE. If true (1), the function uses
+@param useExtrinsicGuess Parameter used for @ref SOLVEPNP_ITERATIVE. If true (1), the function uses
 the provided rvec and tvec values as initial approximations of the rotation and translation
 vectors, respectively, and further optimizes them.
 @param iterationsCount Number of iterations.
@@ -593,17 +1057,24 @@ is the maximum allowed distance between the observed and computed point projecti
 an inlier.
 @param confidence The probability that the algorithm produces a useful result.
 @param inliers Output vector that contains indices of inliers in objectPoints and imagePoints .
-@param flags Method for solving a PnP problem (see solvePnP ).
+@param flags Method for solving a PnP problem (see @ref solvePnP ).
 
 The function estimates an object pose given a set of object points, their corresponding image
 projections, as well as the camera matrix and the distortion coefficients. This function finds such
 a pose that minimizes reprojection error, that is, the sum of squared distances between the observed
-projections imagePoints and the projected (using projectPoints ) objectPoints. The use of RANSAC
+projections imagePoints and the projected (using @ref projectPoints ) objectPoints. The use of RANSAC
 makes the function resistant to outliers.
 
 @note
    -   An example of how to use solvePNPRansac for object detection can be found at
         opencv_source_code/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/
+   -   The default method used to estimate the camera pose for the Minimal Sample Sets step
+       is #SOLVEPNP_EPNP. Exceptions are:
+         - if you choose #SOLVEPNP_P3P or #SOLVEPNP_AP3P, these methods will be used.
+         - if the number of input points is equal to 4, #SOLVEPNP_P3P is used.
+   -   The method used to estimate the camera pose using all the inliers is defined by the
+       flags parameters unless it is equal to #SOLVEPNP_P3P or #SOLVEPNP_AP3P. In this case,
+       the method #SOLVEPNP_EPNP will be used instead.
  */
 CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoints,
                                   InputArray cameraMatrix, InputArray distCoeffs,
@@ -612,6 +1083,292 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint
                                   float reprojectionError = 8.0, double confidence = 0.99,
                                   OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE );
 
+/** @brief Finds an object pose from 3 3D-2D point correspondences.
+
+@param objectPoints Array of object points in the object coordinate space, 3x3 1-channel or
+1x3/3x1 3-channel. vector\<Point3f\> can be also passed here.
+@param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel.
+ vector\<Point2f\> can be also passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
+assumed.
+@param rvecs Output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from
+the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions.
+@param tvecs Output translation vectors.
+@param flags Method for solving a P3P problem:
+-   **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
+"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
+-   **SOLVEPNP_AP3P** Method is based on the paper of T. Ke and S. Roumeliotis.
+"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
+
+The function estimates the object pose given 3 object points, their corresponding image
+projections, as well as the camera matrix and the distortion coefficients.
+
+@note
+The solutions are sorted by reprojection errors (lowest to highest).
+ */
+CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints,
+                           InputArray cameraMatrix, InputArray distCoeffs,
+                           OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+                           int flags );
+
+/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame
+to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.
+
+@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
+where N is the number of points. vector\<Point3d\> can also be passed here.
+@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
+where N is the number of points. vector\<Point2d\> can also be passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
+assumed.
+@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
+the model coordinate system to the camera coordinate system. Input values are used as an initial solution.
+@param tvec Input/Output translation vector. Input values are used as an initial solution.
+@param criteria Criteria when to stop the Levenberg-Marquard iterative algorithm.
+
+The function refines the object pose given at least 3 object points, their corresponding image
+projections, an initial solution for the rotation and translation vector,
+as well as the camera matrix and the distortion coefficients.
+The function minimizes the projection error with respect to the rotation and the translation vectors, according
+to a Levenberg-Marquardt iterative minimization @cite Madsen04 @cite Eade13 process.
+ */
+CV_EXPORTS_W void solvePnPRefineLM( InputArray objectPoints, InputArray imagePoints,
+                                    InputArray cameraMatrix, InputArray distCoeffs,
+                                    InputOutputArray rvec, InputOutputArray tvec,
+                                    TermCriteria criteria = TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 20, FLT_EPSILON));
+
+/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame
+to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.
+
+@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
+where N is the number of points. vector\<Point3d\> can also be passed here.
+@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
+where N is the number of points. vector\<Point2d\> can also be passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
+assumed.
+@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
+the model coordinate system to the camera coordinate system. Input values are used as an initial solution.
+@param tvec Input/Output translation vector. Input values are used as an initial solution.
+@param criteria Criteria when to stop the Levenberg-Marquard iterative algorithm.
+@param VVSlambda Gain for the virtual visual servoing control law, equivalent to the \f$\alpha\f$
+gain in the Damped Gauss-Newton formulation.
+
+The function refines the object pose given at least 3 object points, their corresponding image
+projections, an initial solution for the rotation and translation vector,
+as well as the camera matrix and the distortion coefficients.
+The function minimizes the projection error with respect to the rotation and the translation vectors, using a
+virtual visual servoing (VVS) @cite Chaumette06 @cite Marchand16 scheme.
+ */
+CV_EXPORTS_W void solvePnPRefineVVS( InputArray objectPoints, InputArray imagePoints,
+                                     InputArray cameraMatrix, InputArray distCoeffs,
+                                     InputOutputArray rvec, InputOutputArray tvec,
+                                     TermCriteria criteria = TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 20, FLT_EPSILON),
+                                     double VVSlambda = 1);
+
+/** @brief Finds an object pose from 3D-2D point correspondences.
+This function returns a list of all the possible solutions (a solution is a <rotation vector, translation vector>
+couple), depending on the number of input points and the chosen method:
+- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): 3 or 4 input points. Number of returned solutions can be between 0 and 4 with 3 input points.
+- @ref SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar. Returns 2 solutions.
+- @ref SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.
+Number of input points must be 4 and 2 solutions are returned. Object points must be defined in the following order:
+  - point 0: [-squareLength / 2,  squareLength / 2, 0]
+  - point 1: [ squareLength / 2,  squareLength / 2, 0]
+  - point 2: [ squareLength / 2, -squareLength / 2, 0]
+  - point 3: [-squareLength / 2, -squareLength / 2, 0]
+- for all the other flags, number of input points must be >= 4 and object points can be in any configuration.
+Only 1 solution is returned.
+
+@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
+1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
+@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
+where N is the number of points. vector\<Point2d\> can be also passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
+assumed.
+@param rvecs Vector of output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from
+the model coordinate system to the camera coordinate system.
+@param tvecs Vector of output translation vectors.
+@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
+the provided rvec and tvec values as initial approximations of the rotation and translation
+vectors, respectively, and further optimizes them.
+@param flags Method for solving a PnP problem:
+-   **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In
+this case the function finds such a pose that minimizes reprojection error, that is the sum
+of squared distances between the observed projections imagePoints and the projected (using
+projectPoints ) objectPoints .
+-   **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
+"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
+In this case the function requires exactly four object and image points.
+-   **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis
+"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
+In this case the function requires exactly four object and image points.
+-   **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
+paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
+-   **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
+"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
+-   **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
+F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
+Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
+assuming that both have the same value. Then the cameraMatrix is updated with the estimated
+focal length.
+-   **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli.
+"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
+-   **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli.
+"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
+It requires 4 coplanar object points defined in the following order:
+  - point 0: [-squareLength / 2,  squareLength / 2, 0]
+  - point 1: [ squareLength / 2,  squareLength / 2, 0]
+  - point 2: [ squareLength / 2, -squareLength / 2, 0]
+  - point 3: [-squareLength / 2, -squareLength / 2, 0]
+@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE
+and useExtrinsicGuess is set to true.
+@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE
+and useExtrinsicGuess is set to true.
+@param reprojectionError Optional vector of reprojection error, that is the RMS error
+(\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points
+and the 3D object points projected with the estimated pose.
+
+The function estimates the object pose given a set of object points, their corresponding image
+projections, as well as the camera matrix and the distortion coefficients, see the figure below
+(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
+and the Z-axis forward).
+
+![](pnp.jpg)
+
+Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
+using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$:
+
+\f[
+  \begin{align*}
+  \begin{bmatrix}
+  u \\
+  v \\
+  1
+  \end{bmatrix} &=
+  \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix} \\
+  \begin{bmatrix}
+  u \\
+  v \\
+  1
+  \end{bmatrix} &=
+  \begin{bmatrix}
+  f_x & 0 & c_x \\
+  0 & f_y & c_y \\
+  0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  1 & 0 & 0 & 0 \\
+  0 & 1 & 0 & 0 \\
+  0 & 0 & 1 & 0
+  \end{bmatrix}
+  \begin{bmatrix}
+  r_{11} & r_{12} & r_{13} & t_x \\
+  r_{21} & r_{22} & r_{23} & t_y \\
+  r_{31} & r_{32} & r_{33} & t_z \\
+  0 & 0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix}
+  \end{align*}
+\f]
+
+The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
+a 3D point expressed in the world frame into the camera frame:
+
+\f[
+  \begin{align*}
+  \begin{bmatrix}
+  X_c \\
+  Y_c \\
+  Z_c \\
+  1
+  \end{bmatrix} &=
+  \hspace{0.2em} ^{c}\bf{T}_w
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix} \\
+  \begin{bmatrix}
+  X_c \\
+  Y_c \\
+  Z_c \\
+  1
+  \end{bmatrix} &=
+  \begin{bmatrix}
+  r_{11} & r_{12} & r_{13} & t_x \\
+  r_{21} & r_{22} & r_{23} & t_y \\
+  r_{31} & r_{32} & r_{33} & t_z \\
+  0 & 0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix}
+  \end{align*}
+\f]
+
+@note
+   -   An example of how to use solvePnP for planar augmented reality can be found at
+        opencv_source_code/samples/python/plane_ar.py
+   -   If you are using Python:
+        - Numpy array slices won't work as input because solvePnP requires contiguous
+        arrays (enforced by the assertion using cv::Mat::checkVector() around line 55 of
+        modules/calib3d/src/solvepnp.cpp version 2.4.9)
+        - The P3P algorithm requires image points to be in an array of shape (N,1,2) due
+        to its calling of cv::undistortPoints (around line 75 of modules/calib3d/src/solvepnp.cpp version 2.4.9)
+        which requires 2-channel information.
+        - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of
+        it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints =
+        np.ascontiguousarray(D[:,:2]).reshape((N,1,2))
+   -   The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are
+       unstable and sometimes give completely wrong results. If you pass one of these two
+       flags, **SOLVEPNP_EPNP** method will be used instead.
+   -   The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P**
+       methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions
+       of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error).
+   -   With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points
+       are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the
+       global solution to converge.
+   -   With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar.
+   -   With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation.
+       Number of input points must be 4. Object points must be defined in the following order:
+         - point 0: [-squareLength / 2,  squareLength / 2, 0]
+         - point 1: [ squareLength / 2,  squareLength / 2, 0]
+         - point 2: [ squareLength / 2, -squareLength / 2, 0]
+         - point 3: [-squareLength / 2, -squareLength / 2, 0]
+ */
+CV_EXPORTS_W int solvePnPGeneric( InputArray objectPoints, InputArray imagePoints,
+                                  InputArray cameraMatrix, InputArray distCoeffs,
+                                  OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+                                  bool useExtrinsicGuess = false, SolvePnPMethod flags = SOLVEPNP_ITERATIVE,
+                                  InputArray rvec = noArray(), InputArray tvec = noArray(),
+                                  OutputArray reprojectionError = noArray() );
+
 /** @brief Finds an initial camera matrix from 3D-2D point correspondences.
 
 @param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern
@@ -635,14 +1392,14 @@ CV_EXPORTS_W Mat initCameraMatrix2D( InputArrayOfArrays objectPoints,
 
 @param image Source chessboard view. It must be an 8-bit grayscale or color image.
 @param patternSize Number of inner corners per a chessboard row and column
-( patternSize = cvSize(points_per_row,points_per_colum) = cvSize(columns,rows) ).
+( patternSize = cv::Size(points_per_row,points_per_colum) = cv::Size(columns,rows) ).
 @param corners Output array of detected corners.
 @param flags Various operation flags that can be zero or a combination of the following values:
--   **CV_CALIB_CB_ADAPTIVE_THRESH** Use adaptive thresholding to convert the image to black
+-   **CALIB_CB_ADAPTIVE_THRESH** Use adaptive thresholding to convert the image to black
 and white, rather than a fixed threshold level (computed from the average image brightness).
--   **CV_CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before
+-   **CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before
 applying fixed or adaptive thresholding.
--   **CV_CALIB_CB_FILTER_QUADS** Use additional criteria (like contour area, perimeter,
+-   **CALIB_CB_FILTER_QUADS** Use additional criteria (like contour area, perimeter,
 square-like shape) to filter out false quads extracted at the contour retrieval stage.
 -   **CALIB_CB_FAST_CHECK** Run a fast check on the image that looks for chessboard corners,
 and shortcut the call if none is found. This can drastically speed up the call in the
@@ -671,7 +1428,7 @@ Sample usage of detecting and drawing chessboard corners: :
 
     if(patternfound)
       cornerSubPix(gray, corners, Size(11, 11), Size(-1, -1),
-        TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 30, 0.1));
+        TermCriteria(cv::TermCriteria::EPS + cv::TermCriteria::MAX_ITER, 30, 0.1));
 
     drawChessboardCorners(img, patternsize, Mat(corners), patternfound);
 @endcode
@@ -683,8 +1440,105 @@ square grouping and ordering algorithm fails.
 CV_EXPORTS_W bool findChessboardCorners( InputArray image, Size patternSize, OutputArray corners,
                                          int flags = CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE );
 
+/*
+   Checks whether the image contains chessboard of the specific size or not.
+   If yes, nonzero value is returned.
+*/
+CV_EXPORTS_W bool checkChessboard(InputArray img, Size size);
+
+/** @brief Finds the positions of internal corners of the chessboard using a sector based approach.
+
+@param image Source chessboard view. It must be an 8-bit grayscale or color image.
+@param patternSize Number of inner corners per a chessboard row and column
+( patternSize = cv::Size(points_per_row,points_per_colum) = cv::Size(columns,rows) ).
+@param corners Output array of detected corners.
+@param flags Various operation flags that can be zero or a combination of the following values:
+-   **CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before detection.
+-   **CALIB_CB_EXHAUSTIVE** Run an exhaustive search to improve detection rate.
+-   **CALIB_CB_ACCURACY** Up sample input image to improve sub-pixel accuracy due to aliasing effects.
+-   **CALIB_CB_LARGER** The detected pattern is allowed to be larger than patternSize (see description).
+-   **CALIB_CB_MARKER** The detected pattern must have a marker (see description).
+This should be used if an accurate camera calibration is required.
+@param meta Optional output arrray of detected corners (CV_8UC1 and size = cv::Size(columns,rows)).
+Each entry stands for one corner of the pattern and can have one of the following values:
+-   0 = no meta data attached
+-   1 = left-top corner of a black cell
+-   2 = left-top corner of a white cell
+-   3 = left-top corner of a black cell with a white marker dot
+-   4 = left-top corner of a white cell with a black marker dot (pattern origin in case of markers otherwise first corner)
+
+The function is analog to findchessboardCorners but uses a localized radon
+transformation approximated by box filters being more robust to all sort of
+noise, faster on larger images and is able to directly return the sub-pixel
+position of the internal chessboard corners. The Method is based on the paper
+@cite duda2018 "Accurate Detection and Localization of Checkerboard Corners for
+Calibration" demonstrating that the returned sub-pixel positions are more
+accurate than the one returned by cornerSubPix allowing a precise camera
+calibration for demanding applications.
+
+In the case, the flags **CALIB_CB_LARGER** or **CALIB_CB_MARKER** are given,
+the result can be recovered from the optional meta array. Both flags are
+helpful to use calibration patterns exceeding the field of view of the camera.
+These oversized patterns allow more accurate calibrations as corners can be
+utilized, which are as close as possible to the image borders.  For a
+consistent coordinate system across all images, the optional marker (see image
+below) can be used to move the origin of the board to the location where the
+black circle is located.
+
+@note The function requires a white boarder with roughly the same width as one
+of the checkerboard fields around the whole board to improve the detection in
+various environments. In addition, because of the localized radon
+transformation it is beneficial to use round corners for the field corners
+which are located on the outside of the board. The following figure illustrates
+a sample checkerboard optimized for the detection. However, any other checkerboard
+can be used as well.
+![Checkerboard](pics/checkerboard_radon.png)
+ */
+CV_EXPORTS_AS(findChessboardCornersSBWithMeta)
+bool findChessboardCornersSB(InputArray image,Size patternSize, OutputArray corners,
+                             int flags,OutputArray meta);
+/** @overload */
+CV_EXPORTS_W inline
+bool findChessboardCornersSB(InputArray image, Size patternSize, OutputArray corners,
+                             int flags = 0)
+{
+    return findChessboardCornersSB(image, patternSize, corners, flags, noArray());
+}
+
+/** @brief Estimates the sharpness of a detected chessboard.
+
+Image sharpness, as well as brightness, are a critical parameter for accuracte
+camera calibration. For accessing these parameters for filtering out
+problematic calibraiton images, this method calculates edge profiles by traveling from
+black to white chessboard cell centers. Based on this, the number of pixels is
+calculated required to transit from black to white. This width of the
+transition area is a good indication of how sharp the chessboard is imaged
+and should be below ~3.0 pixels.
+
+@param image Gray image used to find chessboard corners
+@param patternSize Size of a found chessboard pattern
+@param corners Corners found by findChessboardCorners(SB)
+@param rise_distance Rise distance 0.8 means 10% ... 90% of the final signal strength
+@param vertical By default edge responses for horizontal lines are calculated
+@param sharpness Optional output array with a sharpness value for calculated edge responses (see description)
+
+The optional sharpness array is of type CV_32FC1 and has for each calculated
+profile one row with the following five entries:
+* 0 = x coordinate of the underlying edge in the image
+* 1 = y coordinate of the underlying edge in the image
+* 2 = width of the transition area (sharpness)
+* 3 = signal strength in the black cell (min brightness)
+* 4 = signal strength in the white cell (max brightness)
+
+@return Scalar(average sharpness, average min brightness, average max brightness,0)
+*/
+CV_EXPORTS_W Scalar estimateChessboardSharpness(InputArray image, Size patternSize, InputArray corners,
+                                                float rise_distance=0.8F,bool vertical=false,
+                                                OutputArray sharpness=noArray());
+
+
 //! finds subpixel-accurate positions of the chessboard corners
-CV_EXPORTS bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size );
+CV_EXPORTS_W bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size );
 
 /** @brief Renders the detected chessboard corners.
 
@@ -701,6 +1555,57 @@ found, or as colored corners connected with lines if the board was found.
 CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSize,
                                          InputArray corners, bool patternWasFound );
 
+/** @brief Draw axes of the world/object coordinate system from pose estimation. @sa solvePnP
+
+@param image Input/output image. It must have 1 or 3 channels. The number of channels is not altered.
+@param cameraMatrix Input 3x3 floating-point matrix of camera intrinsic parameters.
+\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed.
+@param rvec Rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
+the model coordinate system to the camera coordinate system.
+@param tvec Translation vector.
+@param length Length of the painted axes in the same unit than tvec (usually in meters).
+@param thickness Line thickness of the painted axes.
+
+This function draws the axes of the world/object coordinate system w.r.t. to the camera frame.
+OX is drawn in red, OY in green and OZ in blue.
+ */
+CV_EXPORTS_W void drawFrameAxes(InputOutputArray image, InputArray cameraMatrix, InputArray distCoeffs,
+                                InputArray rvec, InputArray tvec, float length, int thickness=3);
+
+struct CV_EXPORTS_W_SIMPLE CirclesGridFinderParameters
+{
+    CV_WRAP CirclesGridFinderParameters();
+    CV_PROP_RW cv::Size2f densityNeighborhoodSize;
+    CV_PROP_RW float minDensity;
+    CV_PROP_RW int kmeansAttempts;
+    CV_PROP_RW int minDistanceToAddKeypoint;
+    CV_PROP_RW int keypointScale;
+    CV_PROP_RW float minGraphConfidence;
+    CV_PROP_RW float vertexGain;
+    CV_PROP_RW float vertexPenalty;
+    CV_PROP_RW float existingVertexGain;
+    CV_PROP_RW float edgeGain;
+    CV_PROP_RW float edgePenalty;
+    CV_PROP_RW float convexHullFactor;
+    CV_PROP_RW float minRNGEdgeSwitchDist;
+
+    enum GridType
+    {
+      SYMMETRIC_GRID, ASYMMETRIC_GRID
+    };
+    GridType gridType;
+
+    CV_PROP_RW float squareSize; //!< Distance between two adjacent points. Used by CALIB_CB_CLUSTERING.
+    CV_PROP_RW float maxRectifiedDistance; //!< Max deviation from prediction. Used by CALIB_CB_CLUSTERING.
+};
+
+#ifndef DISABLE_OPENCV_3_COMPATIBILITY
+typedef CirclesGridFinderParameters CirclesGridFinderParameters2;
+#endif
+
 /** @brief Finds centers in the grid of circles.
 
 @param image grid view of input circles; it must be an 8-bit grayscale or color image.
@@ -713,6 +1618,7 @@ CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSiz
 -   **CALIB_CB_CLUSTERING** uses a special algorithm for grid detection. It is more robust to
 perspective distortions but much more sensitive to background clutter.
 @param blobDetector feature detector that finds blobs like dark circles on light background.
+@param parameters struct for finding circles in a grid pattern.
 
 The function attempts to determine whether the input image contains a grid of circles. If it is, the
 function locates centers of the circles. The function returns a non-zero value if all of the centers
@@ -732,60 +1638,78 @@ Sample usage of detecting and drawing the centers of circles: :
 @note The function requires white space (like a square-thick border, the wider the better) around
 the board to make the detection more robust in various environments.
  */
+CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize,
+                                   OutputArray centers, int flags,
+                                   const Ptr<FeatureDetector> &blobDetector,
+                                   const CirclesGridFinderParameters& parameters);
+
+/** @overload */
 CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize,
                                    OutputArray centers, int flags = CALIB_CB_SYMMETRIC_GRID,
                                    const Ptr<FeatureDetector> &blobDetector = SimpleBlobDetector::create());
 
-/** @brief Finds the camera intrinsic and extrinsic parameters from several views of a calibration pattern.
+/** @brief Finds the camera intrinsic and extrinsic parameters from several views of a calibration
+pattern.
 
 @param objectPoints In the new interface it is a vector of vectors of calibration pattern points in
 the calibration pattern coordinate space (e.g. std::vector<std::vector<cv::Vec3f>>). The outer
-vector contains as many elements as the number of the pattern views. If the same calibration pattern
+vector contains as many elements as the number of pattern views. If the same calibration pattern
 is shown in each view and it is fully visible, all the vectors will be the same. Although, it is
-possible to use partially occluded patterns, or even different patterns in different views. Then,
-the vectors will be different. The points are 3D, but since they are in a pattern coordinate system,
-then, if the rig is planar, it may make sense to put the model to a XY coordinate plane so that
-Z-coordinate of each input object point is 0.
+possible to use partially occluded patterns or even different patterns in different views. Then,
+the vectors will be different. Although the points are 3D, they all lie in the calibration pattern's
+XY coordinate plane (thus 0 in the Z-coordinate), if the used calibration pattern is a planar rig.
 In the old interface all the vectors of object points from different views are concatenated
 together.
 @param imagePoints In the new interface it is a vector of vectors of the projections of calibration
 pattern points (e.g. std::vector<std::vector<cv::Vec2f>>). imagePoints.size() and
-objectPoints.size() and imagePoints[i].size() must be equal to objectPoints[i].size() for each i.
-In the old interface all the vectors of object points from different views are concatenated
-together.
+objectPoints.size(), and imagePoints[i].size() and objectPoints[i].size() for each i, must be equal,
+respectively. In the old interface all the vectors of object points from different views are
+concatenated together.
 @param imageSize Size of the image used only to initialize the intrinsic camera matrix.
-@param cameraMatrix Output 3x3 floating-point camera matrix
+@param cameraMatrix Input/output 3x3 floating-point camera matrix
 \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS
-and/or CV_CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
+and/or CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
 initialized before calling the function.
-@param distCoeffs Output vector of distortion coefficients
+@param distCoeffs Input/output vector of distortion coefficients
 \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
 4, 5, 8, 12 or 14 elements.
-@param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view
-(e.g. std::vector<cv::Mat>>). That is, each k-th rotation vector together with the corresponding
-k-th translation vector (see the next output parameter description) brings the calibration pattern
-from the model coordinate space (in which object points are specified) to the world coordinate
-space, that is, a real position of the calibration pattern in the k-th pattern view (k=0.. *M* -1).
-@param tvecs Output vector of translation vectors estimated for each pattern view.
+@param rvecs Output vector of rotation vectors (@ref Rodrigues ) estimated for each pattern view
+(e.g. std::vector<cv::Mat>>). That is, each i-th rotation vector together with the corresponding
+i-th translation vector (see the next output parameter description) brings the calibration pattern
+from the object coordinate space (in which object points are specified) to the camera coordinate
+space. In more technical terms, the tuple of the i-th rotation and translation vector performs
+a change of basis from object coordinate space to camera coordinate space. Due to its duality, this
+tuple is equivalent to the position of the calibration pattern with respect to the camera coordinate
+space.
+@param tvecs Output vector of translation vectors estimated for each pattern view, see parameter
+describtion above.
+@param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic
+parameters. Order of deviations values:
+\f$(f_x, f_y, c_x, c_y, k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 , s_1, s_2, s_3,
+ s_4, \tau_x, \tau_y)\f$ If one of parameters is not estimated, it's deviation is equals to zero.
+@param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic
+parameters. Order of deviations values: \f$(R_0, T_0, \dotsc , R_{M - 1}, T_{M - 1})\f$ where M is
+the number of pattern views. \f$R_i, T_i\f$ are concatenated 1x3 vectors.
+ @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view.
 @param flags Different flags that may be zero or a combination of the following values:
--   **CV_CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of
+-   **CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of
 fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
 center ( imageSize is used), and focal distances are computed in a least-squares fashion.
 Note, that if intrinsic parameters are known, there is no need to use this function just to
 estimate extrinsic parameters. Use solvePnP instead.
--   **CV_CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global
+-   **CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global
 optimization. It stays at the center or at a different location specified when
-CV_CALIB_USE_INTRINSIC_GUESS is set too.
--   **CV_CALIB_FIX_ASPECT_RATIO** The functions considers only fy as a free parameter. The
+CALIB_USE_INTRINSIC_GUESS is set too.
+-   **CALIB_FIX_ASPECT_RATIO** The functions consider only fy as a free parameter. The
 ratio fx/fy stays the same as in the input cameraMatrix . When
-CV_CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are
+CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are
 ignored, only their ratio is computed and used further.
--   **CV_CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients \f$(p_1, p_2)\f$ are set
+-   **CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients \f$(p_1, p_2)\f$ are set
 to zeros and stay zero.
--   **CV_CALIB_FIX_K1,...,CV_CALIB_FIX_K6** The corresponding radial distortion
-coefficient is not changed during the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is
+-   **CALIB_FIX_K1,...,CALIB_FIX_K6** The corresponding radial distortion
+coefficient is not changed during the optimization. If CALIB_USE_INTRINSIC_GUESS is
 set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0.
--   **CV_CALIB_RATIONAL_MODEL** Coefficients k4, k5, and k6 are enabled. To provide the
+-   **CALIB_RATIONAL_MODEL** Coefficients k4, k5, and k6 are enabled. To provide the
 backward compatibility, this extra flag should be explicitly specified to make the
 calibration function use the rational model and return 8 coefficients. If the flag is not
 set, the function computes and returns only 5 distortion coefficients.
@@ -794,24 +1718,26 @@ backward compatibility, this extra flag should be explicitly specified to make t
 calibration function use the thin prism model and return 12 coefficients. If the flag is not
 set, the function computes and returns only 5 distortion coefficients.
 -   **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during
-the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
 supplied distCoeffs matrix is used. Otherwise, it is set to 0.
 -   **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the
 backward compatibility, this extra flag should be explicitly specified to make the
 calibration function use the tilted sensor model and return 14 coefficients. If the flag is not
 set, the function computes and returns only 5 distortion coefficients.
 -   **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during
-the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
 supplied distCoeffs matrix is used. Otherwise, it is set to 0.
 @param criteria Termination criteria for the iterative optimization algorithm.
 
+@return the overall RMS re-projection error.
+
 The function estimates the intrinsic camera parameters and extrinsic parameters for each of the
 views. The algorithm is based on @cite Zhang2000 and @cite BouguetMCT . The coordinates of 3D object
 points and their corresponding 2D projections in each view must be specified. That may be achieved
-by using an object with a known geometry and easily detectable feature points. Such an object is
+by using an object with known geometry and easily detectable feature points. Such an object is
 called a calibration rig or calibration pattern, and OpenCV has built-in support for a chessboard as
-a calibration rig (see findChessboardCorners ). Currently, initialization of intrinsic parameters
-(when CV_CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration
+a calibration rig (see @ref findChessboardCorners). Currently, initialization of intrinsic
+parameters (when CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration
 patterns (where Z-coordinates of the object points must be all zeros). 3D calibration rigs can also
 be used as long as initial cameraMatrix is provided.
 
@@ -819,7 +1745,7 @@ The algorithm performs the following steps:
 
 -   Compute the initial intrinsic parameters (the option only available for planar calibration
     patterns) or read them from the input parameters. The distortion coefficients are all set to
-    zeros initially unless some of CV_CALIB_FIX_K? are specified.
+    zeros initially unless some of CALIB_FIX_K? are specified.
 
 -   Estimate the initial camera pose as if the intrinsic parameters have been already known. This is
     done using solvePnP .
@@ -829,18 +1755,28 @@ The algorithm performs the following steps:
     the projected (using the current estimates for camera parameters and the poses) object points
     objectPoints. See projectPoints for details.
 
-The function returns the final re-projection error.
-
 @note
-   If you use a non-square (=non-NxN) grid and findChessboardCorners for calibration, and
-    calibrateCamera returns bad values (zero distortion coefficients, an image center very far from
-    (w/2-0.5,h/2-0.5), and/or large differences between \f$f_x\f$ and \f$f_y\f$ (ratios of 10:1 or more)),
-    then you have probably used patternSize=cvSize(rows,cols) instead of using
-    patternSize=cvSize(cols,rows) in findChessboardCorners .
+    If you use a non-square (i.e. non-N-by-N) grid and @ref findChessboardCorners for calibration,
+    and @ref calibrateCamera returns bad values (zero distortion coefficients, \f$c_x\f$ and
+    \f$c_y\f$ very far from the image center, and/or large differences between \f$f_x\f$ and
+    \f$f_y\f$ (ratios of 10:1 or more)), then you are probably using patternSize=cvSize(rows,cols)
+    instead of using patternSize=cvSize(cols,rows) in @ref findChessboardCorners.
 
 @sa
-   findChessboardCorners, solvePnP, initCameraMatrix2D, stereoCalibrate, undistort
+   calibrateCameraRO, findChessboardCorners, solvePnP, initCameraMatrix2D, stereoCalibrate,
+   undistort
  */
+CV_EXPORTS_AS(calibrateCameraExtended) double calibrateCamera( InputArrayOfArrays objectPoints,
+                                     InputArrayOfArrays imagePoints, Size imageSize,
+                                     InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+                                     OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+                                     OutputArray stdDeviationsIntrinsics,
+                                     OutputArray stdDeviationsExtrinsics,
+                                     OutputArray perViewErrors,
+                                     int flags = 0, TermCriteria criteria = TermCriteria(
+                                        TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) );
+
+/** @overload */
 CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints,
                                      InputArrayOfArrays imagePoints, Size imageSize,
                                      InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
@@ -848,6 +1784,84 @@ CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints,
                                      int flags = 0, TermCriteria criteria = TermCriteria(
                                         TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) );
 
+/** @brief Finds the camera intrinsic and extrinsic parameters from several views of a calibration pattern.
+
+This function is an extension of calibrateCamera() with the method of releasing object which was
+proposed in @cite strobl2011iccv. In many common cases with inaccurate, unmeasured, roughly planar
+targets (calibration plates), this method can dramatically improve the precision of the estimated
+camera parameters. Both the object-releasing method and standard method are supported by this
+function. Use the parameter **iFixedPoint** for method selection. In the internal implementation,
+calibrateCamera() is a wrapper for this function.
+
+@param objectPoints Vector of vectors of calibration pattern points in the calibration pattern
+coordinate space. See calibrateCamera() for details. If the method of releasing object to be used,
+the identical calibration board must be used in each view and it must be fully visible, and all
+objectPoints[i] must be the same and all points should be roughly close to a plane. **The calibration
+target has to be rigid, or at least static if the camera (rather than the calibration target) is
+shifted for grabbing images.**
+@param imagePoints Vector of vectors of the projections of calibration pattern points. See
+calibrateCamera() for details.
+@param imageSize Size of the image used only to initialize the intrinsic camera matrix.
+@param iFixedPoint The index of the 3D object point in objectPoints[0] to be fixed. It also acts as
+a switch for calibration method selection. If object-releasing method to be used, pass in the
+parameter in the range of [1, objectPoints[0].size()-2], otherwise a value out of this range will
+make standard calibration method selected. Usually the top-right corner point of the calibration
+board grid is recommended to be fixed when object-releasing method being utilized. According to
+\cite strobl2011iccv, two other points are also fixed. In this implementation, objectPoints[0].front
+and objectPoints[0].back.z are used. With object-releasing method, accurate rvecs, tvecs and
+newObjPoints are only possible if coordinates of these three fixed points are accurate enough.
+@param cameraMatrix Output 3x3 floating-point camera matrix. See calibrateCamera() for details.
+@param distCoeffs Output vector of distortion coefficients. See calibrateCamera() for details.
+@param rvecs Output vector of rotation vectors estimated for each pattern view. See calibrateCamera()
+for details.
+@param tvecs Output vector of translation vectors estimated for each pattern view.
+@param newObjPoints The updated output vector of calibration pattern points. The coordinates might
+be scaled based on three fixed points. The returned coordinates are accurate only if the above
+mentioned three fixed points are accurate. If not needed, noArray() can be passed in. This parameter
+is ignored with standard calibration method.
+@param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic parameters.
+See calibrateCamera() for details.
+@param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic parameters.
+See calibrateCamera() for details.
+@param stdDeviationsObjPoints Output vector of standard deviations estimated for refined coordinates
+of calibration pattern points. It has the same size and order as objectPoints[0] vector. This
+parameter is ignored with standard calibration method.
+ @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view.
+@param flags Different flags that may be zero or a combination of some predefined values. See
+calibrateCamera() for details. If the method of releasing object is used, the calibration time may
+be much longer. CALIB_USE_QR or CALIB_USE_LU could be used for faster calibration with potentially
+less precise and less stable in some rare cases.
+@param criteria Termination criteria for the iterative optimization algorithm.
+
+@return the overall RMS re-projection error.
+
+The function estimates the intrinsic camera parameters and extrinsic parameters for each of the
+views. The algorithm is based on @cite Zhang2000, @cite BouguetMCT and @cite strobl2011iccv. See
+calibrateCamera() for other detailed explanations.
+@sa
+   calibrateCamera, findChessboardCorners, solvePnP, initCameraMatrix2D, stereoCalibrate, undistort
+ */
+CV_EXPORTS_AS(calibrateCameraROExtended) double calibrateCameraRO( InputArrayOfArrays objectPoints,
+                                     InputArrayOfArrays imagePoints, Size imageSize, int iFixedPoint,
+                                     InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+                                     OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+                                     OutputArray newObjPoints,
+                                     OutputArray stdDeviationsIntrinsics,
+                                     OutputArray stdDeviationsExtrinsics,
+                                     OutputArray stdDeviationsObjPoints,
+                                     OutputArray perViewErrors,
+                                     int flags = 0, TermCriteria criteria = TermCriteria(
+                                        TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) );
+
+/** @overload */
+CV_EXPORTS_W double calibrateCameraRO( InputArrayOfArrays objectPoints,
+                                     InputArrayOfArrays imagePoints, Size imageSize, int iFixedPoint,
+                                     InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+                                     OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+                                     OutputArray newObjPoints,
+                                     int flags = 0, TermCriteria criteria = TermCriteria(
+                                        TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) );
+
 /** @brief Computes useful camera characteristics from the camera matrix.
 
 @param cameraMatrix Input camera matrix that can be estimated by calibrateCamera or
@@ -874,45 +1888,55 @@ CV_EXPORTS_W void calibrationMatrixValues( InputArray cameraMatrix, Size imageSi
                                            CV_OUT double& focalLength, CV_OUT Point2d& principalPoint,
                                            CV_OUT double& aspectRatio );
 
-/** @brief Calibrates the stereo camera.
+/** @brief Calibrates a stereo camera set up. This function finds the intrinsic parameters
+for each of the two cameras and the extrinsic parameters between the two cameras.
 
-@param objectPoints Vector of vectors of the calibration pattern points.
+@param objectPoints Vector of vectors of the calibration pattern points. The same structure as
+in @ref calibrateCamera. For each pattern view, both cameras need to see the same object
+points. Therefore, objectPoints.size(), imagePoints1.size(), and imagePoints2.size() need to be
+equal as well as objectPoints[i].size(), imagePoints1[i].size(), and imagePoints2[i].size() need to
+be equal for each i.
 @param imagePoints1 Vector of vectors of the projections of the calibration pattern points,
-observed by the first camera.
+observed by the first camera. The same structure as in @ref calibrateCamera.
 @param imagePoints2 Vector of vectors of the projections of the calibration pattern points,
-observed by the second camera.
-@param cameraMatrix1 Input/output first camera matrix:
-\f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If
-any of CV_CALIB_USE_INTRINSIC_GUESS , CV_CALIB_FIX_ASPECT_RATIO ,
-CV_CALIB_FIX_INTRINSIC , or CV_CALIB_FIX_FOCAL_LENGTH are specified, some or all of the
-matrix components must be initialized. See the flags description for details.
-@param distCoeffs1 Input/output vector of distortion coefficients
-\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
-4, 5, 8, 12 or 14 elements. The output vector length depends on the flags.
-@param cameraMatrix2 Input/output second camera matrix. The parameter is similar to cameraMatrix1
-@param distCoeffs2 Input/output lens distortion coefficients for the second camera. The parameter
-is similar to distCoeffs1 .
-@param imageSize Size of the image used only to initialize intrinsic camera matrix.
-@param R Output rotation matrix between the 1st and the 2nd camera coordinate systems.
-@param T Output translation vector between the coordinate systems of the cameras.
+observed by the second camera. The same structure as in @ref calibrateCamera.
+@param cameraMatrix1 Input/output camera matrix for the first camera, the same as in
+@ref calibrateCamera. Furthermore, for the stereo case, additional flags may be used, see below.
+@param distCoeffs1 Input/output vector of distortion coefficients, the same as in
+@ref calibrateCamera.
+@param cameraMatrix2 Input/output second camera matrix for the second camera. See description for
+cameraMatrix1.
+@param distCoeffs2 Input/output lens distortion coefficients for the second camera. See
+description for distCoeffs1.
+@param imageSize Size of the image used only to initialize the intrinsic camera matrices.
+@param R Output rotation matrix. Together with the translation vector T, this matrix brings
+points given in the first camera's coordinate system to points in the second camera's
+coordinate system. In more technical terms, the tuple of R and T performs a change of basis
+from the first camera's coordinate system to the second camera's coordinate system. Due to its
+duality, this tuple is equivalent to the position of the first camera with respect to the
+second camera coordinate system.
+@param T Output translation vector, see description above.
 @param E Output essential matrix.
 @param F Output fundamental matrix.
+@param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view.
 @param flags Different flags that may be zero or a combination of the following values:
--   **CV_CALIB_FIX_INTRINSIC** Fix cameraMatrix? and distCoeffs? so that only R, T, E , and F
+-   **CALIB_FIX_INTRINSIC** Fix cameraMatrix? and distCoeffs? so that only R, T, E, and F
 matrices are estimated.
--   **CV_CALIB_USE_INTRINSIC_GUESS** Optimize some or all of the intrinsic parameters
+-   **CALIB_USE_INTRINSIC_GUESS** Optimize some or all of the intrinsic parameters
 according to the specified flags. Initial values are provided by the user.
--   **CV_CALIB_FIX_PRINCIPAL_POINT** Fix the principal points during the optimization.
--   **CV_CALIB_FIX_FOCAL_LENGTH** Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ .
--   **CV_CALIB_FIX_ASPECT_RATIO** Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$
+-   **CALIB_USE_EXTRINSIC_GUESS** R and T contain valid initial values that are optimized further.
+Otherwise R and T are initialized to the median value of the pattern views (each dimension separately).
+-   **CALIB_FIX_PRINCIPAL_POINT** Fix the principal points during the optimization.
+-   **CALIB_FIX_FOCAL_LENGTH** Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ .
+-   **CALIB_FIX_ASPECT_RATIO** Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$
 .
--   **CV_CALIB_SAME_FOCAL_LENGTH** Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ .
--   **CV_CALIB_ZERO_TANGENT_DIST** Set tangential distortion coefficients for each camera to
+-   **CALIB_SAME_FOCAL_LENGTH** Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ .
+-   **CALIB_ZERO_TANGENT_DIST** Set tangential distortion coefficients for each camera to
 zeros and fix there.
--   **CV_CALIB_FIX_K1,...,CV_CALIB_FIX_K6** Do not change the corresponding radial
-distortion coefficient during the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set,
+-   **CALIB_FIX_K1,...,CALIB_FIX_K6** Do not change the corresponding radial
+distortion coefficient during the optimization. If CALIB_USE_INTRINSIC_GUESS is set,
 the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0.
--   **CV_CALIB_RATIONAL_MODEL** Enable coefficients k4, k5, and k6. To provide the backward
+-   **CALIB_RATIONAL_MODEL** Enable coefficients k4, k5, and k6. To provide the backward
 compatibility, this extra flag should be explicitly specified to make the calibration
 function use the rational model and return 8 coefficients. If the flag is not set, the
 function computes and returns only 5 distortion coefficients.
@@ -921,51 +1945,80 @@ backward compatibility, this extra flag should be explicitly specified to make t
 calibration function use the thin prism model and return 12 coefficients. If the flag is not
 set, the function computes and returns only 5 distortion coefficients.
 -   **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during
-the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
 supplied distCoeffs matrix is used. Otherwise, it is set to 0.
 -   **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the
 backward compatibility, this extra flag should be explicitly specified to make the
 calibration function use the tilted sensor model and return 14 coefficients. If the flag is not
 set, the function computes and returns only 5 distortion coefficients.
 -   **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during
-the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
 supplied distCoeffs matrix is used. Otherwise, it is set to 0.
 @param criteria Termination criteria for the iterative optimization algorithm.
 
-The function estimates transformation between two cameras making a stereo pair. If you have a stereo
-camera where the relative position and orientation of two cameras is fixed, and if you computed
-poses of an object relative to the first camera and to the second camera, (R1, T1) and (R2, T2),
-respectively (this can be done with solvePnP ), then those poses definitely relate to each other.
-This means that, given ( \f$R_1\f$,\f$T_1\f$ ), it should be possible to compute ( \f$R_2\f$,\f$T_2\f$ ). You only
-need to know the position and orientation of the second camera relative to the first camera. This is
-what the described function does. It computes ( \f$R\f$,\f$T\f$ ) so that:
+The function estimates the transformation between two cameras making a stereo pair. If one computes
+the poses of an object relative to the first camera and to the second camera,
+( \f$R_1\f$,\f$T_1\f$ ) and (\f$R_2\f$,\f$T_2\f$), respectively, for a stereo camera where the
+relative position and orientation between the two cameras are fixed, then those poses definitely
+relate to each other. This means, if the relative position and orientation (\f$R\f$,\f$T\f$) of the
+two cameras is known, it is possible to compute (\f$R_2\f$,\f$T_2\f$) when (\f$R_1\f$,\f$T_1\f$) is
+given. This is what the described function does. It computes (\f$R\f$,\f$T\f$) such that:
+
+\f[R_2=R R_1\f]
+\f[T_2=R T_1 + T.\f]
+
+Therefore, one can compute the coordinate representation of a 3D point for the second camera's
+coordinate system when given the point's coordinate representation in the first camera's coordinate
+system:
+
+\f[\begin{bmatrix}
+X_2 \\
+Y_2 \\
+Z_2 \\
+1
+\end{bmatrix} = \begin{bmatrix}
+R & T \\
+0 & 1
+\end{bmatrix} \begin{bmatrix}
+X_1 \\
+Y_1 \\
+Z_1 \\
+1
+\end{bmatrix}.\f]
 
-\f[R_2=R*R_1
-T_2=R*T_1 + T,\f]
 
 Optionally, it computes the essential matrix E:
 
-\f[E= \vecthreethree{0}{-T_2}{T_1}{T_2}{0}{-T_0}{-T_1}{T_0}{0} *R\f]
+\f[E= \vecthreethree{0}{-T_2}{T_1}{T_2}{0}{-T_0}{-T_1}{T_0}{0} R\f]
 
-where \f$T_i\f$ are components of the translation vector \f$T\f$ : \f$T=[T_0, T_1, T_2]^T\f$ . And the function
-can also compute the fundamental matrix F:
+where \f$T_i\f$ are components of the translation vector \f$T\f$ : \f$T=[T_0, T_1, T_2]^T\f$ .
+And the function can also compute the fundamental matrix F:
 
 \f[F = cameraMatrix2^{-T} E cameraMatrix1^{-1}\f]
 
 Besides the stereo-related information, the function can also perform a full calibration of each of
-two cameras. However, due to the high dimensionality of the parameter space and noise in the input
-data, the function can diverge from the correct solution. If the intrinsic parameters can be
+the two cameras. However, due to the high dimensionality of the parameter space and noise in the
+input data, the function can diverge from the correct solution. If the intrinsic parameters can be
 estimated with high accuracy for each of the cameras individually (for example, using
-calibrateCamera ), you are recommended to do so and then pass CV_CALIB_FIX_INTRINSIC flag to the
+calibrateCamera ), you are recommended to do so and then pass CALIB_FIX_INTRINSIC flag to the
 function along with the computed intrinsic parameters. Otherwise, if all the parameters are
 estimated at once, it makes sense to restrict some parameters, for example, pass
-CV_CALIB_SAME_FOCAL_LENGTH and CV_CALIB_ZERO_TANGENT_DIST flags, which is usually a
+CALIB_SAME_FOCAL_LENGTH and CALIB_ZERO_TANGENT_DIST flags, which is usually a
 reasonable assumption.
 
-Similarly to calibrateCamera , the function minimizes the total re-projection error for all the
+Similarly to calibrateCamera, the function minimizes the total re-projection error for all the
 points in all the available views from both cameras. The function returns the final value of the
 re-projection error.
  */
+CV_EXPORTS_AS(stereoCalibrateExtended) double stereoCalibrate( InputArrayOfArrays objectPoints,
+                                     InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2,
+                                     InputOutputArray cameraMatrix1, InputOutputArray distCoeffs1,
+                                     InputOutputArray cameraMatrix2, InputOutputArray distCoeffs2,
+                                     Size imageSize, InputOutputArray R,InputOutputArray T, OutputArray E, OutputArray F,
+                                     OutputArray perViewErrors, int flags = CALIB_FIX_INTRINSIC,
+                                     TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 1e-6) );
+
+/// @overload
 CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints,
                                      InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2,
                                      InputOutputArray cameraMatrix1, InputOutputArray distCoeffs1,
@@ -974,7 +2027,6 @@ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints,
                                      int flags = CALIB_FIX_INTRINSIC,
                                      TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 1e-6) );
 
-
 /** @brief Computes rectification transforms for each head of a calibrated stereo camera.
 
 @param cameraMatrix1 First camera matrix.
@@ -982,16 +2034,26 @@ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints,
 @param cameraMatrix2 Second camera matrix.
 @param distCoeffs2 Second camera distortion parameters.
 @param imageSize Size of the image used for stereo calibration.
-@param R Rotation matrix between the coordinate systems of the first and the second cameras.
-@param T Translation vector between coordinate systems of the cameras.
-@param R1 Output 3x3 rectification transform (rotation matrix) for the first camera.
-@param R2 Output 3x3 rectification transform (rotation matrix) for the second camera.
+@param R Rotation matrix from the coordinate system of the first camera to the second camera,
+see @ref stereoCalibrate.
+@param T Translation vector from the coordinate system of the first camera to the second camera,
+see @ref stereoCalibrate.
+@param R1 Output 3x3 rectification transform (rotation matrix) for the first camera. This matrix
+brings points given in the unrectified first camera's coordinate system to points in the rectified
+first camera's coordinate system. In more technical terms, it performs a change of basis from the
+unrectified first camera's coordinate system to the rectified first camera's coordinate system.
+@param R2 Output 3x3 rectification transform (rotation matrix) for the second camera. This matrix
+brings points given in the unrectified second camera's coordinate system to points in the rectified
+second camera's coordinate system. In more technical terms, it performs a change of basis from the
+unrectified second camera's coordinate system to the rectified second camera's coordinate system.
 @param P1 Output 3x4 projection matrix in the new (rectified) coordinate systems for the first
-camera.
+camera, i.e. it projects points given in the rectified first camera coordinate system into the
+rectified first camera's image.
 @param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second
-camera.
-@param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ).
-@param flags Operation flags that may be zero or CV_CALIB_ZERO_DISPARITY . If the flag is set,
+camera, i.e. it projects points given in the rectified first camera coordinate system into the
+rectified second camera's image.
+@param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see @ref reprojectImageTo3D).
+@param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set,
 the function makes the principal points of each camera have the same pixel coordinates in the
 rectified views. And if the flag is not set, the function may still shift the images in the
 horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the
@@ -1001,11 +2063,11 @@ scaling. Otherwise, the parameter should be between 0 and 1. alpha=0 means that
 images are zoomed and shifted so that only valid pixels are visible (no black areas after
 rectification). alpha=1 means that the rectified image is decimated and shifted so that all the
 pixels from the original images from the cameras are retained in the rectified images (no source
-image pixels are lost). Obviously, any intermediate value yields an intermediate result between
+image pixels are lost). Any intermediate value yields an intermediate result between
 those two extreme cases.
 @param newImageSize New image resolution after rectification. The same size should be passed to
 initUndistortRectifyMap (see the stereo_calib.cpp sample in OpenCV samples directory). When (0,0)
-is passed (default), it is set to the original imageSize . Setting it to larger value can help you
+is passed (default), it is set to the original imageSize . Setting it to a larger value can help you
 preserve details in the original image, especially when there is a big radial distortion.
 @param validPixROI1 Optional output rectangles inside the rectified images where all the pixels
 are valid. If alpha=0 , the ROIs cover the whole images. Otherwise, they are likely to be smaller
@@ -1021,27 +2083,43 @@ as input. As output, it provides two rotation matrices and also two projection m
 coordinates. The function distinguishes the following two cases:
 
 -   **Horizontal stereo**: the first and the second camera views are shifted relative to each other
-    mainly along the x axis (with possible small vertical shift). In the rectified images, the
+    mainly along the x-axis (with possible small vertical shift). In the rectified images, the
     corresponding epipolar lines in the left and right cameras are horizontal and have the same
     y-coordinate. P1 and P2 look like:
 
-    \f[\texttt{P1} = \begin{bmatrix} f & 0 & cx_1 & 0 \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix}\f]
+    \f[\texttt{P1} = \begin{bmatrix}
+                        f & 0 & cx_1 & 0 \\
+                        0 & f & cy & 0 \\
+                        0 & 0 & 1 & 0
+                     \end{bmatrix}\f]
 
-    \f[\texttt{P2} = \begin{bmatrix} f & 0 & cx_2 & T_x*f \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix} ,\f]
+    \f[\texttt{P2} = \begin{bmatrix}
+                        f & 0 & cx_2 & T_x*f \\
+                        0 & f & cy & 0 \\
+                        0 & 0 & 1 & 0
+                     \end{bmatrix} ,\f]
 
     where \f$T_x\f$ is a horizontal shift between the cameras and \f$cx_1=cx_2\f$ if
-    CV_CALIB_ZERO_DISPARITY is set.
+    CALIB_ZERO_DISPARITY is set.
 
 -   **Vertical stereo**: the first and the second camera views are shifted relative to each other
-    mainly in vertical direction (and probably a bit in the horizontal direction too). The epipolar
+    mainly in the vertical direction (and probably a bit in the horizontal direction too). The epipolar
     lines in the rectified images are vertical and have the same x-coordinate. P1 and P2 look like:
 
-    \f[\texttt{P1} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_1 & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix}\f]
+    \f[\texttt{P1} = \begin{bmatrix}
+                        f & 0 & cx & 0 \\
+                        0 & f & cy_1 & 0 \\
+                        0 & 0 & 1 & 0
+                     \end{bmatrix}\f]
 
-    \f[\texttt{P2} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_2 & T_y*f \\ 0 & 0 & 1 & 0 \end{bmatrix} ,\f]
+    \f[\texttt{P2} = \begin{bmatrix}
+                        f & 0 & cx & 0 \\
+                        0 & f & cy_2 & T_y*f \\
+                        0 & 0 & 1 & 0
+                     \end{bmatrix},\f]
 
-    where \f$T_y\f$ is a vertical shift between the cameras and \f$cy_1=cy_2\f$ if CALIB_ZERO_DISPARITY is
-    set.
+    where \f$T_y\f$ is a vertical shift between the cameras and \f$cy_1=cy_2\f$ if
+    CALIB_ZERO_DISPARITY is set.
 
 As you can see, the first three columns of P1 and P2 will effectively be the new "rectified" camera
 matrices. The matrices, together with R1 and R2 , can then be passed to initUndistortRectifyMap to
@@ -1076,7 +2154,7 @@ findFundamentalMat .
 @param threshold Optional threshold used to filter out the outliers. If the parameter is greater
 than zero, all the point pairs that do not comply with the epipolar geometry (that is, the points
 for which \f$|\texttt{points2[i]}^T*\texttt{F}*\texttt{points1[i]}|>\texttt{threshold}\f$ ) are
-rejected prior to computing the homographies. Otherwise,all the points are considered inliers.
+rejected prior to computing the homographies. Otherwise, all the points are considered inliers.
 
 The function computes the rectification transformations without knowing intrinsic parameters of the
 cameras and their relative position in the space, which explains the suffix "uncalibrated". Another
@@ -1120,7 +2198,7 @@ assumed.
 @param alpha Free scaling parameter between 0 (when all the pixels in the undistorted image are
 valid) and 1 (when all the source image pixels are retained in the undistorted image). See
 stereoRectify for details.
-@param newImgSize Image size after rectification. By default,it is set to imageSize .
+@param newImgSize Image size after rectification. By default, it is set to imageSize .
 @param validPixROI Optional output rectangle that outlines all-good-pixels region in the
 undistorted image. See roi1, roi2 description in stereoRectify .
 @param centerPrincipalPoint Optional flag that indicates whether in the new camera matrix the
@@ -1131,7 +2209,7 @@ best fit a subset of the source image (determined by alpha) to the corrected ima
 The function computes and returns the optimal new camera matrix based on the free scaling parameter.
 By varying this parameter, you may retrieve only sensible pixels alpha=0 , keep all the original
 image pixels if there is valuable information in the corners alpha=1 , or get something in between.
-When alpha\>0 , the undistortion result is likely to have some black pixels corresponding to
+When alpha\>0 , the undistorted result is likely to have some black pixels corresponding to
 "virtual" pixels outside of the captured distorted image. The original camera matrix, distortion
 coefficients, the computed new camera matrix, and newImageSize should be passed to
 initUndistortRectifyMap to produce the maps for remap .
@@ -1141,6 +2219,139 @@ CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray
                                             CV_OUT Rect* validPixROI = 0,
                                             bool centerPrincipalPoint = false);
 
+/** @brief Computes Hand-Eye calibration: \f$_{}^{g}\textrm{T}_c\f$
+
+@param[in] R_gripper2base Rotation part extracted from the homogeneous matrix that transforms a point
+expressed in the gripper frame to the robot base frame (\f$_{}^{b}\textrm{T}_g\f$).
+This is a vector (`vector<Mat>`) that contains the rotation matrices for all the transformations
+from gripper frame to robot base frame.
+@param[in] t_gripper2base Translation part extracted from the homogeneous matrix that transforms a point
+expressed in the gripper frame to the robot base frame (\f$_{}^{b}\textrm{T}_g\f$).
+This is a vector (`vector<Mat>`) that contains the translation vectors for all the transformations
+from gripper frame to robot base frame.
+@param[in] R_target2cam Rotation part extracted from the homogeneous matrix that transforms a point
+expressed in the target frame to the camera frame (\f$_{}^{c}\textrm{T}_t\f$).
+This is a vector (`vector<Mat>`) that contains the rotation matrices for all the transformations
+from calibration target frame to camera frame.
+@param[in] t_target2cam Rotation part extracted from the homogeneous matrix that transforms a point
+expressed in the target frame to the camera frame (\f$_{}^{c}\textrm{T}_t\f$).
+This is a vector (`vector<Mat>`) that contains the translation vectors for all the transformations
+from calibration target frame to camera frame.
+@param[out] R_cam2gripper Estimated rotation part extracted from the homogeneous matrix that transforms a point
+expressed in the camera frame to the gripper frame (\f$_{}^{g}\textrm{T}_c\f$).
+@param[out] t_cam2gripper Estimated translation part extracted from the homogeneous matrix that transforms a point
+expressed in the camera frame to the gripper frame (\f$_{}^{g}\textrm{T}_c\f$).
+@param[in] method One of the implemented Hand-Eye calibration method, see cv::HandEyeCalibrationMethod
+
+The function performs the Hand-Eye calibration using various methods. One approach consists in estimating the
+rotation then the translation (separable solutions) and the following methods are implemented:
+  - R. Tsai, R. Lenz A New Technique for Fully Autonomous and Efficient 3D Robotics Hand/EyeCalibration \cite Tsai89
+  - F. Park, B. Martin Robot Sensor Calibration: Solving AX = XB on the Euclidean Group \cite Park94
+  - R. Horaud, F. Dornaika Hand-Eye Calibration \cite Horaud95
+
+Another approach consists in estimating simultaneously the rotation and the translation (simultaneous solutions),
+with the following implemented method:
+  - N. Andreff, R. Horaud, B. Espiau On-line Hand-Eye Calibration \cite Andreff99
+  - K. Daniilidis Hand-Eye Calibration Using Dual Quaternions \cite Daniilidis98
+
+The following picture describes the Hand-Eye calibration problem where the transformation between a camera ("eye")
+mounted on a robot gripper ("hand") has to be estimated.
+
+![](pics/hand-eye_figure.png)
+
+The calibration procedure is the following:
+  - a static calibration pattern is used to estimate the transformation between the target frame
+  and the camera frame
+  - the robot gripper is moved in order to acquire several poses
+  - for each pose, the homogeneous transformation between the gripper frame and the robot base frame is recorded using for
+  instance the robot kinematics
+\f[
+    \begin{bmatrix}
+    X_b\\
+    Y_b\\
+    Z_b\\
+    1
+    \end{bmatrix}
+    =
+    \begin{bmatrix}
+    _{}^{b}\textrm{R}_g & _{}^{b}\textrm{t}_g \\
+    0_{1 \times 3} & 1
+    \end{bmatrix}
+    \begin{bmatrix}
+    X_g\\
+    Y_g\\
+    Z_g\\
+    1
+    \end{bmatrix}
+\f]
+  - for each pose, the homogeneous transformation between the calibration target frame and the camera frame is recorded using
+  for instance a pose estimation method (PnP) from 2D-3D point correspondences
+\f[
+    \begin{bmatrix}
+    X_c\\
+    Y_c\\
+    Z_c\\
+    1
+    \end{bmatrix}
+    =
+    \begin{bmatrix}
+    _{}^{c}\textrm{R}_t & _{}^{c}\textrm{t}_t \\
+    0_{1 \times 3} & 1
+    \end{bmatrix}
+    \begin{bmatrix}
+    X_t\\
+    Y_t\\
+    Z_t\\
+    1
+    \end{bmatrix}
+\f]
+
+The Hand-Eye calibration procedure returns the following homogeneous transformation
+\f[
+    \begin{bmatrix}
+    X_g\\
+    Y_g\\
+    Z_g\\
+    1
+    \end{bmatrix}
+    =
+    \begin{bmatrix}
+    _{}^{g}\textrm{R}_c & _{}^{g}\textrm{t}_c \\
+    0_{1 \times 3} & 1
+    \end{bmatrix}
+    \begin{bmatrix}
+    X_c\\
+    Y_c\\
+    Z_c\\
+    1
+    \end{bmatrix}
+\f]
+
+This problem is also known as solving the \f$\mathbf{A}\mathbf{X}=\mathbf{X}\mathbf{B}\f$ equation:
+\f[
+    \begin{align*}
+    ^{b}{\textrm{T}_g}^{(1)} \hspace{0.2em} ^{g}\textrm{T}_c \hspace{0.2em} ^{c}{\textrm{T}_t}^{(1)} &=
+    \hspace{0.1em} ^{b}{\textrm{T}_g}^{(2)} \hspace{0.2em} ^{g}\textrm{T}_c \hspace{0.2em} ^{c}{\textrm{T}_t}^{(2)} \\
+
+    (^{b}{\textrm{T}_g}^{(2)})^{-1} \hspace{0.2em} ^{b}{\textrm{T}_g}^{(1)} \hspace{0.2em} ^{g}\textrm{T}_c &=
+    \hspace{0.1em} ^{g}\textrm{T}_c \hspace{0.2em} ^{c}{\textrm{T}_t}^{(2)} (^{c}{\textrm{T}_t}^{(1)})^{-1} \\
+
+    \textrm{A}_i \textrm{X} &= \textrm{X} \textrm{B}_i \\
+    \end{align*}
+\f]
+
+\note
+Additional information can be found on this [website](http://campar.in.tum.de/Chair/HandEyeCalibration).
+\note
+A minimum of 2 motions with non parallel rotation axes are necessary to determine the hand-eye transformation.
+So at least 3 different poses are required, but it is strongly recommended to use many more poses.
+
+ */
+CV_EXPORTS_W void calibrateHandEye( InputArrayOfArrays R_gripper2base, InputArrayOfArrays t_gripper2base,
+                                    InputArrayOfArrays R_target2cam, InputArrayOfArrays t_target2cam,
+                                    OutputArray R_cam2gripper, OutputArray t_cam2gripper,
+                                    HandEyeCalibrationMethod method=CALIB_HAND_EYE_TSAI );
+
 /** @brief Converts points from Euclidean to homogeneous space.
 
 @param src Input vector of N-dimensional points.
@@ -1184,13 +2395,14 @@ floating-point (single or double precision).
 -   **CV_FM_8POINT** for an 8-point algorithm. \f$N \ge 8\f$
 -   **CV_FM_RANSAC** for the RANSAC algorithm. \f$N \ge 8\f$
 -   **CV_FM_LMEDS** for the LMedS algorithm. \f$N \ge 8\f$
-@param param1 Parameter used for RANSAC. It is the maximum distance from a point to an epipolar
+@param ransacReprojThreshold Parameter used only for RANSAC. It is the maximum distance from a point to an epipolar
 line in pixels, beyond which the point is considered an outlier and is not used for computing the
 final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the
 point localization, image resolution, and the image noise.
-@param param2 Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level
+@param confidence Parameter used for the RANSAC and LMedS methods only. It specifies a desirable level
 of confidence (probability) that the estimated matrix is correct.
 @param mask
+@param maxIters The maximum number of robust method iterations.
 
 The epipolar geometry is described by the following equation:
 
@@ -1224,15 +2436,20 @@ stereoRectifyUncalibrated to compute the rectification transformation. :
      findFundamentalMat(points1, points2, FM_RANSAC, 3, 0.99);
 @endcode
  */
+CV_EXPORTS_W Mat findFundamentalMat( InputArray points1, InputArray points2,
+                                     int method, double ransacReprojThreshold, double confidence,
+                                     int maxIters, OutputArray mask = noArray() );
+
+/** @overload */
 CV_EXPORTS_W Mat findFundamentalMat( InputArray points1, InputArray points2,
                                      int method = FM_RANSAC,
-                                     double param1 = 3., double param2 = 0.99,
+                                     double ransacReprojThreshold = 3., double confidence = 0.99,
                                      OutputArray mask = noArray() );
 
 /** @overload */
 CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2,
                                    OutputArray mask, int method = FM_RANSAC,
-                                   double param1 = 3., double param2 = 0.99 );
+                                   double ransacReprojThreshold = 3., double confidence = 0.99 );
 
 /** @brief Calculates an essential matrix from the corresponding points in two images.
 
@@ -1242,9 +2459,9 @@ be floating-point (single or double precision).
 @param cameraMatrix Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
 Note that this function assumes that points1 and points2 are feature points from cameras with the
 same camera matrix.
-@param method Method for computing a fundamental matrix.
+@param method Method for computing an essential matrix.
 -   **RANSAC** for the RANSAC algorithm.
--   **MEDS** for the LMedS algorithm.
+-   **LMEDS** for the LMedS algorithm.
 @param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of
 confidence (probability) that the estimated matrix is correct.
 @param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar
@@ -1273,8 +2490,8 @@ CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2,
 be floating-point (single or double precision).
 @param points2 Array of the second image points of the same size and format as points1 .
 @param focal focal length of the camera. Note that this function assumes that points1 and points2
-are feature points from cameras with same focal length and principle point.
-@param pp principle point of the camera.
+are feature points from cameras with same focal length and principal point.
+@param pp principal point of the camera.
 @param method Method for computing a fundamental matrix.
 -   **RANSAC** for the RANSAC algorithm.
 -   **LMEDS** for the LMedS algorithm.
@@ -1309,35 +2526,47 @@ CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2,
 @param R2 Another possible rotation matrix.
 @param t One possible translation.
 
-This function decompose an essential matrix E using svd decomposition @cite HartleyZ00 . Generally 4
-possible poses exists for a given E. They are \f$[R_1, t]\f$, \f$[R_1, -t]\f$, \f$[R_2, t]\f$, \f$[R_2, -t]\f$. By
-decomposing E, you can only get the direction of the translation, so the function returns unit t.
+This function decomposes the essential matrix E using svd decomposition @cite HartleyZ00. In
+general, four possible poses exist for the decomposition of E. They are \f$[R_1, t]\f$,
+\f$[R_1, -t]\f$, \f$[R_2, t]\f$, \f$[R_2, -t]\f$.
+
+If E gives the epipolar constraint \f$[p_2; 1]^T A^{-T} E A^{-1} [p_1; 1] = 0\f$ between the image
+points \f$p_1\f$ in the first image and \f$p_2\f$ in second image, then any of the tuples
+\f$[R_1, t]\f$, \f$[R_1, -t]\f$, \f$[R_2, t]\f$, \f$[R_2, -t]\f$ is a change of basis from the first
+camera's coordinate system to the second camera's coordinate system. However, by decomposing E, one
+can only get the direction of the translation. For this reason, the translation t is returned with
+unit length.
  */
 CV_EXPORTS_W void decomposeEssentialMat( InputArray E, OutputArray R1, OutputArray R2, OutputArray t );
 
-/** @brief Recover relative camera rotation and translation from an estimated essential matrix and the
-corresponding points in two images, using cheirality check. Returns the number of inliers which pass
-the check.
+/** @brief Recovers the relative camera rotation and the translation from an estimated essential
+matrix and the corresponding points in two images, using cheirality check. Returns the number of
+inliers that pass the check.
 
 @param E The input essential matrix.
 @param points1 Array of N 2D points from the first image. The point coordinates should be
 floating-point (single or double precision).
 @param points2 Array of the second image points of the same size and format as points1 .
-@param cameraMatrix Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
 Note that this function assumes that points1 and points2 are feature points from cameras with the
 same camera matrix.
-@param R Recovered relative rotation.
-@param t Recoverd relative translation.
-@param mask Input/output mask for inliers in points1 and points2.
-:   If it is not empty, then it marks inliers in points1 and points2 for then given essential
-matrix E. Only these inliers will be used to recover pose. In the output mask only inliers
-which pass the cheirality check.
-This function decomposes an essential matrix using decomposeEssentialMat and then verifies possible
-pose hypotheses by doing cheirality check. The cheirality check basically means that the
-triangulated 3D points should have positive depth. Some details can be found in @cite Nister03 .
-
-This function can be used to process output E and mask from findEssentialMat. In this scenario,
-points1 and points2 are the same input for findEssentialMat. :
+@param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple
+that performs a change of basis from the first camera's coordinate system to the second camera's
+coordinate system. Note that, in general, t can not be used for this tuple, see the parameter
+described below.
+@param t Output translation vector. This vector is obtained by @ref decomposeEssentialMat and
+therefore is only known up to scale, i.e. t is the direction of the translation vector and has unit
+length.
+@param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks
+inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to
+recover pose. In the output mask only inliers which pass the cheirality check.
+
+This function decomposes an essential matrix using @ref decomposeEssentialMat and then verifies
+possible pose hypotheses by doing cheirality check. The cheirality check means that the
+triangulated 3D points should have positive depth. Some details can be found in @cite Nister03.
+
+This function can be used to process the output E and mask from @ref findEssentialMat. In this
+scenario, points1 and points2 are the same input for findEssentialMat.:
 @code
     // Example. Estimation of fundamental matrix using the RANSAC algorithm
     int point_count = 100;
@@ -1369,20 +2598,24 @@ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray point
 @param points1 Array of N 2D points from the first image. The point coordinates should be
 floating-point (single or double precision).
 @param points2 Array of the second image points of the same size and format as points1 .
-@param R Recovered relative rotation.
-@param t Recoverd relative translation.
+@param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple
+that performs a change of basis from the first camera's coordinate system to the second camera's
+coordinate system. Note that, in general, t can not be used for this tuple, see the parameter
+description below.
+@param t Output translation vector. This vector is obtained by @ref decomposeEssentialMat and
+therefore is only known up to scale, i.e. t is the direction of the translation vector and has unit
+length.
 @param focal Focal length of the camera. Note that this function assumes that points1 and points2
-are feature points from cameras with same focal length and principle point.
-@param pp Principle point of the camera.
-@param mask Input/output mask for inliers in points1 and points2.
-:   If it is not empty, then it marks inliers in points1 and points2 for then given essential
-matrix E. Only these inliers will be used to recover pose. In the output mask only inliers
-which pass the cheirality check.
+are feature points from cameras with same focal length and principal point.
+@param pp principal point of the camera.
+@param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks
+inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to
+recover pose. In the output mask only inliers which pass the cheirality check.
 
 This function differs from the one above that it computes camera matrix from focal length and
 principal point:
 
-\f[K =
+\f[A =
 \begin{bmatrix}
 f & 0 & x_{pp}  \\
 0 & f & y_{pp}  \\
@@ -1394,6 +2627,35 @@ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray point
                             double focal = 1.0, Point2d pp = Point2d(0, 0),
                             InputOutputArray mask = noArray() );
 
+/** @overload
+@param E The input essential matrix.
+@param points1 Array of N 2D points from the first image. The point coordinates should be
+floating-point (single or double precision).
+@param points2 Array of the second image points of the same size and format as points1.
+@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+Note that this function assumes that points1 and points2 are feature points from cameras with the
+same camera matrix.
+@param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple
+that performs a change of basis from the first camera's coordinate system to the second camera's
+coordinate system. Note that, in general, t can not be used for this tuple, see the parameter
+description below.
+@param t Output translation vector. This vector is obtained by @ref decomposeEssentialMat and
+therefore is only known up to scale, i.e. t is the direction of the translation vector and has unit
+length.
+@param distanceThresh threshold distance which is used to filter out far away points (i.e. infinite
+points).
+@param mask Input/output mask for inliers in points1 and points2. If it is not empty, then it marks
+inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to
+recover pose. In the output mask only inliers which pass the cheirality check.
+@param triangulatedPoints 3D points which were reconstructed by triangulation.
+
+This function differs from the one above that it outputs the triangulated 3D point that are used for
+the cheirality check.
+ */
+CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2,
+                            InputArray cameraMatrix, OutputArray R, OutputArray t, double distanceThresh, InputOutputArray mask = noArray(),
+                            OutputArray triangulatedPoints = noArray());
+
 /** @brief For points in an image of a stereo pair, computes the corresponding epilines in the other image.
 
 @param points Input points. \f$N \times 1\f$ or \f$1 \times N\f$ matrix of type CV_32FC2 or
@@ -1420,22 +2682,27 @@ Line coefficients are defined up to a scale. They are normalized so that \f$a_i^
 CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, int whichImage,
                                              InputArray F, OutputArray lines );
 
-/** @brief Reconstructs points by triangulation.
+/** @brief This function reconstructs 3-dimensional points (in homogeneous coordinates) by using
+their observations with a stereo camera.
 
-@param projMatr1 3x4 projection matrix of the first camera.
-@param projMatr2 3x4 projection matrix of the second camera.
-@param projPoints1 2xN array of feature points in the first image. In case of c++ version it can
-be also a vector of feature points or two-channel matrix of size 1xN or Nx1.
-@param projPoints2 2xN array of corresponding points in the second image. In case of c++ version
+@param projMatr1 3x4 projection matrix of the first camera, i.e. this matrix projects 3D points
+given in the world's coordinate system into the first image.
+@param projMatr2 3x4 projection matrix of the second camera, i.e. this matrix projects 3D points
+given in the world's coordinate system into the second image.
+@param projPoints1 2xN array of feature points in the first image. In the case of the c++ version,
 it can be also a vector of feature points or two-channel matrix of size 1xN or Nx1.
-@param points4D 4xN array of reconstructed points in homogeneous coordinates.
-
-The function reconstructs 3-dimensional points (in homogeneous coordinates) by using their
-observations with a stereo camera. Projections matrices can be obtained from stereoRectify.
+@param projPoints2 2xN array of corresponding points in the second image. In the case of the c++
+version, it can be also a vector of feature points or two-channel matrix of size 1xN or Nx1.
+@param points4D 4xN array of reconstructed points in homogeneous coordinates. These points are
+returned in the world's coordinate system.
 
 @note
    Keep in mind that all input data should be of float type in order for this function to work.
 
+@note
+   If the projection matrices from @ref stereoRectify are used, then the returned points are
+   represented in the first camera's rectified coordinate system.
+
 @sa
    reprojectImageTo3D
  */
@@ -1480,7 +2747,7 @@ CV_EXPORTS_W void filterSpeckles( InputOutputArray img, double newVal,
 //! computes valid disparity ROI from the valid ROIs of the rectified images (that are returned by cv::stereoRectify())
 CV_EXPORTS_W Rect getValidDisparityROI( Rect roi1, Rect roi2,
                                         int minDisparity, int numberOfDisparities,
-                                        int SADWindowSize );
+                                        int blockSize );
 
 //! validates disparity using the left-right check. The matrix "cost" should be computed by the stereo correspondence algorithm
 CV_EXPORTS_W void validateDisparity( InputOutputArray disparity, InputArray cost,
@@ -1490,12 +2757,16 @@ CV_EXPORTS_W void validateDisparity( InputOutputArray disparity, InputArray cost
 /** @brief Reprojects a disparity image to 3D space.
 
 @param disparity Input single-channel 8-bit unsigned, 16-bit signed, 32-bit signed or 32-bit
-floating-point disparity image. If 16-bit signed format is used, the values are assumed to have no
-fractional bits.
-@param _3dImage Output 3-channel floating-point image of the same size as disparity . Each
-element of _3dImage(x,y) contains 3D coordinates of the point (x,y) computed from the disparity
-map.
-@param Q \f$4 \times 4\f$ perspective transformation matrix that can be obtained with stereoRectify.
+floating-point disparity image. The values of 8-bit / 16-bit signed formats are assumed to have no
+fractional bits. If the disparity is 16-bit signed format, as computed by @ref StereoBM or
+@ref StereoSGBM and maybe other algorithms, it should be divided by 16 (and scaled to float) before
+being used here.
+@param _3dImage Output 3-channel floating-point image of the same size as disparity. Each element of
+_3dImage(x,y) contains 3D coordinates of the point (x,y) computed from the disparity map. If one
+uses Q obtained by @ref stereoRectify, then the returned points are represented in the first
+camera's rectified coordinate system.
+@param Q \f$4 \times 4\f$ perspective transformation matrix that can be obtained with
+@ref stereoRectify.
 @param handleMissingValues Indicates, whether the function should handle missing values (i.e.
 points where the disparity was not computed). If handleMissingValues=true, then pixels with the
 minimal disparity that corresponds to the outliers (see StereoMatcher::compute ) are transformed
@@ -1504,14 +2775,23 @@ to 3D points with a very large Z value (currently set to 10000).
 depth. ddepth can also be set to CV_16S, CV_32S or CV_32F.
 
 The function transforms a single-channel disparity map to a 3-channel image representing a 3D
-surface. That is, for each pixel (x,y) andthe corresponding disparity d=disparity(x,y) , it
+surface. That is, for each pixel (x,y) and the corresponding disparity d=disparity(x,y) , it
 computes:
 
-\f[\begin{array}{l} [X \; Y \; Z \; W]^T =  \texttt{Q} *[x \; y \; \texttt{disparity} (x,y) \; 1]^T  \\ \texttt{\_3dImage} (x,y) = (X/W, \; Y/W, \; Z/W) \end{array}\f]
+\f[\begin{bmatrix}
+X \\
+Y \\
+Z \\
+W
+\end{bmatrix} = Q \begin{bmatrix}
+x \\
+y \\
+\texttt{disparity} (x,y) \\
+z
+\end{bmatrix}.\f]
 
-The matrix Q can be an arbitrary \f$4 \times 4\f$ matrix (for example, the one computed by
-stereoRectify). To reproject a sparse set of points {(x,y,d),...} to 3D space, use
-perspectiveTransform .
+@sa
+   To reproject a sparse set of points {(x,y,d),...} to 3D space, use perspectiveTransform.
  */
 CV_EXPORTS_W void reprojectImageTo3D( InputArray disparity,
                                       OutputArray _3dImage, InputArray Q,
@@ -1520,21 +2800,62 @@ CV_EXPORTS_W void reprojectImageTo3D( InputArray disparity,
 
 /** @brief Calculates the Sampson Distance between two points.
 
-The function sampsonDistance calculates and returns the first order approximation of the geometric error as:
-\f[sd( \texttt{pt1} , \texttt{pt2} )= \frac{(\texttt{pt2}^t \cdot \texttt{F} \cdot \texttt{pt1})^2}{(\texttt{F} \cdot \texttt{pt1})(0) + (\texttt{F} \cdot \texttt{pt1})(1) + (\texttt{F}^t \cdot \texttt{pt2})(0) + (\texttt{F}^t \cdot \texttt{pt2})(1)}\f]
-The fundamental matrix may be calculated using the cv::findFundamentalMat function. See HZ 11.4.3 for details.
+The function cv::sampsonDistance calculates and returns the first order approximation of the geometric error as:
+\f[
+sd( \texttt{pt1} , \texttt{pt2} )=
+\frac{(\texttt{pt2}^t \cdot \texttt{F} \cdot \texttt{pt1})^2}
+{((\texttt{F} \cdot \texttt{pt1})(0))^2 +
+((\texttt{F} \cdot \texttt{pt1})(1))^2 +
+((\texttt{F}^t \cdot \texttt{pt2})(0))^2 +
+((\texttt{F}^t \cdot \texttt{pt2})(1))^2}
+\f]
+The fundamental matrix may be calculated using the cv::findFundamentalMat function. See @cite HartleyZ00 11.4.3 for details.
 @param pt1 first homogeneous 2d point
 @param pt2 second homogeneous 2d point
 @param F fundamental matrix
+@return The computed Sampson distance.
 */
 CV_EXPORTS_W double sampsonDistance(InputArray pt1, InputArray pt2, InputArray F);
 
 /** @brief Computes an optimal affine transformation between two 3D point sets.
 
-@param src First input 3D point set.
-@param dst Second input 3D point set.
-@param out Output 3D affine transformation matrix \f$3 \times 4\f$ .
-@param inliers Output vector indicating which points are inliers.
+It computes
+\f[
+\begin{bmatrix}
+x\\
+y\\
+z\\
+\end{bmatrix}
+=
+\begin{bmatrix}
+a_{11} & a_{12} & a_{13}\\
+a_{21} & a_{22} & a_{23}\\
+a_{31} & a_{32} & a_{33}\\
+\end{bmatrix}
+\begin{bmatrix}
+X\\
+Y\\
+Z\\
+\end{bmatrix}
++
+\begin{bmatrix}
+b_1\\
+b_2\\
+b_3\\
+\end{bmatrix}
+\f]
+
+@param src First input 3D point set containing \f$(X,Y,Z)\f$.
+@param dst Second input 3D point set containing \f$(x,y,z)\f$.
+@param out Output 3D affine transformation matrix \f$3 \times 4\f$ of the form
+\f[
+\begin{bmatrix}
+a_{11} & a_{12} & a_{13} & b_1\\
+a_{21} & a_{22} & a_{23} & b_2\\
+a_{31} & a_{32} & a_{33} & b_3\\
+\end{bmatrix}
+\f]
+@param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier).
 @param ransacThreshold Maximum reprojection error in the RANSAC algorithm to consider a point as
 an inlier.
 @param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything
@@ -1548,6 +2869,174 @@ CV_EXPORTS_W  int estimateAffine3D(InputArray src, InputArray dst,
                                    OutputArray out, OutputArray inliers,
                                    double ransacThreshold = 3, double confidence = 0.99);
 
+/** @brief Computes an optimal translation between two 3D point sets.
+ *
+ * It computes
+ * \f[
+ * \begin{bmatrix}
+ * x\\
+ * y\\
+ * z\\
+ * \end{bmatrix}
+ * =
+ * \begin{bmatrix}
+ * X\\
+ * Y\\
+ * Z\\
+ * \end{bmatrix}
+ * +
+ * \begin{bmatrix}
+ * b_1\\
+ * b_2\\
+ * b_3\\
+ * \end{bmatrix}
+ * \f]
+ *
+ * @param src First input 3D point set containing \f$(X,Y,Z)\f$.
+ * @param dst Second input 3D point set containing \f$(x,y,z)\f$.
+ * @param out Output 3D translation vector \f$3 \times 1\f$ of the form
+ * \f[
+ * \begin{bmatrix}
+ * b_1 \\
+ * b_2 \\
+ * b_3 \\
+ * \end{bmatrix}
+ * \f]
+ * @param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier).
+ * @param ransacThreshold Maximum reprojection error in the RANSAC algorithm to consider a point as
+ * an inlier.
+ * @param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything
+ * between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation
+ * significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation.
+ *
+ * The function estimates an optimal 3D translation between two 3D point sets using the
+ * RANSAC algorithm.
+ *  */
+CV_EXPORTS_W  int estimateTranslation3D(InputArray src, InputArray dst,
+                                        OutputArray out, OutputArray inliers,
+                                        double ransacThreshold = 3, double confidence = 0.99);
+
+/** @brief Computes an optimal affine transformation between two 2D point sets.
+
+It computes
+\f[
+\begin{bmatrix}
+x\\
+y\\
+\end{bmatrix}
+=
+\begin{bmatrix}
+a_{11} & a_{12}\\
+a_{21} & a_{22}\\
+\end{bmatrix}
+\begin{bmatrix}
+X\\
+Y\\
+\end{bmatrix}
++
+\begin{bmatrix}
+b_1\\
+b_2\\
+\end{bmatrix}
+\f]
+
+@param from First input 2D point set containing \f$(X,Y)\f$.
+@param to Second input 2D point set containing \f$(x,y)\f$.
+@param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier).
+@param method Robust method used to compute transformation. The following methods are possible:
+-   cv::RANSAC - RANSAC-based robust method
+-   cv::LMEDS - Least-Median robust method
+RANSAC is the default method.
+@param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider
+a point as an inlier. Applies only to RANSAC.
+@param maxIters The maximum number of robust method iterations.
+@param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything
+between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation
+significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation.
+@param refineIters Maximum number of iterations of refining algorithm (Levenberg-Marquardt).
+Passing 0 will disable refining, so the output matrix will be output of robust method.
+
+@return Output 2D affine transformation matrix \f$2 \times 3\f$ or empty matrix if transformation
+could not be estimated. The returned matrix has the following form:
+\f[
+\begin{bmatrix}
+a_{11} & a_{12} & b_1\\
+a_{21} & a_{22} & b_2\\
+\end{bmatrix}
+\f]
+
+The function estimates an optimal 2D affine transformation between two 2D point sets using the
+selected robust algorithm.
+
+The computed transformation is then refined further (using only inliers) with the
+Levenberg-Marquardt method to reduce the re-projection error even more.
+
+@note
+The RANSAC method can handle practically any ratio of outliers but needs a threshold to
+distinguish inliers from outliers. The method LMeDS does not need any threshold but it works
+correctly only when there are more than 50% of inliers.
+
+@sa estimateAffinePartial2D, getAffineTransform
+*/
+CV_EXPORTS_W cv::Mat estimateAffine2D(InputArray from, InputArray to, OutputArray inliers = noArray(),
+                                  int method = RANSAC, double ransacReprojThreshold = 3,
+                                  size_t maxIters = 2000, double confidence = 0.99,
+                                  size_t refineIters = 10);
+
+/** @brief Computes an optimal limited affine transformation with 4 degrees of freedom between
+two 2D point sets.
+
+@param from First input 2D point set.
+@param to Second input 2D point set.
+@param inliers Output vector indicating which points are inliers.
+@param method Robust method used to compute transformation. The following methods are possible:
+-   cv::RANSAC - RANSAC-based robust method
+-   cv::LMEDS - Least-Median robust method
+RANSAC is the default method.
+@param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider
+a point as an inlier. Applies only to RANSAC.
+@param maxIters The maximum number of robust method iterations.
+@param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything
+between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation
+significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation.
+@param refineIters Maximum number of iterations of refining algorithm (Levenberg-Marquardt).
+Passing 0 will disable refining, so the output matrix will be output of robust method.
+
+@return Output 2D affine transformation (4 degrees of freedom) matrix \f$2 \times 3\f$ or
+empty matrix if transformation could not be estimated.
+
+The function estimates an optimal 2D affine transformation with 4 degrees of freedom limited to
+combinations of translation, rotation, and uniform scaling. Uses the selected algorithm for robust
+estimation.
+
+The computed transformation is then refined further (using only inliers) with the
+Levenberg-Marquardt method to reduce the re-projection error even more.
+
+Estimated transformation matrix is:
+\f[ \begin{bmatrix} \cos(\theta) \cdot s & -\sin(\theta) \cdot s & t_x \\
+                \sin(\theta) \cdot s & \cos(\theta) \cdot s & t_y
+\end{bmatrix} \f]
+Where \f$ \theta \f$ is the rotation angle, \f$ s \f$ the scaling factor and \f$ t_x, t_y \f$ are
+translations in \f$ x, y \f$ axes respectively.
+
+@note
+The RANSAC method can handle practically any ratio of outliers but need a threshold to
+distinguish inliers from outliers. The method LMeDS does not need any threshold but it works
+correctly only when there are more than 50% of inliers.
+
+@sa estimateAffine2D, getAffineTransform
+*/
+CV_EXPORTS_W cv::Mat estimateAffinePartial2D(InputArray from, InputArray to, OutputArray inliers = noArray(),
+                                  int method = RANSAC, double ransacReprojThreshold = 3,
+                                  size_t maxIters = 2000, double confidence = 0.99,
+                                  size_t refineIters = 10);
+
+/** @example samples/cpp/tutorial_code/features2D/Homography/decompose_homography.cpp
+An example program with homography decomposition.
+
+Check @ref tutorial_homography "the corresponding tutorial" for more details.
+*/
+
 /** @brief Decompose a homography matrix to rotation(s), translation(s) and plane normal(s).
 
 @param H The input homography matrix between two images.
@@ -1556,11 +3045,19 @@ CV_EXPORTS_W  int estimateAffine3D(InputArray src, InputArray dst,
 @param translations Array of translation matrices.
 @param normals Array of plane normal matrices.
 
-This function extracts relative camera motion between two views observing a planar object from the
-homography H induced by the plane. The intrinsic camera matrix K must also be provided. The function
-may return up to four mathematical solution sets. At least two of the solutions may further be
-invalidated if point correspondences are available by applying positive depth constraint (all points
-must be in front of the camera). The decomposition method is described in detail in @cite Malis .
+This function extracts relative camera motion between two views of a planar object and returns up to
+four mathematical solution tuples of rotation, translation, and plane normal. The decomposition of
+the homography matrix H is described in detail in @cite Malis.
+
+If the homography H, induced by the plane, gives the constraint
+\f[s_i \vecthree{x'_i}{y'_i}{1} \sim H \vecthree{x_i}{y_i}{1}\f] on the source image points
+\f$p_i\f$ and the destination image points \f$p'_i\f$, then the tuple of rotations[k] and
+translations[k] is a change of basis from the source camera's coordinate system to the destination
+camera's coordinate system. However, by decomposing H, one can only get the translation normalized
+by the (typically unknown) depth of the scene, i.e. its direction but with normalized length.
+
+If point correspondences are available, at least two solutions may further be invalidated, by
+applying positive depth constraint, i.e. all points must be in front of the camera.
  */
 CV_EXPORTS_W int decomposeHomographyMat(InputArray H,
                                         InputArray K,
@@ -1568,6 +3065,31 @@ CV_EXPORTS_W int decomposeHomographyMat(InputArray H,
                                         OutputArrayOfArrays translations,
                                         OutputArrayOfArrays normals);
 
+/** @brief Filters homography decompositions based on additional information.
+
+@param rotations Vector of rotation matrices.
+@param normals Vector of plane normal matrices.
+@param beforePoints Vector of (rectified) visible reference points before the homography is applied
+@param afterPoints Vector of (rectified) visible reference points after the homography is applied
+@param possibleSolutions Vector of int indices representing the viable solution set after filtering
+@param pointsMask optional Mat/Vector of 8u type representing the mask for the inliers as given by the findHomography function
+
+This function is intended to filter the output of the decomposeHomographyMat based on additional
+information as described in @cite Malis . The summary of the method: the decomposeHomographyMat function
+returns 2 unique solutions and their "opposites" for a total of 4 solutions. If we have access to the
+sets of points visible in the camera frame before and after the homography transformation is applied,
+we can determine which are the true potential solutions and which are the opposites by verifying which
+homographies are consistent with all visible reference points being in front of the camera. The inputs
+are left unchanged; the filtered solution set is returned as indices into the existing one.
+
+*/
+CV_EXPORTS_W void filterHomographyDecompByVisibleRefpoints(InputArrayOfArrays rotations,
+                                                           InputArrayOfArrays normals,
+                                                           InputArray beforePoints,
+                                                           InputArray afterPoints,
+                                                           OutputArray possibleSolutions,
+                                                           InputArray pointsMask = noArray());
+
 /** @brief The base class for stereo correspondence algorithms.
  */
 class CV_EXPORTS_W StereoMatcher : public Algorithm
@@ -1683,7 +3205,8 @@ class CV_EXPORTS_W StereoSGBM : public StereoMatcher
     {
         MODE_SGBM = 0,
         MODE_HH   = 1,
-        MODE_SGBM_3WAY = 2
+        MODE_SGBM_3WAY = 2,
+        MODE_HH4  = 3
     };
 
     CV_WRAP virtual int getPreFilterCap() const = 0;
@@ -1714,8 +3237,8 @@ class CV_EXPORTS_W StereoSGBM : public StereoMatcher
     the smoother the disparity is. P1 is the penalty on the disparity change by plus or minus 1
     between neighbor pixels. P2 is the penalty on the disparity change by more than 1 between neighbor
     pixels. The algorithm requires P2 \> P1 . See stereo_match.cpp sample where some reasonably good
-    P1 and P2 values are shown (like 8\*number_of_image_channels\*SADWindowSize\*SADWindowSize and
-    32\*number_of_image_channels\*SADWindowSize\*SADWindowSize , respectively).
+    P1 and P2 values are shown (like 8\*number_of_image_channels\*blockSize\*blockSize and
+    32\*number_of_image_channels\*blockSize\*blockSize , respectively).
     @param disp12MaxDiff Maximum allowed difference (in integer pixel units) in the left-right
     disparity check. Set it to a non-positive value to disable the check.
     @param preFilterCap Truncation value for the prefiltered image pixels. The algorithm first
@@ -1738,13 +3261,216 @@ class CV_EXPORTS_W StereoSGBM : public StereoMatcher
     set StereoSGBM::numDisparities at minimum. The second constructor enables you to set each parameter
     to a custom value.
      */
-    CV_WRAP static Ptr<StereoSGBM> create(int minDisparity, int numDisparities, int blockSize,
+    CV_WRAP static Ptr<StereoSGBM> create(int minDisparity = 0, int numDisparities = 16, int blockSize = 3,
                                           int P1 = 0, int P2 = 0, int disp12MaxDiff = 0,
                                           int preFilterCap = 0, int uniquenessRatio = 0,
                                           int speckleWindowSize = 0, int speckleRange = 0,
                                           int mode = StereoSGBM::MODE_SGBM);
 };
 
+
+//! cv::undistort mode
+enum UndistortTypes
+{
+    PROJ_SPHERICAL_ORTHO  = 0,
+    PROJ_SPHERICAL_EQRECT = 1
+};
+
+/** @brief Transforms an image to compensate for lens distortion.
+
+The function transforms an image to compensate radial and tangential lens distortion.
+
+The function is simply a combination of #initUndistortRectifyMap (with unity R ) and #remap
+(with bilinear interpolation). See the former function for details of the transformation being
+performed.
+
+Those pixels in the destination image, for which there is no correspondent pixels in the source
+image, are filled with zeros (black color).
+
+A particular subset of the source image that will be visible in the corrected image can be regulated
+by newCameraMatrix. You can use #getOptimalNewCameraMatrix to compute the appropriate
+newCameraMatrix depending on your requirements.
+
+The camera matrix and the distortion parameters can be determined using #calibrateCamera. If
+the resolution of images is different from the resolution used at the calibration stage, \f$f_x,
+f_y, c_x\f$ and \f$c_y\f$ need to be scaled accordingly, while the distortion coefficients remain
+the same.
+
+@param src Input (distorted) image.
+@param dst Output (corrected) image that has the same size and type as src .
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$
+of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed.
+@param newCameraMatrix Camera matrix of the distorted image. By default, it is the same as
+cameraMatrix but you may additionally scale and shift the result by using a different matrix.
+ */
+CV_EXPORTS_W void undistort( InputArray src, OutputArray dst,
+                             InputArray cameraMatrix,
+                             InputArray distCoeffs,
+                             InputArray newCameraMatrix = noArray() );
+
+/** @brief Computes the undistortion and rectification transformation map.
+
+The function computes the joint undistortion and rectification transformation and represents the
+result in the form of maps for remap. The undistorted image looks like original, as if it is
+captured with a camera using the camera matrix =newCameraMatrix and zero distortion. In case of a
+monocular camera, newCameraMatrix is usually equal to cameraMatrix, or it can be computed by
+#getOptimalNewCameraMatrix for a better control over scaling. In case of a stereo camera,
+newCameraMatrix is normally set to P1 or P2 computed by #stereoRectify .
+
+Also, this new camera is oriented differently in the coordinate space, according to R. That, for
+example, helps to align two heads of a stereo camera so that the epipolar lines on both images
+become horizontal and have the same y- coordinate (in case of a horizontally aligned stereo camera).
+
+The function actually builds the maps for the inverse mapping algorithm that is used by remap. That
+is, for each pixel \f$(u, v)\f$ in the destination (corrected and rectified) image, the function
+computes the corresponding coordinates in the source image (that is, in the original image from
+camera). The following process is applied:
+\f[
+\begin{array}{l}
+x  \leftarrow (u - {c'}_x)/{f'}_x  \\
+y  \leftarrow (v - {c'}_y)/{f'}_y  \\
+{[X\,Y\,W]} ^T  \leftarrow R^{-1}*[x \, y \, 1]^T  \\
+x'  \leftarrow X/W  \\
+y'  \leftarrow Y/W  \\
+r^2  \leftarrow x'^2 + y'^2 \\
+x''  \leftarrow x' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6}
++ 2p_1 x' y' + p_2(r^2 + 2 x'^2)  + s_1 r^2 + s_2 r^4\\
+y''  \leftarrow y' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6}
++ p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_3 r^2 + s_4 r^4 \\
+s\vecthree{x'''}{y'''}{1} =
+\vecthreethree{R_{33}(\tau_x, \tau_y)}{0}{-R_{13}((\tau_x, \tau_y)}
+{0}{R_{33}(\tau_x, \tau_y)}{-R_{23}(\tau_x, \tau_y)}
+{0}{0}{1} R(\tau_x, \tau_y) \vecthree{x''}{y''}{1}\\
+map_x(u,v)  \leftarrow x''' f_x + c_x  \\
+map_y(u,v)  \leftarrow y''' f_y + c_y
+\end{array}
+\f]
+where \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$
+are the distortion coefficients.
+
+In case of a stereo camera, this function is called twice: once for each camera head, after
+stereoRectify, which in its turn is called after #stereoCalibrate. But if the stereo camera
+was not calibrated, it is still possible to compute the rectification transformations directly from
+the fundamental matrix using #stereoRectifyUncalibrated. For each camera, the function computes
+homography H as the rectification transformation in a pixel domain, not a rotation matrix R in 3D
+space. R can be computed from H as
+\f[\texttt{R} = \texttt{cameraMatrix} ^{-1} \cdot \texttt{H} \cdot \texttt{cameraMatrix}\f]
+where cameraMatrix can be chosen arbitrarily.
+
+@param cameraMatrix Input camera matrix \f$A=\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$
+of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed.
+@param R Optional rectification transformation in the object space (3x3 matrix). R1 or R2 ,
+computed by #stereoRectify can be passed here. If the matrix is empty, the identity transformation
+is assumed. In cvInitUndistortMap R assumed to be an identity matrix.
+@param newCameraMatrix New camera matrix \f$A'=\vecthreethree{f_x'}{0}{c_x'}{0}{f_y'}{c_y'}{0}{0}{1}\f$.
+@param size Undistorted image size.
+@param m1type Type of the first output map that can be CV_32FC1, CV_32FC2 or CV_16SC2, see #convertMaps
+@param map1 The first output map.
+@param map2 The second output map.
+ */
+CV_EXPORTS_W
+void initUndistortRectifyMap(InputArray cameraMatrix, InputArray distCoeffs,
+                             InputArray R, InputArray newCameraMatrix,
+                             Size size, int m1type, OutputArray map1, OutputArray map2);
+
+//! initializes maps for #remap for wide-angle
+CV_EXPORTS
+float initWideAngleProjMap(InputArray cameraMatrix, InputArray distCoeffs,
+                           Size imageSize, int destImageWidth,
+                           int m1type, OutputArray map1, OutputArray map2,
+                           enum UndistortTypes projType = PROJ_SPHERICAL_EQRECT, double alpha = 0);
+static inline
+float initWideAngleProjMap(InputArray cameraMatrix, InputArray distCoeffs,
+                           Size imageSize, int destImageWidth,
+                           int m1type, OutputArray map1, OutputArray map2,
+                           int projType, double alpha = 0)
+{
+    return initWideAngleProjMap(cameraMatrix, distCoeffs, imageSize, destImageWidth,
+                                m1type, map1, map2, (UndistortTypes)projType, alpha);
+}
+
+/** @brief Returns the default new camera matrix.
+
+The function returns the camera matrix that is either an exact copy of the input cameraMatrix (when
+centerPrinicipalPoint=false ), or the modified one (when centerPrincipalPoint=true).
+
+In the latter case, the new camera matrix will be:
+
+\f[\begin{bmatrix} f_x && 0 && ( \texttt{imgSize.width} -1)*0.5  \\ 0 && f_y && ( \texttt{imgSize.height} -1)*0.5  \\ 0 && 0 && 1 \end{bmatrix} ,\f]
+
+where \f$f_x\f$ and \f$f_y\f$ are \f$(0,0)\f$ and \f$(1,1)\f$ elements of cameraMatrix, respectively.
+
+By default, the undistortion functions in OpenCV (see #initUndistortRectifyMap, #undistort) do not
+move the principal point. However, when you work with stereo, it is important to move the principal
+points in both views to the same y-coordinate (which is required by most of stereo correspondence
+algorithms), and may be to the same x-coordinate too. So, you can form the new camera matrix for
+each view where the principal points are located at the center.
+
+@param cameraMatrix Input camera matrix.
+@param imgsize Camera view image size in pixels.
+@param centerPrincipalPoint Location of the principal point in the new camera matrix. The
+parameter indicates whether this location should be at the image center or not.
+ */
+CV_EXPORTS_W
+Mat getDefaultNewCameraMatrix(InputArray cameraMatrix, Size imgsize = Size(),
+                              bool centerPrincipalPoint = false);
+
+/** @brief Computes the ideal point coordinates from the observed point coordinates.
+
+The function is similar to #undistort and #initUndistortRectifyMap but it operates on a
+sparse set of points instead of a raster image. Also the function performs a reverse transformation
+to projectPoints. In case of a 3D object, it does not reconstruct its 3D coordinates, but for a
+planar object, it does, up to a translation vector, if the proper R is specified.
+
+For each observed point coordinate \f$(u, v)\f$ the function computes:
+\f[
+\begin{array}{l}
+x^{"}  \leftarrow (u - c_x)/f_x  \\
+y^{"}  \leftarrow (v - c_y)/f_y  \\
+(x',y') = undistort(x^{"},y^{"}, \texttt{distCoeffs}) \\
+{[X\,Y\,W]} ^T  \leftarrow R*[x' \, y' \, 1]^T  \\
+x  \leftarrow X/W  \\
+y  \leftarrow Y/W  \\
+\text{only performed if P is specified:} \\
+u'  \leftarrow x {f'}_x + {c'}_x  \\
+v'  \leftarrow y {f'}_y + {c'}_y
+\end{array}
+\f]
+
+where *undistort* is an approximate iterative algorithm that estimates the normalized original
+point coordinates out of the normalized distorted point coordinates ("normalized" means that the
+coordinates do not depend on the camera matrix).
+
+The function can be used for both a stereo camera head or a monocular camera (when R is empty).
+@param src Observed point coordinates, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel (CV_32FC2 or CV_64FC2) (or
+vector\<Point2f\> ).
+@param dst Output ideal point coordinates (1xN/Nx1 2-channel or vector\<Point2f\> ) after undistortion and reverse perspective
+transformation. If matrix P is identity or omitted, dst will contain normalized point coordinates.
+@param cameraMatrix Camera matrix \f$\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$
+of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed.
+@param R Rectification transformation in the object space (3x3 matrix). R1 or R2 computed by
+#stereoRectify can be passed here. If the matrix is empty, the identity transformation is used.
+@param P New camera matrix (3x3) or new projection matrix (3x4) \f$\begin{bmatrix} {f'}_x & 0 & {c'}_x & t_x \\ 0 & {f'}_y & {c'}_y & t_y \\ 0 & 0 & 1 & t_z \end{bmatrix}\f$. P1 or P2 computed by
+#stereoRectify can be passed here. If the matrix is empty, the identity new camera matrix is used.
+ */
+CV_EXPORTS_W
+void undistortPoints(InputArray src, OutputArray dst,
+                     InputArray cameraMatrix, InputArray distCoeffs,
+                     InputArray R = noArray(), InputArray P = noArray());
+/** @overload
+    @note Default version of #undistortPoints does 5 iterations to compute undistorted points.
+ */
+CV_EXPORTS_AS(undistortPointsIter)
+void undistortPoints(InputArray src, OutputArray dst,
+                     InputArray cameraMatrix, InputArray distCoeffs,
+                     InputArray R, InputArray P, TermCriteria criteria);
+
 //! @} calib3d
 
 /** @brief The methods in this namespace use a so-called fisheye camera model.
@@ -1756,15 +3482,16 @@ namespace fisheye
 //! @{
 
     enum{
-        CALIB_USE_INTRINSIC_GUESS   = 1,
-        CALIB_RECOMPUTE_EXTRINSIC   = 2,
-        CALIB_CHECK_COND            = 4,
-        CALIB_FIX_SKEW              = 8,
-        CALIB_FIX_K1                = 16,
-        CALIB_FIX_K2                = 32,
-        CALIB_FIX_K3                = 64,
-        CALIB_FIX_K4                = 128,
-        CALIB_FIX_INTRINSIC         = 256
+        CALIB_USE_INTRINSIC_GUESS   = 1 << 0,
+        CALIB_RECOMPUTE_EXTRINSIC   = 1 << 1,
+        CALIB_CHECK_COND            = 1 << 2,
+        CALIB_FIX_SKEW              = 1 << 3,
+        CALIB_FIX_K1                = 1 << 4,
+        CALIB_FIX_K2                = 1 << 5,
+        CALIB_FIX_K3                = 1 << 6,
+        CALIB_FIX_K4                = 1 << 7,
+        CALIB_FIX_INTRINSIC         = 1 << 8,
+        CALIB_FIX_PRINCIPAL_POINT   = 1 << 9
     };
 
     /** @brief Projects points using fisheye model
@@ -1803,7 +3530,7 @@ namespace fisheye
     @param alpha The skew coefficient.
     @param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\<Point2f\> .
 
-    Note that the function assumes the camera matrix of the undistorted points to be indentity.
+    Note that the function assumes the camera matrix of the undistorted points to be identity.
     This means if you want to transform back points undistorted with undistortPoints() you have to
     multiply them with \f$P^{-1}\f$.
      */
@@ -1848,7 +3575,7 @@ namespace fisheye
     @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
     @param Knew Camera matrix of the distorted image. By default, it is the identity matrix but you
     may additionally scale and shift the result by using a different matrix.
-    @param new_size
+    @param new_size the new size
 
     The function transforms an image to compensate radial and tangential lens distortion.
 
@@ -1874,14 +3601,14 @@ namespace fisheye
     /** @brief Estimates new camera matrix for undistortion or rectification.
 
     @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
-    @param image_size
+    @param image_size Size of the image
     @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
     @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
     1-channel or 1x1 3-channel
     @param P New camera matrix (3x3) or new projection matrix (3x4)
     @param balance Sets the new focal length in range between the min focal length and the max focal
     length. Balance is in range of [0, 1].
-    @param new_size
+    @param new_size the new size
     @param fov_scale Divisor for new focal length.
      */
     CV_EXPORTS_W void estimateNewCameraMatrixForUndistortRectify(InputArray K, InputArray D, const Size &image_size, InputArray R,
@@ -1914,8 +3641,10 @@ namespace fisheye
     of intrinsic optimization.
     -   **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number.
     -   **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero.
-    -   **fisheye::CALIB_FIX_K1..4** Selected distortion coefficients are set to zeros and stay
-    zero.
+    -   **fisheye::CALIB_FIX_K1..fisheye::CALIB_FIX_K4** Selected distortion coefficients
+    are set to zeros and stay zero.
+    -   **fisheye::CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global
+optimization. It stays at the center or at a different location specified when CALIB_USE_INTRINSIC_GUESS is set too.
     @param criteria Termination criteria for the iterative optimization algorithm.
      */
     CV_EXPORTS_W double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, const Size& image_size,
@@ -1939,7 +3668,7 @@ namespace fisheye
     @param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second
     camera.
     @param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ).
-    @param flags Operation flags that may be zero or CV_CALIB_ZERO_DISPARITY . If the flag is set,
+    @param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set,
     the function makes the principal points of each camera have the same pixel coordinates in the
     rectified views. And if the flag is not set, the function may still shift the images in the
     horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the
@@ -1965,7 +3694,7 @@ namespace fisheye
     observed by the second camera.
     @param K1 Input/output first camera matrix:
     \f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If
-    any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CV_CALIB_FIX_INTRINSIC are specified,
+    any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CALIB_FIX_INTRINSIC are specified,
     some or all of the matrix components must be initialized.
     @param D1 Input/output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$ of 4 elements.
     @param K2 Input/output second camera matrix. The parameter is similar to K1 .
@@ -1975,7 +3704,7 @@ namespace fisheye
     @param R Output rotation matrix between the 1st and the 2nd camera coordinate systems.
     @param T Output translation vector between the coordinate systems of the cameras.
     @param flags Different flags that may be zero or a combination of the following values:
-    -   **fisheye::CV_CALIB_FIX_INTRINSIC** Fix K1, K2? and D1, D2? so that only R, T matrices
+    -   **fisheye::CALIB_FIX_INTRINSIC** Fix K1, K2? and D1, D2? so that only R, T matrices
     are estimated.
     -   **fisheye::CALIB_USE_INTRINSIC_GUESS** K1, K2 contains valid initial values of
     fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
@@ -1994,12 +3723,48 @@ namespace fisheye
                                   TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 100, DBL_EPSILON));
 
 //! @} calib3d_fisheye
-}
+} // end namespace fisheye
 
-} // cv
+} //end namespace cv
 
-#ifndef DISABLE_OPENCV_24_COMPATIBILITY
-#include "opencv2/calib3d/calib3d_c.h"
+#if 0 //def __cplusplus
+//////////////////////////////////////////////////////////////////////////////////////////
+class CV_EXPORTS CvLevMarq
+{
+public:
+    CvLevMarq();
+    CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria=
+              cvTermCriteria(cv::TermCriteria::EPS+cv::TermCriteria::MAX_ITER,30,DBL_EPSILON),
+              bool completeSymmFlag=false );
+    ~CvLevMarq();
+    void init( int nparams, int nerrs, CvTermCriteria criteria=
+              cvTermCriteria(cv::TermCriteria::EPS+cv::TermCriteria::MAX_ITER,30,DBL_EPSILON),
+              bool completeSymmFlag=false );
+    bool update( const CvMat*& param, CvMat*& J, CvMat*& err );
+    bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm );
+
+    void clear();
+    void step();
+    enum { DONE=0, STARTED=1, CALC_J=2, CHECK_ERR=3 };
+
+    cv::Ptr<CvMat> mask;
+    cv::Ptr<CvMat> prevParam;
+    cv::Ptr<CvMat> param;
+    cv::Ptr<CvMat> J;
+    cv::Ptr<CvMat> err;
+    cv::Ptr<CvMat> JtJ;
+    cv::Ptr<CvMat> JtJN;
+    cv::Ptr<CvMat> JtErr;
+    cv::Ptr<CvMat> JtJV;
+    cv::Ptr<CvMat> JtJW;
+    double prevErrNorm, errNorm;
+    int lambdaLg10;
+    CvTermCriteria criteria;
+    int state;
+    int iters;
+    bool completeSymmFlag;
+    int solveMethod;
+};
 #endif
 
 #endif
diff --git a/IPL/include/opencv/opencv2/calib3d/calib3d_c.h b/IPL/include/opencv/opencv2/calib3d/calib3d_c.h
index 0e77aa8..959579c 100644
--- a/IPL/include/opencv/opencv2/calib3d/calib3d_c.h
+++ b/IPL/include/opencv/opencv2/calib3d/calib3d_c.h
@@ -41,44 +41,15 @@
 //
 //M*/
 
-#ifndef __OPENCV_CALIB3D_C_H__
-#define __OPENCV_CALIB3D_C_H__
+#ifndef OPENCV_CALIB3D_C_H
+#define OPENCV_CALIB3D_C_H
 
-#include "opencv2/core/core_c.h"
+#include "opencv2/core/types_c.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-/** @addtogroup calib3d_c
-  @{
-  */
-
-/****************************************************************************************\
-*                      Camera Calibration, Pose Estimation and Stereo                    *
-\****************************************************************************************/
-
-typedef struct CvPOSITObject CvPOSITObject;
-
-/* Allocates and initializes CvPOSITObject structure before doing cvPOSIT */
-CVAPI(CvPOSITObject*)  cvCreatePOSITObject( CvPoint3D32f* points, int point_count );
-
-
-/* Runs POSIT (POSe from ITeration) algorithm for determining 3d position of
-   an object given its model and projection in a weak-perspective case */
-CVAPI(void)  cvPOSIT(  CvPOSITObject* posit_object, CvPoint2D32f* image_points,
-                       double focal_length, CvTermCriteria criteria,
-                       float* rotation_matrix, float* translation_vector);
-
-/* Releases CvPOSITObject structure */
-CVAPI(void)  cvReleasePOSITObject( CvPOSITObject**  posit_object );
-
-/* updates the number of RANSAC iterations */
-CVAPI(int) cvRANSACUpdateNumIters( double p, double err_prob,
-                                   int model_points, int max_iters );
-
-CVAPI(void) cvConvertPointsHomogeneous( const CvMat* src, CvMat* dst );
-
 /* Calculates fundamental matrix given a set of corresponding points */
 #define CV_FM_7POINT 1
 #define CV_FM_8POINT 2
@@ -99,136 +70,11 @@ enum
     CV_DLS = 3 // Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP"
 };
 
-CVAPI(int) cvFindFundamentalMat( const CvMat* points1, const CvMat* points2,
-                                 CvMat* fundamental_matrix,
-                                 int method CV_DEFAULT(CV_FM_RANSAC),
-                                 double param1 CV_DEFAULT(3.), double param2 CV_DEFAULT(0.99),
-                                 CvMat* status CV_DEFAULT(NULL) );
-
-/* For each input point on one of images
-   computes parameters of the corresponding
-   epipolar line on the other image */
-CVAPI(void) cvComputeCorrespondEpilines( const CvMat* points,
-                                         int which_image,
-                                         const CvMat* fundamental_matrix,
-                                         CvMat* correspondent_lines );
-
-/* Triangulation functions */
-
-CVAPI(void) cvTriangulatePoints(CvMat* projMatr1, CvMat* projMatr2,
-                                CvMat* projPoints1, CvMat* projPoints2,
-                                CvMat* points4D);
-
-CVAPI(void) cvCorrectMatches(CvMat* F, CvMat* points1, CvMat* points2,
-                             CvMat* new_points1, CvMat* new_points2);
-
-
-/* Computes the optimal new camera matrix according to the free scaling parameter alpha:
-   alpha=0 - only valid pixels will be retained in the undistorted image
-   alpha=1 - all the source image pixels will be retained in the undistorted image
-*/
-CVAPI(void) cvGetOptimalNewCameraMatrix( const CvMat* camera_matrix,
-                                         const CvMat* dist_coeffs,
-                                         CvSize image_size, double alpha,
-                                         CvMat* new_camera_matrix,
-                                         CvSize new_imag_size CV_DEFAULT(cvSize(0,0)),
-                                         CvRect* valid_pixel_ROI CV_DEFAULT(0),
-                                         int center_principal_point CV_DEFAULT(0));
-
-/* Converts rotation vector to rotation matrix or vice versa */
-CVAPI(int) cvRodrigues2( const CvMat* src, CvMat* dst,
-                         CvMat* jacobian CV_DEFAULT(0) );
-
-/* Finds perspective transformation between the object plane and image (view) plane */
-CVAPI(int) cvFindHomography( const CvMat* src_points,
-                             const CvMat* dst_points,
-                             CvMat* homography,
-                             int method CV_DEFAULT(0),
-                             double ransacReprojThreshold CV_DEFAULT(3),
-                             CvMat* mask CV_DEFAULT(0),
-                             int maxIters CV_DEFAULT(2000),
-                             double confidence CV_DEFAULT(0.995));
-
-/* Computes RQ decomposition for 3x3 matrices */
-CVAPI(void) cvRQDecomp3x3( const CvMat *matrixM, CvMat *matrixR, CvMat *matrixQ,
-                           CvMat *matrixQx CV_DEFAULT(NULL),
-                           CvMat *matrixQy CV_DEFAULT(NULL),
-                           CvMat *matrixQz CV_DEFAULT(NULL),
-                           CvPoint3D64f *eulerAngles CV_DEFAULT(NULL));
-
-/* Computes projection matrix decomposition */
-CVAPI(void) cvDecomposeProjectionMatrix( const CvMat *projMatr, CvMat *calibMatr,
-                                         CvMat *rotMatr, CvMat *posVect,
-                                         CvMat *rotMatrX CV_DEFAULT(NULL),
-                                         CvMat *rotMatrY CV_DEFAULT(NULL),
-                                         CvMat *rotMatrZ CV_DEFAULT(NULL),
-                                         CvPoint3D64f *eulerAngles CV_DEFAULT(NULL));
-
-/* Computes d(AB)/dA and d(AB)/dB */
-CVAPI(void) cvCalcMatMulDeriv( const CvMat* A, const CvMat* B, CvMat* dABdA, CvMat* dABdB );
-
-/* Computes r3 = rodrigues(rodrigues(r2)*rodrigues(r1)),
-   t3 = rodrigues(r2)*t1 + t2 and the respective derivatives */
-CVAPI(void) cvComposeRT( const CvMat* _rvec1, const CvMat* _tvec1,
-                         const CvMat* _rvec2, const CvMat* _tvec2,
-                         CvMat* _rvec3, CvMat* _tvec3,
-                         CvMat* dr3dr1 CV_DEFAULT(0), CvMat* dr3dt1 CV_DEFAULT(0),
-                         CvMat* dr3dr2 CV_DEFAULT(0), CvMat* dr3dt2 CV_DEFAULT(0),
-                         CvMat* dt3dr1 CV_DEFAULT(0), CvMat* dt3dt1 CV_DEFAULT(0),
-                         CvMat* dt3dr2 CV_DEFAULT(0), CvMat* dt3dt2 CV_DEFAULT(0) );
-
-/* Projects object points to the view plane using
-   the specified extrinsic and intrinsic camera parameters */
-CVAPI(void) cvProjectPoints2( const CvMat* object_points, const CvMat* rotation_vector,
-                              const CvMat* translation_vector, const CvMat* camera_matrix,
-                              const CvMat* distortion_coeffs, CvMat* image_points,
-                              CvMat* dpdrot CV_DEFAULT(NULL), CvMat* dpdt CV_DEFAULT(NULL),
-                              CvMat* dpdf CV_DEFAULT(NULL), CvMat* dpdc CV_DEFAULT(NULL),
-                              CvMat* dpddist CV_DEFAULT(NULL),
-                              double aspect_ratio CV_DEFAULT(0));
-
-/* Finds extrinsic camera parameters from
-   a few known corresponding point pairs and intrinsic parameters */
-CVAPI(void) cvFindExtrinsicCameraParams2( const CvMat* object_points,
-                                          const CvMat* image_points,
-                                          const CvMat* camera_matrix,
-                                          const CvMat* distortion_coeffs,
-                                          CvMat* rotation_vector,
-                                          CvMat* translation_vector,
-                                          int use_extrinsic_guess CV_DEFAULT(0) );
-
-/* Computes initial estimate of the intrinsic camera parameters
-   in case of planar calibration target (e.g. chessboard) */
-CVAPI(void) cvInitIntrinsicParams2D( const CvMat* object_points,
-                                     const CvMat* image_points,
-                                     const CvMat* npoints, CvSize image_size,
-                                     CvMat* camera_matrix,
-                                     double aspect_ratio CV_DEFAULT(1.) );
-
 #define CV_CALIB_CB_ADAPTIVE_THRESH  1
 #define CV_CALIB_CB_NORMALIZE_IMAGE  2
 #define CV_CALIB_CB_FILTER_QUADS     4
 #define CV_CALIB_CB_FAST_CHECK       8
 
-// Performs a fast check if a chessboard is in the input image. This is a workaround to
-// a problem of cvFindChessboardCorners being slow on images with no chessboard
-// - src: input image
-// - size: chessboard size
-// Returns 1 if a chessboard can be in this image and findChessboardCorners should be called,
-// 0 if there is no chessboard, -1 in case of error
-CVAPI(int) cvCheckChessboard(IplImage* src, CvSize size);
-
-    /* Detects corners on a chessboard calibration pattern */
-CVAPI(int) cvFindChessboardCorners( const void* image, CvSize pattern_size,
-                                    CvPoint2D32f* corners,
-                                    int* corner_count CV_DEFAULT(NULL),
-                                    int flags CV_DEFAULT(CV_CALIB_CB_ADAPTIVE_THRESH+CV_CALIB_CB_NORMALIZE_IMAGE) );
-
-/* Draws individual chessboard corners or the whole chessboard detected */
-CVAPI(void) cvDrawChessboardCorners( CvArr* image, CvSize pattern_size,
-                                     CvPoint2D32f* corners,
-                                     int count, int pattern_was_found );
-
 #define CV_CALIB_USE_INTRINSIC_GUESS  1
 #define CV_CALIB_FIX_ASPECT_RATIO     2
 #define CV_CALIB_FIX_PRINCIPAL_POINT  4
@@ -245,140 +91,19 @@ CVAPI(void) cvDrawChessboardCorners( CvArr* image, CvSize pattern_size,
 #define CV_CALIB_FIX_S1_S2_S3_S4  65536
 #define CV_CALIB_TILTED_MODEL  262144
 #define CV_CALIB_FIX_TAUX_TAUY  524288
+#define CV_CALIB_FIX_TANGENT_DIST 2097152
 
-
-/* Finds intrinsic and extrinsic camera parameters
-   from a few views of known calibration pattern */
-CVAPI(double) cvCalibrateCamera2( const CvMat* object_points,
-                                const CvMat* image_points,
-                                const CvMat* point_counts,
-                                CvSize image_size,
-                                CvMat* camera_matrix,
-                                CvMat* distortion_coeffs,
-                                CvMat* rotation_vectors CV_DEFAULT(NULL),
-                                CvMat* translation_vectors CV_DEFAULT(NULL),
-                                int flags CV_DEFAULT(0),
-                                CvTermCriteria term_crit CV_DEFAULT(cvTermCriteria(
-                                    CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,30,DBL_EPSILON)) );
-
-/* Computes various useful characteristics of the camera from the data computed by
-   cvCalibrateCamera2 */
-CVAPI(void) cvCalibrationMatrixValues( const CvMat *camera_matrix,
-                                CvSize image_size,
-                                double aperture_width CV_DEFAULT(0),
-                                double aperture_height CV_DEFAULT(0),
-                                double *fovx CV_DEFAULT(NULL),
-                                double *fovy CV_DEFAULT(NULL),
-                                double *focal_length CV_DEFAULT(NULL),
-                                CvPoint2D64f *principal_point CV_DEFAULT(NULL),
-                                double *pixel_aspect_ratio CV_DEFAULT(NULL));
+#define CV_CALIB_NINTRINSIC 18
 
 #define CV_CALIB_FIX_INTRINSIC  256
 #define CV_CALIB_SAME_FOCAL_LENGTH 512
 
-/* Computes the transformation from one camera coordinate system to another one
-   from a few correspondent views of the same calibration target. Optionally, calibrates
-   both cameras */
-CVAPI(double) cvStereoCalibrate( const CvMat* object_points, const CvMat* image_points1,
-                               const CvMat* image_points2, const CvMat* npoints,
-                               CvMat* camera_matrix1, CvMat* dist_coeffs1,
-                               CvMat* camera_matrix2, CvMat* dist_coeffs2,
-                               CvSize image_size, CvMat* R, CvMat* T,
-                               CvMat* E CV_DEFAULT(0), CvMat* F CV_DEFAULT(0),
-                               int flags CV_DEFAULT(CV_CALIB_FIX_INTRINSIC),
-                               CvTermCriteria term_crit CV_DEFAULT(cvTermCriteria(
-                                   CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,30,1e-6)) );
-
 #define CV_CALIB_ZERO_DISPARITY 1024
 
-/* Computes 3D rotations (+ optional shift) for each camera coordinate system to make both
-   views parallel (=> to make all the epipolar lines horizontal or vertical) */
-CVAPI(void) cvStereoRectify( const CvMat* camera_matrix1, const CvMat* camera_matrix2,
-                             const CvMat* dist_coeffs1, const CvMat* dist_coeffs2,
-                             CvSize image_size, const CvMat* R, const CvMat* T,
-                             CvMat* R1, CvMat* R2, CvMat* P1, CvMat* P2,
-                             CvMat* Q CV_DEFAULT(0),
-                             int flags CV_DEFAULT(CV_CALIB_ZERO_DISPARITY),
-                             double alpha CV_DEFAULT(-1),
-                             CvSize new_image_size CV_DEFAULT(cvSize(0,0)),
-                             CvRect* valid_pix_ROI1 CV_DEFAULT(0),
-                             CvRect* valid_pix_ROI2 CV_DEFAULT(0));
-
-/* Computes rectification transformations for uncalibrated pair of images using a set
-   of point correspondences */
-CVAPI(int) cvStereoRectifyUncalibrated( const CvMat* points1, const CvMat* points2,
-                                        const CvMat* F, CvSize img_size,
-                                        CvMat* H1, CvMat* H2,
-                                        double threshold CV_DEFAULT(5));
-
-
-
 /* stereo correspondence parameters and functions */
-
 #define CV_STEREO_BM_NORMALIZED_RESPONSE  0
 #define CV_STEREO_BM_XSOBEL               1
 
-/* Block matching algorithm structure */
-typedef struct CvStereoBMState
-{
-    // pre-filtering (normalization of input images)
-    int preFilterType; // =CV_STEREO_BM_NORMALIZED_RESPONSE now
-    int preFilterSize; // averaging window size: ~5x5..21x21
-    int preFilterCap; // the output of pre-filtering is clipped by [-preFilterCap,preFilterCap]
-
-    // correspondence using Sum of Absolute Difference (SAD)
-    int SADWindowSize; // ~5x5..21x21
-    int minDisparity;  // minimum disparity (can be negative)
-    int numberOfDisparities; // maximum disparity - minimum disparity (> 0)
-
-    // post-filtering
-    int textureThreshold;  // the disparity is only computed for pixels
-                           // with textured enough neighborhood
-    int uniquenessRatio;   // accept the computed disparity d* only if
-                           // SAD(d) >= SAD(d*)*(1 + uniquenessRatio/100.)
-                           // for any d != d*+/-1 within the search range.
-    int speckleWindowSize; // disparity variation window
-    int speckleRange; // acceptable range of variation in window
-
-    int trySmallerWindows; // if 1, the results may be more accurate,
-                           // at the expense of slower processing
-    CvRect roi1, roi2;
-    int disp12MaxDiff;
-
-    // temporary buffers
-    CvMat* preFilteredImg0;
-    CvMat* preFilteredImg1;
-    CvMat* slidingSumBuf;
-    CvMat* cost;
-    CvMat* disp;
-} CvStereoBMState;
-
-#define CV_STEREO_BM_BASIC 0
-#define CV_STEREO_BM_FISH_EYE 1
-#define CV_STEREO_BM_NARROW 2
-
-CVAPI(CvStereoBMState*) cvCreateStereoBMState(int preset CV_DEFAULT(CV_STEREO_BM_BASIC),
-                                              int numberOfDisparities CV_DEFAULT(0));
-
-CVAPI(void) cvReleaseStereoBMState( CvStereoBMState** state );
-
-CVAPI(void) cvFindStereoCorrespondenceBM( const CvArr* left, const CvArr* right,
-                                          CvArr* disparity, CvStereoBMState* state );
-
-CVAPI(CvRect) cvGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity,
-                                      int numberOfDisparities, int SADWindowSize );
-
-CVAPI(void) cvValidateDisparity( CvArr* disparity, const CvArr* cost,
-                                 int minDisparity, int numberOfDisparities,
-                                 int disp12MaxDiff CV_DEFAULT(1) );
-
-/* Reprojects the computed disparity image to the 3D space using the specified 4x4 matrix */
-CVAPI(void)  cvReprojectImageTo3D( const CvArr* disparityImage,
-                                   CvArr* _3dImage, const CvMat* Q,
-                                   int handleMissingValues CV_DEFAULT(0) );
-
-/** @} calib3d_c */
-
 #ifdef __cplusplus
 } // extern "C"
 
@@ -388,11 +113,11 @@ class CV_EXPORTS CvLevMarq
 public:
     CvLevMarq();
     CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria=
-              cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
+              cvTermCriteria(cv::TermCriteria::EPS+cv::TermCriteria::MAX_ITER,30,DBL_EPSILON),
               bool completeSymmFlag=false );
     ~CvLevMarq();
     void init( int nparams, int nerrs, CvTermCriteria criteria=
-              cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
+              cvTermCriteria(cv::TermCriteria::EPS+cv::TermCriteria::MAX_ITER,30,DBL_EPSILON),
               bool completeSymmFlag=false );
     bool update( const CvMat*& param, CvMat*& J, CvMat*& err );
     bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm );
@@ -422,4 +147,4 @@ class CV_EXPORTS CvLevMarq
 
 #endif
 
-#endif /* __OPENCV_CALIB3D_C_H__ */
+#endif /* OPENCV_CALIB3D_C_H */
diff --git a/IPL/include/opencv/opencv2/ccalib.hpp b/IPL/include/opencv/opencv2/ccalib.hpp
index 79df598..538ec0f 100644
--- a/IPL/include/opencv/opencv2/ccalib.hpp
+++ b/IPL/include/opencv/opencv2/ccalib.hpp
@@ -71,7 +71,7 @@ class CV_EXPORTS CustomPattern : public Algorithm
 
 	bool isInitialized();
 
-	void getPatternPoints(OutputArray original_points);
+	void getPatternPoints(std::vector<KeyPoint>& original_points);
     /**<
 		Returns a vector<Point> of the original points.
 	*/
@@ -96,21 +96,21 @@ class CV_EXPORTS CustomPattern : public Algorithm
 		Calls the calirateCamera function with the same inputs.
 	*/
 
-	bool findRt(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs,
-                OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE);
-	bool findRt(InputArray image, InputArray cameraMatrix, InputArray distCoeffs,
-                OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE);
+    bool findRt(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs,
+                InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE);
+    bool findRt(InputArray image, InputArray cameraMatrix, InputArray distCoeffs,
+                InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE);
     /**<
 		Uses solvePnP to find the rotation and translation of the pattern
 		with respect to the camera frame.
 	*/
 
-	bool findRtRANSAC(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs,
-				OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100,
-				float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE);
-	bool findRtRANSAC(InputArray image, InputArray cameraMatrix, InputArray distCoeffs,
-				OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100,
-				float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE);
+    bool findRtRANSAC(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs,
+                      InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100,
+                      float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE);
+    bool findRtRANSAC(InputArray image, InputArray cameraMatrix, InputArray distCoeffs,
+                      InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100,
+                      float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE);
         /**<
 		Uses solvePnPRansac()
 	*/
diff --git a/IPL/include/opencv/opencv2/ccalib/omnidir.hpp b/IPL/include/opencv/opencv2/ccalib/omnidir.hpp
index 25c41bf..d3132b3 100644
--- a/IPL/include/opencv/opencv2/ccalib/omnidir.hpp
+++ b/IPL/include/opencv/opencv2/ccalib/omnidir.hpp
@@ -39,12 +39,13 @@
 //
 //M*/
 
-#include <opencv2/core.hpp>
-#include <vector>
-
 #ifndef __OPENCV_OMNIDIR_HPP__
 #define __OPENCV_OMNIDIR_HPP__
 
+#include "opencv2/core.hpp"
+#include "opencv2/core/affine.hpp"
+#include <vector>
+
 namespace cv
 {
 namespace omnidir
@@ -102,6 +103,10 @@ namespace omnidir
     CV_EXPORTS_W void projectPoints(InputArray objectPoints, OutputArray imagePoints, InputArray rvec, InputArray tvec,
                        InputArray K, double xi, InputArray D, OutputArray jacobian = noArray());
 
+    /** @overload */
+    CV_EXPORTS void projectPoints(InputArray objectPoints, OutputArray imagePoints, const Affine3d& affine,
+                        InputArray K, double xi, InputArray D, OutputArray jacobian = noArray());
+
     /** @brief Undistort 2D image points for omnidirectional camera using CMei's model
 
     @param distorted Array of distorted image points, vector of Vec2f
@@ -126,7 +131,7 @@ namespace omnidir
     @param R Rotation transform between the original and object space : 3x3 1-channel, or vector: 3x1/1x3, with depth CV_32F or CV_64F
     @param P New camera matrix (3x3) or new projection matrix (3x4)
     @param size Undistorted image size.
-    @param mltype Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps()
+    @param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps()
     for details.
     @param map1 The first output map.
     @param map2 The second output map.
@@ -134,7 +139,7 @@ namespace omnidir
     are supported.
      */
     CV_EXPORTS_W void initUndistortRectifyMap(InputArray K, InputArray D, InputArray xi, InputArray R, InputArray P, const cv::Size& size,
-        int mltype, OutputArray map1, OutputArray map2, int flags);
+        int m1type, OutputArray map1, OutputArray map2, int flags);
 
     /** @brief Undistort omnidirectional images to perspective images
 
@@ -168,7 +173,7 @@ namespace omnidir
     @param idx Indices of images that pass initialization, which are really used in calibration. So the size of rvecs is the
     same as idx.total().
     */
-    CV_EXPORTS_W double calibrate(InputArray objectPoints, InputArray imagePoints, Size size,
+    CV_EXPORTS_W double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, Size size,
         InputOutputArray K, InputOutputArray xi, InputOutputArray D, OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
         int flags, TermCriteria criteria, OutputArray idx=noArray());
 
@@ -278,8 +283,6 @@ namespace internal
     double computeMeanReproErrStereo(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2, InputArray K1, InputArray K2,
         InputArray D1, InputArray D2, double xi1, double xi2, InputArray om, InputArray T, InputArrayOfArrays omL, InputArrayOfArrays TL);
 
-    void checkFixed(Mat &G, int flags, int n);
-
     void subMatrix(const Mat& src, Mat& dst, const std::vector<int>& cols, const std::vector<int>& rows);
 
     void flags2idx(int flags, std::vector<int>& idx, int n);
@@ -309,4 +312,4 @@ namespace internal
 } // omnidir
 
 } //cv
-#endif
\ No newline at end of file
+#endif
diff --git a/IPL/include/opencv/opencv2/ccalib/randpattern.hpp b/IPL/include/opencv/opencv2/ccalib/randpattern.hpp
index 9fc08f8..fb362bd 100644
--- a/IPL/include/opencv/opencv2/ccalib/randpattern.hpp
+++ b/IPL/include/opencv/opencv2/ccalib/randpattern.hpp
@@ -86,7 +86,14 @@ class CV_EXPORTS RandomPatternCornerFinder
     /* @brief Load pattern image and compute features for pattern
     @param patternImage image for "random" pattern generated by RandomPatternGenerator, run it first.
     */
-    void loadPattern(cv::Mat patternImage);
+    void loadPattern(const cv::Mat &patternImage);
+
+    /* @brief Load pattern and features
+	@param patternImage image for "random" pattern generated by RandomPatternGenerator, run it first.
+	@param patternKeyPoints keyPoints created from a FeatureDetector.
+	@param patternDescriptors descriptors created from a DescriptorExtractor.
+	*/
+    void loadPattern(const cv::Mat &patternImage, const std::vector<cv::KeyPoint> &patternKeyPoints, const cv::Mat &patternDescriptors);
 
     /* @brief Compute matched object points and image points which are used for calibration
     The objectPoints (3D) and imagePoints (2D) are stored inside the class. Run getObjectPoints()
@@ -108,11 +115,11 @@ class CV_EXPORTS RandomPatternCornerFinder
 
     /* @brief Get object(3D) points
     */
-    std::vector<cv::Mat> getObjectPoints();
+    const std::vector<cv::Mat> &getObjectPoints();
 
     /* @brief and image(2D) points
     */
-    std::vector<cv::Mat> getImagePoints();
+    const std::vector<cv::Mat> &getImagePoints();
 
 private:
 
diff --git a/IPL/include/opencv/opencv2/core.hpp b/IPL/include/opencv/opencv2/core.hpp
index 2e47658..ff9fa36 100644
--- a/IPL/include/opencv/opencv2/core.hpp
+++ b/IPL/include/opencv/opencv2/core.hpp
@@ -42,8 +42,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_HPP__
-#define __OPENCV_CORE_HPP__
+#ifndef OPENCV_CORE_HPP
+#define OPENCV_CORE_HPP
 
 #ifndef __cplusplus
 #  error core.hpp header must be compiled as C++
@@ -68,12 +68,17 @@
         @defgroup core_c_glue Connections with C++
     @}
     @defgroup core_array Operations on arrays
+    @defgroup core_async Asynchronous API
     @defgroup core_xml XML/YAML Persistence
     @defgroup core_cluster Clustering
     @defgroup core_utils Utility and system functions and macros
     @{
+        @defgroup core_logging Logging facilities
         @defgroup core_utils_sse SSE utilities
         @defgroup core_utils_neon NEON utilities
+        @defgroup core_utils_vsx VSX utilities
+        @defgroup core_utils_softfloat Softfloat support
+        @defgroup core_utils_samples Utility functions for OpenCV samples
     @}
     @defgroup core_opengl OpenGL interoperability
     @defgroup core_ipp Intel IPP Asynchronous C/C++ Converters
@@ -90,6 +95,7 @@
         @{
             @defgroup core_hal_intrin_impl Private implementation helpers
         @}
+        @defgroup core_lowlevel_api Low-level API for external libraries / plugins
     @}
 @}
  */
@@ -114,7 +120,7 @@ class CV_EXPORTS Exception : public std::exception
      */
     Exception();
     /*!
-     Full constructor. Normally the constuctor is not called explicitly.
+     Full constructor. Normally the constructor is not called explicitly.
      Instead, the macros CV_Error(), CV_Error_() and CV_Assert() are used.
     */
     Exception(int _code, const String& _err, const String& _func, const String& _file, int _line);
@@ -123,7 +129,7 @@ class CV_EXPORTS Exception : public std::exception
     /*!
      \return the error description and the context as a text string.
     */
-    virtual const char *what() const throw();
+    virtual const char *what() const throw() CV_OVERRIDE;
     void formatMessage();
 
     String msg; ///< the formatted error message
@@ -131,19 +137,19 @@ class CV_EXPORTS Exception : public std::exception
     int code; ///< error code @see CVStatus
     String err; ///< error description
     String func; ///< function name. Available only when the compiler supports getting it
-    String file; ///< source file name where the error has occured
-    int line; ///< line number in the source file where the error has occured
+    String file; ///< source file name where the error has occurred
+    int line; ///< line number in the source file where the error has occurred
 };
 
 /*! @brief Signals an error and raises the exception.
 
 By default the function prints information about the error to stderr,
 then it either stops if cv::setBreakOnError() had been called before or raises the exception.
-It is possible to alternate error processing by using cv::redirectError().
+It is possible to alternate error processing by using #redirectError().
 @param exc the exception raisen.
 @deprecated drop this version
  */
-CV_EXPORTS void error( const Exception& exc );
+CV_EXPORTS CV_NORETURN void error(const Exception& exc);
 
 enum SortFlags { SORT_EVERY_ROW    = 0, //!< each matrix row is sorted independently
                  SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted
@@ -174,7 +180,7 @@ enum CovarFlags {
     /**The output covariance matrix is calculated as:
         \f[\texttt{scale}   \cdot  [  \texttt{vects}  [0]-  \texttt{mean}  , \texttt{vects}  [1]-  \texttt{mean}  ,...]  \cdot  [ \texttt{vects}  [0]- \texttt{mean}  , \texttt{vects}  [1]- \texttt{mean}  ,...]^T,\f]
         covar will be a square matrix of the same size as the total number of elements in each input
-        vector. One and only one of COVAR_SCRAMBLED and COVAR_NORMAL must be specified.*/
+        vector. One and only one of #COVAR_SCRAMBLED and #COVAR_NORMAL must be specified.*/
     COVAR_NORMAL    = 1,
     /** If the flag is specified, the function does not calculate mean from
         the input vectors but, instead, uses the passed mean vector. This is useful if mean has been
@@ -210,28 +216,6 @@ enum KmeansFlags {
     KMEANS_USE_INITIAL_LABELS = 1
 };
 
-//! type of line
-enum LineTypes {
-    FILLED  = -1,
-    LINE_4  = 4, //!< 4-connected line
-    LINE_8  = 8, //!< 8-connected line
-    LINE_AA = 16 //!< antialiased line
-};
-
-//! Only a subset of Hershey fonts
-//! <http://sources.isc.org/utils/misc/hershey-font.txt> are supported
-enum HersheyFonts {
-    FONT_HERSHEY_SIMPLEX        = 0, //!< normal size sans-serif font
-    FONT_HERSHEY_PLAIN          = 1, //!< small size sans-serif font
-    FONT_HERSHEY_DUPLEX         = 2, //!< normal size sans-serif font (more complex than FONT_HERSHEY_SIMPLEX)
-    FONT_HERSHEY_COMPLEX        = 3, //!< normal size serif font
-    FONT_HERSHEY_TRIPLEX        = 4, //!< normal size serif font (more complex than FONT_HERSHEY_COMPLEX)
-    FONT_HERSHEY_COMPLEX_SMALL  = 5, //!< smaller version of FONT_HERSHEY_COMPLEX
-    FONT_HERSHEY_SCRIPT_SIMPLEX = 6, //!< hand-writing style font
-    FONT_HERSHEY_SCRIPT_COMPLEX = 7, //!< more complex variant of FONT_HERSHEY_SCRIPT_SIMPLEX
-    FONT_ITALIC                 = 16 //!< flag for italic font
-};
-
 enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/columns of the matrix.
                    REDUCE_AVG = 1, //!< the output is the mean vector of all rows/columns of the matrix.
                    REDUCE_MAX = 2, //!< the output is the maximum (column/row-wise) of all rows/columns of the matrix.
@@ -265,14 +249,19 @@ Normally, the function is not called directly. It is used inside filtering funct
 copyMakeBorder.
 @param p 0-based coordinate of the extrapolated pixel along one of the axes, likely \<0 or \>= len
 @param len Length of the array along the corresponding axis.
-@param borderType Border type, one of the cv::BorderTypes, except for cv::BORDER_TRANSPARENT and
-cv::BORDER_ISOLATED . When borderType==cv::BORDER_CONSTANT , the function always returns -1, regardless
+@param borderType Border type, one of the #BorderTypes, except for #BORDER_TRANSPARENT and
+#BORDER_ISOLATED . When borderType==#BORDER_CONSTANT , the function always returns -1, regardless
 of p and len.
 
 @sa copyMakeBorder
 */
 CV_EXPORTS_W int borderInterpolate(int p, int len, int borderType);
 
+/** @example samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
+An example using copyMakeBorder function.
+Check @ref tutorial_copyMakeBorder "the corresponding tutorial" for more details
+*/
+
 /** @brief Forms a border around an image.
 
 The function copies the source image into the middle of the destination image. The areas to the
@@ -300,14 +289,14 @@ function does not copy src itself but simply constructs the border, for example:
 @endcode
 @note When the source image is a part (ROI) of a bigger image, the function will try to use the
 pixels outside of the ROI to form a border. To disable this feature and always do extrapolation, as
-if src was not a ROI, use borderType | BORDER_ISOLATED.
+if src was not a ROI, use borderType | #BORDER_ISOLATED.
 
 @param src Source image.
 @param dst Destination image of the same type as src and the size Size(src.cols+left+right,
 src.rows+top+bottom) .
-@param top
-@param bottom
-@param left
+@param top the top pixels
+@param bottom the bottom pixels
+@param left the left pixels
 @param right Parameter specifying how many pixels in each direction from the source image rectangle
 to extrapolate. For example, top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs
 to be built.
@@ -426,13 +415,18 @@ CV_EXPORTS_W void multiply(InputArray src1, InputArray src2,
 
 /** @brief Performs per-element division of two arrays or a scalar by an array.
 
-The functions divide divide one array by another:
+The function cv::divide divides one array by another:
 \f[\texttt{dst(I) = saturate(src1(I)*scale/src2(I))}\f]
 or a scalar by an array when there is no src1 :
 \f[\texttt{dst(I) = saturate(scale/src2(I))}\f]
 
-When src2(I) is zero, dst(I) will also be zero. Different channels of
-multi-channel arrays are processed independently.
+Different channels of multi-channel arrays are processed independently.
+
+For integer types when src2(I) is zero, dst(I) will also be zero.
+
+@note In case of floating point data there is no special defined behavior for zero src2(I) values.
+Regular floating-point division is used.
+Expect correct IEEE-754 behaviour for floating-point data (with NaN, Inf result values).
 
 @note Saturation is not applied when the output array has the depth CV_32S. You may even get
 result of an incorrect sign in the case of overflow.
@@ -471,6 +465,10 @@ The function can also be emulated with a matrix expression, for example:
 */
 CV_EXPORTS_W void scaleAdd(InputArray src1, double alpha, InputArray src2, OutputArray dst);
 
+/** @example samples/cpp/tutorial_code/HighGUI/AddingImagesTrackbar.cpp
+Check @ref tutorial_trackbar "the corresponding tutorial" for more details
+*/
+
 /** @brief Calculates the weighted sum of two arrays.
 
 The function addWeighted calculates the weighted sum of two arrays as follows:
@@ -524,6 +522,18 @@ For example:
 CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst,
                                   double alpha = 1, double beta = 0);
 
+/** @brief Converts an array to half precision floating number.
+
+This function converts FP32 (single precision floating point) from/to FP16 (half precision floating point). CV_16S format is used to represent FP16 data.
+There are two use modes (src -> dst): CV_32F -> CV_16S and CV_16S -> CV_32F. The input array has to have type of CV_32F or
+CV_16S to represent the bit depth. If the input array is neither of them, the function will raise an error.
+The format of half precision floating point is defined in IEEE 754-2008.
+
+@param src input array.
+@param dst output array.
+*/
+CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst);
+
 /** @brief Performs a look-up table transform of an array.
 
 The function LUT fills the output array with values from the look-up table. Indices of the entries
@@ -542,7 +552,7 @@ CV_EXPORTS_W void LUT(InputArray src, InputArray lut, OutputArray dst);
 
 /** @brief Calculates the sum of array elements.
 
-The functions sum calculate and return the sum of array elements,
+The function cv::sum calculates and returns the sum of array elements,
 independently for each channel.
 @param src input array that must have from 1 to 4 channels.
 @sa  countNonZero, mean, meanStdDev, norm, minMaxLoc, reduce
@@ -581,17 +591,17 @@ or
     // access pixel coordinates
     Point pnt = locations[i];
 @endcode
-@param src single-channel array (type CV_8UC1)
+@param src single-channel array
 @param idx the output array, type of cv::Mat or std::vector<Point>, corresponding to non-zero indices in the input
 */
 CV_EXPORTS_W void findNonZero( InputArray src, OutputArray idx );
 
 /** @brief Calculates an average (mean) of array elements.
 
-The function mean calculates the mean value M of array elements,
+The function cv::mean calculates the mean value M of array elements,
 independently for each channel, and return it:
 \f[\begin{array}{l} N =  \sum _{I: \; \texttt{mask} (I) \ne 0} 1 \\ M_c =  \left ( \sum _{I: \; \texttt{mask} (I) \ne 0}{ \texttt{mtx} (I)_c} \right )/N \end{array}\f]
-When all the mask elements are 0's, the functions return Scalar::all(0)
+When all the mask elements are 0's, the function returns Scalar::all(0)
 @param src input array that should have from 1 to 4 channels so that the result can be stored in
 Scalar_ .
 @param mask optional operation mask.
@@ -601,11 +611,11 @@ CV_EXPORTS_W Scalar mean(InputArray src, InputArray mask = noArray());
 
 /** Calculates a mean and standard deviation of array elements.
 
-The function meanStdDev calculates the mean and the standard deviation M
+The function cv::meanStdDev calculates the mean and the standard deviation M
 of array elements independently for each channel and returns it via the
 output parameters:
 \f[\begin{array}{l} N =  \sum _{I, \texttt{mask} (I)  \ne 0} 1 \\ \texttt{mean} _c =  \frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \texttt{src} (I)_c}{N} \\ \texttt{stddev} _c =  \sqrt{\frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \left ( \texttt{src} (I)_c -  \texttt{mean} _c \right )^2}{N}} \end{array}\f]
-When all the mask elements are 0's, the functions return
+When all the mask elements are 0's, the function returns
 mean=stddev=Scalar::all(0).
 @note The calculated standard deviation is only the diagonal of the
 complete normalized covariance matrix. If the full matrix is needed, you
@@ -615,69 +625,90 @@ then pass the matrix to calcCovarMatrix .
 @param src input array that should have from 1 to 4 channels so that the results can be stored in
 Scalar_ 's.
 @param mean output parameter: calculated mean value.
-@param stddev output parameter: calculateded standard deviation.
+@param stddev output parameter: calculated standard deviation.
 @param mask optional operation mask.
 @sa  countNonZero, mean, norm, minMaxLoc, calcCovarMatrix
 */
 CV_EXPORTS_W void meanStdDev(InputArray src, OutputArray mean, OutputArray stddev,
                              InputArray mask=noArray());
 
-/** @brief Calculates an absolute array norm, an absolute difference norm, or a
-relative difference norm.
-
-The functions norm calculate an absolute norm of src1 (when there is no
-src2 ):
-
-\f[norm =  \forkthree{\|\texttt{src1}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
-{ \| \texttt{src1} \| _{L_1} =  \sum _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
-{ \| \texttt{src1} \| _{L_2} =  \sqrt{\sum_I \texttt{src1}(I)^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }\f]
-
-or an absolute or relative difference norm if src2 is there:
-
-\f[norm =  \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
-{ \| \texttt{src1} - \texttt{src2} \| _{L_1} =  \sum _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
-{ \| \texttt{src1} - \texttt{src2} \| _{L_2} =  \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }\f]
-
-or
-
-\f[norm =  \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}}    }{\|\texttt{src2}\|_{L_{\infty}} }}{if  \(\texttt{normType} = \texttt{NORM_RELATIVE_INF}\) }
-{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE_L1}\) }
-{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE_L2}\) }\f]
-
-The functions norm return the calculated norm.
+/** @brief Calculates the  absolute norm of an array.
+
+This version of #norm calculates the absolute norm of src1. The type of norm to calculate is specified using #NormTypes.
+
+As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$.
+The \f$ L_{1}, L_{2} \f$ and \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$
+is calculated as follows
+\f{align*}
+    \| r(-1) \|_{L_1} &= |-1| + |2| = 3 \\
+    \| r(-1) \|_{L_2} &= \sqrt{(-1)^{2} + (2)^{2}} = \sqrt{5} \\
+    \| r(-1) \|_{L_\infty} &= \max(|-1|,|2|) = 2
+\f}
+and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is
+\f{align*}
+    \| r(0.5) \|_{L_1} &= |0.5| + |0.5| = 1 \\
+    \| r(0.5) \|_{L_2} &= \sqrt{(0.5)^{2} + (0.5)^{2}} = \sqrt{0.5} \\
+    \| r(0.5) \|_{L_\infty} &= \max(|0.5|,|0.5|) = 0.5.
+\f}
+The following graphic shows all values for the three norm functions \f$\| r(x) \|_{L_1}, \| r(x) \|_{L_2}\f$ and \f$\| r(x) \|_{L_\infty}\f$.
+It is notable that the \f$ L_{1} \f$ norm forms the upper and the \f$ L_{\infty} \f$ norm forms the lower border for the example function \f$ r(x) \f$.
+![Graphs for the different norm functions from the above example](pics/NormTypes_OneArray_1-2-INF.png)
 
 When the mask parameter is specified and it is not empty, the norm is
+
+If normType is not specified, #NORM_L2 is used.
 calculated only over the region specified by the mask.
 
-A multi-channel input arrays are treated as a single-channel, that is,
+Multi-channel input arrays are treated as single-channel arrays, that is,
 the results for all channels are combined.
 
+Hamming norms can only be calculated with CV_8U depth arrays.
+
 @param src1 first input array.
-@param normType type of the norm (see cv::NormTypes).
+@param normType type of the norm (see #NormTypes).
 @param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
 */
 CV_EXPORTS_W double norm(InputArray src1, int normType = NORM_L2, InputArray mask = noArray());
 
-/** @overload
+/** @brief Calculates an absolute difference norm or a relative difference norm.
+
+This version of cv::norm calculates the absolute difference norm
+or the relative difference norm of arrays src1 and src2.
+The type of norm to calculate is specified using #NormTypes.
+
 @param src1 first input array.
 @param src2 second input array of the same size and the same type as src1.
-@param normType type of the norm (cv::NormTypes).
+@param normType type of the norm (see #NormTypes).
 @param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
 */
 CV_EXPORTS_W double norm(InputArray src1, InputArray src2,
                          int normType = NORM_L2, InputArray mask = noArray());
 /** @overload
 @param src first input array.
-@param normType type of the norm (see cv::NormTypes).
+@param normType type of the norm (see #NormTypes).
 */
 CV_EXPORTS double norm( const SparseMat& src, int normType );
 
-/** @brief computes PSNR image/video quality metric
+/** @brief Computes the Peak Signal-to-Noise Ratio (PSNR) image quality metric.
+
+This function calculates the Peak Signal-to-Noise Ratio (PSNR) image quality metric in decibels (dB),
+between two input arrays src1 and src2. The arrays must have the same type.
+
+The PSNR is calculated as follows:
+
+\f[
+\texttt{PSNR} = 10 \cdot \log_{10}{\left( \frac{R^2}{MSE} \right) }
+\f]
+
+where R is the maximum integer value of depth (e.g. 255 in the case of CV_8U data)
+and MSE is the mean squared error between the two arrays.
+
+@param src1 first input array.
+@param src2 second input array of the same size as src1.
+@param R the maximum pixel value (255 by default)
 
-see http://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio for details
-@todo document
   */
-CV_EXPORTS_W double PSNR(InputArray src1, InputArray src2);
+CV_EXPORTS_W double PSNR(InputArray src1, InputArray src2, double R=255.);
 
 /** @brief naive nearest neighbor finder
 
@@ -692,7 +723,7 @@ CV_EXPORTS_W void batchDistance(InputArray src1, InputArray src2,
 
 /** @brief Normalizes the norm or value range of an array.
 
-The functions normalize scale and shift the input array elements so that
+The function cv::normalize normalizes scale and shift the input array elements so that
 \f[\| \texttt{dst} \| _{L_p}= \texttt{alpha}\f]
 (where p=Inf, 1 or 2) when normType=NORM_INF, NORM_L1, or NORM_L2, respectively; or so that
 \f[\min _I  \texttt{dst} (I)= \texttt{alpha} , \, \, \max _I  \texttt{dst} (I)= \texttt{beta}\f]
@@ -762,11 +793,11 @@ CV_EXPORTS void normalize( const SparseMat& src, SparseMat& dst, double alpha, i
 
 /** @brief Finds the global minimum and maximum in an array.
 
-The functions minMaxLoc find the minimum and maximum element values and their positions. The
+The function cv::minMaxLoc finds the minimum and maximum element values and their positions. The
 extremums are searched across the whole array or, if mask is not an empty array, in the specified
 array region.
 
-The functions do not work with multi-channel arrays. If you need to find minimum or maximum
+The function do not work with multi-channel arrays. If you need to find minimum or maximum
 elements across all the channels, use Mat::reshape first to reinterpret the array as
 single-channel. Or you may extract the particular channel using either extractImageCOI , or
 mixChannels , or split .
@@ -785,7 +816,7 @@ CV_EXPORTS_W void minMaxLoc(InputArray src, CV_OUT double* minVal,
 
 /** @brief Finds the global minimum and maximum in an array
 
-The function minMaxIdx finds the minimum and maximum element values and their positions. The
+The function cv::minMaxIdx finds the minimum and maximum element values and their positions. The
 extremums are searched across the whole array or, if mask is not an empty array, in the specified
 array region. The function does not work with multi-channel arrays. If you need to find minimum or
 maximum elements across all the channels, use Mat::reshape first to reinterpret the array as
@@ -823,17 +854,24 @@ CV_EXPORTS void minMaxLoc(const SparseMat& a, double* minVal,
 
 /** @brief Reduces a matrix to a vector.
 
-The function reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of
+The function #reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of
 1D vectors and performing the specified operation on the vectors until a single row/column is
 obtained. For example, the function can be used to compute horizontal and vertical projections of a
-raster image. In case of REDUCE_SUM and REDUCE_AVG , the output may have a larger element
-bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction
-modes.
+raster image. In case of #REDUCE_MAX and #REDUCE_MIN , the output image should have the same type as the source one.
+In case of #REDUCE_SUM and #REDUCE_AVG , the output may have a larger element bit-depth to preserve accuracy.
+And multi-channel arrays are also supported in these two reduction modes.
+
+The following code demonstrates its usage for a single channel matrix.
+@snippet snippets/core_reduce.cpp example
+
+And the following code demonstrates its usage for a two-channel matrix.
+@snippet snippets/core_reduce.cpp example2
+
 @param src input 2D matrix.
 @param dst output vector. Its size and type is defined by dim and dtype parameters.
 @param dim dimension index along which the matrix is reduced. 0 means that the matrix is reduced to
 a single row. 1 means that the matrix is reduced to a single column.
-@param rtype reduction operation that could be one of cv::ReduceTypes
+@param rtype reduction operation that could be one of #ReduceTypes
 @param dtype when negative, the output vector will have the same type as the input matrix,
 otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()).
 @sa repeat
@@ -842,12 +880,16 @@ CV_EXPORTS_W void reduce(InputArray src, OutputArray dst, int dim, int rtype, in
 
 /** @brief Creates one multi-channel array out of several single-channel ones.
 
-The function merge merges several arrays to make a single multi-channel array. That is, each
+The function cv::merge merges several arrays to make a single multi-channel array. That is, each
 element of the output array will be a concatenation of the elements of the input arrays, where
 elements of i-th input array are treated as mv[i].channels()-element vectors.
 
 The function cv::split does the reverse operation. If you need to shuffle channels in some other
 advanced way, use cv::mixChannels.
+
+The following example shows how to merge 3 single channel matrices into a single 3-channel matrix.
+@snippet snippets/core_merge.cpp example
+
 @param mv input array of matrices to be merged; all the matrices in mv must have the same
 size and the same depth.
 @param count number of input matrices when mv is a plain C array; it must be greater than zero.
@@ -867,10 +909,14 @@ CV_EXPORTS_W void merge(InputArrayOfArrays mv, OutputArray dst);
 
 /** @brief Divides a multi-channel array into several single-channel arrays.
 
-The functions split split a multi-channel array into separate single-channel arrays:
+The function cv::split splits a multi-channel array into separate single-channel arrays:
 \f[\texttt{mv} [c](I) =  \texttt{src} (I)_c\f]
 If you need to extract a single channel or do some other sophisticated channel permutation, use
 mixChannels .
+
+The following example demonstrates how to split a 3-channel matrix into 3 single channel matrices.
+@snippet snippets/core_split.cpp example
+
 @param src input multi-channel array.
 @param mvbegin output array; the number of arrays must match src.channels(); the arrays themselves are
 reallocated, if needed.
@@ -889,7 +935,7 @@ output arrays.
 
 The function cv::mixChannels provides an advanced mechanism for shuffling image channels.
 
-cv::split and cv::merge and some forms of cv::cvtColor are partial cases of cv::mixChannels .
+cv::split,cv::merge,cv::extractChannel,cv::insertChannel and some forms of cv::cvtColor are partial cases of cv::mixChannels.
 
 In the example below, the code splits a 4-channel BGRA image into a 3-channel BGR (with B and R
 channels swapped) and a separate alpha-channel image:
@@ -923,7 +969,7 @@ src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for
 channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is
 filled with zero .
 @param npairs number of index pairs in `fromTo`.
-@sa cv::split, cv::merge, cv::cvtColor
+@sa split, merge, extractChannel, insertChannel, cvtColor
 */
 CV_EXPORTS void mixChannels(const Mat* src, size_t nsrcs, Mat* dst, size_t ndsts,
                             const int* fromTo, size_t npairs);
@@ -961,19 +1007,25 @@ filled with zero .
 CV_EXPORTS_W void mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
                               const std::vector<int>& fromTo);
 
-/** @brief extracts a single channel from src (coi is 0-based index)
-@todo document
+/** @brief Extracts a single channel from src (coi is 0-based index)
+@param src input array
+@param dst output array
+@param coi index of channel to extract
+@sa mixChannels, split
 */
 CV_EXPORTS_W void extractChannel(InputArray src, OutputArray dst, int coi);
 
-/** @brief inserts a single channel to dst (coi is 0-based index)
-@todo document
+/** @brief Inserts a single channel to dst (coi is 0-based index)
+@param src input array
+@param dst output array
+@param coi index of channel for insertion
+@sa mixChannels, merge
 */
 CV_EXPORTS_W void insertChannel(InputArray src, InputOutputArray dst, int coi);
 
 /** @brief Flips a 2D array around vertical, horizontal, or both axes.
 
-The function flip flips the array in one of three different ways (row
+The function cv::flip flips the array in one of three different ways (row
 and column indices are 0-based):
 \f[\texttt{dst} _{ij} =
 \left\{
@@ -1005,6 +1057,24 @@ around both axes.
 */
 CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode);
 
+enum RotateFlags {
+    ROTATE_90_CLOCKWISE = 0, //!<Rotate 90 degrees clockwise
+    ROTATE_180 = 1, //!<Rotate 180 degrees clockwise
+    ROTATE_90_COUNTERCLOCKWISE = 2, //!<Rotate 270 degrees clockwise
+};
+/** @brief Rotates a 2D array in multiples of 90 degrees.
+The function cv::rotate rotates the array in one of three different ways:
+*   Rotate by 90 degrees clockwise (rotateCode = ROTATE_90_CLOCKWISE).
+*   Rotate by 180 degrees clockwise (rotateCode = ROTATE_180).
+*   Rotate by 270 degrees clockwise (rotateCode = ROTATE_90_COUNTERCLOCKWISE).
+@param src input array.
+@param dst output array of the same type as src.  The size is the same with ROTATE_180,
+and the rows and cols are switched for ROTATE_90_CLOCKWISE and ROTATE_90_COUNTERCLOCKWISE.
+@param rotateCode an enum to specify how to rotate the array; see the enum #RotateFlags
+@sa transpose , repeat , completeSymm, flip, RotateFlags
+*/
+CV_EXPORTS_W void rotate(InputArray src, OutputArray dst, int rotateCode);
+
 /** @brief Fills the output array with repeated copies of the input array.
 
 The function cv::repeat duplicates the input array one or more times along each of the two axes:
@@ -1160,7 +1230,7 @@ CV_EXPORTS_W void vconcat(InputArrayOfArrays src, OutputArray dst);
 Calculates the per-element bit-wise conjunction of two arrays or an
 array and a scalar.
 
-The function calculates the per-element bit-wise logical conjunction for:
+The function cv::bitwise_and calculates the per-element bit-wise logical conjunction for:
 *   Two arrays when src1 and src2 have the same size:
     \f[\texttt{dst} (I) =  \texttt{src1} (I)  \wedge \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f]
 *   An array and a scalar when src2 is constructed from Scalar or has
@@ -1187,7 +1257,7 @@ CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2,
 /** @brief Calculates the per-element bit-wise disjunction of two arrays or an
 array and a scalar.
 
-The function calculates the per-element bit-wise logical disjunction for:
+The function cv::bitwise_or calculates the per-element bit-wise logical disjunction for:
 *   Two arrays when src1 and src2 have the same size:
     \f[\texttt{dst} (I) =  \texttt{src1} (I)  \vee \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f]
 *   An array and a scalar when src2 is constructed from Scalar or has
@@ -1214,7 +1284,7 @@ CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2,
 /** @brief Calculates the per-element bit-wise "exclusive or" operation on two
 arrays or an array and a scalar.
 
-The function calculates the per-element bit-wise logical "exclusive-or"
+The function cv::bitwise_xor calculates the per-element bit-wise logical "exclusive-or"
 operation for:
 *   Two arrays when src1 and src2 have the same size:
     \f[\texttt{dst} (I) =  \texttt{src1} (I)  \oplus \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f]
@@ -1241,7 +1311,7 @@ CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2,
 
 /** @brief  Inverts every bit of an array.
 
-The function calculates per-element bit-wise inversion of the input
+The function cv::bitwise_not calculates per-element bit-wise inversion of the input
 array:
 \f[\texttt{dst} (I) =  \neg \texttt{src} (I)\f]
 In case of a floating-point input array, its machine-specific bit
@@ -1258,7 +1328,7 @@ CV_EXPORTS_W void bitwise_not(InputArray src, OutputArray dst,
 
 /** @brief Calculates the per-element absolute difference between two arrays or between an array and a scalar.
 
-The function absdiff calculates:
+The function cv::absdiff calculates:
 *   Absolute difference between two arrays when they have the same
     size and type:
     \f[\texttt{dst}(I) =  \texttt{saturate} (| \texttt{src1}(I) -  \texttt{src2}(I)|)\f]
@@ -1281,6 +1351,17 @@ You may even get a negative value in the case of overflow.
 */
 CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst);
 
+/** @brief  This is an overloaded member function, provided for convenience (python)
+Copies the matrix to another one.
+When the operation mask is specified, if the Mat::create call shown above reallocates the matrix, the newly allocated matrix is initialized with all zeros before copying the data.
+@param src source matrix.
+@param dst Destination matrix. If it does not have a proper size or type before the operation, it is
+reallocated.
+@param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix
+elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels.
+*/
+
+void CV_EXPORTS_W copyTo(InputArray src, OutputArray dst, InputArray mask);
 /** @brief  Checks if array elements lie between the elements of two other arrays.
 
 The function checks the range as follows:
@@ -1333,7 +1414,7 @@ CV_EXPORTS_W void compare(InputArray src1, InputArray src2, OutputArray dst, int
 
 /** @brief Calculates per-element minimum of two arrays or an array and a scalar.
 
-The functions min calculate the per-element minimum of two arrays:
+The function cv::min calculates the per-element minimum of two arrays:
 \f[\texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{src2} (I))\f]
 or array and a scalar:
 \f[\texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{value} )\f]
@@ -1354,7 +1435,7 @@ CV_EXPORTS void min(const UMat& src1, const UMat& src2, UMat& dst);
 
 /** @brief Calculates per-element maximum of two arrays or an array and a scalar.
 
-The functions max calculate the per-element maximum of two arrays:
+The function cv::max calculates the per-element maximum of two arrays:
 \f[\texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{src2} (I))\f]
 or array and a scalar:
 \f[\texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{value} )\f]
@@ -1375,7 +1456,7 @@ CV_EXPORTS void max(const UMat& src1, const UMat& src2, UMat& dst);
 
 /** @brief Calculates a square root of array elements.
 
-The functions sqrt calculate a square root of each input array element.
+The function cv::sqrt calculates a square root of each input array element.
 In case of multi-channel arrays, each channel is processed
 independently. The accuracy is approximately the same as of the built-in
 std::sqrt .
@@ -1386,7 +1467,7 @@ CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst);
 
 /** @brief Raises every array element to a power.
 
-The function pow raises every element of the input array to power :
+The function cv::pow raises every element of the input array to power :
 \f[\texttt{dst} (I) =  \fork{\texttt{src}(I)^{power}}{if \(\texttt{power}\) is integer}{|\texttt{src}(I)|^{power}}{otherwise}\f]
 
 So, for a non-integer power exponent, the absolute values of input array
@@ -1411,7 +1492,7 @@ CV_EXPORTS_W void pow(InputArray src, double power, OutputArray dst);
 
 /** @brief Calculates the exponent of every array element.
 
-The function exp calculates the exponent of every element of the input
+The function cv::exp calculates the exponent of every element of the input
 array:
 \f[\texttt{dst} [I] = e^{ src(I) }\f]
 
@@ -1427,14 +1508,11 @@ CV_EXPORTS_W void exp(InputArray src, OutputArray dst);
 
 /** @brief Calculates the natural logarithm of every array element.
 
-The function log calculates the natural logarithm of the absolute value
-of every element of the input array:
-\f[\texttt{dst} (I) =  \fork{\log |\texttt{src}(I)|}{if \(\texttt{src}(I) \ne 0\) }{\texttt{C}}{otherwise}\f]
+The function cv::log calculates the natural logarithm of every element of the input array:
+\f[\texttt{dst} (I) =  \log (\texttt{src}(I)) \f]
+
+Output on zero, negative and special (NaN, Inf) values is undefined.
 
-where C is a large negative number (about -700 in the current
-implementation). The maximum relative error is about 7e-6 for
-single-precision input and less than 1e-10 for double-precision input.
-Special values (NaN, Inf) are not handled.
 @param src input array.
 @param dst output array of the same size and type as src .
 @sa exp, cartToPolar, polarToCart, phase, pow, sqrt, magnitude
@@ -1443,7 +1521,7 @@ CV_EXPORTS_W void log(InputArray src, OutputArray dst);
 
 /** @brief Calculates x and y coordinates of 2D vectors from their magnitude and angle.
 
-The function polarToCart calculates the Cartesian coordinates of each 2D
+The function cv::polarToCart calculates the Cartesian coordinates of each 2D
 vector represented by the corresponding elements of magnitude and angle:
 \f[\begin{array}{l} \texttt{x} (I) =  \texttt{magnitude} (I) \cos ( \texttt{angle} (I)) \\ \texttt{y} (I) =  \texttt{magnitude} (I) \sin ( \texttt{angle} (I)) \\ \end{array}\f]
 
@@ -1466,7 +1544,7 @@ CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle,
 
 /** @brief Calculates the magnitude and angle of 2D vectors.
 
-The function cartToPolar calculates either the magnitude, angle, or both
+The function cv::cartToPolar calculates either the magnitude, angle, or both
 for every 2D vector (x(I),y(I)):
 \f[\begin{array}{l} \texttt{magnitude} (I)= \sqrt{\texttt{x}(I)^2+\texttt{y}(I)^2} , \\ \texttt{angle} (I)= \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))[ \cdot180 / \pi ] \end{array}\f]
 
@@ -1488,7 +1566,7 @@ CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y,
 
 /** @brief Calculates the rotation angle of 2D vectors.
 
-The function phase calculates the rotation angle of each 2D vector that
+The function cv::phase calculates the rotation angle of each 2D vector that
 is formed from the corresponding elements of x and y :
 \f[\texttt{angle} (I) =  \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))\f]
 
@@ -1507,7 +1585,7 @@ CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle,
 
 /** @brief Calculates the magnitude of 2D vectors.
 
-The function magnitude calculates the magnitude of 2D vectors formed
+The function cv::magnitude calculates the magnitude of 2D vectors formed
 from the corresponding elements of x and y arrays:
 \f[\texttt{dst} (I) =  \sqrt{\texttt{x}(I)^2 + \texttt{y}(I)^2}\f]
 @param x floating-point array of x-coordinates of the vectors.
@@ -1520,11 +1598,11 @@ CV_EXPORTS_W void magnitude(InputArray x, InputArray y, OutputArray magnitude);
 
 /** @brief Checks every element of an input array for invalid values.
 
-The functions checkRange check that every array element is neither NaN nor infinite. When minVal \>
--DBL_MAX and maxVal \< DBL_MAX, the functions also check that each value is between minVal and
+The function cv::checkRange checks that every array element is neither NaN nor infinite. When minVal \>
+-DBL_MAX and maxVal \< DBL_MAX, the function also checks that each value is between minVal and
 maxVal. In case of multi-channel arrays, each channel is processed independently. If some values
 are out of range, position of the first outlier is stored in pos (when pos != NULL). Then, the
-functions either return false (when quiet=true) or throw an exception.
+function either returns false (when quiet=true) or throws an exception.
 @param a input array.
 @param quiet a flag, indicating whether the functions quietly return false when the array elements
 are out of range or they throw an exception.
@@ -1542,7 +1620,7 @@ CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val = 0);
 
 /** @brief Performs generalized matrix multiplication.
 
-The function performs generalized matrix multiplication similar to the
+The function cv::gemm performs generalized matrix multiplication similar to the
 gemm functions in BLAS level 3. For example,
 `gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T)`
 corresponds to
@@ -1573,7 +1651,7 @@ CV_EXPORTS_W void gemm(InputArray src1, InputArray src2, double alpha,
 
 /** @brief Calculates the product of a matrix and its transposition.
 
-The function mulTransposed calculates the product of src and its
+The function cv::mulTransposed calculates the product of src and its
 transposition:
 \f[\texttt{dst} = \texttt{scale} ( \texttt{src} - \texttt{delta} )^T ( \texttt{src} - \texttt{delta} )\f]
 if aTa=true , and
@@ -1605,9 +1683,9 @@ CV_EXPORTS_W void mulTransposed( InputArray src, OutputArray dst, bool aTa,
 
 /** @brief Transposes a matrix.
 
-The function transpose transposes the matrix src :
+The function cv::transpose transposes the matrix src :
 \f[\texttt{dst} (i,j) =  \texttt{src} (j,i)\f]
-@note No complex conjugation is done in case of a complex matrix. It it
+@note No complex conjugation is done in case of a complex matrix. It
 should be done separately if needed.
 @param src input array.
 @param dst output array of the same type as src.
@@ -1616,7 +1694,7 @@ CV_EXPORTS_W void transpose(InputArray src, OutputArray dst);
 
 /** @brief Performs the matrix transformation of every array element.
 
-The function transform performs the matrix transformation of every
+The function cv::transform performs the matrix transformation of every
 element of the array src and stores the results in dst :
 \f[\texttt{dst} (I) =  \texttt{m} \cdot \texttt{src} (I)\f]
 (when m.cols=src.channels() ), or
@@ -1636,13 +1714,13 @@ m.cols or m.cols-1.
 @param dst output array of the same size and depth as src; it has as
 many channels as m.rows.
 @param m transformation 2x2 or 2x3 floating-point matrix.
-@sa perspectiveTransform, getAffineTransform, estimateRigidTransform, warpAffine, warpPerspective
+@sa perspectiveTransform, getAffineTransform, estimateAffine2D, warpAffine, warpPerspective
 */
 CV_EXPORTS_W void transform(InputArray src, OutputArray dst, InputArray m );
 
 /** @brief Performs the perspective matrix transformation of vectors.
 
-The function perspectiveTransform transforms every element of src by
+The function cv::perspectiveTransform transforms every element of src by
 treating it as a 2D or 3D vector, in the following way:
 \f[(x, y, z)  \rightarrow (x'/w, y'/w, z'/w)\f]
 where
@@ -1667,24 +1745,25 @@ element is a 2D/3D vector to be transformed.
 */
 CV_EXPORTS_W void perspectiveTransform(InputArray src, OutputArray dst, InputArray m );
 
-/** @brief Copies the lower or the upper half of a square matrix to another half.
+/** @brief Copies the lower or the upper half of a square matrix to its another half.
 
-The function completeSymm copies the lower half of a square matrix to
+The function cv::completeSymm copies the lower or the upper half of a square matrix to
 its another half. The matrix diagonal remains unchanged:
-*   \f$\texttt{mtx}_{ij}=\texttt{mtx}_{ji}\f$ for \f$i > j\f$ if
+ - \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i > j\f$ if
     lowerToUpper=false
-*   \f$\texttt{mtx}_{ij}=\texttt{mtx}_{ji}\f$ for \f$i < j\f$ if
+ - \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i < j\f$ if
     lowerToUpper=true
-@param mtx input-output floating-point square matrix.
+
+@param m input-output floating-point square matrix.
 @param lowerToUpper operation flag; if true, the lower half is copied to
 the upper half. Otherwise, the upper half is copied to the lower half.
 @sa flip, transpose
 */
-CV_EXPORTS_W void completeSymm(InputOutputArray mtx, bool lowerToUpper = false);
+CV_EXPORTS_W void completeSymm(InputOutputArray m, bool lowerToUpper = false);
 
 /** @brief Initializes a scaled identity matrix.
 
-The function setIdentity initializes a scaled identity matrix:
+The function cv::setIdentity initializes a scaled identity matrix:
 \f[\texttt{mtx} (i,j)= \fork{\texttt{value}}{ if \(i=j\)}{0}{otherwise}\f]
 
 The function can also be emulated using the matrix initializers and the
@@ -1701,7 +1780,7 @@ CV_EXPORTS_W void setIdentity(InputOutputArray mtx, const Scalar& s = Scalar(1))
 
 /** @brief Returns the determinant of a square floating-point matrix.
 
-The function determinant calculates and returns the determinant of the
+The function cv::determinant calculates and returns the determinant of the
 specified matrix. For small matrices ( mtx.cols=mtx.rows\<=3 ), the
 direct method is used. For larger matrices, the function uses LU
 factorization with partial pivoting.
@@ -1716,7 +1795,7 @@ CV_EXPORTS_W double determinant(InputArray mtx);
 
 /** @brief Returns the trace of a matrix.
 
-The function trace returns the sum of the diagonal elements of the
+The function cv::trace returns the sum of the diagonal elements of the
 matrix mtx .
 \f[\mathrm{tr} ( \texttt{mtx} ) =  \sum _i  \texttt{mtx} (i,i)\f]
 @param mtx input matrix.
@@ -1725,20 +1804,20 @@ CV_EXPORTS_W Scalar trace(InputArray mtx);
 
 /** @brief Finds the inverse or pseudo-inverse of a matrix.
 
-The function invert inverts the matrix src and stores the result in dst
+The function cv::invert inverts the matrix src and stores the result in dst
 . When the matrix src is singular or non-square, the function calculates
 the pseudo-inverse matrix (the dst matrix) so that norm(src\*dst - I) is
 minimal, where I is an identity matrix.
 
-In case of the DECOMP_LU method, the function returns non-zero value if
+In case of the #DECOMP_LU method, the function returns non-zero value if
 the inverse has been successfully calculated and 0 if src is singular.
 
-In case of the DECOMP_SVD method, the function returns the inverse
+In case of the #DECOMP_SVD method, the function returns the inverse
 condition number of src (the ratio of the smallest singular value to the
 largest singular value) and 0 if src is singular. The SVD method
 calculates a pseudo-inverse matrix if src is singular.
 
-Similarly to DECOMP_LU, the method DECOMP_CHOLESKY works only with
+Similarly to #DECOMP_LU, the method #DECOMP_CHOLESKY works only with
 non-singular square matrices that should also be symmetrical and
 positively defined. In this case, the function stores the inverted
 matrix in dst and returns non-zero. Otherwise, it returns 0.
@@ -1752,12 +1831,12 @@ CV_EXPORTS_W double invert(InputArray src, OutputArray dst, int flags = DECOMP_L
 
 /** @brief Solves one or more linear systems or least-squares problems.
 
-The function solve solves a linear system or least-squares problem (the
+The function cv::solve solves a linear system or least-squares problem (the
 latter is possible with SVD or QR methods, or by specifying the flag
-DECOMP_NORMAL ):
+#DECOMP_NORMAL ):
 \f[\texttt{dst} =  \arg \min _X \| \texttt{src1} \cdot \texttt{X} -  \texttt{src2} \|\f]
 
-If DECOMP_LU or DECOMP_CHOLESKY method is used, the function returns 1
+If #DECOMP_LU or #DECOMP_CHOLESKY method is used, the function returns 1
 if src1 (or \f$\texttt{src1}^T\texttt{src1}\f$ ) is non-singular. Otherwise,
 it returns 0. In the latter case, dst is not valid. Other methods find a
 pseudo-solution in case of a singular left-hand side part.
@@ -1769,7 +1848,7 @@ will not do the work. Use SVD::solveZ instead.
 @param src1 input matrix on the left-hand side of the system.
 @param src2 input matrix on the right-hand side of the system.
 @param dst output solution.
-@param flags solution (matrix inversion) method (cv::DecompTypes)
+@param flags solution (matrix inversion) method (#DecompTypes)
 @sa invert, SVD, eigen
 */
 CV_EXPORTS_W bool solve(InputArray src1, InputArray src2,
@@ -1777,7 +1856,7 @@ CV_EXPORTS_W bool solve(InputArray src1, InputArray src2,
 
 /** @brief Sorts each row or each column of a matrix.
 
-The function sort sorts each matrix row or each matrix column in
+The function cv::sort sorts each matrix row or each matrix column in
 ascending or descending order. So you should pass two operation flags to
 get desired behaviour. If you want to sort matrix rows or columns
 lexicographically, you can use STL std::sort generic function with the
@@ -1785,14 +1864,14 @@ proper comparison predicate.
 
 @param src input single-channel array.
 @param dst output array of the same size and type as src.
-@param flags operation flags, a combination of cv::SortFlags
+@param flags operation flags, a combination of #SortFlags
 @sa sortIdx, randShuffle
 */
 CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags);
 
 /** @brief Sorts each row or each column of a matrix.
 
-The function sortIdx sorts each matrix row or each matrix column in the
+The function cv::sortIdx sorts each matrix row or each matrix column in the
 ascending or descending order. So you should pass two operation flags to
 get desired behaviour. Instead of reordering the elements themselves, it
 stores the indices of sorted elements in the output array. For example:
@@ -1821,12 +1900,13 @@ The function solveCubic finds the real roots of a cubic equation:
 The roots are stored in the roots array.
 @param coeffs equation coefficients, an array of 3 or 4 elements.
 @param roots output array of real roots that has 1 or 3 elements.
+@return number of real roots. It can be 0, 1 or 2.
 */
 CV_EXPORTS_W int solveCubic(InputArray coeffs, OutputArray roots);
 
 /** @brief Finds the real or complex roots of a polynomial equation.
 
-The function solvePoly finds real and complex roots of a polynomial equation:
+The function cv::solvePoly finds real and complex roots of a polynomial equation:
 \f[\texttt{coeffs} [n] x^{n} +  \texttt{coeffs} [n-1] x^{n-1} + ... +  \texttt{coeffs} [1] x +  \texttt{coeffs} [0] = 0\f]
 @param coeffs array of polynomial coefficients.
 @param roots output (complex) array of roots.
@@ -1836,13 +1916,14 @@ CV_EXPORTS_W double solvePoly(InputArray coeffs, OutputArray roots, int maxIters
 
 /** @brief Calculates eigenvalues and eigenvectors of a symmetric matrix.
 
-The functions eigen calculate just eigenvalues, or eigenvalues and eigenvectors of the symmetric
+The function cv::eigen calculates just eigenvalues, or eigenvalues and eigenvectors of the symmetric
 matrix src:
 @code
     src*eigenvectors.row(i).t() = eigenvalues.at<srcType>(i)*eigenvectors.row(i).t()
 @endcode
-@note in the new and the old interfaces different ordering of eigenvalues and eigenvectors
-parameters is used.
+
+@note Use cv::eigenNonSymmetric for calculation of real eigenvalues and eigenvectors of non-symmetric matrix.
+
 @param src input matrix that must have CV_32FC1 or CV_64FC1 type, square size and be symmetrical
 (src ^T^ == src).
 @param eigenvalues output vector of eigenvalues of the same type as src; the eigenvalues are stored
@@ -1850,20 +1931,37 @@ in the descending order.
 @param eigenvectors output matrix of eigenvectors; it has the same size and type as src; the
 eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding
 eigenvalues.
-@sa completeSymm , PCA
+@sa eigenNonSymmetric, completeSymm , PCA
 */
 CV_EXPORTS_W bool eigen(InputArray src, OutputArray eigenvalues,
                         OutputArray eigenvectors = noArray());
 
+/** @brief Calculates eigenvalues and eigenvectors of a non-symmetric matrix (real eigenvalues only).
+
+@note Assumes real eigenvalues.
+
+The function calculates eigenvalues and eigenvectors (optional) of the square matrix src:
+@code
+    src*eigenvectors.row(i).t() = eigenvalues.at<srcType>(i)*eigenvectors.row(i).t()
+@endcode
+
+@param src input matrix (CV_32FC1 or CV_64FC1 type).
+@param eigenvalues output vector of eigenvalues (type is the same type as src).
+@param eigenvectors output matrix of eigenvectors (type is the same type as src). The eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding eigenvalues.
+@sa eigen
+*/
+CV_EXPORTS_W void eigenNonSymmetric(InputArray src, OutputArray eigenvalues,
+                                    OutputArray eigenvectors);
+
 /** @brief Calculates the covariance matrix of a set of vectors.
 
-The functions calcCovarMatrix calculate the covariance matrix and, optionally, the mean vector of
+The function cv::calcCovarMatrix calculates the covariance matrix and, optionally, the mean vector of
 the set of input vectors.
 @param samples samples stored as separate matrices
 @param nsamples number of samples
 @param covar output covariance matrix of the type ctype and square size.
 @param mean input or output (depending on the flags) array as the average value of the input vectors.
-@param flags operation flags as a combination of cv::CovarFlags
+@param flags operation flags as a combination of #CovarFlags
 @param ctype type of the matrixl; it equals 'CV_64F' by default.
 @sa PCA, mulTransposed, Mahalanobis
 @todo InputArrayOfArrays
@@ -1872,11 +1970,11 @@ CV_EXPORTS void calcCovarMatrix( const Mat* samples, int nsamples, Mat& covar, M
                                  int flags, int ctype = CV_64F);
 
 /** @overload
-@note use cv::COVAR_ROWS or cv::COVAR_COLS flag
+@note use #COVAR_ROWS or #COVAR_COLS flag
 @param samples samples stored as rows/columns of a single matrix.
 @param covar output covariance matrix of the type ctype and square size.
 @param mean input or output (depending on the flags) array as the average value of the input vectors.
-@param flags operation flags as a combination of cv::CovarFlags
+@param flags operation flags as a combination of #CovarFlags
 @param ctype type of the matrixl; it equals 'CV_64F' by default.
 */
 CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar,
@@ -1886,10 +1984,20 @@ CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar,
 CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean,
                              OutputArray eigenvectors, int maxComponents = 0);
 
+/** wrap PCA::operator() and add eigenvalues output parameter */
+CV_EXPORTS_AS(PCACompute2) void PCACompute(InputArray data, InputOutputArray mean,
+                                           OutputArray eigenvectors, OutputArray eigenvalues,
+                                           int maxComponents = 0);
+
 /** wrap PCA::operator() */
 CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean,
                              OutputArray eigenvectors, double retainedVariance);
 
+/** wrap PCA::operator() and add eigenvalues output parameter */
+CV_EXPORTS_AS(PCACompute2) void PCACompute(InputArray data, InputOutputArray mean,
+                                           OutputArray eigenvectors, OutputArray eigenvalues,
+                                           double retainedVariance);
+
 /** wrap PCA::project */
 CV_EXPORTS_W void PCAProject(InputArray data, InputArray mean,
                              InputArray eigenvectors, OutputArray result);
@@ -1907,10 +2015,10 @@ CV_EXPORTS_W void SVBackSubst( InputArray w, InputArray u, InputArray vt,
 
 /** @brief Calculates the Mahalanobis distance between two vectors.
 
-The function Mahalanobis calculates and returns the weighted distance between two vectors:
+The function cv::Mahalanobis calculates and returns the weighted distance between two vectors:
 \f[d( \texttt{vec1} , \texttt{vec2} )= \sqrt{\sum_{i,j}{\texttt{icovar(i,j)}\cdot(\texttt{vec1}(I)-\texttt{vec2}(I))\cdot(\texttt{vec1(j)}-\texttt{vec2(j)})} }\f]
-The covariance matrix may be calculated using the cv::calcCovarMatrix function and then inverted using
-the invert function (preferably using the cv::DECOMP_SVD method, as the most accurate).
+The covariance matrix may be calculated using the #calcCovarMatrix function and then inverted using
+the invert function (preferably using the #DECOMP_SVD method, as the most accurate).
 @param v1 first 1D input vector.
 @param v2 second 1D input vector.
 @param icovar inverse covariance matrix.
@@ -1919,7 +2027,7 @@ CV_EXPORTS_W double Mahalanobis(InputArray v1, InputArray v2, InputArray icovar)
 
 /** @brief Performs a forward or inverse Discrete Fourier transform of a 1D or 2D floating-point array.
 
-The function performs one of the following:
+The function cv::dft performs one of the following:
 -   Forward the Fourier transform of a 1D vector of N elements:
     \f[Y = F^{(N)}  \cdot X,\f]
     where \f$F^{(N)}_{jk}=\exp(-2\pi i j k/N)\f$ and \f$i=\sqrt{-1}\f$
@@ -1940,28 +2048,28 @@ is how 2D *CCS* spectrum looks:
 In case of 1D transform of a real vector, the output looks like the first row of the matrix above.
 
 So, the function chooses an operation mode depending on the flags and size of the input array:
--   If DFT_ROWS is set or the input array has a single row or single column, the function
-    performs a 1D forward or inverse transform of each row of a matrix when DFT_ROWS is set.
+-   If #DFT_ROWS is set or the input array has a single row or single column, the function
+    performs a 1D forward or inverse transform of each row of a matrix when #DFT_ROWS is set.
     Otherwise, it performs a 2D transform.
--   If the input array is real and DFT_INVERSE is not set, the function performs a forward 1D or
+-   If the input array is real and #DFT_INVERSE is not set, the function performs a forward 1D or
     2D transform:
-    -   When DFT_COMPLEX_OUTPUT is set, the output is a complex matrix of the same size as
+    -   When #DFT_COMPLEX_OUTPUT is set, the output is a complex matrix of the same size as
         input.
-    -   When DFT_COMPLEX_OUTPUT is not set, the output is a real matrix of the same size as
+    -   When #DFT_COMPLEX_OUTPUT is not set, the output is a real matrix of the same size as
         input. In case of 2D transform, it uses the packed format as shown above. In case of a
         single 1D transform, it looks like the first row of the matrix above. In case of
-        multiple 1D transforms (when using the DFT_ROWS flag), each row of the output matrix
+        multiple 1D transforms (when using the #DFT_ROWS flag), each row of the output matrix
         looks like the first row of the matrix above.
--   If the input array is complex and either DFT_INVERSE or DFT_REAL_OUTPUT are not set, the
+-   If the input array is complex and either #DFT_INVERSE or #DFT_REAL_OUTPUT are not set, the
     output is a complex array of the same size as input. The function performs a forward or
     inverse 1D or 2D transform of the whole input array or each row of the input array
     independently, depending on the flags DFT_INVERSE and DFT_ROWS.
--   When DFT_INVERSE is set and the input array is real, or it is complex but DFT_REAL_OUTPUT
+-   When #DFT_INVERSE is set and the input array is real, or it is complex but #DFT_REAL_OUTPUT
     is set, the output is a real array of the same size as input. The function performs a 1D or 2D
     inverse transformation of the whole input array or each individual row, depending on the flags
-    DFT_INVERSE and DFT_ROWS.
+    #DFT_INVERSE and #DFT_ROWS.
 
-If DFT_SCALE is set, the scaling is done after the transformation.
+If #DFT_SCALE is set, the scaling is done after the transformation.
 
 Unlike dct , the function supports arrays of arbitrary size. But only those arrays are processed
 efficiently, whose sizes can be factorized in a product of small prime numbers (2, 3, and 5 in the
@@ -2027,7 +2135,7 @@ To optimize this sample, consider the following approaches:
 -   If different tiles in C can be calculated in parallel and, thus, the convolution is done by
     parts, the loop can be threaded.
 
-All of the above improvements have been implemented in matchTemplate and filter2D . Therefore, by
+All of the above improvements have been implemented in #matchTemplate and #filter2D . Therefore, by
 using them, you can get the performance even better than with the above theoretically optimal
 implementation. Though, those two functions actually calculate cross-correlation, not convolution,
 so you need to "flip" the second convolution operand B vertically and horizontally using flip .
@@ -2040,10 +2148,10 @@ so you need to "flip" the second convolution operand B vertically and horizontal
     opencv_source/samples/python/dft.py
 @param src input array that could be real or complex.
 @param dst output array whose size and type depends on the flags .
-@param flags transformation flags, representing a combination of the cv::DftFlags
+@param flags transformation flags, representing a combination of the #DftFlags
 @param nonzeroRows when the parameter is not zero, the function assumes that only the first
-nonzeroRows rows of the input array (DFT_INVERSE is not set) or only the first nonzeroRows of the
-output array (DFT_INVERSE is set) contain non-zeros, thus, the function can handle the rest of the
+nonzeroRows rows of the input array (#DFT_INVERSE is not set) or only the first nonzeroRows of the
+output array (#DFT_INVERSE is set) contain non-zeros, thus, the function can handle the rest of the
 rows more efficiently and save some time; this technique is very useful for calculating array
 cross-correlation or convolution using DFT.
 @sa dct , getOptimalDFTSize , mulSpectrums, filter2D , matchTemplate , flip , cartToPolar ,
@@ -2053,13 +2161,13 @@ CV_EXPORTS_W void dft(InputArray src, OutputArray dst, int flags = 0, int nonzer
 
 /** @brief Calculates the inverse Discrete Fourier Transform of a 1D or 2D array.
 
-idft(src, dst, flags) is equivalent to dft(src, dst, flags | DFT_INVERSE) .
-@note None of dft and idft scales the result by default. So, you should pass DFT_SCALE to one of
+idft(src, dst, flags) is equivalent to dft(src, dst, flags | #DFT_INVERSE) .
+@note None of dft and idft scales the result by default. So, you should pass #DFT_SCALE to one of
 dft or idft explicitly to make these transforms mutually inverse.
 @sa dft, dct, idct, mulSpectrums, getOptimalDFTSize
 @param src input floating-point real or complex array.
 @param dst output array whose size and type depend on the flags.
-@param flags operation flags (see dft and cv::DftFlags).
+@param flags operation flags (see dft and #DftFlags).
 @param nonzeroRows number of dst rows to process; the rest of the rows have undefined content (see
 the convolution sample in dft description.
 */
@@ -2067,7 +2175,7 @@ CV_EXPORTS_W void idft(InputArray src, OutputArray dst, int flags = 0, int nonze
 
 /** @brief Performs a forward or inverse discrete Cosine transform of 1D or 2D array.
 
-The function dct performs a forward or inverse discrete Cosine transform (DCT) of a 1D or 2D
+The function cv::dct performs a forward or inverse discrete Cosine transform (DCT) of a 1D or 2D
 floating-point array:
 -   Forward Cosine transform of a 1D vector of N elements:
     \f[Y = C^{(N)}  \cdot X\f]
@@ -2084,9 +2192,9 @@ floating-point array:
     \f[X =  \left (C^{(N)} \right )^T  \cdot X  \cdot C^{(N)}\f]
 
 The function chooses the mode of operation by looking at the flags and size of the input array:
--   If (flags & DCT_INVERSE) == 0 , the function does a forward 1D or 2D transform. Otherwise, it
+-   If (flags & #DCT_INVERSE) == 0 , the function does a forward 1D or 2D transform. Otherwise, it
     is an inverse 1D or 2D transform.
--   If (flags & DCT_ROWS) != 0 , the function performs a 1D transform of each row.
+-   If (flags & #DCT_ROWS) != 0 , the function performs a 1D transform of each row.
 -   If the array is a single column or a single row, the function performs a 1D transform.
 -   If none of the above is true, the function performs a 2D transform.
 
@@ -2118,7 +2226,7 @@ CV_EXPORTS_W void idct(InputArray src, OutputArray dst, int flags = 0);
 
 /** @brief Performs the per-element multiplication of two Fourier spectrums.
 
-The function mulSpectrums performs the per-element multiplication of the two CCS-packed or complex
+The function cv::mulSpectrums performs the per-element multiplication of the two CCS-packed or complex
 matrices that are results of a real or complex Fourier transform.
 
 The function, together with dft and idft , may be used to calculate convolution (pass conjB=false )
@@ -2145,7 +2253,7 @@ original one. Arrays whose size is a power-of-two (2, 4, 8, 16, 32, ...) are the
 Though, the arrays whose size is a product of 2's, 3's, and 5's (for example, 300 = 5\*5\*3\*2\*2)
 are also processed quite efficiently.
 
-The function getOptimalDFTSize returns the minimum number N that is greater than or equal to vecsize
+The function cv::getOptimalDFTSize returns the minimum number N that is greater than or equal to vecsize
 so that the DFT of a vector of size N can be processed efficiently. In the current implementation N
 = 2 ^p^ \* 3 ^q^ \* 5 ^r^ for some integer p, q, r.
 
@@ -2161,7 +2269,7 @@ CV_EXPORTS_W int getOptimalDFTSize(int vecsize);
 
 /** @brief Returns the default random number generator.
 
-The function theRNG returns the default random number generator. For each thread, there is a
+The function cv::theRNG returns the default random number generator. For each thread, there is a
 separate random number generator, so you can use the function safely in multi-thread environments.
 If you just need to get a single random number using this generator or initialize an array, you can
 use randu or randn instead. But if you are going to generate many random numbers inside a loop, it
@@ -2170,6 +2278,14 @@ is much faster to use this function to retrieve the generator and then use RNG::
 */
 CV_EXPORTS RNG& theRNG();
 
+/** @brief Sets state of default random number generator.
+
+The function cv::setRNGSeed sets state of default random number generator to custom value.
+@param seed new state for default random number generator
+@sa RNG, randu, randn
+*/
+CV_EXPORTS_W void setRNGSeed(int seed);
+
 /** @brief Generates a single uniformly-distributed random number or an array of random numbers.
 
 Non-template variant of the function fills the matrix dst with uniformly-distributed
@@ -2184,7 +2300,7 @@ CV_EXPORTS_W void randu(InputOutputArray dst, InputArray low, InputArray high);
 
 /** @brief Fills the array with normally distributed random numbers.
 
-The function randn fills the matrix dst with normally distributed random numbers with the specified
+The function cv::randn fills the matrix dst with normally distributed random numbers with the specified
 mean vector and the standard deviation matrix. The generated random numbers are clipped to fit the
 value range of the output array data type.
 @param dst output array of random numbers; the array must be pre-allocated and have 1 to 4 channels.
@@ -2197,7 +2313,7 @@ CV_EXPORTS_W void randn(InputOutputArray dst, InputArray mean, InputArray stddev
 
 /** @brief Shuffles the array elements randomly.
 
-The function randShuffle shuffles the specified 1D array by randomly choosing pairs of elements and
+The function cv::randShuffle shuffles the specified 1D array by randomly choosing pairs of elements and
 swapping them. The number of such swap operations will be dst.rows\*dst.cols\*iterFactor .
 @param dst input/output numerical 1D array.
 @param iterFactor scale factor that determines the number of random swap operations (see the details
@@ -2316,11 +2432,11 @@ class CV_EXPORTS PCA
     The operator performs %PCA of the supplied dataset. It is safe to reuse
     the same PCA structure for multiple datasets. That is, if the structure
     has been previously used with another dataset, the existing internal
-    data is reclaimed and the new eigenvalues, @ref eigenvectors , and @ref
+    data is reclaimed and the new @ref eigenvalues, @ref eigenvectors and @ref
     mean are allocated and computed.
 
-    The computed eigenvalues are sorted from the largest to the smallest and
-    the corresponding eigenvectors are stored as eigenvectors rows.
+    The computed @ref eigenvalues are sorted from the largest to the smallest and
+    the corresponding @ref eigenvectors are stored as eigenvectors rows.
 
     @param data input samples stored as the matrix rows or as the matrix
     columns.
@@ -2400,25 +2516,35 @@ class CV_EXPORTS PCA
      */
     void backProject(InputArray vec, OutputArray result) const;
 
-    /** @brief write and load PCA matrix
+    /** @brief write PCA objects
 
-*/
-    void write(FileStorage& fs ) const;
-    void read(const FileNode& fs);
+    Writes @ref eigenvalues @ref eigenvectors and @ref mean to specified FileStorage
+     */
+    void write(FileStorage& fs) const;
+
+    /** @brief load PCA objects
+
+    Loads @ref eigenvalues @ref eigenvectors and @ref mean from specified FileNode
+     */
+    void read(const FileNode& fn);
 
     Mat eigenvectors; //!< eigenvectors of the covariation matrix
     Mat eigenvalues; //!< eigenvalues of the covariation matrix
     Mat mean; //!< mean value subtracted before the projection and added after the back projection
 };
 
-/** @example pca.cpp
-  An example using %PCA for dimensionality reduction while maintaining an amount of variance
- */
+/** @example samples/cpp/pca.cpp
+An example using %PCA for dimensionality reduction while maintaining an amount of variance
+*/
+
+/** @example samples/cpp/tutorial_code/ml/introduction_to_pca/introduction_to_pca.cpp
+Check @ref tutorial_introduction_to_pca "the corresponding tutorial" for more details
+*/
 
 /**
-   @brief Linear Discriminant Analysis
-   @todo document this class
- */
+@brief Linear Discriminant Analysis
+@todo document this class
+*/
 class CV_EXPORTS LDA
 {
 public:
@@ -2480,7 +2606,6 @@ class CV_EXPORTS LDA
     static Mat subspaceReconstruct(InputArray W, InputArray mean, InputArray src);
 
 protected:
-    bool _dataAsRow; // unused, but needed for 3.0 ABI compatibility.
     int _num_components;
     Mat _eigenvectors;
     Mat _eigenvalues;
@@ -2525,7 +2650,7 @@ class CV_EXPORTS SVD
 
     /** @overload
     initializes an empty SVD structure and then calls SVD::operator()
-    @param src decomposed matrix.
+    @param src decomposed matrix. The depth has to be CV_32F or CV_64F.
     @param flags operation flags (SVD::Flags)
       */
     SVD( InputArray src, int flags = 0 );
@@ -2538,7 +2663,7 @@ class CV_EXPORTS SVD
     different matrices. Each time, if needed, the previous u,`vt` , and w
     are reclaimed and the new matrices are created, which is all handled by
     Mat::create.
-    @param src decomposed matrix.
+    @param src decomposed matrix. The depth has to be CV_32F or CV_64F.
     @param flags operation flags (SVD::Flags)
       */
     SVD& operator ()( InputArray src, int flags = 0 );
@@ -2554,18 +2679,18 @@ class CV_EXPORTS SVD
     SVD::compute(A, w, u, vt);
     @endcode
 
-    @param src decomposed matrix
+    @param src decomposed matrix. The depth has to be CV_32F or CV_64F.
     @param w calculated singular values
     @param u calculated left singular vectors
-    @param vt transposed matrix of right singular values
-    @param flags operation flags - see SVD::SVD.
+    @param vt transposed matrix of right singular vectors
+    @param flags operation flags - see SVD::Flags.
       */
     static void compute( InputArray src, OutputArray w,
                          OutputArray u, OutputArray vt, int flags = 0 );
 
     /** @overload
     computes singular values of a matrix
-    @param src decomposed matrix
+    @param src decomposed matrix. The depth has to be CV_32F or CV_64F.
     @param w calculated singular values
     @param flags operation flags - see SVD::Flags.
       */
@@ -2609,7 +2734,7 @@ class CV_EXPORTS SVD
     if you need to solve many linear systems with the same left-hand side
     (for example, src ). If all you need is to solve a single system
     (possibly with multiple rhs immediately available), simply call solve
-    add pass DECOMP_SVD there. It does absolutely the same thing.
+    add pass #DECOMP_SVD there. It does absolutely the same thing.
       */
     void backSubst( InputArray rhs, OutputArray dst ) const;
 
@@ -2716,7 +2841,7 @@ class CV_EXPORTS RNG
     double a1 = rng.uniform((double)0, (double)1);
 
     // produces float from [0, 1)
-    double b = rng.uniform(0.f, 1.f);
+    float b = rng.uniform(0.f, 1.f);
 
     // produces double from [0, 1)
     double c = rng.uniform(0., 1.);
@@ -2732,9 +2857,9 @@ class CV_EXPORTS RNG
     want a floating-point random number, but the range boundaries are
     integer numbers, either put dots in the end, if they are constants, or
     use explicit type cast operators, as in the a1 initialization above.
-    @param a lower inclusive boundary of the returned random numbers.
-    @param b upper non-inclusive boundary of the returned random numbers.
-      */
+    @param a lower inclusive boundary of the returned random number.
+    @param b upper non-inclusive boundary of the returned random number.
+    */
     int uniform(int a, int b);
     /** @overload */
     float uniform(float a, float b);
@@ -2788,13 +2913,15 @@ class CV_EXPORTS RNG
     double gaussian(double sigma);
 
     uint64 state;
+
+    bool operator ==(const RNG& other) const;
 };
 
 /** @brief Mersenne Twister random number generator
 
 Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
 @todo document
- */
+*/
 class CV_EXPORTS RNG_MT19937
 {
 public:
@@ -2812,17 +2939,11 @@ class CV_EXPORTS RNG_MT19937
     unsigned operator ()(unsigned N);
     unsigned operator ()();
 
-    /** @brief returns uniformly distributed integer random number from [a,b) range
-
-*/
+    /** @brief returns uniformly distributed integer random number from [a,b) range*/
     int uniform(int a, int b);
-    /** @brief returns uniformly distributed floating-point random number from [a,b) range
-
-*/
+    /** @brief returns uniformly distributed floating-point random number from [a,b) range*/
     float uniform(float a, float b);
-    /** @brief returns uniformly distributed double-precision floating-point random number from [a,b) range
-
-*/
+    /** @brief returns uniformly distributed double-precision floating-point random number from [a,b) range*/
     double uniform(double a, double b);
 
 private:
@@ -2836,14 +2957,14 @@ class CV_EXPORTS RNG_MT19937
 //! @addtogroup core_cluster
 //!  @{
 
-/** @example kmeans.cpp
-  An example on K-means clustering
+/** @example samples/cpp/kmeans.cpp
+An example on K-means clustering
 */
 
 /** @brief Finds centers of clusters and groups input samples around the clusters.
 
 The function kmeans implements a k-means algorithm that finds the centers of cluster_count clusters
-and groups the input samples around the clusters. As an output, \f$\texttt{labels}_i\f$ contains a
+and groups the input samples around the clusters. As an output, \f$\texttt{bestLabels}_i\f$ contains a
 0-based cluster index for the sample stored in the \f$i^{th}\f$ row of the samples matrix.
 
 @note
@@ -2870,7 +2991,7 @@ function parameter).
 after every attempt. The best (minimum) value is chosen and the corresponding labels and the
 compactness value are returned by the function. Basically, you can use only the core of the
 function, set the number of attempts to 1, initialize labels each time using a custom algorithm,
-pass them with the ( flags = KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best
+pass them with the ( flags = #KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best
 (most-compact) clustering.
 */
 CV_EXPORTS_W double kmeans( InputArray data, int K, InputOutputArray bestLabels,
@@ -2897,7 +3018,8 @@ class CV_EXPORTS Formatted
 class CV_EXPORTS Formatter
 {
 public:
-    enum { FMT_DEFAULT = 0,
+    enum FormatType {
+           FMT_DEFAULT = 0,
            FMT_MATLAB  = 1,
            FMT_CSV     = 2,
            FMT_PYTHON  = 3,
@@ -2909,11 +3031,12 @@ class CV_EXPORTS Formatter
 
     virtual Ptr<Formatted> format(const Mat& mtx) const = 0;
 
+    virtual void set16fPrecision(int p = 4) = 0;
     virtual void set32fPrecision(int p = 8) = 0;
     virtual void set64fPrecision(int p = 16) = 0;
     virtual void setMultiline(bool ml = true) = 0;
 
-    static Ptr<Formatter> get(int fmt = FMT_DEFAULT);
+    static Ptr<Formatter> get(Formatter::FormatType fmt = FMT_DEFAULT);
 
 };
 
@@ -2936,7 +3059,7 @@ String& operator << (String& out, const Mat& mtx)
 
 class CV_EXPORTS Algorithm;
 
-template<typename _Tp> struct ParamType {};
+template<typename _Tp, typename _EnumTp = void> struct ParamType {};
 
 
 /** @brief This is a base class for all more or less complex algorithms in OpenCV
@@ -2947,32 +3070,9 @@ matching, graph-cut etc.), background subtraction (which can be done using mixtu
 models, codebook-based algorithm etc.), optical flow (block matching, Lucas-Kanade, Horn-Schunck
 etc.).
 
-Here is example of SIFT use in your application via Algorithm interface:
-@code
-    #include "opencv2/opencv.hpp"
-    #include "opencv2/xfeatures2d.hpp"
-    using namespace cv::xfeatures2d;
-
-    Ptr<Feature2D> sift = SIFT::create();
-    FileStorage fs("sift_params.xml", FileStorage::READ);
-    if( fs.isOpened() ) // if we have file with parameters, read them
-    {
-        sift->read(fs["sift_params"]);
-        fs.release();
-    }
-    else // else modify the parameters and store them; user can later edit the file to use different parameters
-    {
-        sift->setContrastThreshold(0.01f); // lower the contrast threshold, compared to the default value
-        {
-            WriteStructContext ws(fs, "sift_params", CV_NODE_MAP);
-            sift->write(fs);
-        }
-    }
-    Mat image = imread("myimage.png", 0), descriptors;
-    vector<KeyPoint> keypoints;
-    sift->detectAndCompute(image, noArray(), keypoints, descriptors);
-@endcode
- */
+Here is example of SimpleBlobDetector use in your application via Algorithm interface:
+@snippet snippets/core_various.cpp Algorithm
+*/
 class CV_EXPORTS_W Algorithm
 {
 public:
@@ -2985,26 +3085,32 @@ class CV_EXPORTS_W Algorithm
 
     /** @brief Stores algorithm parameters in a file storage
     */
-    virtual void write(FileStorage& fs) const { (void)fs; }
+    virtual void write(FileStorage& fs) const { CV_UNUSED(fs); }
+
+    /** @brief simplified API for language bindings
+    * @overload
+    */
+    CV_WRAP void write(const Ptr<FileStorage>& fs, const String& name = String()) const;
 
     /** @brief Reads algorithm parameters from a file storage
     */
-    virtual void read(const FileNode& fn) { (void)fn; }
+    CV_WRAP virtual void read(const FileNode& fn) { CV_UNUSED(fn); }
 
     /** @brief Returns true if the Algorithm is empty (e.g. in the very beginning or after unsuccessful read
-     */
-    virtual bool empty() const { return false; }
+    */
+    CV_WRAP virtual bool empty() const { return false; }
 
     /** @brief Reads algorithm from the file node
 
-     This is static template method of Algorithm. It's usage is following (in the case of SVM):
-     @code
-     Ptr<SVM> svm = Algorithm::read<SVM>(fn);
-     @endcode
-     In order to make this method work, the derived class must overwrite Algorithm::read(const
-     FileNode& fn) and also have static create() method without parameters
-     (or with all the optional parameters)
-     */
+    This is static template method of Algorithm. It's usage is following (in the case of SVM):
+    @code
+    cv::FileStorage fsRead("example.xml", FileStorage::READ);
+    Ptr<SVM> svm = Algorithm::read<SVM>(fsRead.root());
+    @endcode
+    In order to make this method work, the derived class must overwrite Algorithm::read(const
+    FileNode& fn) and also have static create() method without parameters
+    (or with all the optional parameters)
+    */
     template<typename _Tp> static Ptr<_Tp> read(const FileNode& fn)
     {
         Ptr<_Tp> obj = _Tp::create();
@@ -3014,20 +3120,22 @@ class CV_EXPORTS_W Algorithm
 
     /** @brief Loads algorithm from the file
 
-     @param filename Name of the file to read.
-     @param objname The optional name of the node to read (if empty, the first top-level node will be used)
+    @param filename Name of the file to read.
+    @param objname The optional name of the node to read (if empty, the first top-level node will be used)
 
-     This is static template method of Algorithm. It's usage is following (in the case of SVM):
-     @code
-     Ptr<SVM> svm = Algorithm::load<SVM>("my_svm_model.xml");
-     @endcode
-     In order to make this method work, the derived class must overwrite Algorithm::read(const
-     FileNode& fn).
-     */
+    This is static template method of Algorithm. It's usage is following (in the case of SVM):
+    @code
+    Ptr<SVM> svm = Algorithm::load<SVM>("my_svm_model.xml");
+    @endcode
+    In order to make this method work, the derived class must overwrite Algorithm::read(const
+    FileNode& fn).
+    */
     template<typename _Tp> static Ptr<_Tp> load(const String& filename, const String& objname=String())
     {
         FileStorage fs(filename, FileStorage::READ);
+        CV_Assert(fs.isOpened());
         FileNode fn = objname.empty() ? fs.getFirstTopLevelNode() : fs[objname];
+        if (fn.empty()) return Ptr<_Tp>();
         Ptr<_Tp> obj = _Tp::create();
         obj->read(fn);
         return !obj->empty() ? obj : Ptr<_Tp>();
@@ -3035,14 +3143,14 @@ class CV_EXPORTS_W Algorithm
 
     /** @brief Loads algorithm from a String
 
-     @param strModel The string variable containing the model you want to load.
-     @param objname The optional name of the node to read (if empty, the first top-level node will be used)
+    @param strModel The string variable containing the model you want to load.
+    @param objname The optional name of the node to read (if empty, the first top-level node will be used)
 
-     This is static template method of Algorithm. It's usage is following (in the case of SVM):
-     @code
-     Ptr<SVM> svm = Algorithm::loadFromString<SVM>(myStringModel);
-     @endcode
-     */
+    This is static template method of Algorithm. It's usage is following (in the case of SVM):
+    @code
+    Ptr<SVM> svm = Algorithm::loadFromString<SVM>(myStringModel);
+    @endcode
+    */
     template<typename _Tp> static Ptr<_Tp> loadFromString(const String& strModel, const String& objname=String())
     {
         FileStorage fs(strModel, FileStorage::READ + FileStorage::MEMORY);
@@ -3053,17 +3161,20 @@ class CV_EXPORTS_W Algorithm
     }
 
     /** Saves the algorithm to a file.
-     In order to make this method work, the derived class must implement Algorithm::write(FileStorage& fs). */
+    In order to make this method work, the derived class must implement Algorithm::write(FileStorage& fs). */
     CV_WRAP virtual void save(const String& filename) const;
 
     /** Returns the algorithm string identifier.
-     This string is used as top level xml/yml node tag when the object is saved to a file or string. */
+    This string is used as top level xml/yml node tag when the object is saved to a file or string. */
     CV_WRAP virtual String getDefaultName() const;
+
+protected:
+    void writeFormat(FileStorage& fs) const;
 };
 
-struct Param {
-    enum { INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7,
-           UNSIGNED_INT=8, UINT64=9, UCHAR=11 };
+enum struct Param {
+    INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7,
+    UNSIGNED_INT=8, UINT64=9, UCHAR=11, SCALAR=12
 };
 
 
@@ -3073,7 +3184,7 @@ template<> struct ParamType<bool>
     typedef bool const_param_type;
     typedef bool member_type;
 
-    enum { type = Param::BOOLEAN };
+    static const Param type = Param::BOOLEAN;
 };
 
 template<> struct ParamType<int>
@@ -3081,7 +3192,7 @@ template<> struct ParamType<int>
     typedef int const_param_type;
     typedef int member_type;
 
-    enum { type = Param::INT };
+    static const Param type = Param::INT;
 };
 
 template<> struct ParamType<double>
@@ -3089,7 +3200,7 @@ template<> struct ParamType<double>
     typedef double const_param_type;
     typedef double member_type;
 
-    enum { type = Param::REAL };
+    static const Param type = Param::REAL;
 };
 
 template<> struct ParamType<String>
@@ -3097,7 +3208,7 @@ template<> struct ParamType<String>
     typedef const String& const_param_type;
     typedef String member_type;
 
-    enum { type = Param::STRING };
+    static const Param type = Param::STRING;
 };
 
 template<> struct ParamType<Mat>
@@ -3105,7 +3216,7 @@ template<> struct ParamType<Mat>
     typedef const Mat& const_param_type;
     typedef Mat member_type;
 
-    enum { type = Param::MAT };
+    static const Param type = Param::MAT;
 };
 
 template<> struct ParamType<std::vector<Mat> >
@@ -3113,7 +3224,7 @@ template<> struct ParamType<std::vector<Mat> >
     typedef const std::vector<Mat>& const_param_type;
     typedef std::vector<Mat> member_type;
 
-    enum { type = Param::MAT_VECTOR };
+    static const Param type = Param::MAT_VECTOR;
 };
 
 template<> struct ParamType<Algorithm>
@@ -3121,7 +3232,7 @@ template<> struct ParamType<Algorithm>
     typedef const Ptr<Algorithm>& const_param_type;
     typedef Ptr<Algorithm> member_type;
 
-    enum { type = Param::ALGORITHM };
+    static const Param type = Param::ALGORITHM;
 };
 
 template<> struct ParamType<float>
@@ -3129,7 +3240,7 @@ template<> struct ParamType<float>
     typedef float const_param_type;
     typedef float member_type;
 
-    enum { type = Param::FLOAT };
+    static const Param type = Param::FLOAT;
 };
 
 template<> struct ParamType<unsigned>
@@ -3137,7 +3248,7 @@ template<> struct ParamType<unsigned>
     typedef unsigned const_param_type;
     typedef unsigned member_type;
 
-    enum { type = Param::UNSIGNED_INT };
+    static const Param type = Param::UNSIGNED_INT;
 };
 
 template<> struct ParamType<uint64>
@@ -3145,7 +3256,7 @@ template<> struct ParamType<uint64>
     typedef uint64 const_param_type;
     typedef uint64 member_type;
 
-    enum { type = Param::UINT64 };
+    static const Param type = Param::UINT64;
 };
 
 template<> struct ParamType<uchar>
@@ -3153,7 +3264,24 @@ template<> struct ParamType<uchar>
     typedef uchar const_param_type;
     typedef uchar member_type;
 
-    enum { type = Param::UCHAR };
+    static const Param type = Param::UCHAR;
+};
+
+template<> struct ParamType<Scalar>
+{
+    typedef const Scalar& const_param_type;
+    typedef Scalar member_type;
+
+    static const Param type = Param::SCALAR;
+};
+
+template<typename _Tp>
+struct ParamType<_Tp, typename std::enable_if< std::is_enum<_Tp>::value >::type>
+{
+    typedef typename std::underlying_type<_Tp>::type const_param_type;
+    typedef typename std::underlying_type<_Tp>::type member_type;
+
+    static const Param type = Param::INT;
 };
 
 //! @} core_basic
@@ -3164,5 +3292,6 @@ template<> struct ParamType<uchar>
 #include "opencv2/core/cvstd.inl.hpp"
 #include "opencv2/core/utility.hpp"
 #include "opencv2/core/optim.hpp"
+#include "opencv2/core/ovx.hpp"
 
-#endif /*__OPENCV_CORE_HPP__*/
+#endif /*OPENCV_CORE_HPP*/
diff --git a/IPL/include/opencv/opencv2/core/affine.hpp b/IPL/include/opencv/opencv2/core/affine.hpp
index 7f8deb5..7e2ed30 100644
--- a/IPL/include/opencv/opencv2/core/affine.hpp
+++ b/IPL/include/opencv/opencv2/core/affine.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_AFFINE3_HPP__
-#define __OPENCV_CORE_AFFINE3_HPP__
+#ifndef OPENCV_CORE_AFFINE3_HPP
+#define OPENCV_CORE_AFFINE3_HPP
 
 #ifdef __cplusplus
 
@@ -55,7 +55,72 @@ namespace cv
 //! @{
 
     /** @brief Affine transform
-      @todo document
+     *
+     * It represents a 4x4 homogeneous transformation matrix \f$T\f$
+     *
+     *  \f[T =
+     *  \begin{bmatrix}
+     *  R & t\\
+     *  0 & 1\\
+     *  \end{bmatrix}
+     *  \f]
+     *
+     *  where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector.
+     *
+     *  You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector,
+     *  which is converted to a 3x3 rotation matrix by the Rodrigues formula.
+     *
+     *  To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation
+     *  angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use
+     *
+     *  @code
+     *  cv::Vec3f r, t;
+     *  cv::Affine3f T(r, t);
+     *  @endcode
+     *
+     *  If you already have the rotation matrix \f$R\f$, then you can use
+     *
+     *  @code
+     *  cv::Matx33f R;
+     *  cv::Affine3f T(R, t);
+     *  @endcode
+     *
+     *  To extract the rotation matrix \f$R\f$ from \f$T\f$, use
+     *
+     *  @code
+     *  cv::Matx33f R = T.rotation();
+     *  @endcode
+     *
+     *  To extract the translation vector \f$t\f$ from \f$T\f$, use
+     *
+     *  @code
+     *  cv::Vec3f t = T.translation();
+     *  @endcode
+     *
+     *  To extract the rotation vector \f$r\f$ from \f$T\f$, use
+     *
+     *  @code
+     *  cv::Vec3f r = T.rvec();
+     *  @endcode
+     *
+     *  Note that since the mapping from rotation vectors to rotation matrices
+     *  is many to one. The returned rotation vector is not necessarily the one
+     *  you used before to set the matrix.
+     *
+     *  If you have two transformations \f$T = T_1 * T_2\f$, use
+     *
+     *  @code
+     *  cv::Affine3f T, T1, T2;
+     *  T = T2.concatenate(T1);
+     *  @endcode
+     *
+     *  To get the inverse transform of \f$T\f$, use
+     *
+     *  @code
+     *  cv::Affine3f T, T_inv;
+     *  T_inv = T.inv();
+     *  @endcode
+     *
      */
     template<typename T>
     class Affine3
@@ -66,45 +131,127 @@ namespace cv
         typedef Matx<float_type, 4, 4> Mat4;
         typedef Vec<float_type, 3> Vec3;
 
+       //! Default constructor. It represents a 4x4 identity matrix.
         Affine3();
 
         //! Augmented affine matrix
         Affine3(const Mat4& affine);
 
-        //! Rotation matrix
+        /**
+         *  The resulting 4x4 matrix is
+         *
+         *  \f[
+         *  \begin{bmatrix}
+         *  R & t\\
+         *  0 & 1\\
+         *  \end{bmatrix}
+         *  \f]
+         *
+         * @param R 3x3 rotation matrix.
+         * @param t 3x1 translation vector.
+         */
         Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));
 
-        //! Rodrigues vector
+        /**
+         * Rodrigues vector.
+         *
+         * The last row of the current matrix is set to [0,0,0,1].
+         *
+         * @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length
+         *             indicates the rotation angle in radian (using right hand rule).
+         * @param t 3x1 translation vector.
+         */
         Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));
 
-        //! Combines all contructors above. Supports 4x4, 4x3, 3x3, 1x3, 3x1 sizes of data matrix
+        /**
+         * Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix.
+         *
+         * The last row of the current matrix is set to [0,0,0,1] when data is not 4x4.
+         *
+         * @param data 1-channel matrix.
+         *             when it is 4x4, it is copied to the current matrix and t is not used.
+         *             When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used.
+         *             When it is 3x3, it is copied to the upper left 3x3 part of the current matrix.
+         *             When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used
+         *                             to compute a 3x3 rotation matrix.
+         * @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4.
+         */
         explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));
 
-        //! From 16th element array
+        //! From 16-element array
         explicit Affine3(const float_type* vals);
 
-        //! Create identity transform
+        //! Create an 4x4 identity transform
         static Affine3 Identity();
 
-        //! Rotation matrix
+        /**
+         * Rotation matrix.
+         *
+         * Copy the rotation matrix to the upper left 3x3 part of the current matrix.
+         * The remaining elements of the current matrix are not changed.
+         *
+         * @param R 3x3 rotation matrix.
+         *
+         */
         void rotation(const Mat3& R);
 
-        //! Rodrigues vector
+        /**
+         * Rodrigues vector.
+         *
+         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
+         *
+         * @param rvec 3x1 rotation vector. The direction indicates the rotation axis and
+         *             its length indicates the rotation angle in radian (using the right thumb convention).
+         */
         void rotation(const Vec3& rvec);
 
-        //! Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix;
+        /**
+         * Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix.
+         *
+         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
+         *
+         * @param data 1-channel matrix.
+         *             When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix.
+         *             When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula
+         *             is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix.
+         */
         void rotation(const Mat& data);
 
+        /**
+         * Copy the 3x3 matrix L to the upper left part of the current matrix
+         *
+         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
+         *
+         * @param L 3x3 matrix.
+         */
         void linear(const Mat3& L);
+
+        /**
+         * Copy t to the first three elements of the last column of the current matrix
+         *
+         * It sets the upper right 3x1 part of the matrix. The remaining part is unaffected.
+         *
+         * @param t 3x1 translation vector.
+         */
         void translation(const Vec3& t);
 
+        //! @return the upper left 3x3 part
         Mat3 rotation() const;
+
+        //! @return the upper left 3x3 part
         Mat3 linear() const;
+
+        //! @return the upper right 3x1 part
         Vec3 translation() const;
 
-        //! Rodrigues vector
+        //! Rodrigues vector.
+        //! @return a vector representing the upper left 3x3 rotation matrix of the current matrix.
+        //! @warning  Since the mapping between rotation vectors and rotation matrices is many to one,
+        //!           this function returns only one rotation vector that represents the current rotation matrix,
+        //!           which is not necessarily the same one set by `rotation(const Vec3& rvec)`.
         Vec3 rvec() const;
 
+        //! @return the inverse of the current matrix.
         Affine3 inv(int method = cv::DECOMP_SVD) const;
 
         //! a.rotate(R) is equivalent to Affine(R, 0) * a;
@@ -113,7 +260,7 @@ namespace cv
         //! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a;
         Affine3 rotate(const Vec3& rvec) const;
 
-        //! a.translate(t) is equivalent to Affine(E, t) * a;
+        //! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix
         Affine3 translate(const Vec3& t) const;
 
         //! a.concatenate(affine) is equivalent to affine * a;
@@ -136,6 +283,7 @@ namespace cv
     template<typename T> static
     Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);
 
+    //! V is a 3-element vector with member fields x, y and z
     template<typename T, typename V> static
     V operator*(const Affine3<T>& affine, const V& vector);
 
@@ -153,15 +301,24 @@ namespace cv
         typedef _Tp                                        channel_type;
 
         enum { generic_type = 0,
-               depth        = DataType<channel_type>::depth,
                channels     = 16,
-               fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-               type         = CV_MAKETYPE(depth, channels)
+               fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+               ,depth        = DataType<channel_type>::depth
+               ,type         = CV_MAKETYPE(depth, channels)
+#endif
              };
 
         typedef Vec<channel_type, channels> vec_type;
     };
 
+    namespace traits {
+    template<typename _Tp>
+    struct Depth< Affine3<_Tp> > { enum { value = Depth<_Tp>::value }; };
+    template<typename _Tp>
+    struct Type< Affine3<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 16) }; };
+    } // namespace
+
 //! @} core
 
 }
@@ -169,7 +326,7 @@ namespace cv
 //! @cond IGNORED
 
 ///////////////////////////////////////////////////////////////////////////////////
-// Implementaiton
+// Implementation
 
 template<typename T> inline
 cv::Affine3<T>::Affine3()
@@ -202,7 +359,8 @@ cv::Affine3<T>::Affine3(const Vec3& _rvec, const Vec3& t)
 template<typename T> inline
 cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
 {
-    CV_Assert(data.type() == cv::DataType<T>::type);
+    CV_Assert(data.type() == cv::traits::Type<T>::value);
+    CV_Assert(data.channels() == 1);
 
     if (data.cols == 4 && data.rows == 4)
     {
@@ -213,11 +371,13 @@ cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
     {
         rotation(data(Rect(0, 0, 3, 3)));
         translation(data(Rect(3, 0, 1, 3)));
-        return;
+    }
+    else
+    {
+        rotation(data);
+        translation(t);
     }
 
-    rotation(data);
-    translation(t);
     matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
     matrix.val[15] = 1;
 }
@@ -265,11 +425,12 @@ void cv::Affine3<T>::rotation(const Vec3& _rvec)
     }
 }
 
-//Combines rotation methods above. Suports 3x3, 1x3, 3x1 sizes of data matrix;
+//Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix;
 template<typename T> inline
 void cv::Affine3<T>::rotation(const cv::Mat& data)
 {
-    CV_Assert(data.type() == cv::DataType<T>::type);
+    CV_Assert(data.type() == cv::traits::Type<T>::value);
+    CV_Assert(data.channels() == 1);
 
     if (data.cols == 3 && data.rows == 3)
     {
@@ -284,7 +445,7 @@ void cv::Affine3<T>::rotation(const cv::Mat& data)
         rotation(_rvec);
     }
     else
-        CV_Assert(!"Input marix can be 3x3, 1x3 or 3x1");
+        CV_Error(Error::StsError, "Input matrix can only be 3x3, 1x3 or 3x1");
 }
 
 template<typename T> inline
@@ -483,21 +644,21 @@ cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v)
 template<typename T> inline
 cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine)
 {
-    cv::Mat(4, 4, cv::DataType<T>::type, affine.matrix().data()).copyTo(matrix);
+    cv::Mat(4, 4, cv::traits::Type<T>::value, affine.matrix().data()).copyTo(matrix);
 }
 
 template<typename T> inline
 cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine)
 {
     Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> a = affine;
-    cv::Mat(4, 4, cv::DataType<T>::type, a.matrix().data()).copyTo(matrix);
+    cv::Mat(4, 4, cv::traits::Type<T>::value, a.matrix().data()).copyTo(matrix);
 }
 
 template<typename T> inline
 cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const
 {
     Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> r;
-    cv::Mat hdr(4, 4, cv::DataType<T>::type, r.matrix().data());
+    cv::Mat hdr(4, 4, cv::traits::Type<T>::value, r.matrix().data());
     cv::Mat(matrix, false).copyTo(hdr);
     return r;
 }
@@ -514,4 +675,4 @@ cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
 
 #endif /* __cplusplus */
 
-#endif /* __OPENCV_CORE_AFFINE3_HPP__ */
+#endif /* OPENCV_CORE_AFFINE3_HPP */
diff --git a/IPL/include/opencv/opencv2/core/async.hpp b/IPL/include/opencv/opencv2/core/async.hpp
new file mode 100644
index 0000000..54560c7
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/async.hpp
@@ -0,0 +1,105 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_ASYNC_HPP
+#define OPENCV_CORE_ASYNC_HPP
+
+#include <opencv2/core/mat.hpp>
+
+#ifdef CV_CXX11
+//#include <future>
+#include <chrono>
+#endif
+
+namespace cv {
+
+/** @addtogroup core_async
+
+@{
+*/
+
+
+/** @brief Returns result of asynchronous operations
+
+Object has attached asynchronous state.
+Assignment operator doesn't clone asynchronous state (it is shared between all instances).
+
+Result can be fetched via get() method only once.
+
+*/
+class CV_EXPORTS_W AsyncArray
+{
+public:
+    ~AsyncArray() CV_NOEXCEPT;
+    CV_WRAP AsyncArray() CV_NOEXCEPT;
+    AsyncArray(const AsyncArray& o) CV_NOEXCEPT;
+    AsyncArray& operator=(const AsyncArray& o) CV_NOEXCEPT;
+    CV_WRAP void release() CV_NOEXCEPT;
+
+    /** Fetch the result.
+    @param[out] dst destination array
+
+    Waits for result until container has valid result.
+    Throws exception if exception was stored as a result.
+
+    Throws exception on invalid container state.
+
+    @note Result or stored exception can be fetched only once.
+    */
+    CV_WRAP void get(OutputArray dst) const;
+
+    /** Retrieving the result with timeout
+    @param[out] dst destination array
+    @param[in] timeoutNs timeout in nanoseconds, -1 for infinite wait
+
+    @returns true if result is ready, false if the timeout has expired
+
+    @note Result or stored exception can be fetched only once.
+    */
+    bool get(OutputArray dst, int64 timeoutNs) const;
+
+    CV_WRAP inline
+    bool get(OutputArray dst, double timeoutNs) const { return get(dst, (int64)timeoutNs); }
+
+    bool wait_for(int64 timeoutNs) const;
+
+    CV_WRAP inline
+    bool wait_for(double timeoutNs) const { return wait_for((int64)timeoutNs); }
+
+    CV_WRAP bool valid() const CV_NOEXCEPT;
+
+#ifdef CV_CXX11
+    inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; }
+    inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
+
+    template<typename _Rep, typename _Period>
+    inline bool get(OutputArray dst, const std::chrono::duration<_Rep, _Period>& timeout)
+    {
+        return get(dst, (int64)(std::chrono::nanoseconds(timeout).count()));
+    }
+
+    template<typename _Rep, typename _Period>
+    inline bool wait_for(const std::chrono::duration<_Rep, _Period>& timeout)
+    {
+        return wait_for((int64)(std::chrono::nanoseconds(timeout).count()));
+    }
+
+#if 0
+    std::future<Mat> getFutureMat() const;
+    std::future<UMat> getFutureUMat() const;
+#endif
+#endif
+
+
+    // PImpl
+    struct Impl; friend struct Impl;
+    inline void* _getImpl() const CV_NOEXCEPT { return p; }
+protected:
+    Impl* p;
+};
+
+
+//! @}
+} // namespace
+#endif // OPENCV_CORE_ASYNC_HPP
diff --git a/IPL/include/opencv/opencv2/core/base.hpp b/IPL/include/opencv/opencv2/core/base.hpp
index ed633f5..a3a3e51 100644
--- a/IPL/include/opencv/opencv2/core/base.hpp
+++ b/IPL/include/opencv/opencv2/core/base.hpp
@@ -42,13 +42,15 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_BASE_HPP__
-#define __OPENCV_CORE_BASE_HPP__
+#ifndef OPENCV_CORE_BASE_HPP
+#define OPENCV_CORE_BASE_HPP
 
 #ifndef __cplusplus
 #  error base.hpp header must be compiled as C++
 #endif
 
+#include "opencv2/opencv_modules.hpp"
+
 #include <climits>
 #include <algorithm>
 
@@ -64,38 +66,38 @@ namespace cv
 namespace Error {
 //! error codes
 enum Code {
-    StsOk=                       0,  //!< everithing is ok
+    StsOk=                       0,  //!< everything is ok
     StsBackTrace=               -1,  //!< pseudo error for back trace
     StsError=                   -2,  //!< unknown /unspecified error
     StsInternal=                -3,  //!< internal error (bad state)
     StsNoMem=                   -4,  //!< insufficient memory
     StsBadArg=                  -5,  //!< function arg/param is bad
     StsBadFunc=                 -6,  //!< unsupported function
-    StsNoConv=                  -7,  //!< iter. didn't converge
+    StsNoConv=                  -7,  //!< iteration didn't converge
     StsAutoTrace=               -8,  //!< tracing
     HeaderIsNull=               -9,  //!< image header is NULL
     BadImageSize=              -10,  //!< image size is invalid
     BadOffset=                 -11,  //!< offset is invalid
     BadDataPtr=                -12,  //!<
-    BadStep=                   -13,  //!<
+    BadStep=                   -13,  //!< image step is wrong, this may happen for a non-continuous matrix.
     BadModelOrChSeq=           -14,  //!<
-    BadNumChannels=            -15,  //!<
+    BadNumChannels=            -15,  //!< bad number of channels, for example, some functions accept only single channel matrices.
     BadNumChannel1U=           -16,  //!<
-    BadDepth=                  -17,  //!<
+    BadDepth=                  -17,  //!< input image depth is not supported by the function
     BadAlphaChannel=           -18,  //!<
-    BadOrder=                  -19,  //!<
-    BadOrigin=                 -20,  //!<
-    BadAlign=                  -21,  //!<
+    BadOrder=                  -19,  //!< number of dimensions is out of range
+    BadOrigin=                 -20,  //!< incorrect input origin
+    BadAlign=                  -21,  //!< incorrect input align
     BadCallBack=               -22,  //!<
     BadTileSize=               -23,  //!<
-    BadCOI=                    -24,  //!<
-    BadROISize=                -25,  //!<
+    BadCOI=                    -24,  //!< input COI is not supported
+    BadROISize=                -25,  //!< incorrect input roi
     MaskIsTiled=               -26,  //!<
     StsNullPtr=                -27,  //!< null pointer
     StsVecLengthErr=           -28,  //!< incorrect vector length
-    StsFilterStructContentErr= -29,  //!< incorr. filter structure content
-    StsKernelStructContentErr= -30,  //!< incorr. transform kernel content
-    StsFilterOffsetErr=        -31,  //!< incorrect filter ofset value
+    StsFilterStructContentErr= -29,  //!< incorrect filter structure content
+    StsKernelStructContentErr= -30,  //!< incorrect transform kernel content
+    StsFilterOffsetErr=        -31,  //!< incorrect filter offset value
     StsBadSize=                -201, //!< the input/output structure size is incorrect
     StsDivByZero=              -202, //!< division by zero
     StsInplaceNotSupported=    -203, //!< in-place operation is not supported
@@ -111,13 +113,13 @@ enum Code {
     StsNotImplemented=         -213, //!< the requested function/feature is not implemented
     StsBadMemBlock=            -214, //!< an allocated block has been corrupted
     StsAssert=                 -215, //!< assertion failed
-    GpuNotSupported=           -216,
-    GpuApiCallError=           -217,
-    OpenGlNotSupported=        -218,
-    OpenGlApiCallError=        -219,
-    OpenCLApiCallError=        -220,
+    GpuNotSupported=           -216, //!< no CUDA support
+    GpuApiCallError=           -217, //!< GPU API call error
+    OpenGlNotSupported=        -218, //!< no OpenGL support
+    OpenGlApiCallError=        -219, //!< OpenGL API call error
+    OpenCLApiCallError=        -220, //!< OpenCL API call error
     OpenCLDoubleNotSupported=  -221,
-    OpenCLInitError=           -222,
+    OpenCLInitError=           -222, //!< OpenCL initialization error
     OpenCLNoAMDBlasFft=        -223
 };
 } //Error
@@ -150,46 +152,57 @@ enum DecompTypes {
 };
 
 /** norm types
-- For one array:
-\f[norm =  \forkthree{\|\texttt{src1}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
-{ \| \texttt{src1} \| _{L_1} =  \sum _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
-{ \| \texttt{src1} \| _{L_2} =  \sqrt{\sum_I \texttt{src1}(I)^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }\f]
-
-- Absolute norm for two arrays
-\f[norm =  \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
-{ \| \texttt{src1} - \texttt{src2} \| _{L_1} =  \sum _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
-{ \| \texttt{src1} - \texttt{src2} \| _{L_2} =  \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }\f]
-
-- Relative norm for two arrays
-\f[norm =  \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}}    }{\|\texttt{src2}\|_{L_{\infty}} }}{if  \(\texttt{normType} = \texttt{NORM_RELATIVE_INF}\) }
-{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE_L1}\) }
-{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE_L2}\) }\f]
-
-As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$.
-The \f$ L_{1}, L_{2} \f$ and \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$
-is calculated as follows
-\f{align*}
-    \| r(-1) \|_{L_1} &= |-1| + |2| = 3 \\
-    \| r(-1) \|_{L_2} &= \sqrt{(-1)^{2} + (2)^{2}} = \sqrt{5} \\
-    \| r(-1) \|_{L_\infty} &= \max(|-1|,|2|) = 2
-\f}
-and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is
-\f{align*}
-    \| r(0.5) \|_{L_1} &= |0.5| + |0.5| = 1 \\
-    \| r(0.5) \|_{L_2} &= \sqrt{(0.5)^{2} + (0.5)^{2}} = \sqrt{0.5} \\
-    \| r(0.5) \|_{L_\infty} &= \max(|0.5|,|0.5|) = 0.5.
-\f}
-The following graphic shows all values for the three norm functions \f$\| r(x) \|_{L_1}, \| r(x) \|_{L_2}\f$ and \f$\| r(x) \|_{L_\infty}\f$.
-It is notable that the \f$ L_{1} \f$ norm forms the upper and the \f$ L_{\infty} \f$ norm forms the lower border for the example function \f$ r(x) \f$.
-![Graphs for the different norm functions from the above example](pics/NormTypes_OneArray_1-2-INF.png)
- */
-enum NormTypes { NORM_INF       = 1,
+
+src1 and src2 denote input arrays.
+*/
+
+enum NormTypes {
+                /**
+                \f[
+                norm =  \forkthree
+                {\|\texttt{src1}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
+                {\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
+                {\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}}    }{\|\texttt{src2}\|_{L_{\infty}} }}{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_INF}\) }
+                \f]
+                */
+                NORM_INF       = 1,
+                /**
+                \f[
+                norm =  \forkthree
+                {\| \texttt{src1} \| _{L_1} =  \sum _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\)}
+                { \| \texttt{src1} - \texttt{src2} \| _{L_1} =  \sum _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
+                { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L1}\) }
+                \f]*/
                  NORM_L1        = 2,
+                 /**
+                 \f[
+                 norm =  \forkthree
+                 { \| \texttt{src1} \| _{L_2} =  \sqrt{\sum_I \texttt{src1}(I)^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }
+                 { \| \texttt{src1} - \texttt{src2} \| _{L_2} =  \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }
+                 { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
+                 \f]
+                 */
                  NORM_L2        = 4,
+                 /**
+                 \f[
+                 norm =  \forkthree
+                 { \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if  \(\texttt{normType} = \texttt{NORM_L2SQR}\)}
+                 { \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} =  \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if  \(\texttt{normType} = \texttt{NORM_L2SQR}\) }
+                 { \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2SQR}\) }
+                 \f]
+                 */
                  NORM_L2SQR     = 5,
+                 /**
+                 In the case of one input array, calculates the Hamming distance of the array from zero,
+                 In the case of two input arrays, calculates the Hamming distance between the arrays.
+                 */
                  NORM_HAMMING   = 6,
+                 /**
+                 Similar to NORM_HAMMING, but in the calculation, each two bits of the input sequence will
+                 be added and treated as a single bit to be used in the same calculation as NORM_HAMMING.
+                 */
                  NORM_HAMMING2  = 7,
-                 NORM_TYPE_MASK = 7,
+                 NORM_TYPE_MASK = 7, //!< bit-mask which can be used to separate norm type from norm flags
                  NORM_RELATIVE  = 8, //!< flag
                  NORM_MINMAX    = 32 //!< flag
                };
@@ -237,6 +250,10 @@ enum DftFlags {
         into a real array and inverse transformation is executed, the function treats the input as a
         packed complex-conjugate symmetrical array, and the output will also be a real array). */
     DFT_REAL_OUTPUT    = 32,
+    /** specifies that input is complex input. If this flag is set, the input must have 2 channels.
+        On the other hand, for backwards compatibility reason, if input has 2 channels, input is
+        already considered complex. */
+    DFT_COMPLEX_INPUT  = 64,
     /** performs an inverse 1D or 2D transform instead of the default forward transform. */
     DCT_INVERSE        = DFT_INVERSE,
     /** performs a forward or inverse transform of every individual row of the input
@@ -254,7 +271,7 @@ enum BorderTypes {
     BORDER_REFLECT     = 2, //!< `fedcba|abcdefgh|hgfedcb`
     BORDER_WRAP        = 3, //!< `cdefgh|abcdefgh|abcdefg`
     BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
-    BORDER_TRANSPARENT = 5, //!< `uvwxyz|absdefgh|ijklmno`
+    BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno`
 
     BORDER_REFLECT101  = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
     BORDER_DEFAULT     = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
@@ -266,68 +283,6 @@ enum BorderTypes {
 //! @addtogroup core_utils
 //! @{
 
-//! @cond IGNORED
-
-//////////////// static assert /////////////////
-#define CVAUX_CONCAT_EXP(a, b) a##b
-#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
-
-#if defined(__clang__)
-#  ifndef __has_extension
-#    define __has_extension __has_feature /* compatibility, for older versions of clang */
-#  endif
-#  if __has_extension(cxx_static_assert)
-#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
-#  elif __has_extension(c_static_assert)
-#    define CV_StaticAssert(condition, reason)    _Static_assert((condition), reason " " #condition)
-#  endif
-#elif defined(__GNUC__)
-#  if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
-#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
-#  endif
-#elif defined(_MSC_VER)
-#  if _MSC_VER >= 1600 /* MSVC 10 */
-#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
-#  endif
-#endif
-#ifndef CV_StaticAssert
-#  if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
-#    define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
-#  else
-     template <bool x> struct CV_StaticAssert_failed;
-     template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
-     template<int x> struct CV_StaticAssert_test {};
-#    define CV_StaticAssert(condition, reason)\
-       typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
-#  endif
-#endif
-
-// Suppress warning "-Wdeprecated-declarations" / C4996
-#if defined(_MSC_VER)
-    #define CV_DO_PRAGMA(x) __pragma(x)
-#elif defined(__GNUC__)
-    #define CV_DO_PRAGMA(x) _Pragma (#x)
-#else
-    #define CV_DO_PRAGMA(x)
-#endif
-
-#ifdef _MSC_VER
-#define CV_SUPPRESS_DEPRECATED_START \
-    CV_DO_PRAGMA(warning(push)) \
-    CV_DO_PRAGMA(warning(disable: 4996))
-#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
-#elif defined (__clang__) || ((__GNUC__)  && (__GNUC__*100 + __GNUC_MINOR__ > 405))
-#define CV_SUPPRESS_DEPRECATED_START \
-    CV_DO_PRAGMA(GCC diagnostic push) \
-    CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
-#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
-#else
-#define CV_SUPPRESS_DEPRECATED_START
-#define CV_SUPPRESS_DEPRECATED_END
-#endif
-#define CV_UNUSED(name) (void)name
-//! @endcond
-
 /*! @brief Signals an error and raises the exception.
 
 By default the function prints information about the error to stderr,
@@ -336,44 +291,21 @@ It is possible to alternate error processing by using redirectError().
 @param _code - error code (Error::Code)
 @param _err - error description
 @param _func - function name. Available only when the compiler supports getting it
-@param _file - source file name where the error has occured
-@param _line - line number in the source file where the error has occured
-@see CV_Error, CV_Error_, CV_ErrorNoReturn, CV_ErrorNoReturn_, CV_Assert, CV_DbgAssert
+@param _file - source file name where the error has occurred
+@param _line - line number in the source file where the error has occurred
+@see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert
  */
-CV_EXPORTS void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
+CV_EXPORTS CV_NORETURN void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
 
-#ifdef __GNUC__
-# if defined __clang__ || defined __APPLE__
-#   pragma GCC diagnostic push
-#   pragma GCC diagnostic ignored "-Winvalid-noreturn"
-# endif
-#endif
+#ifdef CV_STATIC_ANALYSIS
 
-/** same as cv::error, but does not return */
-CV_INLINE CV_NORETURN void errorNoReturn(int _code, const String& _err, const char* _func, const char* _file, int _line)
-{
-    error(_code, _err, _func, _file, _line);
-#ifdef __GNUC__
-# if !defined __clang__ && !defined __APPLE__
-    // this suppresses this warning: "noreturn" function does return [enabled by default]
-    __builtin_trap();
-    // or use infinite loop: for (;;) {}
-# endif
-#endif
-}
-#ifdef __GNUC__
-# if defined __clang__ || defined __APPLE__
-#   pragma GCC diagnostic pop
-# endif
-#endif
+// In practice, some macro are not processed correctly (noreturn is not detected).
+// We need to use simplified definition for them.
+#define CV_Error(code, msg) do { (void)(code); (void)(msg); abort(); } while (0)
+#define CV_Error_(code, args) do { (void)(code); (void)(cv::format args); abort(); } while (0)
+#define CV_Assert( expr ) do { if (!(expr)) abort(); } while (0)
 
-#if defined __GNUC__
-#define CV_Func __func__
-#elif defined _MSC_VER
-#define CV_Func __FUNCTION__
-#else
-#define CV_Func ""
-#endif
+#else // CV_STATIC_ANALYSIS
 
 /** @brief Call the error handler.
 
@@ -393,7 +325,7 @@ This macro can be used to construct an error message on-fly to include some dyna
 for example:
 @code
     // note the extra parentheses around the formatted text message
-    CV_Error_( CV_StsOutOfRange,
+    CV_Error_(Error::StsOutOfRange,
     ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
 @endcode
 @param code one of Error::Code
@@ -407,18 +339,39 @@ The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression.
 raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
 configurations while CV_DbgAssert is only retained in the Debug configuration.
 */
-#define CV_Assert( expr ) if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ )
+#define CV_Assert( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0)
 
-/** same as CV_Error(code,msg), but does not return */
-#define CV_ErrorNoReturn( code, msg ) cv::errorNoReturn( code, msg, CV_Func, __FILE__, __LINE__ )
+#endif // CV_STATIC_ANALYSIS
 
-/** same as CV_Error_(code,args), but does not return */
-#define CV_ErrorNoReturn_( code, args ) cv::errorNoReturn( code, cv::format args, CV_Func, __FILE__, __LINE__ )
+//! @cond IGNORED
+#if !defined(__OPENCV_BUILD)  // TODO: backward compatibility only
+#ifndef CV_ErrorNoReturn
+#define CV_ErrorNoReturn CV_Error
+#endif
+#ifndef CV_ErrorNoReturn_
+#define CV_ErrorNoReturn_ CV_Error_
+#endif
+#endif
 
-/** replaced with CV_Assert(expr) in Debug configuration */
-#ifdef _DEBUG
+#define CV_Assert_1 CV_Assert
+#define CV_Assert_2( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_1( __VA_ARGS__ ))
+#define CV_Assert_3( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_2( __VA_ARGS__ ))
+#define CV_Assert_4( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_3( __VA_ARGS__ ))
+#define CV_Assert_5( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_4( __VA_ARGS__ ))
+#define CV_Assert_6( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_5( __VA_ARGS__ ))
+#define CV_Assert_7( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_6( __VA_ARGS__ ))
+#define CV_Assert_8( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_7( __VA_ARGS__ ))
+#define CV_Assert_9( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_8( __VA_ARGS__ ))
+#define CV_Assert_10( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_9( __VA_ARGS__ ))
+
+#define CV_Assert_N(...) do { __CV_EXPAND(__CV_CAT(CV_Assert_, __CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__)); } while(0)
+
+//! @endcond
+
+#if defined _DEBUG || defined CV_STATIC_ANALYSIS
 #  define CV_DbgAssert(expr) CV_Assert(expr)
 #else
+/** replaced with CV_Assert(expr) in Debug configuration */
 #  define CV_DbgAssert(expr)
 #endif
 
@@ -428,7 +381,7 @@ configurations while CV_DbgAssert is only retained in the Debug configuration.
  */
 struct CV_EXPORTS Hamming
 {
-    enum { normType = NORM_HAMMING };
+    static const NormTypes normType = NORM_HAMMING;
     typedef unsigned char ValueType;
     typedef int ResultType;
 
@@ -665,13 +618,23 @@ namespace cudev
 
 namespace ipp
 {
-CV_EXPORTS int getIppFeatures();
-CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
+CV_EXPORTS   unsigned long long getIppFeatures();
+CV_EXPORTS   void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
                              int line = 0);
-CV_EXPORTS int getIppStatus();
-CV_EXPORTS String getIppErrorLocation();
-CV_EXPORTS bool useIPP();
-CV_EXPORTS void setUseIPP(bool flag);
+CV_EXPORTS   int getIppStatus();
+CV_EXPORTS   String getIppErrorLocation();
+CV_EXPORTS_W bool   useIPP();
+CV_EXPORTS_W void   setUseIPP(bool flag);
+CV_EXPORTS_W String getIppVersion();
+
+// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
+// but have internal accuracy differences which have too much direct or indirect impact on accuracy tests.
+CV_EXPORTS_W bool useIPP_NotExact();
+CV_EXPORTS_W void setUseIPP_NotExact(bool flag);
+#ifndef DISABLE_OPENCV_3_COMPATIBILITY
+static inline bool useIPP_NE() { return useIPP_NotExact(); }
+static inline void setUseIPP_NE(bool flag) { setUseIPP_NotExact(flag); }
+#endif
 
 } // ipp
 
@@ -685,5 +648,7 @@ CV_EXPORTS void setUseIPP(bool flag);
 } // cv
 
 #include "opencv2/core/neon_utils.hpp"
+#include "opencv2/core/vsx_utils.hpp"
+#include "opencv2/core/check.hpp"
 
-#endif //__OPENCV_CORE_BASE_HPP__
+#endif //OPENCV_CORE_BASE_HPP
diff --git a/IPL/include/opencv/opencv2/core/bindings_utils.hpp b/IPL/include/opencv/opencv2/core/bindings_utils.hpp
new file mode 100644
index 0000000..f693dc8
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/bindings_utils.hpp
@@ -0,0 +1,87 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_BINDINGS_UTILS_HPP
+#define OPENCV_CORE_BINDINGS_UTILS_HPP
+
+#include <opencv2/core/async.hpp>
+#include <opencv2/core/detail/async_promise.hpp>
+
+namespace cv { namespace utils {
+//! @addtogroup core_utils
+//! @{
+
+CV_EXPORTS_W String dumpInputArray(InputArray argument);
+
+CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument);
+
+CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument);
+
+CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argument);
+
+CV_WRAP static inline
+String dumpBool(bool argument)
+{
+    return (argument) ? String("Bool: True") : String("Bool: False");
+}
+
+CV_WRAP static inline
+String dumpInt(int argument)
+{
+    return cv::format("Int: %d", argument);
+}
+
+CV_WRAP static inline
+String dumpSizeT(size_t argument)
+{
+    std::ostringstream oss("size_t: ", std::ios::ate);
+    oss << argument;
+    return oss.str();
+}
+
+CV_WRAP static inline
+String dumpFloat(float argument)
+{
+    return cv::format("Float: %.2f", argument);
+}
+
+CV_WRAP static inline
+String dumpDouble(double argument)
+{
+    return cv::format("Double: %.2f", argument);
+}
+
+CV_WRAP static inline
+String dumpCString(const char* argument)
+{
+    return cv::format("String: %s", argument);
+}
+
+CV_WRAP static inline
+AsyncArray testAsyncArray(InputArray argument)
+{
+    AsyncPromise p;
+    p.setValue(argument);
+    return p.getArrayResult();
+}
+
+CV_WRAP static inline
+AsyncArray testAsyncException()
+{
+    AsyncPromise p;
+    try
+    {
+        CV_Error(Error::StsOk, "Test: Generated async error");
+    }
+    catch (const cv::Exception& e)
+    {
+        p.setException(e);
+    }
+    return p.getArrayResult();
+}
+
+//! @}
+}} // namespace
+
+#endif // OPENCV_CORE_BINDINGS_UTILS_HPP
diff --git a/IPL/include/opencv/opencv2/core/bufferpool.hpp b/IPL/include/opencv/opencv2/core/bufferpool.hpp
index 76df2d2..4698e5d 100644
--- a/IPL/include/opencv/opencv2/core/bufferpool.hpp
+++ b/IPL/include/opencv/opencv2/core/bufferpool.hpp
@@ -4,8 +4,13 @@
 //
 // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
 
-#ifndef __OPENCV_CORE_BUFFER_POOL_HPP__
-#define __OPENCV_CORE_BUFFER_POOL_HPP__
+#ifndef OPENCV_CORE_BUFFER_POOL_HPP
+#define OPENCV_CORE_BUFFER_POOL_HPP
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4265)
+#endif
 
 namespace cv
 {
@@ -28,4 +33,8 @@ class BufferPoolController
 
 }
 
-#endif // __OPENCV_CORE_BUFFER_POOL_HPP__
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#endif // OPENCV_CORE_BUFFER_POOL_HPP
diff --git a/IPL/include/opencv/opencv2/core/check.hpp b/IPL/include/opencv/opencv2/core/check.hpp
new file mode 100644
index 0000000..0e0c7cb
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/check.hpp
@@ -0,0 +1,160 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_CHECK_HPP
+#define OPENCV_CORE_CHECK_HPP
+
+#include <opencv2/core/base.hpp>
+
+namespace cv {
+
+/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "<invalid depth>" */
+CV_EXPORTS const char* depthToString(int depth);
+
+/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "<invalid type>" */
+CV_EXPORTS const String typeToString(int type);
+
+
+//! @cond IGNORED
+namespace detail {
+
+/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */
+CV_EXPORTS const char* depthToString_(int depth);
+
+/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */
+CV_EXPORTS const cv::String typeToString_(int type);
+
+enum TestOp {
+  TEST_CUSTOM = 0,
+  TEST_EQ = 1,
+  TEST_NE = 2,
+  TEST_LE = 3,
+  TEST_LT = 4,
+  TEST_GE = 5,
+  TEST_GT = 6,
+  CV__LAST_TEST_OP
+};
+
+struct CheckContext {
+    const char* func;
+    const char* file;
+    int line;
+    enum TestOp testOp;
+    const char* message;
+    const char* p1_str;
+    const char* p2_str;
+};
+
+#ifndef CV__CHECK_FILENAME
+# define CV__CHECK_FILENAME __FILE__
+#endif
+
+#ifndef CV__CHECK_FUNCTION
+# if defined _MSC_VER
+#   define CV__CHECK_FUNCTION __FUNCSIG__
+# elif defined __GNUC__
+#   define CV__CHECK_FUNCTION __PRETTY_FUNCTION__
+# else
+#   define CV__CHECK_FUNCTION "<unknown>"
+# endif
+#endif
+
+#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
+#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
+    static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
+            { CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str }
+
+CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v1, const Size_<int> v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);
+
+CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const std::string& v1, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);
+
+
+#define CV__TEST_EQ(v1, v2) ((v1) == (v2))
+#define CV__TEST_NE(v1, v2) ((v1) != (v2))
+#define CV__TEST_LE(v1, v2) ((v1) <= (v2))
+#define CV__TEST_LT(v1, v2) ((v1) < (v2))
+#define CV__TEST_GE(v1, v2) ((v1) >= (v2))
+#define CV__TEST_GT(v1, v2) ((v1) > (v2))
+
+#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \
+    if(CV__TEST_##op((v1), (v2))) ; else { \
+        CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \
+        cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \
+    } \
+} while (0)
+
+#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \
+    if(!!(test_expr)) ; else { \
+        CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \
+        cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \
+    } \
+} while (0)
+
+} // namespace
+//! @endcond
+
+
+/// Supported values of these types: int, float, double
+#define CV_CheckEQ(v1, v2, msg)  CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckNE(v1, v2, msg)  CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckLE(v1, v2, msg)  CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckLT(v1, v2, msg)  CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckGE(v1, v2, msg)  CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckGT(v1, v2, msg)  CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
+
+/// Check with additional "decoding" of type values in error message
+#define CV_CheckTypeEQ(t1, t2, msg)  CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg)
+/// Check with additional "decoding" of depth values in error message
+#define CV_CheckDepthEQ(d1, d2, msg)  CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg)
+
+#define CV_CheckChannelsEQ(c1, c2, msg)  CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)
+
+/// Example: type == CV_8UC1 || type == CV_8UC3
+#define CV_CheckType(t, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)
+
+/// Example: depth == CV_32F || depth == CV_64F
+#define CV_CheckDepth(t, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)
+
+/// Example: v == A || v == B
+#define CV_Check(v, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
+
+/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
+// TODO define pretty-printers
+
+#ifndef NDEBUG
+#define CV_DbgCheck(v, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
+#define CV_DbgCheckEQ(v1, v2, msg)  CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckNE(v1, v2, msg)  CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckLE(v1, v2, msg)  CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckLT(v1, v2, msg)  CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckGE(v1, v2, msg)  CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckGT(v1, v2, msg)  CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
+#else
+#define CV_DbgCheck(v, test_expr, msg)  do { } while (0)
+#define CV_DbgCheckEQ(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckNE(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckLE(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckLT(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckGE(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckGT(v1, v2, msg)  do { } while (0)
+#endif
+
+} // namespace
+
+#endif // OPENCV_CORE_CHECK_HPP
diff --git a/IPL/include/opencv/opencv2/core/core_c.h b/IPL/include/opencv/opencv2/core/core_c.h
index a0ed632..5dd1a8f 100644
--- a/IPL/include/opencv/opencv2/core/core_c.h
+++ b/IPL/include/opencv/opencv2/core/core_c.h
@@ -42,8 +42,8 @@
 //M*/
 
 
-#ifndef __OPENCV_CORE_C_H__
-#define __OPENCV_CORE_C_H__
+#ifndef OPENCV_CORE_C_H
+#define OPENCV_CORE_C_H
 
 #include "opencv2/core/types_c.h"
 
@@ -53,7 +53,7 @@
                           which is incompatible with C
 
    It is OK to disable it because we only extend few plain structures with
-   C++ construrtors for simpler interoperability with C++ API of the library
+   C++ constructors for simpler interoperability with C++ API of the library
 */
 #    pragma warning(disable:4190)
 #  elif defined __clang__ && __clang_major__ >= 3
@@ -359,7 +359,7 @@ CVAPI(CvMat*) cvGetSubRect( const CvArr* arr, CvMat* submat, CvRect rect );
 
 /** @brief Returns array row or row span.
 
-The functions return the header, corresponding to a specified row/row span of the input array.
+The function returns the header, corresponding to a specified row/row span of the input array.
 cvGetRow(arr, submat, row) is a shortcut for cvGetRows(arr, submat, row, row+1).
 @param arr Input array
 @param submat Pointer to the resulting sub-array header
@@ -385,7 +385,7 @@ CV_INLINE  CvMat*  cvGetRow( const CvArr* arr, CvMat* submat, int row )
 
 /** @brief Returns one of more array columns.
 
-The functions return the header, corresponding to a specified column span of the input array. That
+The function returns the header, corresponding to a specified column span of the input array. That
 
 is, no data is copied. Therefore, any modifications of the submatrix will affect the original array.
 If you need to copy the columns, use cvCloneMat. cvGetCol(arr, submat, col) is a shortcut for
@@ -579,7 +579,7 @@ CvNArrayIterator;
 #define CV_NO_CN_CHECK        2
 #define CV_NO_SIZE_CHECK      4
 
-/** initializes iterator that traverses through several arrays simulteneously
+/** initializes iterator that traverses through several arrays simultaneously
    (the function together with cvNextArraySlice is used for
     N-ari element-wise operations) */
 CVAPI(int) cvInitNArrayIterator( int count, CvArr** arrs,
@@ -1309,7 +1309,7 @@ CVAPI(void) cvMulTransposed( const CvArr* src, CvArr* dst, int order,
                              const CvArr* delta CV_DEFAULT(NULL),
                              double scale CV_DEFAULT(1.) );
 
-/** Tranposes matrix. Square matrices can be transposed in-place */
+/** Transposes matrix. Square matrices can be transposed in-place */
 CVAPI(void)  cvTranspose( const CvArr* src, CvArr* dst );
 #define cvT cvTranspose
 
@@ -1576,8 +1576,8 @@ CVAPI(void)  cvRestoreMemStoragePos( CvMemStorage* storage, CvMemStoragePos* pos
 CVAPI(void*) cvMemStorageAlloc( CvMemStorage* storage, size_t size );
 
 /** Allocates string in memory storage */
-CVAPI(CvString) cvMemStorageAllocString( CvMemStorage* storage, const char* ptr,
-                                         int len CV_DEFAULT(-1) );
+//CVAPI(CvString) cvMemStorageAllocString( CvMemStorage* storage, const char* ptr,
+//                                         int len CV_DEFAULT(-1) );
 
 /** Creates new empty sequence that will reside in the specified storage */
 CVAPI(CvSeq*)  cvCreateSeq( int seq_flags, size_t header_size,
@@ -1788,7 +1788,7 @@ CVAPI(int)  cvGraphRemoveVtx( CvGraph* graph, int index );
 CVAPI(int)  cvGraphRemoveVtxByPtr( CvGraph* graph, CvGraphVtx* vtx );
 
 
-/** Link two vertices specifed by indices or pointers if they
+/** Link two vertices specified by indices or pointers if they
    are not connected or return pointer to already existing edge
    connecting the vertices.
    Functions return 1 if a new edge was created, 0 otherwise */
@@ -1970,14 +1970,19 @@ CVAPI(void) cvSetIPLAllocators( Cv_iplCreateImageHeader create_header,
 *                                    Data Persistence                                    *
 \****************************************************************************************/
 
+#if 0
 /********************************** High-level functions ********************************/
 
 /** @brief Opens file storage for reading or writing data.
 
 The function opens file storage for reading or writing data. In the latter case, a new file is
 created or an existing file is rewritten. The type of the read or written file is determined by the
-filename extension: .xml for XML and .yml or .yaml for YAML. The function returns a pointer to the
-CvFileStorage structure. If the file cannot be opened then the function returns NULL.
+filename extension: .xml for XML, .yml or .yaml for YAML and .json for JSON.
+
+At the same time, it also supports adding parameters like "example.xml?base64".
+
+The function returns a pointer to the CvFileStorage structure.
+If the file cannot be opened then the function returns NULL.
 @param filename Name of the file associated with the storage
 @param memstorage Memory storage used for temporary data and for
 :   storing dynamic structures, such as CvSeq or CvGraph . If it is NULL, a temporary memory
@@ -1985,6 +1990,7 @@ CvFileStorage structure. If the file cannot be opened then the function returns
 @param flags Can be one of the following:
 > -   **CV_STORAGE_READ** the storage is open for reading
 > -   **CV_STORAGE_WRITE** the storage is open for writing
+      (use **CV_STORAGE_WRITE | CV_STORAGE_WRITE_BASE64** to write rawdata in Base64)
 @param encoding
  */
 CVAPI(CvFileStorage*)  cvOpenFileStorage( const char* filename, CvMemStorage* memstorage,
@@ -2022,7 +2028,8 @@ One and only one of the two above flags must be specified
 @param type_name Optional parameter - the object type name. In
     case of XML it is written as a type_id attribute of the structure opening tag. In the case of
     YAML it is written after a colon following the structure name (see the example in
-    CvFileStorage description). Mainly it is used with user objects. When the storage is read, the
+    CvFileStorage description). In case of JSON it is written as a name/value pair.
+    Mainly it is used with user objects. When the storage is read, the
     encoded type name is used to determine the object type (see CvTypeInfo and cvFindType ).
 @param attributes This parameter is not used in the current implementation
  */
@@ -2059,9 +2066,9 @@ such as termination criteria, without registering a new type. :
     {
         cvStartWriteStruct( fs, struct_name, CV_NODE_MAP, NULL, cvAttrList(0,0));
         cvWriteComment( fs, "termination criteria", 1 ); // just a description
-        if( termcrit->type & CV_TERMCRIT_ITER )
+        if( termcrit->type & cv::TermCriteria::MAX_ITER )
             cvWriteInteger( fs, "max_iterations", termcrit->max_iter );
-        if( termcrit->type & CV_TERMCRIT_EPS )
+        if( termcrit->type & cv::TermCriteria::EPS )
             cvWriteReal( fs, "accuracy", termcrit->epsilon );
         cvEndWriteStruct( fs );
     }
@@ -2162,7 +2169,7 @@ the file with multiple streams looks like this:
 @endcode
 The YAML file will look like this:
 @code{.yaml}
-    %YAML:1.0
+    %YAML 1.0
     # stream #1 data
     ...
     ---
@@ -2187,6 +2194,23 @@ to a sequence rather than a map.
 CVAPI(void) cvWriteRawData( CvFileStorage* fs, const void* src,
                                 int len, const char* dt );
 
+/** @brief Writes multiple numbers in Base64.
+
+If either CV_STORAGE_WRITE_BASE64 or cv::FileStorage::WRITE_BASE64 is used,
+this function will be the same as cvWriteRawData. If neither, the main
+difference is that it outputs a sequence in Base64 encoding rather than
+in plain text.
+
+This function can only be used to write a sequence with a type "binary".
+
+@param fs File storage
+@param src Pointer to the written array
+@param len Number of the array elements to write
+@param dt Specification of each array element, see @ref format_spec "format specification"
+*/
+CVAPI(void) cvWriteRawDataBase64( CvFileStorage* fs, const void* src,
+                                 int len, const char* dt );
+
 /** @brief Returns a unique pointer for a given name.
 
 The function returns a unique pointer for each particular file node name. This pointer can be then
@@ -2468,7 +2492,7 @@ CVAPI(void) cvReadRawData( const CvFileStorage* fs, const CvFileNode* src,
 /** @brief Writes a file node to another file storage.
 
 The function writes a copy of a file node to file storage. Possible applications of the function are
-merging several file storages into one and conversion between XML and YAML formats.
+merging several file storages into one and conversion between XML, YAML and JSON formats.
 @param fs Destination file storage
 @param new_node_name New name of the file node in the destination file storage. To keep the
 existing name, use cvcvGetFileNodeName
@@ -2533,10 +2557,12 @@ returns NULL.
  */
 CVAPI(CvTypeInfo*) cvTypeOf( const void* struct_ptr );
 
+#endif
+
 /** @brief Releases an object.
 
-The function finds the type of a given object and calls release with the double pointer.
-@param struct_ptr Double pointer to the object
+ The function finds the type of a given object and calls release with the double pointer.
+ @param struct_ptr Double pointer to the object
  */
 CVAPI(void) cvRelease( void** struct_ptr );
 
@@ -2549,41 +2575,6 @@ function, like cvCloneMat.
  */
 CVAPI(void*) cvClone( const void* struct_ptr );
 
-/** @brief Saves an object to a file.
-
-The function saves an object to a file. It provides a simple interface to cvWrite .
-@param filename File name
-@param struct_ptr Object to save
-@param name Optional object name. If it is NULL, the name will be formed from filename .
-@param comment Optional comment to put in the beginning of the file
-@param attributes Optional attributes passed to cvWrite
- */
-CVAPI(void) cvSave( const char* filename, const void* struct_ptr,
-                    const char* name CV_DEFAULT(NULL),
-                    const char* comment CV_DEFAULT(NULL),
-                    CvAttrList attributes CV_DEFAULT(cvAttrList()));
-
-/** @brief Loads an object from a file.
-
-The function loads an object from a file. It basically reads the specified file, find the first
-top-level node and calls cvRead for that node. If the file node does not have type information or
-the type information can not be found by the type name, the function returns NULL. After the object
-is loaded, the file storage is closed and all the temporary buffers are deleted. Thus, to load a
-dynamic structure, such as a sequence, contour, or graph, one should pass a valid memory storage
-destination to the function.
-@param filename File name
-@param memstorage Memory storage for dynamic structures, such as CvSeq or CvGraph . It is not used
-for matrices or images.
-@param name Optional object name. If it is NULL, the first top-level object in the storage will be
-loaded.
-@param real_name Optional output parameter that will contain the name of the loaded object
-(useful if name=NULL )
- */
-CVAPI(void*) cvLoad( const char* filename,
-                     CvMemStorage* memstorage CV_DEFAULT(NULL),
-                     const char* name CV_DEFAULT(NULL),
-                     const char** real_name CV_DEFAULT(NULL) );
-
 /*********************************** Measuring Execution Time ***************************/
 
 /** helper functions for RNG initialization and accurate time measurement:
@@ -2616,13 +2607,13 @@ CVAPI(void) cvSetErrStatus( int status );
 #define CV_ErrModeParent   1   /* Print error and continue */
 #define CV_ErrModeSilent   2   /* Don't print and continue */
 
-/** Retrives current error processing mode */
+/** Retrieves current error processing mode */
 CVAPI(int)  cvGetErrMode( void );
 
 /** Sets error processing mode, returns previously used mode */
 CVAPI(int) cvSetErrMode( int mode );
 
-/** Sets error status and performs some additonal actions (displaying message box,
+/** Sets error status and performs some additional actions (displaying message box,
  writing message to stderr, terminating application etc.)
  depending on the current error mode */
 CVAPI(void) cvError( int status, const char* func_name,
@@ -2631,7 +2622,7 @@ CVAPI(void) cvError( int status, const char* func_name,
 /** Retrieves textual description of the error given its code */
 CVAPI(const char*) cvErrorStr( int status );
 
-/** Retrieves detailed information about the last error occured */
+/** Retrieves detailed information about the last error occurred */
 CVAPI(int) cvGetErrInfo( const char** errcode_desc, const char** description,
                         const char** filename, int* line );
 
@@ -2706,7 +2697,7 @@ static char cvFuncName[] = Name
 /**
  CV_CALL macro calls CV (or IPL) function, checks error status and
  signals a error if the function failed. Useful in "parent node"
- error procesing mode
+ error processing mode
  */
 #define CV_CALL( Func )                                             \
 {                                                                   \
@@ -2734,24 +2725,6 @@ static char cvFuncName[] = Name
 
 #ifdef __cplusplus
 
-//! @addtogroup core_c_glue
-//! @{
-
-//! class for automatic module/RTTI data registration/unregistration
-struct CV_EXPORTS CvType
-{
-    CvType( const char* type_name,
-            CvIsInstanceFunc is_instance, CvReleaseFunc release=0,
-            CvReadFunc read=0, CvWriteFunc write=0, CvCloneFunc clone=0 );
-    ~CvType();
-    CvTypeInfo* info;
-
-    static CvTypeInfo* first;
-    static CvTypeInfo* last;
-};
-
-//! @}
-
 #include "opencv2/core/utility.hpp"
 
 namespace cv
@@ -2782,11 +2755,11 @@ CV_EXPORTS void insertImageCOI(InputArray coiimg, CvArr* arr, int coi=-1);
 
 ////// specialized implementations of DefaultDeleter::operator() for classic OpenCV types //////
 
-template<> CV_EXPORTS void DefaultDeleter<CvMat>::operator ()(CvMat* obj) const;
-template<> CV_EXPORTS void DefaultDeleter<IplImage>::operator ()(IplImage* obj) const;
-template<> CV_EXPORTS void DefaultDeleter<CvMatND>::operator ()(CvMatND* obj) const;
-template<> CV_EXPORTS void DefaultDeleter<CvSparseMat>::operator ()(CvSparseMat* obj) const;
-template<> CV_EXPORTS void DefaultDeleter<CvMemStorage>::operator ()(CvMemStorage* obj) const;
+template<> struct DefaultDeleter<CvMat>{ CV_EXPORTS void operator ()(CvMat* obj) const; };
+template<> struct DefaultDeleter<IplImage>{ CV_EXPORTS void operator ()(IplImage* obj) const; };
+template<> struct DefaultDeleter<CvMatND>{ CV_EXPORTS void operator ()(CvMatND* obj) const; };
+template<> struct DefaultDeleter<CvSparseMat>{ CV_EXPORTS void operator ()(CvSparseMat* obj) const; };
+template<> struct DefaultDeleter<CvMemStorage>{ CV_EXPORTS void operator ()(CvMemStorage* obj) const; };
 
 ////////////// convenient wrappers for operating old-style dynamic structures //////////////
 
@@ -3041,7 +3014,7 @@ template<typename _Tp> inline void Seq<_Tp>::copyTo(std::vector<_Tp>& vec, const
     size_t len = !seq ? 0 : range == Range::all() ? seq->total : range.end - range.start;
     vec.resize(len);
     if( seq && len )
-        cvCvtSeqToArray(seq, &vec[0], range);
+        cvCvtSeqToArray(seq, &vec[0], cvSlice(range));
 }
 
 template<typename _Tp> inline Seq<_Tp>::operator std::vector<_Tp>() const
diff --git a/IPL/include/opencv/opencv2/core/cuda.hpp b/IPL/include/opencv/opencv2/core/cuda.hpp
index 64bc53e..5d94b72 100644
--- a/IPL/include/opencv/opencv2/core/cuda.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_CUDA_HPP__
-#define __OPENCV_CORE_CUDA_HPP__
+#ifndef OPENCV_CORE_CUDA_HPP
+#define OPENCV_CORE_CUDA_HPP
 
 #ifndef __cplusplus
 #  error cuda.hpp header must be compiled as C++
@@ -56,7 +56,7 @@
   @{
     @defgroup cudacore Core part
     @{
-      @defgroup cudacore_init Initalization and Information
+      @defgroup cudacore_init Initialization and Information
       @defgroup cudacore_struct Data Structures
     @}
   @}
@@ -91,12 +91,21 @@ aligned to a size depending on the hardware. Single-row GpuMat is always a conti
 on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
 release function returns error if the CUDA context has been destroyed before.
 
+Some member functions are described as a "Blocking Call" while some are described as a
+"Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU
+operation is finished when the function returns. However, non-blocking functions are asynchronous to
+host. Those functions may return even if the GPU operation is not finished.
+
+Compared to their blocking counterpart, non-blocking functions accept Stream as an additional
+argument. If a non-default stream is passed, the GPU operation may overlap with operations in other
+streams.
+
 @sa Mat
  */
-class CV_EXPORTS GpuMat
+class CV_EXPORTS_W GpuMat
 {
 public:
-    class CV_EXPORTS Allocator
+    class CV_EXPORTS_W Allocator
     {
     public:
         virtual ~Allocator() {}
@@ -107,33 +116,33 @@ class CV_EXPORTS GpuMat
     };
 
     //! default allocator
-    static Allocator* defaultAllocator();
-    static void setDefaultAllocator(Allocator* allocator);
+    CV_WRAP static GpuMat::Allocator* defaultAllocator();
+    CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator);
 
     //! default constructor
-    explicit GpuMat(Allocator* allocator = defaultAllocator());
+    CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
 
     //! constructs GpuMat of the specified size and type
-    GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
-    GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
+    CV_WRAP GpuMat(int rows, int cols, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
+    CV_WRAP GpuMat(Size size, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
 
-    //! constucts GpuMat and fills it with the specified value _s
-    GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
-    GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
+    //! constructs GpuMat and fills it with the specified value _s
+    CV_WRAP GpuMat(int rows, int cols, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
+    CV_WRAP GpuMat(Size size, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
 
     //! copy constructor
-    GpuMat(const GpuMat& m);
+    CV_WRAP GpuMat(const GpuMat& m);
 
     //! constructor for GpuMat headers pointing to user-allocated data
     GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
     GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
 
     //! creates a GpuMat header for a part of the bigger matrix
-    GpuMat(const GpuMat& m, Range rowRange, Range colRange);
-    GpuMat(const GpuMat& m, Rect roi);
+    CV_WRAP GpuMat(const GpuMat& m, Range rowRange, Range colRange);
+    CV_WRAP GpuMat(const GpuMat& m, Rect roi);
 
     //! builds GpuMat from host memory (Blocking call)
-    explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
+    CV_WRAP explicit GpuMat(InputArray arr, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
 
     //! destructor - calls release()
     ~GpuMat();
@@ -142,70 +151,92 @@ class CV_EXPORTS GpuMat
     GpuMat& operator =(const GpuMat& m);
 
     //! allocates new GpuMat data unless the GpuMat already has specified size and type
-    void create(int rows, int cols, int type);
-    void create(Size size, int type);
+    CV_WRAP void create(int rows, int cols, int type);
+    CV_WRAP void create(Size size, int type);
 
     //! decreases reference counter, deallocate the data when reference counter reaches 0
     void release();
 
     //! swaps with other smart pointer
-    void swap(GpuMat& mat);
+    CV_WRAP void swap(GpuMat& mat);
+
+    /** @brief Performs data upload to GpuMat (Blocking call)
+
+    This function copies data from host memory to device memory. As being a blocking call, it is
+    guaranteed that the copy operation is finished when this function returns.
+    */
+    CV_WRAP void upload(InputArray arr);
+
+    /** @brief Performs data upload to GpuMat (Non-Blocking call)
+
+    This function copies data from host memory to device memory. As being a non-blocking call, this
+    function may return even if the copy operation is not finished.
 
-    //! pefroms upload data to GpuMat (Blocking call)
-    void upload(InputArray arr);
+    The copy operation may be overlapped with operations in other non-default streams if \p stream is
+    not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
+    */
+    CV_WRAP void upload(InputArray arr, Stream& stream);
+
+    /** @brief Performs data download from GpuMat (Blocking call)
+
+    This function copies data from device memory to host memory. As being a blocking call, it is
+    guaranteed that the copy operation is finished when this function returns.
+    */
+    CV_WRAP void download(OutputArray dst) const;
 
-    //! pefroms upload data to GpuMat (Non-Blocking call)
-    void upload(InputArray arr, Stream& stream);
+    /** @brief Performs data download from GpuMat (Non-Blocking call)
 
-    //! pefroms download data from device to host memory (Blocking call)
-    void download(OutputArray dst) const;
+    This function copies data from device memory to host memory. As being a non-blocking call, this
+    function may return even if the copy operation is not finished.
 
-    //! pefroms download data from device to host memory (Non-Blocking call)
-    void download(OutputArray dst, Stream& stream) const;
+    The copy operation may be overlapped with operations in other non-default streams if \p stream is
+    not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
+    */
+    CV_WRAP void download(OutputArray dst, Stream& stream) const;
 
     //! returns deep copy of the GpuMat, i.e. the data is copied
-    GpuMat clone() const;
+    CV_WRAP GpuMat clone() const;
 
     //! copies the GpuMat content to device memory (Blocking call)
-    void copyTo(OutputArray dst) const;
+    CV_WRAP void copyTo(OutputArray dst) const;
 
     //! copies the GpuMat content to device memory (Non-Blocking call)
-    void copyTo(OutputArray dst, Stream& stream) const;
+    CV_WRAP void copyTo(OutputArray dst, Stream& stream) const;
 
     //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
-    void copyTo(OutputArray dst, InputArray mask) const;
+    CV_WRAP void copyTo(OutputArray dst, InputArray mask) const;
 
     //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
-    void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
+    CV_WRAP void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
 
     //! sets some of the GpuMat elements to s (Blocking call)
-    GpuMat& setTo(Scalar s);
+    CV_WRAP GpuMat& setTo(Scalar s);
 
     //! sets some of the GpuMat elements to s (Non-Blocking call)
-    GpuMat& setTo(Scalar s, Stream& stream);
+    CV_WRAP GpuMat& setTo(Scalar s, Stream& stream);
 
     //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
-    GpuMat& setTo(Scalar s, InputArray mask);
+    CV_WRAP GpuMat& setTo(Scalar s, InputArray mask);
 
     //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
-    GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
+    CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
 
     //! converts GpuMat to another datatype (Blocking call)
-    void convertTo(OutputArray dst, int rtype) const;
+    CV_WRAP void convertTo(OutputArray dst, int rtype) const;
 
     //! converts GpuMat to another datatype (Non-Blocking call)
-    void convertTo(OutputArray dst, int rtype, Stream& stream) const;
+    CV_WRAP void convertTo(OutputArray dst, int rtype, Stream& stream) const;
 
     //! converts GpuMat to another datatype with scaling (Blocking call)
-    void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
+    CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
 
     //! converts GpuMat to another datatype with scaling (Non-Blocking call)
-    void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
+    CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
 
     //! converts GpuMat to another datatype with scaling (Non-Blocking call)
-    void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
+    CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
 
-    void assignTo(GpuMat& m, int type=-1) const;
+    CV_WRAP void assignTo(GpuMat& m, int type = -1) const;
 
     //! returns pointer to y-th row
     uchar* ptr(int y = 0);
@@ -219,18 +250,18 @@ class CV_EXPORTS GpuMat
     template <typename _Tp> operator PtrStep<_Tp>() const;
 
     //! returns a new GpuMat header for the specified row
-    GpuMat row(int y) const;
+    CV_WRAP GpuMat row(int y) const;
 
     //! returns a new GpuMat header for the specified column
-    GpuMat col(int x) const;
+    CV_WRAP GpuMat col(int x) const;
 
     //! ... for the specified row span
-    GpuMat rowRange(int startrow, int endrow) const;
-    GpuMat rowRange(Range r) const;
+    CV_WRAP GpuMat rowRange(int startrow, int endrow) const;
+    CV_WRAP GpuMat rowRange(Range r) const;
 
     //! ... for the specified column span
-    GpuMat colRange(int startcol, int endcol) const;
-    GpuMat colRange(Range r) const;
+    CV_WRAP GpuMat colRange(int startcol, int endcol) const;
+    CV_WRAP GpuMat colRange(Range r) const;
 
     //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
     GpuMat operator ()(Range rowRange, Range colRange) const;
@@ -238,41 +269,47 @@ class CV_EXPORTS GpuMat
 
     //! creates alternative GpuMat header for the same data, with different
     //! number of channels and/or different number of rows
-    GpuMat reshape(int cn, int rows = 0) const;
+    CV_WRAP GpuMat reshape(int cn, int rows = 0) const;
 
     //! locates GpuMat header within a parent GpuMat
-    void locateROI(Size& wholeSize, Point& ofs) const;
+    CV_WRAP void locateROI(Size& wholeSize, Point& ofs) const;
 
     //! moves/resizes the current GpuMat ROI inside the parent GpuMat
-    GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
+    CV_WRAP GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
 
     //! returns true iff the GpuMat data is continuous
     //! (i.e. when there are no gaps between successive rows)
-    bool isContinuous() const;
+    CV_WRAP bool isContinuous() const;
 
     //! returns element size in bytes
-    size_t elemSize() const;
+    CV_WRAP size_t elemSize() const;
 
     //! returns the size of element channel in bytes
-    size_t elemSize1() const;
+    CV_WRAP size_t elemSize1() const;
 
     //! returns element type
-    int type() const;
+    CV_WRAP int type() const;
 
     //! returns element type
-    int depth() const;
+    CV_WRAP int depth() const;
 
     //! returns number of channels
-    int channels() const;
+    CV_WRAP int channels() const;
 
     //! returns step/elemSize1()
-    size_t step1() const;
+    CV_WRAP size_t step1() const;
 
     //! returns GpuMat size : width == number of columns, height == number of rows
-    Size size() const;
+    CV_WRAP Size size() const;
 
     //! returns true if GpuMat data is NULL
-    bool empty() const;
+    CV_WRAP bool empty() const;
+
+    // returns pointer to cuda memory
+    CV_WRAP void* cudaPtr() const;
+
+    //! internal use method: updates the continuity flag
+    CV_WRAP void updateContinuityFlag();
 
     /*! includes several bit-fields:
     - the magic signature
@@ -286,7 +323,7 @@ class CV_EXPORTS GpuMat
     int rows, cols;
 
     //! a distance between successive rows in bytes; includes the gap if any
-    size_t step;
+    CV_PROP size_t step;
 
     //! pointer to the data
     uchar* data;
@@ -314,7 +351,7 @@ class CV_EXPORTS GpuMat
 Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
 end of each row.
  */
-CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
+CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr);
 
 /** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
 
@@ -325,11 +362,148 @@ CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
 
 The function does not reallocate memory if the matrix has proper attributes already.
  */
-CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
+CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
+
+/** @brief BufferPool for use with CUDA streams
+
+BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
+only useful when enabled with #setBufferPoolUsage.
+
+@code
+    setBufferPoolUsage(true);
+@endcode
+
+@note #setBufferPoolUsage must be called \em before any Stream declaration.
+
+Users may specify custom allocator for Stream and may implement their own stream based
+functions utilizing the same underlying GPU memory management.
+
+If custom allocator is not specified, BufferPool utilizes StackAllocator by
+default. StackAllocator allocates a chunk of GPU device memory beforehand,
+and when GpuMat is declared later on, it is given the pre-allocated memory.
+This kind of strategy reduces the number of calls for memory allocating APIs
+such as cudaMalloc or cudaMallocPitch.
+
+Below is an example that utilizes BufferPool with StackAllocator:
+
+@code
+    #include <opencv2/opencv.hpp>
+
+    using namespace cv;
+    using namespace cv::cuda
+
+    int main()
+    {
+        setBufferPoolUsage(true);                               // Tell OpenCV that we are going to utilize BufferPool
+        setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2);  // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
+
+        Stream stream1, stream2;                                // Each stream uses 1 stack
+        BufferPool pool1(stream1), pool2(stream2);
+
+        GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1);   // 16MB
+        GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3);   // 48MB, pool1 is now full
+
+        GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1);   // 1MB
+        GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3);   // 3MB
+
+        cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1);
+        cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2);
+    }
+@endcode
+
+If we allocate another GpuMat on pool1 in the above example, it will be carried out by
+the DefaultAllocator since the stack for pool1 is full.
+
+@code
+    GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1);   // Stack for pool1 is full, memory is allocated with DefaultAllocator
+@endcode
+
+If a third stream is declared in the above example, allocating with #getBuffer
+within that stream will also be carried out by the DefaultAllocator because we've run out of
+stacks.
+
+@code
+    Stream stream3;                                         // Only 2 stacks were allocated, we've run out of stacks
+    BufferPool pool3(stream3);
+    GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1);   // Memory is allocated with DefaultAllocator
+@endcode
+
+@warning When utilizing StackAllocator, deallocation order is important.
+
+Just like a stack, deallocation must be done in LIFO order. Below is an example of
+erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this
+sample code will emit CV_Assert error.
+
+@code
+    int main()
+    {
+        setBufferPoolUsage(true);                               // Tell OpenCV that we are going to utilize BufferPool
+        Stream stream;                                          // A default size (10 MB) stack is allocated to this stream
+        BufferPool pool(stream);
+
+        GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1);      // Allocate mat1 (1MB)
+        GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1);      // Allocate mat2 (1MB)
+
+        mat1.release();                                         // erroneous usage : mat2 must be deallocated before mat1
+    }
+@endcode
+
+Since C++ local variables are destroyed in the reverse order of construction,
+the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated
+and the corresponding memory is automatically returned to the pool for later usage.
+
+@code
+    int main()
+    {
+        setBufferPoolUsage(true);                               // Tell OpenCV that we are going to utilize BufferPool
+        setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2);  // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
+
+        Stream stream1, stream2;                                // Each stream uses 1 stack
+        BufferPool pool1(stream1), pool2(stream2);
+
+        for (int i = 0; i < 10; i++)
+        {
+            GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1);   // 16MB
+            GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3);   // 48MB, pool1 is now full
+
+            GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1);   // 1MB
+            GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3);   // 3MB
+
+            d_src1.setTo(Scalar(i), stream1);
+            d_src2.setTo(Scalar(i), stream2);
+
+            cvtColor(d_src1, d_dst1, cv::COLOR_GRAY2BGR, 0, stream1);
+            cvtColor(d_src2, d_dst2, cv::COLOR_GRAY2BGR, 0, stream2);
+                                                                    // The order of destruction of the local variables is:
+                                                                    //   d_dst2 => d_src2 => d_dst1 => d_src1
+                                                                    // LIFO rule is satisfied, this code runs without error
+        }
+    }
+@endcode
+ */
+class CV_EXPORTS_W BufferPool
+{
+public:
+
+    //! Gets the BufferPool for the given stream.
+    explicit BufferPool(Stream& stream);
+
+    //! Allocates a new GpuMat of given size and type.
+    CV_WRAP GpuMat getBuffer(int rows, int cols, int type);
+
+    //! Allocates a new GpuMat of given size and type.
+    CV_WRAP GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
+
+    //! Returns the allocator associated with the stream.
+    CV_WRAP Ptr<GpuMat::Allocator> getAllocator() const { return allocator_; }
+
+private:
+    Ptr<GpuMat::Allocator> allocator_;
+};
 
 //! BufferPool management (must be called before Stream creation)
-CV_EXPORTS void setBufferPoolUsage(bool on);
-CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
+CV_EXPORTS_W void setBufferPoolUsage(bool on);
+CV_EXPORTS_W void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
 
 //===================================================================================
 // HostMem
@@ -350,46 +524,46 @@ Its interface is also Mat-like but with additional memory type parameters.
 @note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
 Pinned Memory APIs* document or *CUDA C Programming Guide*.
  */
-class CV_EXPORTS HostMem
+class CV_EXPORTS_W HostMem
 {
 public:
     enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
 
-    static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
+    static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
 
-    explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
+    CV_WRAP explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
 
     HostMem(const HostMem& m);
 
-    HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
-    HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
+    CV_WRAP HostMem(int rows, int cols, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
+    CV_WRAP HostMem(Size size, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
 
     //! creates from host memory with coping data
-    explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
+    CV_WRAP explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
 
     ~HostMem();
 
     HostMem& operator =(const HostMem& m);
 
     //! swaps with other smart pointer
-    void swap(HostMem& b);
+    CV_WRAP void swap(HostMem& b);
 
     //! returns deep copy of the matrix, i.e. the data is copied
-    HostMem clone() const;
+    CV_WRAP HostMem clone() const;
 
     //! allocates new matrix data unless the matrix already has specified size and type.
-    void create(int rows, int cols, int type);
+    CV_WRAP void create(int rows, int cols, int type);
     void create(Size size, int type);
 
     //! creates alternative HostMem header for the same data, with different
     //! number of channels and/or different number of rows
-    HostMem reshape(int cn, int rows = 0) const;
+    CV_WRAP HostMem reshape(int cn, int rows = 0) const;
 
     //! decrements reference counter and released memory if needed.
     void release();
 
     //! returns matrix header with disabled reference counting for HostMem data.
-    Mat createMatHeader() const;
+    CV_WRAP Mat createMatHeader() const;
 
     /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
     for it.
@@ -401,20 +575,20 @@ class CV_EXPORTS HostMem
     GpuMat createGpuMatHeader() const;
 
     // Please see cv::Mat for descriptions
-    bool isContinuous() const;
-    size_t elemSize() const;
-    size_t elemSize1() const;
-    int type() const;
-    int depth() const;
-    int channels() const;
-    size_t step1() const;
-    Size size() const;
-    bool empty() const;
+    CV_WRAP bool isContinuous() const;
+    CV_WRAP size_t elemSize() const;
+    CV_WRAP size_t elemSize1() const;
+    CV_WRAP int type() const;
+    CV_WRAP int depth() const;
+    CV_WRAP int channels() const;
+    CV_WRAP size_t step1() const;
+    CV_WRAP Size size() const;
+    CV_WRAP bool empty() const;
 
     // Please see cv::Mat for descriptions
     int flags;
     int rows, cols;
-    size_t step;
+    CV_PROP size_t step;
 
     uchar* data;
     int* refcount;
@@ -429,13 +603,13 @@ class CV_EXPORTS HostMem
 
 @param m Input matrix.
  */
-CV_EXPORTS void registerPageLocked(Mat& m);
+CV_EXPORTS_W void registerPageLocked(Mat& m);
 
 /** @brief Unmaps the memory of matrix and makes it pageable again.
 
 @param m Input matrix.
  */
-CV_EXPORTS void unregisterPageLocked(Mat& m);
+CV_EXPORTS_W void unregisterPageLocked(Mat& m);
 
 //===================================================================================
 // Stream
@@ -447,9 +621,28 @@ CV_EXPORTS void unregisterPageLocked(Mat& m);
 functions use the constant GPU memory, and next call may update the memory before the previous one
 has been finished. But calling different operations asynchronously is safe because each operation
 has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
-also safe. :
+also safe.
+
+@note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads.
+
+@code
+void thread1()
+{
+    cv::cuda::Stream stream1;
+    cv::cuda::func1(..., stream1);
+}
+
+void thread2()
+{
+    cv::cuda::Stream stream2;
+    cv::cuda::func2(..., stream2);
+}
+@endcode
+
+@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
+In multi-threading environment the stream objects must be passed explicitly (see previous note).
  */
-class CV_EXPORTS Stream
+class CV_EXPORTS_W Stream
 {
     typedef void (Stream::*bool_type)() const;
     void this_type_does_not_support_comparisons() const {}
@@ -458,19 +651,22 @@ class CV_EXPORTS Stream
     typedef void (*StreamCallback)(int status, void* userData);
 
     //! creates a new asynchronous stream
-    Stream();
+    CV_WRAP Stream();
+
+    //! creates a new asynchronous stream with custom allocator
+    CV_WRAP Stream(const Ptr<GpuMat::Allocator>& allocator);
 
     /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
     */
-    bool queryIfComplete() const;
+    CV_WRAP bool queryIfComplete() const;
 
     /** @brief Blocks the current CPU thread until all operations in the stream are complete.
     */
-    void waitForCompletion();
+    CV_WRAP void waitForCompletion();
 
     /** @brief Makes a compute stream wait on an event.
     */
-    void waitEvent(const Event& event);
+    CV_WRAP void waitEvent(const Event& event);
 
     /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
     completed.
@@ -483,11 +679,14 @@ class CV_EXPORTS Stream
     void enqueueHostCallback(StreamCallback callback, void* userData);
 
     //! return Stream object for default CUDA stream
-    static Stream& Null();
+    CV_WRAP static Stream& Null();
 
     //! returns true if stream object is not default (!= 0)
     operator bool_type() const;
 
+    //! return Pointer to CUDA stream
+    CV_WRAP void* cudaPtr() const;
+
     class Impl;
 
 private:
@@ -499,7 +698,7 @@ class CV_EXPORTS Stream
     friend class DefaultDeviceInitializer;
 };
 
-class CV_EXPORTS Event
+class CV_EXPORTS_W Event
 {
 public:
     enum CreateFlags
@@ -510,19 +709,19 @@ class CV_EXPORTS Event
         INTERPROCESS   = 0x04   /**< Event is suitable for interprocess use. DisableTiming must be set */
     };
 
-    explicit Event(CreateFlags flags = DEFAULT);
+    CV_WRAP explicit Event(Event::CreateFlags flags = Event::CreateFlags::DEFAULT);
 
     //! records an event
-    void record(Stream& stream = Stream::Null());
+    CV_WRAP void record(Stream& stream = Stream::Null());
 
     //! queries an event's status
-    bool queryIfComplete() const;
+    CV_WRAP bool queryIfComplete() const;
 
     //! waits for an event to complete
-    void waitForCompletion();
+    CV_WRAP void waitForCompletion();
 
     //! computes the elapsed time between events
-    static float elapsedTime(const Event& start, const Event& end);
+    CV_WRAP static float elapsedTime(const Event& start, const Event& end);
 
     class Impl;
 
@@ -545,9 +744,10 @@ class CV_EXPORTS Event
 /** @brief Returns the number of installed CUDA-enabled devices.
 
 Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
-this function returns 0.
+this function returns 0. If the CUDA driver is not installed, or is incompatible, this function
+returns -1.
  */
-CV_EXPORTS int getCudaEnabledDeviceCount();
+CV_EXPORTS_W int getCudaEnabledDeviceCount();
 
 /** @brief Sets a device and initializes it for the current thread.
 
@@ -555,18 +755,18 @@ CV_EXPORTS int getCudaEnabledDeviceCount();
 
 If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
  */
-CV_EXPORTS void setDevice(int device);
+CV_EXPORTS_W void setDevice(int device);
 
 /** @brief Returns the current device index set by cuda::setDevice or initialized by default.
  */
-CV_EXPORTS int getDevice();
+CV_EXPORTS_W int getDevice();
 
 /** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
 process.
 
 Any subsequent API call to this device will reinitialize the device.
  */
-CV_EXPORTS void resetDevice();
+CV_EXPORTS_W void resetDevice();
 
 /** @brief Enumeration providing CUDA computing features.
  */
@@ -599,7 +799,7 @@ built for.
 According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
 capability can always be compiled to binary code of greater or equal compute capability".
  */
-class CV_EXPORTS TargetArchs
+class CV_EXPORTS_W TargetArchs
 {
 public:
     /** @brief The following method checks whether the module was built with the support of the given feature:
@@ -614,23 +814,23 @@ class CV_EXPORTS TargetArchs
     @param major Major compute capability version.
     @param minor Minor compute capability version.
      */
-    static bool has(int major, int minor);
-    static bool hasPtx(int major, int minor);
-    static bool hasBin(int major, int minor);
-
-    static bool hasEqualOrLessPtx(int major, int minor);
-    static bool hasEqualOrGreater(int major, int minor);
-    static bool hasEqualOrGreaterPtx(int major, int minor);
-    static bool hasEqualOrGreaterBin(int major, int minor);
+    CV_WRAP static bool has(int major, int minor);
+    CV_WRAP static bool hasPtx(int major, int minor);
+    CV_WRAP static bool hasBin(int major, int minor);
+
+    CV_WRAP static bool hasEqualOrLessPtx(int major, int minor);
+    CV_WRAP static bool hasEqualOrGreater(int major, int minor);
+    CV_WRAP static bool hasEqualOrGreaterPtx(int major, int minor);
+    CV_WRAP static bool hasEqualOrGreaterBin(int major, int minor);
 };
 
 /** @brief Class providing functionality for querying the specified GPU properties.
  */
-class CV_EXPORTS DeviceInfo
+class CV_EXPORTS_W DeviceInfo
 {
 public:
     //! creates DeviceInfo object for the current GPU
-    DeviceInfo();
+    CV_WRAP DeviceInfo();
 
     /** @brief The constructors.
 
@@ -639,68 +839,68 @@ class CV_EXPORTS DeviceInfo
     Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
     constructs an object for the current device.
      */
-    DeviceInfo(int device_id);
+    CV_WRAP DeviceInfo(int device_id);
 
     /** @brief Returns system index of the CUDA device starting with 0.
     */
-    int deviceID() const;
+    CV_WRAP int deviceID() const;
 
     //! ASCII string identifying device
     const char* name() const;
 
     //! global memory available on device in bytes
-    size_t totalGlobalMem() const;
+    CV_WRAP size_t totalGlobalMem() const;
 
     //! shared memory available per block in bytes
-    size_t sharedMemPerBlock() const;
+    CV_WRAP size_t sharedMemPerBlock() const;
 
     //! 32-bit registers available per block
-    int regsPerBlock() const;
+    CV_WRAP int regsPerBlock() const;
 
     //! warp size in threads
-    int warpSize() const;
+    CV_WRAP int warpSize() const;
 
     //! maximum pitch in bytes allowed by memory copies
-    size_t memPitch() const;
+    CV_WRAP size_t memPitch() const;
 
     //! maximum number of threads per block
-    int maxThreadsPerBlock() const;
+    CV_WRAP int maxThreadsPerBlock() const;
 
     //! maximum size of each dimension of a block
-    Vec3i maxThreadsDim() const;
+    CV_WRAP Vec3i maxThreadsDim() const;
 
     //! maximum size of each dimension of a grid
-    Vec3i maxGridSize() const;
+    CV_WRAP Vec3i maxGridSize() const;
 
     //! clock frequency in kilohertz
-    int clockRate() const;
+    CV_WRAP int clockRate() const;
 
     //! constant memory available on device in bytes
-    size_t totalConstMem() const;
+    CV_WRAP size_t totalConstMem() const;
 
     //! major compute capability
-    int majorVersion() const;
+    CV_WRAP int majorVersion() const;
 
     //! minor compute capability
-    int minorVersion() const;
+    CV_WRAP int minorVersion() const;
 
     //! alignment requirement for textures
-    size_t textureAlignment() const;
+    CV_WRAP size_t textureAlignment() const;
 
     //! pitch alignment requirement for texture references bound to pitched memory
-    size_t texturePitchAlignment() const;
+    CV_WRAP size_t texturePitchAlignment() const;
 
     //! number of multiprocessors on device
-    int multiProcessorCount() const;
+    CV_WRAP int multiProcessorCount() const;
 
     //! specified whether there is a run time limit on kernels
-    bool kernelExecTimeoutEnabled() const;
+    CV_WRAP bool kernelExecTimeoutEnabled() const;
 
     //! device is integrated as opposed to discrete
-    bool integrated() const;
+    CV_WRAP bool integrated() const;
 
     //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
-    bool canMapHostMemory() const;
+    CV_WRAP bool canMapHostMemory() const;
 
     enum ComputeMode
     {
@@ -711,108 +911,108 @@ class CV_EXPORTS DeviceInfo
     };
 
     //! compute mode
-    ComputeMode computeMode() const;
+    CV_WRAP DeviceInfo::ComputeMode computeMode() const;
 
     //! maximum 1D texture size
-    int maxTexture1D() const;
+    CV_WRAP int maxTexture1D() const;
 
     //! maximum 1D mipmapped texture size
-    int maxTexture1DMipmap() const;
+    CV_WRAP int maxTexture1DMipmap() const;
 
     //! maximum size for 1D textures bound to linear memory
-    int maxTexture1DLinear() const;
+    CV_WRAP int maxTexture1DLinear() const;
 
     //! maximum 2D texture dimensions
-    Vec2i maxTexture2D() const;
+    CV_WRAP Vec2i maxTexture2D() const;
 
     //! maximum 2D mipmapped texture dimensions
-    Vec2i maxTexture2DMipmap() const;
+    CV_WRAP Vec2i maxTexture2DMipmap() const;
 
     //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
-    Vec3i maxTexture2DLinear() const;
+    CV_WRAP Vec3i maxTexture2DLinear() const;
 
     //! maximum 2D texture dimensions if texture gather operations have to be performed
-    Vec2i maxTexture2DGather() const;
+    CV_WRAP Vec2i maxTexture2DGather() const;
 
     //! maximum 3D texture dimensions
-    Vec3i maxTexture3D() const;
+    CV_WRAP Vec3i maxTexture3D() const;
 
     //! maximum Cubemap texture dimensions
-    int maxTextureCubemap() const;
+    CV_WRAP int maxTextureCubemap() const;
 
     //! maximum 1D layered texture dimensions
-    Vec2i maxTexture1DLayered() const;
+    CV_WRAP Vec2i maxTexture1DLayered() const;
 
     //! maximum 2D layered texture dimensions
-    Vec3i maxTexture2DLayered() const;
+    CV_WRAP Vec3i maxTexture2DLayered() const;
 
     //! maximum Cubemap layered texture dimensions
-    Vec2i maxTextureCubemapLayered() const;
+    CV_WRAP Vec2i maxTextureCubemapLayered() const;
 
     //! maximum 1D surface size
-    int maxSurface1D() const;
+    CV_WRAP int maxSurface1D() const;
 
     //! maximum 2D surface dimensions
-    Vec2i maxSurface2D() const;
+    CV_WRAP Vec2i maxSurface2D() const;
 
     //! maximum 3D surface dimensions
-    Vec3i maxSurface3D() const;
+    CV_WRAP Vec3i maxSurface3D() const;
 
     //! maximum 1D layered surface dimensions
-    Vec2i maxSurface1DLayered() const;
+    CV_WRAP Vec2i maxSurface1DLayered() const;
 
     //! maximum 2D layered surface dimensions
-    Vec3i maxSurface2DLayered() const;
+    CV_WRAP Vec3i maxSurface2DLayered() const;
 
     //! maximum Cubemap surface dimensions
-    int maxSurfaceCubemap() const;
+    CV_WRAP int maxSurfaceCubemap() const;
 
     //! maximum Cubemap layered surface dimensions
-    Vec2i maxSurfaceCubemapLayered() const;
+    CV_WRAP Vec2i maxSurfaceCubemapLayered() const;
 
     //! alignment requirements for surfaces
-    size_t surfaceAlignment() const;
+    CV_WRAP size_t surfaceAlignment() const;
 
     //! device can possibly execute multiple kernels concurrently
-    bool concurrentKernels() const;
+    CV_WRAP bool concurrentKernels() const;
 
     //! device has ECC support enabled
-    bool ECCEnabled() const;
+    CV_WRAP bool ECCEnabled() const;
 
     //! PCI bus ID of the device
-    int pciBusID() const;
+    CV_WRAP int pciBusID() const;
 
     //! PCI device ID of the device
-    int pciDeviceID() const;
+    CV_WRAP int pciDeviceID() const;
 
     //! PCI domain ID of the device
-    int pciDomainID() const;
+    CV_WRAP int pciDomainID() const;
 
     //! true if device is a Tesla device using TCC driver, false otherwise
-    bool tccDriver() const;
+    CV_WRAP bool tccDriver() const;
 
     //! number of asynchronous engines
-    int asyncEngineCount() const;
+    CV_WRAP int asyncEngineCount() const;
 
     //! device shares a unified address space with the host
-    bool unifiedAddressing() const;
+    CV_WRAP bool unifiedAddressing() const;
 
     //! peak memory clock frequency in kilohertz
-    int memoryClockRate() const;
+    CV_WRAP int memoryClockRate() const;
 
     //! global memory bus width in bits
-    int memoryBusWidth() const;
+    CV_WRAP int memoryBusWidth() const;
 
     //! size of L2 cache in bytes
-    int l2CacheSize() const;
+    CV_WRAP int l2CacheSize() const;
 
     //! maximum resident threads per multiprocessor
-    int maxThreadsPerMultiProcessor() const;
+    CV_WRAP int maxThreadsPerMultiProcessor() const;
 
     //! gets free and total device memory
-    void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
-    size_t freeMemory() const;
-    size_t totalMemory() const;
+    CV_WRAP void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
+    CV_WRAP size_t freeMemory() const;
+    CV_WRAP size_t totalMemory() const;
 
     /** @brief Provides information on CUDA feature support.
 
@@ -827,14 +1027,23 @@ class CV_EXPORTS DeviceInfo
     This function returns true if the CUDA module can be run on the specified device. Otherwise, it
     returns false .
      */
-    bool isCompatible() const;
+    CV_WRAP bool isCompatible() const;
 
 private:
     int device_id_;
 };
 
-CV_EXPORTS void printCudaDeviceInfo(int device);
-CV_EXPORTS void printShortCudaDeviceInfo(int device);
+CV_EXPORTS_W void printCudaDeviceInfo(int device);
+CV_EXPORTS_W void printShortCudaDeviceInfo(int device);
+
+/** @brief Converts an array to half precision floating number.
+
+@param _src input array.
+@param _dst output array.
+@param stream Stream for the asynchronous version.
+@sa convertFp16
+*/
+CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
 
 //! @} cudacore_init
 
@@ -843,4 +1052,4 @@ CV_EXPORTS void printShortCudaDeviceInfo(int device);
 
 #include "opencv2/core/cuda.inl.hpp"
 
-#endif /* __OPENCV_CORE_CUDA_HPP__ */
+#endif /* OPENCV_CORE_CUDA_HPP */
diff --git a/IPL/include/opencv/opencv2/core/cuda.inl.hpp b/IPL/include/opencv/opencv2/core/cuda.inl.hpp
index 01dc6d7..30fc0ae 100644
--- a/IPL/include/opencv/opencv2/core/cuda.inl.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda.inl.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_CUDAINL_HPP__
-#define __OPENCV_CORE_CUDAINL_HPP__
+#ifndef OPENCV_CORE_CUDAINL_HPP
+#define OPENCV_CORE_CUDAINL_HPP
 
 #include "opencv2/core/cuda.hpp"
 
@@ -343,6 +343,12 @@ bool GpuMat::empty() const
     return data == 0;
 }
 
+inline
+void* GpuMat::cudaPtr() const
+{
+    return data;
+}
+
 static inline
 GpuMat createContinuous(int rows, int cols, int type)
 {
@@ -628,4 +634,4 @@ Mat::Mat(const cuda::GpuMat& m)
 
 //! @endcond
 
-#endif // __OPENCV_CORE_CUDAINL_HPP__
+#endif // OPENCV_CORE_CUDAINL_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/block.hpp b/IPL/include/opencv/opencv2/core/cuda/block.hpp
index 0c6f063..c277f0e 100644
--- a/IPL/include/opencv/opencv2/core/cuda/block.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/block.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_DEVICE_BLOCK_HPP__
-#define __OPENCV_CUDA_DEVICE_BLOCK_HPP__
+#ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP
+#define OPENCV_CUDA_DEVICE_BLOCK_HPP
 
 /** @file
  * @deprecated Use @ref cudev instead.
@@ -106,7 +106,7 @@ namespace cv { namespace cuda { namespace device
         }
 
         template<typename InIt, typename OutIt, class UnOp>
-        static __device__ __forceinline__ void transfrom(InIt beg, InIt end, OutIt out, UnOp op)
+        static __device__ __forceinline__ void transform(InIt beg, InIt end, OutIt out, UnOp op)
         {
             int STRIDE = stride();
             InIt  t = beg + flattenedThreadId();
@@ -117,7 +117,7 @@ namespace cv { namespace cuda { namespace device
         }
 
         template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
-        static __device__ __forceinline__ void transfrom(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
+        static __device__ __forceinline__ void transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
         {
             int STRIDE = stride();
             InIt1 t1 = beg1 + flattenedThreadId();
@@ -208,4 +208,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif /* __OPENCV_CUDA_DEVICE_BLOCK_HPP__ */
+#endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */
diff --git a/IPL/include/opencv/opencv2/core/cuda/border_interpolate.hpp b/IPL/include/opencv/opencv2/core/cuda/border_interpolate.hpp
index a204155..874f705 100644
--- a/IPL/include/opencv/opencv2/core/cuda/border_interpolate.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/border_interpolate.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
-#define __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
+#ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP
+#define OPENCV_CUDA_BORDER_INTERPOLATE_HPP
 
 #include "saturate_cast.hpp"
 #include "vec_traits.hpp"
@@ -719,4 +719,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
+#endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/color.hpp b/IPL/include/opencv/opencv2/core/cuda/color.hpp
index 6faf8c9..dcce280 100644
--- a/IPL/include/opencv/opencv2/core/cuda/color.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/color.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_COLOR_HPP__
-#define __OPENCV_CUDA_COLOR_HPP__
+#ifndef OPENCV_CUDA_COLOR_HPP
+#define OPENCV_CUDA_COLOR_HPP
 
 #include "detail/color_detail.hpp"
 
@@ -306,4 +306,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
+#endif // OPENCV_CUDA_COLOR_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/common.hpp b/IPL/include/opencv/opencv2/core/cuda/common.hpp
index b93c3ef..14b1f3f 100644
--- a/IPL/include/opencv/opencv2/core/cuda/common.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/common.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_COMMON_HPP__
-#define __OPENCV_CUDA_COMMON_HPP__
+#ifndef OPENCV_CUDA_COMMON_HPP
+#define OPENCV_CUDA_COMMON_HPP
 
 #include <cuda_runtime.h>
 #include "opencv2/core/cuda_types.hpp"
@@ -106,4 +106,4 @@ namespace cv { namespace cuda
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_COMMON_HPP__
+#endif // OPENCV_CUDA_COMMON_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/datamov_utils.hpp b/IPL/include/opencv/opencv2/core/cuda/datamov_utils.hpp
index bb02cf9..6820d0f 100644
--- a/IPL/include/opencv/opencv2/core/cuda/datamov_utils.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/datamov_utils.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_DATAMOV_UTILS_HPP__
-#define __OPENCV_CUDA_DATAMOV_UTILS_HPP__
+#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
+#define OPENCV_CUDA_DATAMOV_UTILS_HPP
 
 #include "common.hpp"
 
@@ -110,4 +110,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_DATAMOV_UTILS_HPP__
+#endif // OPENCV_CUDA_DATAMOV_UTILS_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/color_detail.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/color_detail.hpp
index 1151806..f4b4796 100644
--- a/IPL/include/opencv/opencv2/core/cuda/detail/color_detail.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/detail/color_detail.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_COLOR_DETAIL_HPP__
-#define __OPENCV_CUDA_COLOR_DETAIL_HPP__
+#ifndef OPENCV_CUDA_COLOR_DETAIL_HPP
+#define OPENCV_CUDA_COLOR_DETAIL_HPP
 
 #include "../common.hpp"
 #include "../vec_traits.hpp"
@@ -92,13 +92,51 @@ namespace cv { namespace cuda { namespace device
             return vec.w;
         }
 
+        //constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
+        constexpr float B2YF = 0.114f;
+        constexpr float G2YF = 0.587f;
+        constexpr float R2YF = 0.299f;
+
+        //to YCbCr
+        constexpr float YCBF = 0.564f; // == 1/2/(1-B2YF)
+        constexpr float YCRF = 0.713f; // == 1/2/(1-R2YF)
+        const     int   YCBI = 9241;  // == YCBF*16384
+        const     int   YCRI = 11682; // == YCRF*16384
+        //to YUV
+        constexpr float B2UF = 0.492f;
+        constexpr float R2VF = 0.877f;
+        const     int   B2UI = 8061;  // == B2UF*16384
+        const     int   R2VI = 14369; // == R2VF*16384
+        //from YUV
+        constexpr float U2BF = 2.032f;
+        constexpr float U2GF = -0.395f;
+        constexpr float V2GF = -0.581f;
+        constexpr float V2RF = 1.140f;
+        const     int   U2BI = 33292;
+        const     int   U2GI = -6472;
+        const     int   V2GI = -9519;
+        const     int   V2RI = 18678;
+        //from YCrCb
+        constexpr float CB2BF = 1.773f;
+        constexpr float CB2GF = -0.344f;
+        constexpr float CR2GF = -0.714f;
+        constexpr float CR2RF = 1.403f;
+        const     int   CB2BI = 29049;
+        const     int   CB2GI = -5636;
+        const     int   CR2GI = -11698;
+        const     int   CR2RI = 22987;
+
         enum
         {
             yuv_shift  = 14,
             xyz_shift  = 12,
+            gray_shift = 15,
             R2Y        = 4899,
             G2Y        = 9617,
             B2Y        = 1868,
+            RY15 =  9798, // == R2YF*32768 + 0.5
+            GY15 = 19235, // == G2YF*32768 + 0.5
+            BY15 =  3735, // == B2YF*32768 + 0.5
             BLOCK_SIZE = 256
         };
     }
@@ -406,7 +444,7 @@ namespace cv { namespace cuda { namespace device
         {
             static __device__ __forceinline__ uchar cvt(uint t)
             {
-                return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 3) & 0xfc) * G2Y + ((t >> 8) & 0xf8) * R2Y, yuv_shift);
+                return (uchar)CV_DESCALE(((t << 3) & 0xf8) * BY15 + ((t >> 3) & 0xfc) * GY15 + ((t >> 8) & 0xf8) * RY15, gray_shift);
             }
         };
 
@@ -414,7 +452,7 @@ namespace cv { namespace cuda { namespace device
         {
             static __device__ __forceinline__ uchar cvt(uint t)
             {
-                return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 2) & 0xf8) * G2Y + ((t >> 7) & 0xf8) * R2Y, yuv_shift);
+                return (uchar)CV_DESCALE(((t << 3) & 0xf8) * BY15 + ((t >> 2) & 0xf8) * GY15 + ((t >> 7) & 0xf8) * RY15, gray_shift);
             }
         };
 
@@ -443,7 +481,7 @@ namespace cv { namespace cuda { namespace device
     {
         template <int bidx, typename T> static __device__ __forceinline__ T RGB2GrayConvert(const T* src)
         {
-            return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift);
+            return (T)CV_DESCALE((unsigned)(src[bidx] * BY15 + src[1] * GY15 + src[bidx^2] * RY15), gray_shift);
         }
 
         template <int bidx> static __device__ __forceinline__ uchar RGB2GrayConvert(uint src)
@@ -451,12 +489,12 @@ namespace cv { namespace cuda { namespace device
             uint b = 0xffu & (src >> (bidx * 8));
             uint g = 0xffu & (src >> 8);
             uint r = 0xffu & (src >> ((bidx ^ 2) * 8));
-            return CV_DESCALE((uint)(b * B2Y + g * G2Y + r * R2Y), yuv_shift);
+            return CV_DESCALE((uint)(b * BY15 + g * GY15 + r * RY15), gray_shift);
         }
 
         template <int bidx> static __device__ __forceinline__ float RGB2GrayConvert(const float* src)
         {
-            return src[bidx] * 0.114f + src[1] * 0.587f + src[bidx^2] * 0.299f;
+            return src[bidx] * B2YF + src[1] * G2YF + src[bidx^2] * R2YF;
         }
 
         template <typename T, int scn, int bidx> struct RGB2Gray : unary_function<typename TypeVec<T, scn>::vec_type, T>
@@ -494,8 +532,8 @@ namespace cv { namespace cuda { namespace device
 
     namespace color_detail
     {
-        __constant__ float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
-        __constant__ int   c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
+        __constant__ float c_RGB2YUVCoeffs_f[5] = { B2YF, G2YF, R2YF, B2UF, R2VF };
+        __constant__ int   c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, B2UI, R2VI };
 
         template <int bidx, typename T, typename D> static __device__ void RGB2YUVConvert(const T* src, D& dst)
         {
@@ -543,8 +581,8 @@ namespace cv { namespace cuda { namespace device
 
     namespace color_detail
     {
-        __constant__ float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
-        __constant__ int   c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
+        __constant__ float c_YUV2RGBCoeffs_f[5] = { U2BF, U2GF, V2GF, V2RF };
+        __constant__ int   c_YUV2RGBCoeffs_i[5] = { U2BI, U2GI, V2GI, V2RI };
 
         template <int bidx, typename T, typename D> static __device__ void YUV2RGBConvert(const T& src, D* dst)
         {
@@ -633,8 +671,8 @@ namespace cv { namespace cuda { namespace device
 
     namespace color_detail
     {
-        __constant__ float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
-        __constant__ int   c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
+        __constant__ float c_RGB2YCrCbCoeffs_f[5] = {R2YF, G2YF, B2YF, YCRF, YCBF};
+        __constant__ int   c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, YCRI, YCBI};
 
         template <int bidx, typename T, typename D> static __device__ void RGB2YCrCbConvert(const T* src, D& dst)
         {
@@ -710,8 +748,8 @@ namespace cv { namespace cuda { namespace device
 
     namespace color_detail
     {
-        __constant__ float c_YCrCb2RGBCoeffs_f[5] = {1.403f, -0.714f, -0.344f, 1.773f};
-        __constant__ int   c_YCrCb2RGBCoeffs_i[5] = {22987, -11698, -5636, 29049};
+        __constant__ float c_YCrCb2RGBCoeffs_f[5] = {CR2RF, CR2GF, CB2GF, CB2BF};
+        __constant__ int   c_YCrCb2RGBCoeffs_i[5] = {CR2RI, CR2GI, CB2GI, CB2BI};
 
         template <int bidx, typename T, typename D> static __device__ void YCrCb2RGBConvert(const T& src, D* dst)
         {
@@ -1977,4 +2015,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_COLOR_DETAIL_HPP__
+#endif // OPENCV_CUDA_COLOR_DETAIL_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/reduce.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/reduce.hpp
index 44400c8..8af20b0 100644
--- a/IPL/include/opencv/opencv2/core/cuda/detail/reduce.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/detail/reduce.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_REDUCE_DETAIL_HPP__
-#define __OPENCV_CUDA_REDUCE_DETAIL_HPP__
+#ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP
+#define OPENCV_CUDA_REDUCE_DETAIL_HPP
 
 #include <thrust/tuple.h>
 #include "../warp.hpp"
@@ -276,8 +276,8 @@ namespace cv { namespace cuda { namespace device
             static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
             {
             #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
-                (void) smem;
-                (void) tid;
+                CV_UNUSED(smem);
+                CV_UNUSED(tid);
 
                 Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
             #else
@@ -362,4 +362,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_REDUCE_DETAIL_HPP__
+#endif // OPENCV_CUDA_REDUCE_DETAIL_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/reduce_key_val.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/reduce_key_val.hpp
index bab85d7..df37c17 100644
--- a/IPL/include/opencv/opencv2/core/cuda/detail/reduce_key_val.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/detail/reduce_key_val.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
-#define __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
+#ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
+#define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
 
 #include <thrust/tuple.h>
 #include "../warp.hpp"
@@ -402,9 +402,9 @@ namespace cv { namespace cuda { namespace device
             static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
             {
             #if 0 // __CUDA_ARCH__ >= 300
-                (void) skeys;
-                (void) svals;
-                (void) tid;
+                CV_UNUSED(skeys);
+                CV_UNUSED(svals);
+                CV_UNUSED(tid);
 
                 Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
             #else
@@ -499,4 +499,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
+#endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/transform_detail.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/transform_detail.hpp
index 96031c8..1919848 100644
--- a/IPL/include/opencv/opencv2/core/cuda/detail/transform_detail.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/detail/transform_detail.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
-#define __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
+#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP
+#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP
 
 #include "../common.hpp"
 #include "../vec_traits.hpp"
@@ -223,11 +223,7 @@ namespace cv { namespace cuda { namespace device
                 if (x_shifted + ft::smart_shift - 1 < src_.cols)
                 {
                     const read_type src_n_el = ((const read_type*)src)[x];
-                    write_type dst_n_el = ((const write_type*)dst)[x];
-
-                    OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
-
-                    ((write_type*)dst)[x] = dst_n_el;
+                    OpUnroller<ft::smart_shift>::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
                 }
                 else
                 {
@@ -275,11 +271,8 @@ namespace cv { namespace cuda { namespace device
                 {
                     const read_type1 src1_n_el = ((const read_type1*)src1)[x];
                     const read_type2 src2_n_el = ((const read_type2*)src2)[x];
-                    write_type dst_n_el = ((const write_type*)dst)[x];
-
-                    OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
 
-                    ((write_type*)dst)[x] = dst_n_el;
+                    OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
                 }
                 else
                 {
@@ -396,4 +389,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
+#endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/type_traits_detail.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/type_traits_detail.hpp
index 3463c78..a78bd2c 100644
--- a/IPL/include/opencv/opencv2/core/cuda/detail/type_traits_detail.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/detail/type_traits_detail.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__
-#define __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__
+#ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
+#define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
 
 #include "../common.hpp"
 #include "../vec_traits.hpp"
@@ -188,4 +188,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP__
+#endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/detail/vec_distance_detail.hpp b/IPL/include/opencv/opencv2/core/cuda/detail/vec_distance_detail.hpp
index 9ca85a5..8283a99 100644
--- a/IPL/include/opencv/opencv2/core/cuda/detail/vec_distance_detail.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/detail/vec_distance_detail.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__
-#define __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__
+#ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
+#define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
 
 #include "../datamov_utils.hpp"
 
@@ -118,4 +118,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP__
+#endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/dynamic_smem.hpp b/IPL/include/opencv/opencv2/core/cuda/dynamic_smem.hpp
index 3488463..42570c6 100644
--- a/IPL/include/opencv/opencv2/core/cuda/dynamic_smem.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/dynamic_smem.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
-#define __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
+#ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP
+#define OPENCV_CUDA_DYNAMIC_SMEM_HPP
 
 /** @file
  * @deprecated Use @ref cudev instead.
@@ -85,4 +85,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
+#endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/emulation.hpp b/IPL/include/opencv/opencv2/core/cuda/emulation.hpp
index d346865..17dc117 100644
--- a/IPL/include/opencv/opencv2/core/cuda/emulation.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/emulation.hpp
@@ -177,8 +177,8 @@ namespace cv { namespace cuda { namespace device
                 } while (assumed != old);
                 return __longlong_as_double(old);
             #else
-                (void) address;
-                (void) val;
+                CV_UNUSED(address);
+                CV_UNUSED(val);
                 return 0.0;
             #endif
             }
@@ -199,8 +199,8 @@ namespace cv { namespace cuda { namespace device
                 } while (assumed != old);
                 return __int_as_float(old);
             #else
-                (void) address;
-                (void) val;
+                CV_UNUSED(address);
+                CV_UNUSED(val);
                 return 0.0f;
             #endif
             }
@@ -216,8 +216,8 @@ namespace cv { namespace cuda { namespace device
                 } while (assumed != old);
                 return __longlong_as_double(old);
             #else
-                (void) address;
-                (void) val;
+                CV_UNUSED(address);
+                CV_UNUSED(val);
                 return 0.0;
             #endif
             }
@@ -238,8 +238,8 @@ namespace cv { namespace cuda { namespace device
                 } while (assumed != old);
                 return __int_as_float(old);
             #else
-                (void) address;
-                (void) val;
+                CV_UNUSED(address);
+                CV_UNUSED(val);
                 return 0.0f;
             #endif
             }
@@ -255,8 +255,8 @@ namespace cv { namespace cuda { namespace device
                 } while (assumed != old);
                 return __longlong_as_double(old);
             #else
-                (void) address;
-                (void) val;
+                CV_UNUSED(address);
+                CV_UNUSED(val);
                 return 0.0;
             #endif
             }
diff --git a/IPL/include/opencv/opencv2/core/cuda/filters.hpp b/IPL/include/opencv/opencv2/core/cuda/filters.hpp
index 9adc00c..bb94212 100644
--- a/IPL/include/opencv/opencv2/core/cuda/filters.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/filters.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_FILTERS_HPP__
-#define __OPENCV_CUDA_FILTERS_HPP__
+#ifndef OPENCV_CUDA_FILTERS_HPP
+#define OPENCV_CUDA_FILTERS_HPP
 
 #include "saturate_cast.hpp"
 #include "vec_traits.hpp"
@@ -64,8 +64,8 @@ namespace cv { namespace cuda { namespace device
         explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
         : src(src_)
         {
-            (void)fx;
-            (void)fy;
+            CV_UNUSED(fx);
+            CV_UNUSED(fy);
         }
 
         __device__ __forceinline__ elem_type operator ()(float y, float x) const
@@ -84,8 +84,8 @@ namespace cv { namespace cuda { namespace device
         explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
         : src(src_)
         {
-            (void)fx;
-            (void)fy;
+            CV_UNUSED(fx);
+            CV_UNUSED(fy);
         }
         __device__ __forceinline__ elem_type operator ()(float y, float x) const
         {
@@ -125,8 +125,8 @@ namespace cv { namespace cuda { namespace device
         explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
         : src(src_)
         {
-            (void)fx;
-            (void)fy;
+            CV_UNUSED(fx);
+            CV_UNUSED(fy);
         }
 
         static __device__ __forceinline__ float bicubicCoeff(float x_)
@@ -283,4 +283,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_FILTERS_HPP__
+#endif // OPENCV_CUDA_FILTERS_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/funcattrib.hpp b/IPL/include/opencv/opencv2/core/cuda/funcattrib.hpp
index fbb236b..f582080 100644
--- a/IPL/include/opencv/opencv2/core/cuda/funcattrib.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/funcattrib.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_
-#define __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_
+#ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
+#define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
 
 #include <cstdio>
 
@@ -76,4 +76,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif  /* __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_ */
+#endif  /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */
diff --git a/IPL/include/opencv/opencv2/core/cuda/functional.hpp b/IPL/include/opencv/opencv2/core/cuda/functional.hpp
index ed3943d..9f53d87 100644
--- a/IPL/include/opencv/opencv2/core/cuda/functional.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/functional.hpp
@@ -40,14 +40,13 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_FUNCTIONAL_HPP__
-#define __OPENCV_CUDA_FUNCTIONAL_HPP__
+#ifndef OPENCV_CUDA_FUNCTIONAL_HPP
+#define OPENCV_CUDA_FUNCTIONAL_HPP
 
 #include <functional>
 #include "saturate_cast.hpp"
 #include "vec_traits.hpp"
 #include "type_traits.hpp"
-#include "device_functions.h"
 
 /** @file
  * @deprecated Use @ref cudev instead.
@@ -58,8 +57,17 @@
 namespace cv { namespace cuda { namespace device
 {
     // Function Objects
-    template<typename Argument, typename Result> struct unary_function : public std::unary_function<Argument, Result> {};
-    template<typename Argument1, typename Argument2, typename Result> struct binary_function : public std::binary_function<Argument1, Argument2, Result> {};
+    template<typename Argument, typename Result> struct unary_function
+    {
+        typedef Argument argument_type;
+        typedef Result result_type;
+    };
+    template<typename Argument1, typename Argument2, typename Result> struct binary_function
+    {
+        typedef Argument1 first_argument_type;
+        typedef Argument2 second_argument_type;
+        typedef Result result_type;
+    };
 
     // Arithmetic Operations
     template <typename T> struct plus : binary_function<T, T, T>
@@ -583,7 +591,7 @@ namespace cv { namespace cuda { namespace device
 
     template <typename T> struct thresh_trunc_func : unary_function<T, T>
     {
-        explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
+        explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
 
         __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
         {
@@ -599,7 +607,7 @@ namespace cv { namespace cuda { namespace device
 
     template <typename T> struct thresh_to_zero_func : unary_function<T, T>
     {
-        explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
+        explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
 
         __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
         {
@@ -615,7 +623,7 @@ namespace cv { namespace cuda { namespace device
 
     template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
     {
-        explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
+        explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
 
         __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
         {
@@ -794,4 +802,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_FUNCTIONAL_HPP__
+#endif // OPENCV_CUDA_FUNCTIONAL_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/limits.hpp b/IPL/include/opencv/opencv2/core/cuda/limits.hpp
index b98bdf2..7e15ed6 100644
--- a/IPL/include/opencv/opencv2/core/cuda/limits.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/limits.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_LIMITS_HPP__
-#define __OPENCV_CUDA_LIMITS_HPP__
+#ifndef OPENCV_CUDA_LIMITS_HPP
+#define OPENCV_CUDA_LIMITS_HPP
 
 #include <limits.h>
 #include <float.h>
@@ -125,4 +125,4 @@ template <> struct numeric_limits<double>
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_LIMITS_HPP__
+#endif // OPENCV_CUDA_LIMITS_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/reduce.hpp b/IPL/include/opencv/opencv2/core/cuda/reduce.hpp
index 3133c9a..5de3650 100644
--- a/IPL/include/opencv/opencv2/core/cuda/reduce.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/reduce.hpp
@@ -40,8 +40,12 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_REDUCE_HPP__
-#define __OPENCV_CUDA_REDUCE_HPP__
+#ifndef OPENCV_CUDA_REDUCE_HPP
+#define OPENCV_CUDA_REDUCE_HPP
+
+#ifndef THRUST_DEBUG // eliminate -Wundef warning
+#define THRUST_DEBUG 0
+#endif
 
 #include <thrust/tuple.h>
 #include "detail/reduce.hpp"
@@ -202,4 +206,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_UTILITY_HPP__
+#endif // OPENCV_CUDA_REDUCE_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/saturate_cast.hpp b/IPL/include/opencv/opencv2/core/cuda/saturate_cast.hpp
index f55ae4f..c3a3d1c 100644
--- a/IPL/include/opencv/opencv2/core/cuda/saturate_cast.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/saturate_cast.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_SATURATE_CAST_HPP__
-#define __OPENCV_CUDA_SATURATE_CAST_HPP__
+#ifndef OPENCV_CUDA_SATURATE_CAST_HPP
+#define OPENCV_CUDA_SATURATE_CAST_HPP
 
 #include "common.hpp"
 
@@ -289,4 +289,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif /* __OPENCV_CUDA_SATURATE_CAST_HPP__ */
+#endif /* OPENCV_CUDA_SATURATE_CAST_HPP */
diff --git a/IPL/include/opencv/opencv2/core/cuda/scan.hpp b/IPL/include/opencv/opencv2/core/cuda/scan.hpp
index 687abb5..e128fb0 100644
--- a/IPL/include/opencv/opencv2/core/cuda/scan.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/scan.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_SCAN_HPP__
-#define __OPENCV_CUDA_SCAN_HPP__
+#ifndef OPENCV_CUDA_SCAN_HPP
+#define OPENCV_CUDA_SCAN_HPP
 
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/utility.hpp"
@@ -61,7 +61,7 @@ namespace cv { namespace cuda { namespace device
     template <ScanKind Kind, typename T, typename F> struct WarpScan
     {
         __device__ __forceinline__ WarpScan() {}
-        __device__ __forceinline__ WarpScan(const WarpScan& other) { (void)other; }
+        __device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); }
 
         __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
         {
@@ -95,7 +95,7 @@ namespace cv { namespace cuda { namespace device
     template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
     {
         __device__ __forceinline__ WarpScanNoComp() {}
-        __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { (void)other; }
+        __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); }
 
         __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
         {
@@ -135,7 +135,7 @@ namespace cv { namespace cuda { namespace device
     template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
     {
         __device__ __forceinline__ BlockScan() {}
-        __device__ __forceinline__ BlockScan(const BlockScan& other) { (void)other; }
+        __device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); }
 
         __device__ __forceinline__ T operator()(volatile T *ptr)
         {
@@ -255,4 +255,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_SCAN_HPP__
+#endif // OPENCV_CUDA_SCAN_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/simd_functions.hpp b/IPL/include/opencv/opencv2/core/cuda/simd_functions.hpp
index b9e0041..3d8c2e0 100644
--- a/IPL/include/opencv/opencv2/core/cuda/simd_functions.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/simd_functions.hpp
@@ -70,8 +70,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__
-#define __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__
+#ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP
+#define OPENCV_CUDA_SIMD_FUNCTIONS_HPP
 
 #include "common.hpp"
 
@@ -866,4 +866,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__
+#endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/transform.hpp b/IPL/include/opencv/opencv2/core/cuda/transform.hpp
index 08a313d..42aa6ea 100644
--- a/IPL/include/opencv/opencv2/core/cuda/transform.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/transform.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_TRANSFORM_HPP__
-#define __OPENCV_CUDA_TRANSFORM_HPP__
+#ifndef OPENCV_CUDA_TRANSFORM_HPP
+#define OPENCV_CUDA_TRANSFORM_HPP
 
 #include "common.hpp"
 #include "utility.hpp"
@@ -72,4 +72,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_TRANSFORM_HPP__
+#endif // OPENCV_CUDA_TRANSFORM_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/type_traits.hpp b/IPL/include/opencv/opencv2/core/cuda/type_traits.hpp
index f2471eb..8b7a3fd 100644
--- a/IPL/include/opencv/opencv2/core/cuda/type_traits.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/type_traits.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_TYPE_TRAITS_HPP__
-#define __OPENCV_CUDA_TYPE_TRAITS_HPP__
+#ifndef OPENCV_CUDA_TYPE_TRAITS_HPP
+#define OPENCV_CUDA_TYPE_TRAITS_HPP
 
 #include "detail/type_traits_detail.hpp"
 
@@ -87,4 +87,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_TYPE_TRAITS_HPP__
+#endif // OPENCV_CUDA_TYPE_TRAITS_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/utility.hpp b/IPL/include/opencv/opencv2/core/cuda/utility.hpp
index ed60471..7f5db48 100644
--- a/IPL/include/opencv/opencv2/core/cuda/utility.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/utility.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_UTILITY_HPP__
-#define __OPENCV_CUDA_UTILITY_HPP__
+#ifndef OPENCV_CUDA_UTILITY_HPP
+#define OPENCV_CUDA_UTILITY_HPP
 
 #include "saturate_cast.hpp"
 #include "datamov_utils.hpp"
@@ -54,6 +54,15 @@
 
 namespace cv { namespace cuda { namespace device
 {
+    struct CV_EXPORTS ThrustAllocator
+    {
+        typedef uchar value_type;
+        virtual ~ThrustAllocator();
+        virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0;
+        virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0;
+        static ThrustAllocator& getAllocator();
+        static void setAllocator(ThrustAllocator* allocator);
+    };
     #define OPENCV_CUDA_LOG_WARP_SIZE        (5)
     #define OPENCV_CUDA_WARP_SIZE            (1 << OPENCV_CUDA_LOG_WARP_SIZE)
     #define OPENCV_CUDA_LOG_MEM_BANKS        ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
@@ -218,4 +227,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_UTILITY_HPP__
+#endif // OPENCV_CUDA_UTILITY_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/vec_distance.hpp b/IPL/include/opencv/opencv2/core/cuda/vec_distance.hpp
index 013b747..ef6e510 100644
--- a/IPL/include/opencv/opencv2/core/cuda/vec_distance.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/vec_distance.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_VEC_DISTANCE_HPP__
-#define __OPENCV_CUDA_VEC_DISTANCE_HPP__
+#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
+#define OPENCV_CUDA_VEC_DISTANCE_HPP
 
 #include "reduce.hpp"
 #include "functional.hpp"
@@ -229,4 +229,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_VEC_DISTANCE_HPP__
+#endif // OPENCV_CUDA_VEC_DISTANCE_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/vec_math.hpp b/IPL/include/opencv/opencv2/core/cuda/vec_math.hpp
index 8595fb8..80b1303 100644
--- a/IPL/include/opencv/opencv2/core/cuda/vec_math.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/vec_math.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_VECMATH_HPP__
-#define __OPENCV_CUDA_VECMATH_HPP__
+#ifndef OPENCV_CUDA_VECMATH_HPP
+#define OPENCV_CUDA_VECMATH_HPP
 
 #include "vec_traits.hpp"
 #include "saturate_cast.hpp"
@@ -199,14 +199,7 @@ CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
         return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
     }
 
-CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
-CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
-CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
-CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
-CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
-CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
-CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double)
 
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
@@ -927,4 +920,4 @@ CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_VECMATH_HPP__
+#endif // OPENCV_CUDA_VECMATH_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/vec_traits.hpp b/IPL/include/opencv/opencv2/core/cuda/vec_traits.hpp
index 905e37f..b5ff281 100644
--- a/IPL/include/opencv/opencv2/core/cuda/vec_traits.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/vec_traits.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_VEC_TRAITS_HPP__
-#define __OPENCV_CUDA_VEC_TRAITS_HPP__
+#ifndef OPENCV_CUDA_VEC_TRAITS_HPP
+#define OPENCV_CUDA_VEC_TRAITS_HPP
 
 #include "common.hpp"
 
@@ -285,4 +285,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif // __OPENCV_CUDA_VEC_TRAITS_HPP__
+#endif // OPENCV_CUDA_VEC_TRAITS_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda/warp.hpp b/IPL/include/opencv/opencv2/core/cuda/warp.hpp
index d93afe7..8af7e6a 100644
--- a/IPL/include/opencv/opencv2/core/cuda/warp.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/warp.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_DEVICE_WARP_HPP__
-#define __OPENCV_CUDA_DEVICE_WARP_HPP__
+#ifndef OPENCV_CUDA_DEVICE_WARP_HPP
+#define OPENCV_CUDA_DEVICE_WARP_HPP
 
 /** @file
  * @deprecated Use @ref cudev instead.
@@ -64,7 +64,7 @@ namespace cv { namespace cuda { namespace device
         static __device__ __forceinline__ unsigned int laneId()
         {
             unsigned int ret;
-            asm("mov.u32 %0, %laneid;" : "=r"(ret) );
+            asm("mov.u32 %0, %%laneid;" : "=r"(ret) );
             return ret;
         }
 
@@ -136,4 +136,4 @@ namespace cv { namespace cuda { namespace device
 
 //! @endcond
 
-#endif /* __OPENCV_CUDA_DEVICE_WARP_HPP__ */
+#endif /* OPENCV_CUDA_DEVICE_WARP_HPP */
diff --git a/IPL/include/opencv/opencv2/core/cuda/warp_shuffle.hpp b/IPL/include/opencv/opencv2/core/cuda/warp_shuffle.hpp
index 256fc2a..0da54ae 100644
--- a/IPL/include/opencv/opencv2/core/cuda/warp_shuffle.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda/warp_shuffle.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CUDA_WARP_SHUFFLE_HPP__
-#define __OPENCV_CUDA_WARP_SHUFFLE_HPP__
+#ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
+#define OPENCV_CUDA_WARP_SHUFFLE_HPP
 
 /** @file
  * @deprecated Use @ref cudev instead.
@@ -51,6 +51,11 @@
 
 namespace cv { namespace cuda { namespace device
 {
+#if __CUDACC_VER_MAJOR__ >= 9
+#  define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
+#  define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
+#  define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
+#endif
     template <typename T>
     __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
     {
@@ -148,6 +153,10 @@ namespace cv { namespace cuda { namespace device
     }
 }}}
 
+#  undef __shfl
+#  undef __shfl_up
+#  undef __shfl_down
+
 //! @endcond
 
-#endif // __OPENCV_CUDA_WARP_SHUFFLE_HPP__
+#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP
diff --git a/IPL/include/opencv/opencv2/core/cuda_stream_accessor.hpp b/IPL/include/opencv/opencv2/core/cuda_stream_accessor.hpp
index 0f8ee9b..deaf356 100644
--- a/IPL/include/opencv/opencv2/core/cuda_stream_accessor.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda_stream_accessor.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__
-#define __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__
+#ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
+#define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
 
 #ifndef __cplusplus
 #  error cuda_stream_accessor.hpp header must be compiled as C++
@@ -83,4 +83,4 @@ namespace cv
     }
 }
 
-#endif /* __OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP__ */
+#endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */
diff --git a/IPL/include/opencv/opencv2/core/cuda_types.hpp b/IPL/include/opencv/opencv2/core/cuda_types.hpp
index 8df816e..45dc2ca 100644
--- a/IPL/include/opencv/opencv2/core/cuda_types.hpp
+++ b/IPL/include/opencv/opencv2/core/cuda_types.hpp
@@ -40,13 +40,20 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_CUDA_TYPES_HPP__
-#define __OPENCV_CORE_CUDA_TYPES_HPP__
+#ifndef OPENCV_CORE_CUDA_TYPES_HPP
+#define OPENCV_CORE_CUDA_TYPES_HPP
 
 #ifndef __cplusplus
 #  error cuda_types.hpp header must be compiled as C++
 #endif
 
+#if defined(__OPENCV_BUILD) && defined(__clang__)
+#pragma clang diagnostic ignored "-Winconsistent-missing-override"
+#endif
+#if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5
+#pragma GCC diagnostic ignored "-Wsuggest-override"
+#endif
+
 /** @file
  * @deprecated Use @ref cudev instead.
  */
@@ -120,10 +127,12 @@ namespace cv
         };
 
         typedef PtrStepSz<unsigned char> PtrStepSzb;
+        typedef PtrStepSz<unsigned short> PtrStepSzus;
         typedef PtrStepSz<float> PtrStepSzf;
         typedef PtrStepSz<int> PtrStepSzi;
 
         typedef PtrStep<unsigned char> PtrStepb;
+        typedef PtrStep<unsigned short> PtrStepus;
         typedef PtrStep<float> PtrStepf;
         typedef PtrStep<int> PtrStepi;
 
@@ -132,4 +141,4 @@ namespace cv
 
 //! @endcond
 
-#endif /* __OPENCV_CORE_CUDA_TYPES_HPP__ */
+#endif /* OPENCV_CORE_CUDA_TYPES_HPP */
diff --git a/IPL/include/opencv/opencv2/core/cv_cpu_dispatch.h b/IPL/include/opencv/opencv2/core/cv_cpu_dispatch.h
new file mode 100644
index 0000000..42651ae
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/cv_cpu_dispatch.h
@@ -0,0 +1,345 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#if defined __OPENCV_BUILD \
+
+#include "cv_cpu_config.h"
+#include "cv_cpu_helper.h"
+
+#ifdef CV_CPU_DISPATCH_MODE
+#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
+#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
+#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
+#else
+#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
+#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
+#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
+#define CV_CPU_BASELINE_MODE 1
+#endif
+
+
+#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...)  /* done */
+#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
+#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
+
+
+#if defined CV_ENABLE_INTRINSICS \
+    && !defined CV_DISABLE_OPTIMIZATION \
+    && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
+
+#ifdef CV_CPU_COMPILE_SSE2
+#  include <emmintrin.h>
+#  define CV_MMX 1
+#  define CV_SSE 1
+#  define CV_SSE2 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE3
+#  include <pmmintrin.h>
+#  define CV_SSE3 1
+#endif
+#ifdef CV_CPU_COMPILE_SSSE3
+#  include <tmmintrin.h>
+#  define CV_SSSE3 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE4_1
+#  include <smmintrin.h>
+#  define CV_SSE4_1 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE4_2
+#  include <nmmintrin.h>
+#  define CV_SSE4_2 1
+#endif
+#ifdef CV_CPU_COMPILE_POPCNT
+#  ifdef _MSC_VER
+#    include <nmmintrin.h>
+#    if defined(_M_X64)
+#      define CV_POPCNT_U64 _mm_popcnt_u64
+#    endif
+#    define CV_POPCNT_U32 _mm_popcnt_u32
+#  else
+#    include <popcntintrin.h>
+#    if defined(__x86_64__)
+#      define CV_POPCNT_U64 __builtin_popcountll
+#    endif
+#    define CV_POPCNT_U32 __builtin_popcount
+#  endif
+#  define CV_POPCNT 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX
+#  include <immintrin.h>
+#  define CV_AVX 1
+#endif
+#ifdef CV_CPU_COMPILE_FP16
+#  if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
+#    include <arm_neon.h>
+#  else
+#    include <immintrin.h>
+#  endif
+#  define CV_FP16 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX2
+#  include <immintrin.h>
+#  define CV_AVX2 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX_512F
+#  include <immintrin.h>
+#  define CV_AVX_512F 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_COMMON
+#  define CV_AVX512_COMMON 1
+#  define CV_AVX_512CD 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_KNL
+#  define CV_AVX512_KNL 1
+#  define CV_AVX_512ER 1
+#  define CV_AVX_512PF 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_KNM
+#  define CV_AVX512_KNM 1
+#  define CV_AVX_5124FMAPS 1
+#  define CV_AVX_5124VNNIW 1
+#  define CV_AVX_512VPOPCNTDQ 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_SKX
+#  define CV_AVX512_SKX 1
+#  define CV_AVX_512VL 1
+#  define CV_AVX_512BW 1
+#  define CV_AVX_512DQ 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_CNL
+#  define CV_AVX512_CNL 1
+#  define CV_AVX_512IFMA 1
+#  define CV_AVX_512VBMI 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_CLX
+#  define CV_AVX512_CLX 1
+#  define CV_AVX_512VNNI 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_ICL
+#  define CV_AVX512_ICL 1
+#  undef CV_AVX_512IFMA
+#  define CV_AVX_512IFMA 1
+#  undef CV_AVX_512VBMI
+#  define CV_AVX_512VBMI 1
+#  undef CV_AVX_512VNNI
+#  define CV_AVX_512VNNI 1
+#  define CV_AVX_512VBMI2 1
+#  define CV_AVX_512BITALG 1
+#  define CV_AVX_512VPOPCNTDQ 1
+#endif
+#ifdef CV_CPU_COMPILE_FMA3
+#  define CV_FMA3 1
+#endif
+
+#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
+# include <Intrin.h>
+# include <arm_neon.h>
+# define CV_NEON 1
+#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
+#  include <arm_neon.h>
+#  define CV_NEON 1
+#endif
+
+#if defined(__ARM_NEON__) || defined(__aarch64__)
+#  include <arm_neon.h>
+#endif
+
+#ifdef CV_CPU_COMPILE_VSX
+#  include <altivec.h>
+#  undef vector
+#  undef pixel
+#  undef bool
+#  define CV_VSX 1
+#endif
+
+#ifdef CV_CPU_COMPILE_VSX3
+#  define CV_VSX3 1
+#endif
+
+#ifdef CV_CPU_COMPILE_MSA
+#  include "hal/msa_macros.h"
+#  define CV_MSA 1
+#endif
+
+#ifdef __EMSCRIPTEN__
+#  define CV_WASM_SIMD 1
+#  include <wasm_simd128.h>
+#endif
+
+#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
+
+#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
+struct VZeroUpperGuard {
+#ifdef __GNUC__
+    __attribute__((always_inline))
+#endif
+    inline VZeroUpperGuard() { _mm256_zeroupper(); }
+#ifdef __GNUC__
+    __attribute__((always_inline))
+#endif
+    inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
+};
+#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard);
+#endif
+
+#ifdef __CV_AVX_GUARD
+#define CV_AVX_GUARD __CV_AVX_GUARD
+#else
+#define CV_AVX_GUARD
+#endif
+
+#endif // __OPENCV_BUILD
+
+
+
+#if !defined __OPENCV_BUILD /* Compatibility code */ \
+    && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */
+#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
+#  include <emmintrin.h>
+#  define CV_MMX 1
+#  define CV_SSE 1
+#  define CV_SSE2 1
+#elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
+# include <Intrin.h>
+# include <arm_neon.h>
+# define CV_NEON 1
+#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
+#  include <arm_neon.h>
+#  define CV_NEON 1
+#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
+#  include <altivec.h>
+#  undef vector
+#  undef pixel
+#  undef bool
+#  define CV_VSX 1
+#endif
+
+#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
+
+
+
+#ifndef CV_MMX
+#  define CV_MMX 0
+#endif
+#ifndef CV_SSE
+#  define CV_SSE 0
+#endif
+#ifndef CV_SSE2
+#  define CV_SSE2 0
+#endif
+#ifndef CV_SSE3
+#  define CV_SSE3 0
+#endif
+#ifndef CV_SSSE3
+#  define CV_SSSE3 0
+#endif
+#ifndef CV_SSE4_1
+#  define CV_SSE4_1 0
+#endif
+#ifndef CV_SSE4_2
+#  define CV_SSE4_2 0
+#endif
+#ifndef CV_POPCNT
+#  define CV_POPCNT 0
+#endif
+#ifndef CV_AVX
+#  define CV_AVX 0
+#endif
+#ifndef CV_FP16
+#  define CV_FP16 0
+#endif
+#ifndef CV_AVX2
+#  define CV_AVX2 0
+#endif
+#ifndef CV_FMA3
+#  define CV_FMA3 0
+#endif
+#ifndef CV_AVX_512F
+#  define CV_AVX_512F 0
+#endif
+#ifndef CV_AVX_512BW
+#  define CV_AVX_512BW 0
+#endif
+#ifndef CV_AVX_512CD
+#  define CV_AVX_512CD 0
+#endif
+#ifndef CV_AVX_512DQ
+#  define CV_AVX_512DQ 0
+#endif
+#ifndef CV_AVX_512ER
+#  define CV_AVX_512ER 0
+#endif
+#ifndef CV_AVX_512IFMA
+#  define CV_AVX_512IFMA 0
+#endif
+#define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated
+#ifndef CV_AVX_512PF
+#  define CV_AVX_512PF 0
+#endif
+#ifndef CV_AVX_512VBMI
+#  define CV_AVX_512VBMI 0
+#endif
+#ifndef CV_AVX_512VL
+#  define CV_AVX_512VL 0
+#endif
+#ifndef CV_AVX_5124FMAPS
+#  define CV_AVX_5124FMAPS 0
+#endif
+#ifndef CV_AVX_5124VNNIW
+#  define CV_AVX_5124VNNIW 0
+#endif
+#ifndef CV_AVX_512VPOPCNTDQ
+#  define CV_AVX_512VPOPCNTDQ 0
+#endif
+#ifndef CV_AVX_512VNNI
+#  define CV_AVX_512VNNI 0
+#endif
+#ifndef CV_AVX_512VBMI2
+#  define CV_AVX_512VBMI2 0
+#endif
+#ifndef CV_AVX_512BITALG
+#  define CV_AVX_512BITALG 0
+#endif
+#ifndef CV_AVX512_COMMON
+#  define CV_AVX512_COMMON 0
+#endif
+#ifndef CV_AVX512_KNL
+#  define CV_AVX512_KNL 0
+#endif
+#ifndef CV_AVX512_KNM
+#  define CV_AVX512_KNM 0
+#endif
+#ifndef CV_AVX512_SKX
+#  define CV_AVX512_SKX 0
+#endif
+#ifndef CV_AVX512_CNL
+#  define CV_AVX512_CNL 0
+#endif
+#ifndef CV_AVX512_CLX
+#  define CV_AVX512_CLX 0
+#endif
+#ifndef CV_AVX512_ICL
+#  define CV_AVX512_ICL 0
+#endif
+
+#ifndef CV_NEON
+#  define CV_NEON 0
+#endif
+
+#ifndef CV_VSX
+#  define CV_VSX 0
+#endif
+
+#ifndef CV_VSX3
+#  define CV_VSX3 0
+#endif
+
+#ifndef CV_MSA
+#  define CV_MSA 0
+#endif
+
+#ifndef CV_WASM_SIMD
+#  define CV_WASM_SIMD 0
+#endif
diff --git a/IPL/include/opencv/opencv2/core/cv_cpu_helper.h b/IPL/include/opencv/opencv2/core/cv_cpu_helper.h
new file mode 100644
index 0000000..aaa89ed
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/cv_cpu_helper.h
@@ -0,0 +1,487 @@
+// AUTOGENERATED, DO NOT EDIT
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
+#  define CV_TRY_SSE 1
+#  define CV_CPU_FORCE_SSE 1
+#  define CV_CPU_HAS_SUPPORT_SSE 1
+#  define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
+#  define CV_TRY_SSE 1
+#  define CV_CPU_FORCE_SSE 0
+#  define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
+#  define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
+#  define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
+#else
+#  define CV_TRY_SSE 0
+#  define CV_CPU_FORCE_SSE 0
+#  define CV_CPU_HAS_SUPPORT_SSE 0
+#  define CV_CPU_CALL_SSE(fn, args)
+#  define CV_CPU_CALL_SSE_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...)  CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
+#  define CV_TRY_SSE2 1
+#  define CV_CPU_FORCE_SSE2 1
+#  define CV_CPU_HAS_SUPPORT_SSE2 1
+#  define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
+#  define CV_TRY_SSE2 1
+#  define CV_CPU_FORCE_SSE2 0
+#  define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
+#  define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
+#  define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
+#else
+#  define CV_TRY_SSE2 0
+#  define CV_CPU_FORCE_SSE2 0
+#  define CV_CPU_HAS_SUPPORT_SSE2 0
+#  define CV_CPU_CALL_SSE2(fn, args)
+#  define CV_CPU_CALL_SSE2_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...)  CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
+#  define CV_TRY_SSE3 1
+#  define CV_CPU_FORCE_SSE3 1
+#  define CV_CPU_HAS_SUPPORT_SSE3 1
+#  define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
+#  define CV_TRY_SSE3 1
+#  define CV_CPU_FORCE_SSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
+#  define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
+#  define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
+#else
+#  define CV_TRY_SSE3 0
+#  define CV_CPU_FORCE_SSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSE3 0
+#  define CV_CPU_CALL_SSE3(fn, args)
+#  define CV_CPU_CALL_SSE3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...)  CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
+#  define CV_TRY_SSSE3 1
+#  define CV_CPU_FORCE_SSSE3 1
+#  define CV_CPU_HAS_SUPPORT_SSSE3 1
+#  define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
+#  define CV_TRY_SSSE3 1
+#  define CV_CPU_FORCE_SSSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
+#  define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
+#  define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
+#else
+#  define CV_TRY_SSSE3 0
+#  define CV_CPU_FORCE_SSSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSSE3 0
+#  define CV_CPU_CALL_SSSE3(fn, args)
+#  define CV_CPU_CALL_SSSE3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...)  CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
+#  define CV_TRY_SSE4_1 1
+#  define CV_CPU_FORCE_SSE4_1 1
+#  define CV_CPU_HAS_SUPPORT_SSE4_1 1
+#  define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
+#  define CV_TRY_SSE4_1 1
+#  define CV_CPU_FORCE_SSE4_1 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
+#  define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
+#  define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
+#else
+#  define CV_TRY_SSE4_1 0
+#  define CV_CPU_FORCE_SSE4_1 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_1 0
+#  define CV_CPU_CALL_SSE4_1(fn, args)
+#  define CV_CPU_CALL_SSE4_1_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...)  CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
+#  define CV_TRY_SSE4_2 1
+#  define CV_CPU_FORCE_SSE4_2 1
+#  define CV_CPU_HAS_SUPPORT_SSE4_2 1
+#  define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
+#  define CV_TRY_SSE4_2 1
+#  define CV_CPU_FORCE_SSE4_2 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
+#  define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
+#  define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
+#else
+#  define CV_TRY_SSE4_2 0
+#  define CV_CPU_FORCE_SSE4_2 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_2 0
+#  define CV_CPU_CALL_SSE4_2(fn, args)
+#  define CV_CPU_CALL_SSE4_2_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...)  CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
+#  define CV_TRY_POPCNT 1
+#  define CV_CPU_FORCE_POPCNT 1
+#  define CV_CPU_HAS_SUPPORT_POPCNT 1
+#  define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
+#  define CV_TRY_POPCNT 1
+#  define CV_CPU_FORCE_POPCNT 0
+#  define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
+#  define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
+#  define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
+#else
+#  define CV_TRY_POPCNT 0
+#  define CV_CPU_FORCE_POPCNT 0
+#  define CV_CPU_HAS_SUPPORT_POPCNT 0
+#  define CV_CPU_CALL_POPCNT(fn, args)
+#  define CV_CPU_CALL_POPCNT_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...)  CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
+#  define CV_TRY_AVX 1
+#  define CV_CPU_FORCE_AVX 1
+#  define CV_CPU_HAS_SUPPORT_AVX 1
+#  define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
+#  define CV_TRY_AVX 1
+#  define CV_CPU_FORCE_AVX 0
+#  define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
+#  define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
+#  define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
+#else
+#  define CV_TRY_AVX 0
+#  define CV_CPU_FORCE_AVX 0
+#  define CV_CPU_HAS_SUPPORT_AVX 0
+#  define CV_CPU_CALL_AVX(fn, args)
+#  define CV_CPU_CALL_AVX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...)  CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
+#  define CV_TRY_FP16 1
+#  define CV_CPU_FORCE_FP16 1
+#  define CV_CPU_HAS_SUPPORT_FP16 1
+#  define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
+#  define CV_TRY_FP16 1
+#  define CV_CPU_FORCE_FP16 0
+#  define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
+#  define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
+#  define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
+#else
+#  define CV_TRY_FP16 0
+#  define CV_CPU_FORCE_FP16 0
+#  define CV_CPU_HAS_SUPPORT_FP16 0
+#  define CV_CPU_CALL_FP16(fn, args)
+#  define CV_CPU_CALL_FP16_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...)  CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
+#  define CV_TRY_AVX2 1
+#  define CV_CPU_FORCE_AVX2 1
+#  define CV_CPU_HAS_SUPPORT_AVX2 1
+#  define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
+#  define CV_TRY_AVX2 1
+#  define CV_CPU_FORCE_AVX2 0
+#  define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
+#  define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
+#  define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
+#else
+#  define CV_TRY_AVX2 0
+#  define CV_CPU_FORCE_AVX2 0
+#  define CV_CPU_HAS_SUPPORT_AVX2 0
+#  define CV_CPU_CALL_AVX2(fn, args)
+#  define CV_CPU_CALL_AVX2_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...)  CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
+#  define CV_TRY_FMA3 1
+#  define CV_CPU_FORCE_FMA3 1
+#  define CV_CPU_HAS_SUPPORT_FMA3 1
+#  define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
+#  define CV_TRY_FMA3 1
+#  define CV_CPU_FORCE_FMA3 0
+#  define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
+#  define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
+#  define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
+#else
+#  define CV_TRY_FMA3 0
+#  define CV_CPU_FORCE_FMA3 0
+#  define CV_CPU_HAS_SUPPORT_FMA3 0
+#  define CV_CPU_CALL_FMA3(fn, args)
+#  define CV_CPU_CALL_FMA3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...)  CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F
+#  define CV_TRY_AVX_512F 1
+#  define CV_CPU_FORCE_AVX_512F 1
+#  define CV_CPU_HAS_SUPPORT_AVX_512F 1
+#  define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F
+#  define CV_TRY_AVX_512F 1
+#  define CV_CPU_FORCE_AVX_512F 0
+#  define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F))
+#  define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
+#  define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
+#else
+#  define CV_TRY_AVX_512F 0
+#  define CV_CPU_FORCE_AVX_512F 0
+#  define CV_CPU_HAS_SUPPORT_AVX_512F 0
+#  define CV_CPU_CALL_AVX_512F(fn, args)
+#  define CV_CPU_CALL_AVX_512F_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...)  CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_COMMON
+#  define CV_TRY_AVX512_COMMON 1
+#  define CV_CPU_FORCE_AVX512_COMMON 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_COMMON 1
+#  define CV_CPU_CALL_AVX512_COMMON(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_COMMON_(fn, args) return (opt_AVX512_COMMON::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_COMMON
+#  define CV_TRY_AVX512_COMMON 1
+#  define CV_CPU_FORCE_AVX512_COMMON 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_COMMON (cv::checkHardwareSupport(CV_CPU_AVX512_COMMON))
+#  define CV_CPU_CALL_AVX512_COMMON(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args)
+#  define CV_CPU_CALL_AVX512_COMMON_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args)
+#else
+#  define CV_TRY_AVX512_COMMON 0
+#  define CV_CPU_FORCE_AVX512_COMMON 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_COMMON 0
+#  define CV_CPU_CALL_AVX512_COMMON(fn, args)
+#  define CV_CPU_CALL_AVX512_COMMON_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_COMMON(fn, args, mode, ...)  CV_CPU_CALL_AVX512_COMMON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNL
+#  define CV_TRY_AVX512_KNL 1
+#  define CV_CPU_FORCE_AVX512_KNL 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNL 1
+#  define CV_CPU_CALL_AVX512_KNL(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_KNL_(fn, args) return (opt_AVX512_KNL::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNL
+#  define CV_TRY_AVX512_KNL 1
+#  define CV_CPU_FORCE_AVX512_KNL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNL (cv::checkHardwareSupport(CV_CPU_AVX512_KNL))
+#  define CV_CPU_CALL_AVX512_KNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args)
+#  define CV_CPU_CALL_AVX512_KNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args)
+#else
+#  define CV_TRY_AVX512_KNL 0
+#  define CV_CPU_FORCE_AVX512_KNL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNL 0
+#  define CV_CPU_CALL_AVX512_KNL(fn, args)
+#  define CV_CPU_CALL_AVX512_KNL_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNL(fn, args, mode, ...)  CV_CPU_CALL_AVX512_KNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNM
+#  define CV_TRY_AVX512_KNM 1
+#  define CV_CPU_FORCE_AVX512_KNM 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNM 1
+#  define CV_CPU_CALL_AVX512_KNM(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_KNM_(fn, args) return (opt_AVX512_KNM::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNM
+#  define CV_TRY_AVX512_KNM 1
+#  define CV_CPU_FORCE_AVX512_KNM 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNM (cv::checkHardwareSupport(CV_CPU_AVX512_KNM))
+#  define CV_CPU_CALL_AVX512_KNM(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args)
+#  define CV_CPU_CALL_AVX512_KNM_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args)
+#else
+#  define CV_TRY_AVX512_KNM 0
+#  define CV_CPU_FORCE_AVX512_KNM 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNM 0
+#  define CV_CPU_CALL_AVX512_KNM(fn, args)
+#  define CV_CPU_CALL_AVX512_KNM_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNM(fn, args, mode, ...)  CV_CPU_CALL_AVX512_KNM(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX
+#  define CV_TRY_AVX512_SKX 1
+#  define CV_CPU_FORCE_AVX512_SKX 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_SKX 1
+#  define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX
+#  define CV_TRY_AVX512_SKX 1
+#  define CV_CPU_FORCE_AVX512_SKX 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX))
+#  define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
+#  define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
+#else
+#  define CV_TRY_AVX512_SKX 0
+#  define CV_CPU_FORCE_AVX512_SKX 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_SKX 0
+#  define CV_CPU_CALL_AVX512_SKX(fn, args)
+#  define CV_CPU_CALL_AVX512_SKX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...)  CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CNL
+#  define CV_TRY_AVX512_CNL 1
+#  define CV_CPU_FORCE_AVX512_CNL 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_CNL 1
+#  define CV_CPU_CALL_AVX512_CNL(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_CNL_(fn, args) return (opt_AVX512_CNL::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CNL
+#  define CV_TRY_AVX512_CNL 1
+#  define CV_CPU_FORCE_AVX512_CNL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_CNL (cv::checkHardwareSupport(CV_CPU_AVX512_CNL))
+#  define CV_CPU_CALL_AVX512_CNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args)
+#  define CV_CPU_CALL_AVX512_CNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args)
+#else
+#  define CV_TRY_AVX512_CNL 0
+#  define CV_CPU_FORCE_AVX512_CNL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_CNL 0
+#  define CV_CPU_CALL_AVX512_CNL(fn, args)
+#  define CV_CPU_CALL_AVX512_CNL_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...)  CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CLX
+#  define CV_TRY_AVX512_CLX 1
+#  define CV_CPU_FORCE_AVX512_CLX 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_CLX 1
+#  define CV_CPU_CALL_AVX512_CLX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_CLX_(fn, args) return (opt_AVX512_CLX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CLX
+#  define CV_TRY_AVX512_CLX 1
+#  define CV_CPU_FORCE_AVX512_CLX 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_CLX (cv::checkHardwareSupport(CV_CPU_AVX512_CLX))
+#  define CV_CPU_CALL_AVX512_CLX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
+#  define CV_CPU_CALL_AVX512_CLX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
+#else
+#  define CV_TRY_AVX512_CLX 0
+#  define CV_CPU_FORCE_AVX512_CLX 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_CLX 0
+#  define CV_CPU_CALL_AVX512_CLX(fn, args)
+#  define CV_CPU_CALL_AVX512_CLX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_CLX(fn, args, mode, ...)  CV_CPU_CALL_AVX512_CLX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL
+#  define CV_TRY_AVX512_ICL 1
+#  define CV_CPU_FORCE_AVX512_ICL 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_ICL 1
+#  define CV_CPU_CALL_AVX512_ICL(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_ICL_(fn, args) return (opt_AVX512_ICL::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_ICL
+#  define CV_TRY_AVX512_ICL 1
+#  define CV_CPU_FORCE_AVX512_ICL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_ICL (cv::checkHardwareSupport(CV_CPU_AVX512_ICL))
+#  define CV_CPU_CALL_AVX512_ICL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args)
+#  define CV_CPU_CALL_AVX512_ICL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args)
+#else
+#  define CV_TRY_AVX512_ICL 0
+#  define CV_CPU_FORCE_AVX512_ICL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_ICL 0
+#  define CV_CPU_CALL_AVX512_ICL(fn, args)
+#  define CV_CPU_CALL_AVX512_ICL_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_ICL(fn, args, mode, ...)  CV_CPU_CALL_AVX512_ICL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
+#  define CV_TRY_NEON 1
+#  define CV_CPU_FORCE_NEON 1
+#  define CV_CPU_HAS_SUPPORT_NEON 1
+#  define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
+#  define CV_TRY_NEON 1
+#  define CV_CPU_FORCE_NEON 0
+#  define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
+#  define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
+#  define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
+#else
+#  define CV_TRY_NEON 0
+#  define CV_CPU_FORCE_NEON 0
+#  define CV_CPU_HAS_SUPPORT_NEON 0
+#  define CV_CPU_CALL_NEON(fn, args)
+#  define CV_CPU_CALL_NEON_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...)  CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA
+#  define CV_TRY_MSA 1
+#  define CV_CPU_FORCE_MSA 1
+#  define CV_CPU_HAS_SUPPORT_MSA 1
+#  define CV_CPU_CALL_MSA(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_MSA_(fn, args) return (opt_MSA::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_MSA
+#  define CV_TRY_MSA 1
+#  define CV_CPU_FORCE_MSA 0
+#  define CV_CPU_HAS_SUPPORT_MSA (cv::checkHardwareSupport(CV_CPU_MSA))
+#  define CV_CPU_CALL_MSA(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
+#  define CV_CPU_CALL_MSA_(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
+#else
+#  define CV_TRY_MSA 0
+#  define CV_CPU_FORCE_MSA 0
+#  define CV_CPU_HAS_SUPPORT_MSA 0
+#  define CV_CPU_CALL_MSA(fn, args)
+#  define CV_CPU_CALL_MSA_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_MSA(fn, args, mode, ...)  CV_CPU_CALL_MSA(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
+#  define CV_TRY_VSX 1
+#  define CV_CPU_FORCE_VSX 1
+#  define CV_CPU_HAS_SUPPORT_VSX 1
+#  define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
+#  define CV_TRY_VSX 1
+#  define CV_CPU_FORCE_VSX 0
+#  define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
+#  define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
+#  define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
+#else
+#  define CV_TRY_VSX 0
+#  define CV_CPU_FORCE_VSX 0
+#  define CV_CPU_HAS_SUPPORT_VSX 0
+#  define CV_CPU_CALL_VSX(fn, args)
+#  define CV_CPU_CALL_VSX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...)  CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
+#  define CV_TRY_VSX3 1
+#  define CV_CPU_FORCE_VSX3 1
+#  define CV_CPU_HAS_SUPPORT_VSX3 1
+#  define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
+#  define CV_TRY_VSX3 1
+#  define CV_CPU_FORCE_VSX3 0
+#  define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
+#  define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
+#  define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
+#else
+#  define CV_TRY_VSX3 0
+#  define CV_CPU_FORCE_VSX3 0
+#  define CV_CPU_HAS_SUPPORT_VSX3 0
+#  define CV_CPU_CALL_VSX3(fn, args)
+#  define CV_CPU_CALL_VSX3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...)  CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
+#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...)  CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
diff --git a/IPL/include/opencv/opencv2/core/cvdef.h b/IPL/include/opencv/opencv2/core/cvdef.h
index af2abfb..e66a646 100644
--- a/IPL/include/opencv/opencv2/core/cvdef.h
+++ b/IPL/include/opencv/opencv2/core/cvdef.h
@@ -42,16 +42,124 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_CVDEF_H__
-#define __OPENCV_CORE_CVDEF_H__
+#ifndef OPENCV_CORE_CVDEF_H
+#define OPENCV_CORE_CVDEF_H
 
 //! @addtogroup core_utils
 //! @{
 
-#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
-#  define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
+#ifdef OPENCV_INCLUDE_PORT_FILE  // User-provided header file with custom platform configuration
+#include OPENCV_INCLUDE_PORT_FILE
 #endif
 
+#if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD
+#if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \
+    (defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC))
+// Guard to prevent using of binary incompatible binaries / runtimes
+// https://github.com/opencv/opencv/pull/9161
+#define CV__DEBUG_NS_BEGIN namespace debug_build_guard {
+#define CV__DEBUG_NS_END }
+namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; }
+#endif
+#endif
+
+#ifndef CV__DEBUG_NS_BEGIN
+#define CV__DEBUG_NS_BEGIN
+#define CV__DEBUG_NS_END
+#endif
+
+
+#ifdef __OPENCV_BUILD
+#include "cvconfig.h"
+#endif
+
+#ifndef __CV_EXPAND
+#define __CV_EXPAND(x) x
+#endif
+
+#ifndef __CV_CAT
+#define __CV_CAT__(x, y) x ## y
+#define __CV_CAT_(x, y) __CV_CAT__(x, y)
+#define __CV_CAT(x, y) __CV_CAT_(x, y)
+#endif
+
+#define __CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
+#define __CV_VA_NUM_ARGS(...) __CV_EXPAND(__CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
+
+#if defined __GNUC__
+#define CV_Func __func__
+#elif defined _MSC_VER
+#define CV_Func __FUNCTION__
+#else
+#define CV_Func ""
+#endif
+
+//! @cond IGNORED
+
+//////////////// static assert /////////////////
+#define CVAUX_CONCAT_EXP(a, b) a##b
+#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
+
+#if defined(__clang__)
+#  ifndef __has_extension
+#    define __has_extension __has_feature /* compatibility, for older versions of clang */
+#  endif
+#  if __has_extension(cxx_static_assert)
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  elif __has_extension(c_static_assert)
+#    define CV_StaticAssert(condition, reason)    _Static_assert((condition), reason " " #condition)
+#  endif
+#elif defined(__GNUC__)
+#  if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  endif
+#elif defined(_MSC_VER)
+#  if _MSC_VER >= 1600 /* MSVC 10 */
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  endif
+#endif
+#ifndef CV_StaticAssert
+#  if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
+#    define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
+#  else
+namespace cv {
+     template <bool x> struct CV_StaticAssert_failed;
+     template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
+     template<int x> struct CV_StaticAssert_test {};
+}
+#    define CV_StaticAssert(condition, reason)\
+       typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
+#  endif
+#endif
+
+// Suppress warning "-Wdeprecated-declarations" / C4996
+#if defined(_MSC_VER)
+    #define CV_DO_PRAGMA(x) __pragma(x)
+#elif defined(__GNUC__)
+    #define CV_DO_PRAGMA(x) _Pragma (#x)
+#else
+    #define CV_DO_PRAGMA(x)
+#endif
+
+#ifdef _MSC_VER
+#define CV_SUPPRESS_DEPRECATED_START \
+    CV_DO_PRAGMA(warning(push)) \
+    CV_DO_PRAGMA(warning(disable: 4996))
+#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
+#elif defined (__clang__) || ((__GNUC__)  && (__GNUC__*100 + __GNUC_MINOR__ > 405))
+#define CV_SUPPRESS_DEPRECATED_START \
+    CV_DO_PRAGMA(GCC diagnostic push) \
+    CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
+#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
+#else
+#define CV_SUPPRESS_DEPRECATED_START
+#define CV_SUPPRESS_DEPRECATED_END
+#endif
+
+#define CV_UNUSED(name) (void)name
+
+//! @endcond
+
 // undef problematic defines sometimes defined by system headers (windows.h in particular)
 #undef small
 #undef min
@@ -59,11 +167,12 @@
 #undef abs
 #undef Complex
 
-#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
-#  define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
+#if defined __cplusplus
+#include <limits>
+#else
+#include <limits.h>
 #endif
 
-#include <limits.h>
 #include "opencv2/core/hal/interface.h"
 
 #if defined __ICL
@@ -88,7 +197,17 @@
 #  endif
 #endif
 
-#if defined CV_ICC && !defined CV_ENABLE_UNROLLED
+#ifndef CV_ALWAYS_INLINE
+#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
+#define CV_ALWAYS_INLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#define CV_ALWAYS_INLINE __forceinline
+#else
+#define CV_ALWAYS_INLINE inline
+#endif
+#endif
+
+#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
 #  define CV_ENABLE_UNROLLED 0
 #else
 #  define CV_ENABLE_UNROLLED 1
@@ -112,7 +231,7 @@
 #define CV_CPU_SSE4_1           6
 #define CV_CPU_SSE4_2           7
 #define CV_CPU_POPCNT           8
-
+#define CV_CPU_FP16             9
 #define CV_CPU_AVX              10
 #define CV_CPU_AVX2             11
 #define CV_CPU_FMA3             12
@@ -122,15 +241,36 @@
 #define CV_CPU_AVX_512CD        15
 #define CV_CPU_AVX_512DQ        16
 #define CV_CPU_AVX_512ER        17
-#define CV_CPU_AVX_512IFMA512   18
+#define CV_CPU_AVX_512IFMA512   18 // deprecated
+#define CV_CPU_AVX_512IFMA      18
 #define CV_CPU_AVX_512PF        19
 #define CV_CPU_AVX_512VBMI      20
 #define CV_CPU_AVX_512VL        21
+#define CV_CPU_AVX_512VBMI2     22
+#define CV_CPU_AVX_512VNNI      23
+#define CV_CPU_AVX_512BITALG    24
+#define CV_CPU_AVX_512VPOPCNTDQ 25
+#define CV_CPU_AVX_5124VNNIW    26
+#define CV_CPU_AVX_5124FMAPS    27
+
+#define CV_CPU_NEON             100
 
-#define CV_CPU_NEON   100
+#define CV_CPU_MSA              150
+
+#define CV_CPU_VSX              200
+#define CV_CPU_VSX3             201
+
+// CPU features groups
+#define CV_CPU_AVX512_SKX       256
+#define CV_CPU_AVX512_COMMON    257
+#define CV_CPU_AVX512_KNL       258
+#define CV_CPU_AVX512_KNM       259
+#define CV_CPU_AVX512_CNL       260
+#define CV_CPU_AVX512_CLX       261
+#define CV_CPU_AVX512_ICL       262
 
 // when adding to this list remember to update the following enum
-#define CV_HARDWARE_MAX_FEATURE 255
+#define CV_HARDWARE_MAX_FEATURE 512
 
 /** @brief Available CPU features.
 */
@@ -143,7 +283,7 @@ enum CpuFeatures {
     CPU_SSE4_1          = 6,
     CPU_SSE4_2          = 7,
     CPU_POPCNT          = 8,
-
+    CPU_FP16            = 9,
     CPU_AVX             = 10,
     CPU_AVX2            = 11,
     CPU_FMA3            = 12,
@@ -153,156 +293,69 @@ enum CpuFeatures {
     CPU_AVX_512CD       = 15,
     CPU_AVX_512DQ       = 16,
     CPU_AVX_512ER       = 17,
-    CPU_AVX_512IFMA512  = 18,
+    CPU_AVX_512IFMA512  = 18, // deprecated
+    CPU_AVX_512IFMA     = 18,
     CPU_AVX_512PF       = 19,
     CPU_AVX_512VBMI     = 20,
     CPU_AVX_512VL       = 21,
+    CPU_AVX_512VBMI2    = 22,
+    CPU_AVX_512VNNI     = 23,
+    CPU_AVX_512BITALG   = 24,
+    CPU_AVX_512VPOPCNTDQ= 25,
+    CPU_AVX_5124VNNIW   = 26,
+    CPU_AVX_5124FMAPS   = 27,
 
-    CPU_NEON            = 100
-};
+    CPU_NEON            = 100,
 
-// do not include SSE/AVX/NEON headers for NVCC compiler
-#ifndef __CUDACC__
-
-#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
-#  include <emmintrin.h>
-#  define CV_MMX 1
-#  define CV_SSE 1
-#  define CV_SSE2 1
-#  if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
-#    include <pmmintrin.h>
-#    define CV_SSE3 1
-#  endif
-#  if defined __SSSE3__  || (defined _MSC_VER && _MSC_VER >= 1500)
-#    include <tmmintrin.h>
-#    define CV_SSSE3 1
-#  endif
-#  if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
-#    include <smmintrin.h>
-#    define CV_SSE4_1 1
-#  endif
-#  if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
-#    include <nmmintrin.h>
-#    define CV_SSE4_2 1
-#  endif
-#  if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
-#    ifdef _MSC_VER
-#      include <nmmintrin.h>
-#    else
-#      include <popcntintrin.h>
-#    endif
-#    define CV_POPCNT 1
-#  endif
-#  if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
-// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
-// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
-#    include <immintrin.h>
-#    define CV_AVX 1
-#    if defined(_XCR_XFEATURE_ENABLED_MASK)
-#      define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
-#    else
-#      define __xgetbv() 0
-#    endif
-#  endif
-#  if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0)
-#    include <immintrin.h>
-#    define CV_AVX2 1
-#    if defined __FMA__
-#      define CV_FMA3 1
-#    endif
-#  endif
-#endif
+    CPU_MSA             = 150,
 
-#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
-# include <Intrin.h>
-# include "arm_neon.h"
-# define CV_NEON 1
-# define CPU_HAS_NEON_FEATURE (true)
-#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
-#  include <arm_neon.h>
-#  define CV_NEON 1
-#endif
+    CPU_VSX             = 200,
+    CPU_VSX3            = 201,
 
-#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
-#  define CV_VFP 1
-#endif
+    CPU_AVX512_SKX      = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
+    CPU_AVX512_COMMON   = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512
+    CPU_AVX512_KNL      = 258, //!< Knights Landing with AVX-512F/CD/ER/PF
+    CPU_AVX512_KNM      = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ
+    CPU_AVX512_CNL      = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI
+    CPU_AVX512_CLX      = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI
+    CPU_AVX512_ICL      = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ
 
-#endif // __CUDACC__
+    CPU_MAX_FEATURE     = 512  // see CV_HARDWARE_MAX_FEATURE
+};
 
-#ifndef CV_POPCNT
-#define CV_POPCNT 0
-#endif
-#ifndef CV_MMX
-#  define CV_MMX 0
-#endif
-#ifndef CV_SSE
-#  define CV_SSE 0
-#endif
-#ifndef CV_SSE2
-#  define CV_SSE2 0
-#endif
-#ifndef CV_SSE3
-#  define CV_SSE3 0
-#endif
-#ifndef CV_SSSE3
-#  define CV_SSSE3 0
-#endif
-#ifndef CV_SSE4_1
-#  define CV_SSE4_1 0
-#endif
-#ifndef CV_SSE4_2
-#  define CV_SSE4_2 0
-#endif
-#ifndef CV_AVX
-#  define CV_AVX 0
-#endif
-#ifndef CV_AVX2
-#  define CV_AVX2 0
-#endif
-#ifndef CV_FMA3
-#  define CV_FMA3 0
-#endif
-#ifndef CV_AVX_512F
-#  define CV_AVX_512F 0
-#endif
-#ifndef CV_AVX_512BW
-#  define CV_AVX_512BW 0
-#endif
-#ifndef CV_AVX_512CD
-#  define CV_AVX_512CD 0
-#endif
-#ifndef CV_AVX_512DQ
-#  define CV_AVX_512DQ 0
-#endif
-#ifndef CV_AVX_512ER
-#  define CV_AVX_512ER 0
-#endif
-#ifndef CV_AVX_512IFMA512
-#  define CV_AVX_512IFMA512 0
-#endif
-#ifndef CV_AVX_512PF
-#  define CV_AVX_512PF 0
-#endif
-#ifndef CV_AVX_512VBMI
-#  define CV_AVX_512VBMI 0
-#endif
-#ifndef CV_AVX_512VL
-#  define CV_AVX_512VL 0
-#endif
 
-#ifndef CV_NEON
-#  define CV_NEON 0
-#endif
+#include "cv_cpu_dispatch.h"
 
-#ifndef CV_VFP
-#  define CV_VFP 0
+#if !defined(CV_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64))
+// int*, int64* should be propertly aligned pointers on ARMv7
+#define CV_STRONG_ALIGNMENT 1
+#endif
+#if !defined(CV_STRONG_ALIGNMENT)
+#define CV_STRONG_ALIGNMENT 0
 #endif
 
 /* fundamental constants */
 #define CV_PI   3.1415926535897932384626433832795
-#define CV_2PI 6.283185307179586476925286766559
+#define CV_2PI  6.283185307179586476925286766559
 #define CV_LOG2 0.69314718055994530941723212145818
 
+#if defined __ARM_FP16_FORMAT_IEEE \
+    && !defined __CUDACC__
+#  define CV_FP16_TYPE 1
+#else
+#  define CV_FP16_TYPE 0
+#endif
+
+typedef union Cv16suf
+{
+    short i;
+    ushort u;
+#if CV_FP16_TYPE
+    __fp16 h;
+#endif
+}
+Cv16suf;
+
 typedef union Cv32suf
 {
     int i;
@@ -319,20 +372,50 @@ typedef union Cv64suf
 }
 Cv64suf;
 
-#define OPENCV_ABI_COMPATIBILITY 300
+#define OPENCV_ABI_COMPATIBILITY 400
 
 #ifdef __OPENCV_BUILD
-#  define DISABLE_OPENCV_24_COMPATIBILITY
+#  define DISABLE_OPENCV_3_COMPATIBILITY
+#  define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY
 #endif
 
-#if (defined WIN32 || defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined CVAPI_EXPORTS
-#  define CV_EXPORTS __declspec(dllexport)
-#elif defined __GNUC__ && __GNUC__ >= 4
-#  define CV_EXPORTS __attribute__ ((visibility ("default")))
+#ifndef CV_EXPORTS
+# if (defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined(CVAPI_EXPORTS)
+#   define CV_EXPORTS __declspec(dllexport)
+# elif defined __GNUC__ && __GNUC__ >= 4 && (defined(CVAPI_EXPORTS) || defined(__APPLE__))
+#   define CV_EXPORTS __attribute__ ((visibility ("default")))
+# endif
+#endif
+
+#ifndef CV_EXPORTS
+# define CV_EXPORTS
+#endif
+
+#ifdef _MSC_VER
+#   define CV_EXPORTS_TEMPLATE
 #else
-#  define CV_EXPORTS
+#   define CV_EXPORTS_TEMPLATE CV_EXPORTS
+#endif
+
+#ifndef CV_DEPRECATED
+#  if defined(__GNUC__)
+#    define CV_DEPRECATED __attribute__ ((deprecated))
+#  elif defined(_MSC_VER)
+#    define CV_DEPRECATED __declspec(deprecated)
+#  else
+#    define CV_DEPRECATED
+#  endif
+#endif
+
+#ifndef CV_DEPRECATED_EXTERNAL
+#  if defined(__OPENCV_BUILD)
+#    define CV_DEPRECATED_EXTERNAL /* nothing */
+#  else
+#    define CV_DEPRECATED_EXTERNAL CV_DEPRECATED
+#  endif
 #endif
 
+
 #ifndef CV_EXTERN_C
 #  ifdef __cplusplus
 #    define CV_EXTERN_C extern "C"
@@ -352,72 +435,14 @@ Cv64suf;
 #define CV_PROP_RW
 #define CV_WRAP
 #define CV_WRAP_AS(synonym)
+#define CV_WRAP_MAPPABLE(mappable)
+#define CV_WRAP_PHANTOM(phantom_header)
+#define CV_WRAP_DEFAULT(val)
 
 /****************************************************************************************\
 *                                  Matrix type (Mat)                                     *
 \****************************************************************************************/
 
-#define CV_CN_MAX     512
-#define CV_CN_SHIFT   3
-#define CV_DEPTH_MAX  (1 << CV_CN_SHIFT)
-
-#define CV_8U   0
-#define CV_8S   1
-#define CV_16U  2
-#define CV_16S  3
-#define CV_32S  4
-#define CV_32F  5
-#define CV_64F  6
-#define CV_USRTYPE1 7
-
-#define CV_MAT_DEPTH_MASK       (CV_DEPTH_MAX - 1)
-#define CV_MAT_DEPTH(flags)     ((flags) & CV_MAT_DEPTH_MASK)
-
-#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
-#define CV_MAKE_TYPE CV_MAKETYPE
-
-#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
-#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
-#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
-#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
-#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
-
-#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
-#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
-#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
-#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
-#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
-
-#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
-#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
-#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
-#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
-#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
-
-#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
-#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
-#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
-#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
-#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
-
-#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
-#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
-#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
-#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
-#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
-
-#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
-#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
-#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
-#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
-#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
-
-#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
-#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
-#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
-#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
-#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
-
 #define CV_MAT_CN_MASK          ((CV_CN_MAX - 1) << CV_CN_SHIFT)
 #define CV_MAT_CN(flags)        ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
 #define CV_MAT_TYPE_MASK        (CV_DEPTH_MAX*CV_CN_MAX - 1)
@@ -431,13 +456,10 @@ Cv64suf;
 #define CV_IS_SUBMAT(flags)     ((flags) & CV_MAT_SUBMAT_FLAG)
 
 /** Size of each channel item,
-   0x124489 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
-#define CV_ELEM_SIZE1(type) \
-    ((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15)
+   0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
+#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15)
 
-/** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
-#define CV_ELEM_SIZE(type) \
-    (CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
+#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type))
 
 #ifndef MIN
 #  define MIN(a,b)  ((a) > (b) ? (b) : (a))
@@ -447,15 +469,179 @@ Cv64suf;
 #  define MAX(a,b)  ((a) < (b) ? (b) : (a))
 #endif
 
+///////////////////////////////////////// Enum operators ///////////////////////////////////////
+
+/**
+
+Provides compatibility operators for both classical and C++11 enum classes,
+as well as exposing the C++11 enum class members for backwards compatibility
+
+@code
+    // Provides operators required for flag enums
+    CV_ENUM_FLAGS(AccessFlag)
+
+    // Exposes the listed members of the enum class AccessFlag to the current namespace
+    CV_ENUM_CLASS_EXPOSE(AccessFlag, ACCESS_READ [, ACCESS_WRITE [, ...] ]);
+@endcode
+*/
+
+#define __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST)                                              \
+static const EnumType MEMBER_CONST = EnumType::MEMBER_CONST;                                          \
+
+#define __CV_ENUM_CLASS_EXPOSE_2(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_1(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_3(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_2(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_4(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_3(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_5(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_4(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_6(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_5(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_7(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_6(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_8(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_7(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_9(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_8(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_FLAGS_LOGICAL_NOT(EnumType)                                                         \
+static inline bool operator!(const EnumType& val)                                                     \
+{                                                                                                     \
+    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
+    return !static_cast<UnderlyingType>(val);                                                         \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_LOGICAL_NOT_EQ(Arg1Type, Arg2Type)                                            \
+static inline bool operator!=(const Arg1Type& a, const Arg2Type& b)                                   \
+{                                                                                                     \
+    return static_cast<int>(a) != static_cast<int>(b);                                                \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_LOGICAL_EQ(Arg1Type, Arg2Type)                                                \
+static inline bool operator==(const Arg1Type& a, const Arg2Type& b)                                   \
+{                                                                                                     \
+    return static_cast<int>(a) == static_cast<int>(b);                                                \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_NOT(EnumType)                                                         \
+static inline EnumType operator~(const EnumType& val)                                                 \
+{                                                                                                     \
+    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
+    return static_cast<EnumType>(~static_cast<UnderlyingType>(val));                                  \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_OR(EnumType, Arg1Type, Arg2Type)                                      \
+static inline EnumType operator|(const Arg1Type& a, const Arg2Type& b)                                \
+{                                                                                                     \
+    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
+    return static_cast<EnumType>(static_cast<UnderlyingType>(a) | static_cast<UnderlyingType>(b));    \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_AND(EnumType, Arg1Type, Arg2Type)                                     \
+static inline EnumType operator&(const Arg1Type& a, const Arg2Type& b)                                \
+{                                                                                                     \
+    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
+    return static_cast<EnumType>(static_cast<UnderlyingType>(a) & static_cast<UnderlyingType>(b));    \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_XOR(EnumType, Arg1Type, Arg2Type)                                     \
+static inline EnumType operator^(const Arg1Type& a, const Arg2Type& b)                                \
+{                                                                                                     \
+    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
+    return static_cast<EnumType>(static_cast<UnderlyingType>(a) ^ static_cast<UnderlyingType>(b));    \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_OR_EQ(EnumType, Arg1Type)                                             \
+static inline EnumType& operator|=(EnumType& _this, const Arg1Type& val)                              \
+{                                                                                                     \
+    _this = static_cast<EnumType>(static_cast<int>(_this) | static_cast<int>(val));                   \
+    return _this;                                                                                     \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_AND_EQ(EnumType, Arg1Type)                                            \
+static inline EnumType& operator&=(EnumType& _this, const Arg1Type& val)                              \
+{                                                                                                     \
+    _this = static_cast<EnumType>(static_cast<int>(_this) & static_cast<int>(val));                   \
+    return _this;                                                                                     \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_XOR_EQ(EnumType, Arg1Type)                                            \
+static inline EnumType& operator^=(EnumType& _this, const Arg1Type& val)                              \
+{                                                                                                     \
+    _this = static_cast<EnumType>(static_cast<int>(_this) ^ static_cast<int>(val));                   \
+    return _this;                                                                                     \
+}                                                                                                     \
+
+#define CV_ENUM_CLASS_EXPOSE(EnumType, ...)                                                           \
+__CV_EXPAND(__CV_CAT(__CV_ENUM_CLASS_EXPOSE_, __CV_VA_NUM_ARGS(__VA_ARGS__))(EnumType, __VA_ARGS__)); \
+
+#define CV_ENUM_FLAGS(EnumType)                                                                       \
+__CV_ENUM_FLAGS_LOGICAL_NOT      (EnumType)                                                           \
+__CV_ENUM_FLAGS_LOGICAL_EQ       (EnumType, int)                                                      \
+__CV_ENUM_FLAGS_LOGICAL_NOT_EQ   (EnumType, int)                                                      \
+                                                                                                      \
+__CV_ENUM_FLAGS_BITWISE_NOT      (EnumType)                                                           \
+__CV_ENUM_FLAGS_BITWISE_OR       (EnumType, EnumType, EnumType)                                       \
+__CV_ENUM_FLAGS_BITWISE_AND      (EnumType, EnumType, EnumType)                                       \
+__CV_ENUM_FLAGS_BITWISE_XOR      (EnumType, EnumType, EnumType)                                       \
+                                                                                                      \
+__CV_ENUM_FLAGS_BITWISE_OR_EQ    (EnumType, EnumType)                                                 \
+__CV_ENUM_FLAGS_BITWISE_AND_EQ   (EnumType, EnumType)                                                 \
+__CV_ENUM_FLAGS_BITWISE_XOR_EQ   (EnumType, EnumType)                                                 \
+
+/****************************************************************************************\
+*                                    static analysys                                     *
+\****************************************************************************************/
+
+// In practice, some macro are not processed correctly (noreturn is not detected).
+// We need to use simplified definition for them.
+#ifndef CV_STATIC_ANALYSIS
+# if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__)
+#   define CV_STATIC_ANALYSIS 1
+# endif
+#else
+# if defined(CV_STATIC_ANALYSIS) && !(__CV_CAT(1, CV_STATIC_ANALYSIS) == 1)  // defined and not empty
+#   if 0 == CV_STATIC_ANALYSIS
+#     undef CV_STATIC_ANALYSIS
+#   endif
+# endif
+#endif
+
+/****************************************************************************************\
+*                                    Thread sanitizer                                    *
+\****************************************************************************************/
+#ifndef CV_THREAD_SANITIZER
+# if defined(__has_feature)
+#   if __has_feature(thread_sanitizer)
+#     define CV_THREAD_SANITIZER
+#   endif
+# endif
+#endif
+
 /****************************************************************************************\
 *          exchange-add operation for atomic operations on reference counters            *
 \****************************************************************************************/
 
-#if defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32)
-   // atomic increment on the linux version of the Intel(tm) compiler
-#  define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta)
-#elif defined __GNUC__
-#  if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
+#ifdef CV_XADD
+  // allow to use user-defined macro
+#elif defined __GNUC__ || defined __clang__
+#  if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)  && !defined __INTEL_COMPILER
 #    ifdef __ATOMIC_ACQ_REL
 #      define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
 #    else
@@ -473,7 +659,11 @@ Cv64suf;
 #  include <intrin.h>
 #  define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
 #else
-   CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
+  #ifdef OPENCV_FORCE_UNSAFE_XADD
+    CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
+  #else
+    #error "OpenCV: can't define safe CV_XADD macro for current platform (unsupported). Define CV_XADD macro through custom port header (see OPENCV_INCLUDE_PORT_FILE)"
+  #endif
 #endif
 
 
@@ -493,23 +683,226 @@ Cv64suf;
 
 
 /****************************************************************************************\
-*                                    C++ Move semantics                                  *
+*                                  CV_NODISCARD attribute                                *
+* encourages the compiler to issue a warning if the return value is discarded (C++17)    *
 \****************************************************************************************/
-
-#ifndef CV_CXX_MOVE_SEMANTICS
-#  if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || defined(_MSC_VER) && _MSC_VER >= 1600
-#    define CV_CXX_MOVE_SEMANTICS 1
-#  elif defined(__clang)
-#    if __has_feature(cxx_rvalue_references)
-#      define CV_CXX_MOVE_SEMANTICS 1
+#ifndef CV_NODISCARD
+#  if defined(__GNUC__)
+#    define CV_NODISCARD __attribute__((__warn_unused_result__)) // at least available with GCC 3.4
+#  elif defined(__clang__) && defined(__has_attribute)
+#    if __has_attribute(__warn_unused_result__)
+#      define CV_NODISCARD __attribute__((__warn_unused_result__))
 #    endif
 #  endif
+#endif
+#ifndef CV_NODISCARD
+#  define CV_NODISCARD /* nothing by default */
+#endif
+
+
+/****************************************************************************************\
+*                                    C++ 11                                              *
+\****************************************************************************************/
+#ifndef CV_CXX11
+#  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
+#    define CV_CXX11 1
+#  endif
 #else
-#  if CV_CXX_MOVE_SEMANTICS == 0
-#    undef CV_CXX_MOVE_SEMANTICS
+#  if CV_CXX11 == 0
+#    undef CV_CXX11
 #  endif
 #endif
+#ifndef CV_CXX11
+#  error "OpenCV 4.x+ requires enabled C++11 support"
+#endif
+
+#define CV_CXX_MOVE_SEMANTICS 1
+#define CV_CXX_MOVE(x) std::move(x)
+#define CV_CXX_STD_ARRAY 1
+#include <array>
+#ifndef CV_OVERRIDE
+#  define CV_OVERRIDE override
+#endif
+#ifndef CV_FINAL
+#  define CV_FINAL final
+#endif
+
+#ifndef CV_NOEXCEPT
+#  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
+#    define CV_NOEXCEPT noexcept
+#  endif
+#endif
+#ifndef CV_NOEXCEPT
+#  define CV_NOEXCEPT
+#endif
+
+#ifndef CV_CONSTEXPR
+#  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
+#    define CV_CONSTEXPR constexpr
+#  endif
+#endif
+#ifndef CV_CONSTEXPR
+#  define CV_CONSTEXPR
+#endif
+
+// Integer types portatibility
+#ifdef OPENCV_STDINT_HEADER
+#include OPENCV_STDINT_HEADER
+#elif defined(__cplusplus)
+#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */
+namespace cv {
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef signed __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+}
+#elif defined(_MSC_VER) || __cplusplus >= 201103L
+#include <cstdint>
+namespace cv {
+using std::int8_t;
+using std::uint8_t;
+using std::int16_t;
+using std::uint16_t;
+using std::int32_t;
+using std::uint32_t;
+using std::int64_t;
+using std::uint64_t;
+}
+#else
+#include <stdint.h>
+namespace cv {
+typedef ::int8_t int8_t;
+typedef ::uint8_t uint8_t;
+typedef ::int16_t int16_t;
+typedef ::uint16_t uint16_t;
+typedef ::int32_t int32_t;
+typedef ::uint32_t uint32_t;
+typedef ::int64_t int64_t;
+typedef ::uint64_t uint64_t;
+}
+#endif
+#else // pure C
+#include <stdint.h>
+#endif
+
+#ifdef __cplusplus
+namespace cv
+{
+
+class float16_t
+{
+public:
+#if CV_FP16_TYPE
+
+    float16_t() : h(0) {}
+    explicit float16_t(float x) { h = (__fp16)x; }
+    operator float() const { return (float)h; }
+    static float16_t fromBits(ushort w)
+    {
+        Cv16suf u;
+        u.u = w;
+        float16_t result;
+        result.h = u.h;
+        return result;
+    }
+    static float16_t zero()
+    {
+        float16_t result;
+        result.h = (__fp16)0;
+        return result;
+    }
+    ushort bits() const
+    {
+        Cv16suf u;
+        u.h = h;
+        return u.u;
+    }
+protected:
+    __fp16 h;
+
+#else
+    float16_t() : w(0) {}
+    explicit float16_t(float x)
+    {
+    #if CV_AVX2
+        __m128 v = _mm_load_ss(&x);
+        w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
+    #else
+        Cv32suf in;
+        in.f = x;
+        unsigned sign = in.u & 0x80000000;
+        in.u ^= sign;
+
+        if( in.u >= 0x47800000 )
+            w = (ushort)(in.u > 0x7f800000 ? 0x7e00 : 0x7c00);
+        else
+        {
+            if (in.u < 0x38800000)
+            {
+                in.f += 0.5f;
+                w = (ushort)(in.u - 0x3f000000);
+            }
+            else
+            {
+                unsigned t = in.u + 0xc8000fff;
+                w = (ushort)((t + ((in.u >> 13) & 1)) >> 13);
+            }
+        }
+
+        w = (ushort)(w | (sign >> 16));
+    #endif
+    }
+
+    operator float() const
+    {
+    #if CV_AVX2
+        float f;
+        _mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
+        return f;
+    #else
+        Cv32suf out;
+
+        unsigned t = ((w & 0x7fff) << 13) + 0x38000000;
+        unsigned sign = (w & 0x8000) << 16;
+        unsigned e = w & 0x7c00;
+
+        out.u = t + (1 << 23);
+        out.u = (e >= 0x7c00 ? t + 0x38000000 :
+                 e == 0 ? (static_cast<void>(out.f -= 6.103515625e-05f), out.u) : t) | sign;
+        return out.f;
+    #endif
+    }
+
+    static float16_t fromBits(ushort b)
+    {
+        float16_t result;
+        result.w = b;
+        return result;
+    }
+    static float16_t zero()
+    {
+        float16_t result;
+        result.w = (ushort)0;
+        return result;
+    }
+    ushort bits() const { return w; }
+protected:
+    ushort w;
+
+#endif
+};
+
+}
+#endif
 
 //! @}
 
-#endif // __OPENCV_CORE_CVDEF_H__
+#ifndef __cplusplus
+#include "opencv2/core/fast_math.hpp" // define cvRound(double)
+#endif
+
+#endif // OPENCV_CORE_CVDEF_H
diff --git a/IPL/include/opencv/opencv2/core/cvstd.hpp b/IPL/include/opencv/opencv2/core/cvstd.hpp
index edae954..6ce9e4b 100644
--- a/IPL/include/opencv/opencv2/core/cvstd.hpp
+++ b/IPL/include/opencv/opencv2/core/cvstd.hpp
@@ -41,25 +41,21 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_CVSTD_HPP__
-#define __OPENCV_CORE_CVSTD_HPP__
+#ifndef OPENCV_CORE_CVSTD_HPP
+#define OPENCV_CORE_CVSTD_HPP
 
 #ifndef __cplusplus
 #  error cvstd.hpp header must be compiled as C++
 #endif
 
 #include "opencv2/core/cvdef.h"
-
 #include <cstddef>
 #include <cstring>
 #include <cctype>
 
-#ifndef OPENCV_NOSTL
-#  include <string>
-#endif
+#include <string>
 
 // import useful primitives from stl
-#ifndef OPENCV_NOSTL_TRANSITIONAL
 #  include <algorithm>
 #  include <utility>
 #  include <cstdlib> //for abs(int)
@@ -67,6 +63,11 @@
 
 namespace cv
 {
+    static inline uchar abs(uchar a) { return a; }
+    static inline ushort abs(ushort a) { return a; }
+    static inline unsigned abs(unsigned a) { return a; }
+    static inline uint64 abs(uint64 a) { return a; }
+
     using std::min;
     using std::max;
     using std::abs;
@@ -77,28 +78,7 @@ namespace cv
     using std::log;
 }
 
-namespace std
-{
-    static inline uchar abs(uchar a) { return a; }
-    static inline ushort abs(ushort a) { return a; }
-    static inline unsigned abs(unsigned a) { return a; }
-    static inline uint64 abs(uint64 a) { return a; }
-}
-
-#else
-namespace cv
-{
-    template<typename T> static inline T min(T a, T b) { return a < b ? a : b; }
-    template<typename T> static inline T max(T a, T b) { return a > b ? a : b; }
-    template<typename T> static inline T abs(T a) { return a < 0 ? -a : a; }
-    template<typename T> static inline void swap(T& a, T& b) { T tmp = a; a = b; b = tmp; }
-
-    template<> inline uchar abs(uchar a) { return a; }
-    template<> inline ushort abs(ushort a) { return a; }
-    template<> inline unsigned abs(unsigned a) { return a; }
-    template<> inline uint64 abs(uint64 a) { return a; }
-}
-#endif
+#include "cvstd_wrapper.hpp"
 
 namespace cv {
 
@@ -125,7 +105,7 @@ double memory deallocation.
 CV_EXPORTS void fastFree(void* ptr);
 
 /*!
-  The STL-compilant memory Allocator based on cv::fastMalloc() and cv::fastFree()
+  The STL-compliant memory Allocator based on cv::fastMalloc() and cv::fastFree()
 */
 template<typename _Tp> class Allocator
 {
@@ -160,910 +140,51 @@ template<typename _Tp> class Allocator
 
 //! @} core_utils
 
-//! @cond IGNORED
-
-namespace detail
-{
-
-// Metafunction to avoid taking a reference to void.
-template<typename T>
-struct RefOrVoid { typedef T& type; };
-
-template<>
-struct RefOrVoid<void>{ typedef void type; };
-
-template<>
-struct RefOrVoid<const void>{ typedef const void type; };
-
-template<>
-struct RefOrVoid<volatile void>{ typedef volatile void type; };
-
-template<>
-struct RefOrVoid<const volatile void>{ typedef const volatile void type; };
-
-// This class would be private to Ptr, if it didn't have to be a non-template.
-struct PtrOwner;
-
-}
-
-template<typename Y>
-struct DefaultDeleter
-{
-    void operator () (Y* p) const;
-};
-
 //! @endcond
 
 //! @addtogroup core_basic
 //! @{
 
-/** @brief Template class for smart pointers with shared ownership
-
-A Ptr\<T\> pretends to be a pointer to an object of type T. Unlike an ordinary pointer, however, the
-object will be automatically cleaned up once all Ptr instances pointing to it are destroyed.
-
-Ptr is similar to boost::shared_ptr that is part of the Boost library
-(<http://www.boost.org/doc/libs/release/libs/smart_ptr/shared_ptr.htm>) and std::shared_ptr from
-the [C++11](http://en.wikipedia.org/wiki/C++11) standard.
-
-This class provides the following advantages:
--   Default constructor, copy constructor, and assignment operator for an arbitrary C++ class or C
-    structure. For some objects, like files, windows, mutexes, sockets, and others, a copy
-    constructor or an assignment operator are difficult to define. For some other objects, like
-    complex classifiers in OpenCV, copy constructors are absent and not easy to implement. Finally,
-    some of complex OpenCV and your own data structures may be written in C. However, copy
-    constructors and default constructors can simplify programming a lot. Besides, they are often
-    required (for example, by STL containers). By using a Ptr to such an object instead of the
-    object itself, you automatically get all of the necessary constructors and the assignment
-    operator.
--   *O(1)* complexity of the above-mentioned operations. While some structures, like std::vector,
-    provide a copy constructor and an assignment operator, the operations may take a considerable
-    amount of time if the data structures are large. But if the structures are put into a Ptr, the
-    overhead is small and independent of the data size.
--   Automatic and customizable cleanup, even for C structures. See the example below with FILE\*.
--   Heterogeneous collections of objects. The standard STL and most other C++ and OpenCV containers
-    can store only objects of the same type and the same size. The classical solution to store
-    objects of different types in the same container is to store pointers to the base class (Base\*)
-    instead but then you lose the automatic memory management. Again, by using Ptr\<Base\> instead
-    of raw pointers, you can solve the problem.
-
-A Ptr is said to *own* a pointer - that is, for each Ptr there is a pointer that will be deleted
-once all Ptr instances that own it are destroyed. The owned pointer may be null, in which case
-nothing is deleted. Each Ptr also *stores* a pointer. The stored pointer is the pointer the Ptr
-pretends to be; that is, the one you get when you use Ptr::get or the conversion to T\*. It's
-usually the same as the owned pointer, but if you use casts or the general shared-ownership
-constructor, the two may diverge: the Ptr will still own the original pointer, but will itself point
-to something else.
-
-The owned pointer is treated as a black box. The only thing Ptr needs to know about it is how to
-delete it. This knowledge is encapsulated in the *deleter* - an auxiliary object that is associated
-with the owned pointer and shared between all Ptr instances that own it. The default deleter is an
-instance of DefaultDeleter, which uses the standard C++ delete operator; as such it will work with
-any pointer allocated with the standard new operator.
-
-However, if the pointer must be deleted in a different way, you must specify a custom deleter upon
-Ptr construction. A deleter is simply a callable object that accepts the pointer as its sole
-argument. For example, if you want to wrap FILE, you may do so as follows:
-@code
-    Ptr<FILE> f(fopen("myfile.txt", "w"), fclose);
-    if(!f) throw ...;
-    fprintf(f, ....);
-    ...
-    // the file will be closed automatically by f's destructor.
-@endcode
-Alternatively, if you want all pointers of a particular type to be deleted the same way, you can
-specialize DefaultDeleter<T>::operator() for that type, like this:
-@code
-    namespace cv {
-    template<> void DefaultDeleter<FILE>::operator ()(FILE * obj) const
-    {
-        fclose(obj);
-    }
-    }
-@endcode
-For convenience, the following types from the OpenCV C API already have such a specialization that
-calls the appropriate release function:
--   CvCapture
--   CvFileStorage
--   CvHaarClassifierCascade
--   CvMat
--   CvMatND
--   CvMemStorage
--   CvSparseMat
--   CvVideoWriter
--   IplImage
-@note The shared ownership mechanism is implemented with reference counting. As such, cyclic
-ownership (e.g. when object a contains a Ptr to object b, which contains a Ptr to object a) will
-lead to all involved objects never being cleaned up. Avoid such situations.
-@note It is safe to concurrently read (but not write) a Ptr instance from multiple threads and
-therefore it is normally safe to use it in multi-threaded applications. The same is true for Mat and
-other C++ OpenCV classes that use internal reference counts.
-*/
-template<typename T>
-struct Ptr
-{
-    /** Generic programming support. */
-    typedef T element_type;
-
-    /** The default constructor creates a null Ptr - one that owns and stores a null pointer.
-    */
-    Ptr();
-
-    /**
-    If p is null, these are equivalent to the default constructor.
-    Otherwise, these constructors assume ownership of p - that is, the created Ptr owns and stores p
-    and assumes it is the sole owner of it. Don't use them if p is already owned by another Ptr, or
-    else p will get deleted twice.
-    With the first constructor, DefaultDeleter\<Y\>() becomes the associated deleter (so p will
-    eventually be deleted with the standard delete operator). Y must be a complete type at the point
-    of invocation.
-    With the second constructor, d becomes the associated deleter.
-    Y\* must be convertible to T\*.
-    @param p Pointer to own.
-    @note It is often easier to use makePtr instead.
-     */
-    template<typename Y>
-#ifdef DISABLE_OPENCV_24_COMPATIBILITY
-    explicit
-#endif
-    Ptr(Y* p);
-
-    /** @overload
-    @param d Deleter to use for the owned pointer.
-    @param p Pointer to own.
-    */
-    template<typename Y, typename D>
-    Ptr(Y* p, D d);
-
-    /**
-    These constructors create a Ptr that shares ownership with another Ptr - that is, own the same
-    pointer as o.
-    With the first two, the same pointer is stored, as well; for the second, Y\* must be convertible
-    to T\*.
-    With the third, p is stored, and Y may be any type. This constructor allows to have completely
-    unrelated owned and stored pointers, and should be used with care to avoid confusion. A relatively
-    benign use is to create a non-owning Ptr, like this:
-    @code
-        ptr = Ptr<T>(Ptr<T>(), dont_delete_me); // owns nothing; will not delete the pointer.
-    @endcode
-    @param o Ptr to share ownership with.
-    */
-    Ptr(const Ptr& o);
-
-    /** @overload
-    @param o Ptr to share ownership with.
-    */
-    template<typename Y>
-    Ptr(const Ptr<Y>& o);
-
-    /** @overload
-    @param o Ptr to share ownership with.
-    @param p Pointer to store.
-    */
-    template<typename Y>
-    Ptr(const Ptr<Y>& o, T* p);
-
-    /** The destructor is equivalent to calling Ptr::release. */
-    ~Ptr();
-
-    /**
-    Assignment replaces the current Ptr instance with one that owns and stores same pointers as o and
-    then destroys the old instance.
-    @param o Ptr to share ownership with.
-     */
-    Ptr& operator = (const Ptr& o);
-
-    /** @overload */
-    template<typename Y>
-    Ptr& operator = (const Ptr<Y>& o);
-
-    /** If no other Ptr instance owns the owned pointer, deletes it with the associated deleter. Then sets
-    both the owned and the stored pointers to NULL.
-    */
-    void release();
-
-    /**
-    `ptr.reset(...)` is equivalent to `ptr = Ptr<T>(...)`.
-    @param p Pointer to own.
-    */
-    template<typename Y>
-    void reset(Y* p);
-
-    /** @overload
-    @param d Deleter to use for the owned pointer.
-    @param p Pointer to own.
-    */
-    template<typename Y, typename D>
-    void reset(Y* p, D d);
-
-    /**
-    Swaps the owned and stored pointers (and deleters, if any) of this and o.
-    @param o Ptr to swap with.
-    */
-    void swap(Ptr& o);
-
-    /** Returns the stored pointer. */
-    T* get() const;
-
-    /** Ordinary pointer emulation. */
-    typename detail::RefOrVoid<T>::type operator * () const;
-
-    /** Ordinary pointer emulation. */
-    T* operator -> () const;
-
-    /** Equivalent to get(). */
-    operator T* () const;
-
-    /** ptr.empty() is equivalent to `!ptr.get()`. */
-    bool empty() const;
-
-    /** Returns a Ptr that owns the same pointer as this, and stores the same
-       pointer as this, except converted via static_cast to Y*.
-    */
-    template<typename Y>
-    Ptr<Y> staticCast() const;
-
-    /** Ditto for const_cast. */
-    template<typename Y>
-    Ptr<Y> constCast() const;
-
-    /** Ditto for dynamic_cast. */
-    template<typename Y>
-    Ptr<Y> dynamicCast() const;
-
-#ifdef CV_CXX_MOVE_SEMANTICS
-    Ptr(Ptr&& o);
-    Ptr& operator = (Ptr&& o);
-#endif
-
-private:
-    detail::PtrOwner* owner;
-    T* stored;
-
-    template<typename Y>
-    friend struct Ptr; // have to do this for the cross-type copy constructor
-};
-
-/** Equivalent to ptr1.swap(ptr2). Provided to help write generic algorithms. */
-template<typename T>
-void swap(Ptr<T>& ptr1, Ptr<T>& ptr2);
-
-/** Return whether ptr1.get() and ptr2.get() are equal and not equal, respectively. */
-template<typename T>
-bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2);
-template<typename T>
-bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2);
-
-/** `makePtr<T>(...)` is equivalent to `Ptr<T>(new T(...))`. It is shorter than the latter, and it's
-marginally safer than using a constructor or Ptr::reset, since it ensures that the owned pointer
-is new and thus not owned by any other Ptr instance.
-Unfortunately, perfect forwarding is impossible to implement in C++03, and so makePtr is limited
-to constructors of T that have up to 10 arguments, none of which are non-const references.
- */
-template<typename T>
-Ptr<T> makePtr();
-/** @overload */
-template<typename T, typename A1>
-Ptr<T> makePtr(const A1& a1);
-/** @overload */
-template<typename T, typename A1, typename A2>
-Ptr<T> makePtr(const A1& a1, const A2& a2);
-/** @overload */
-template<typename T, typename A1, typename A2, typename A3>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3);
-/** @overload */
-template<typename T, typename A1, typename A2, typename A3, typename A4>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4);
-/** @overload */
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5);
-/** @overload */
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6);
-/** @overload */
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7);
-/** @overload */
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8);
-/** @overload */
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9);
-/** @overload */
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10);
-
 //////////////////////////////// string class ////////////////////////////////
 
 class CV_EXPORTS FileNode; //for string constructor from FileNode
 
-class CV_EXPORTS String
-{
-public:
-    typedef char value_type;
-    typedef char& reference;
-    typedef const char& const_reference;
-    typedef char* pointer;
-    typedef const char* const_pointer;
-    typedef ptrdiff_t difference_type;
-    typedef size_t size_type;
-    typedef char* iterator;
-    typedef const char* const_iterator;
-
-    static const size_t npos = size_t(-1);
-
-    explicit String();
-    String(const String& str);
-    String(const String& str, size_t pos, size_t len = npos);
-    String(const char* s);
-    String(const char* s, size_t n);
-    String(size_t n, char c);
-    String(const char* first, const char* last);
-    template<typename Iterator> String(Iterator first, Iterator last);
-    explicit String(const FileNode& fn);
-    ~String();
-
-    String& operator=(const String& str);
-    String& operator=(const char* s);
-    String& operator=(char c);
-
-    String& operator+=(const String& str);
-    String& operator+=(const char* s);
-    String& operator+=(char c);
-
-    size_t size() const;
-    size_t length() const;
-
-    char operator[](size_t idx) const;
-    char operator[](int idx) const;
-
-    const char* begin() const;
-    const char* end() const;
-
-    const char* c_str() const;
-
-    bool empty() const;
-    void clear();
-
-    int compare(const char* s) const;
-    int compare(const String& str) const;
-
-    void swap(String& str);
-    String substr(size_t pos = 0, size_t len = npos) const;
-
-    size_t find(const char* s, size_t pos, size_t n) const;
-    size_t find(char c, size_t pos = 0) const;
-    size_t find(const String& str, size_t pos = 0) const;
-    size_t find(const char* s, size_t pos = 0) const;
-
-    size_t rfind(const char* s, size_t pos, size_t n) const;
-    size_t rfind(char c, size_t pos = npos) const;
-    size_t rfind(const String& str, size_t pos = npos) const;
-    size_t rfind(const char* s, size_t pos = npos) const;
-
-    size_t find_first_of(const char* s, size_t pos, size_t n) const;
-    size_t find_first_of(char c, size_t pos = 0) const;
-    size_t find_first_of(const String& str, size_t pos = 0) const;
-    size_t find_first_of(const char* s, size_t pos = 0) const;
-
-    size_t find_last_of(const char* s, size_t pos, size_t n) const;
-    size_t find_last_of(char c, size_t pos = npos) const;
-    size_t find_last_of(const String& str, size_t pos = npos) const;
-    size_t find_last_of(const char* s, size_t pos = npos) const;
-
-    friend String operator+ (const String& lhs, const String& rhs);
-    friend String operator+ (const String& lhs, const char*   rhs);
-    friend String operator+ (const char*   lhs, const String& rhs);
-    friend String operator+ (const String& lhs, char          rhs);
-    friend String operator+ (char          lhs, const String& rhs);
-
-    String toLowerCase() const;
-
-#ifndef OPENCV_NOSTL
-    String(const std::string& str);
-    String(const std::string& str, size_t pos, size_t len = npos);
-    String& operator=(const std::string& str);
-    String& operator+=(const std::string& str);
-    operator std::string() const;
-
-    friend String operator+ (const String& lhs, const std::string& rhs);
-    friend String operator+ (const std::string& lhs, const String& rhs);
-#endif
+typedef std::string String;
 
-private:
-    char*  cstr_;
-    size_t len_;
-
-    char* allocate(size_t len); // len without trailing 0
-    void deallocate();
-
-    String(int); // disabled and invalid. Catch invalid usages like, commandLineParser.has(0) problem
-};
-
-//! @} core_basic
-
-////////////////////////// cv::String implementation /////////////////////////
+#ifndef OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS
 
 //! @cond IGNORED
-
-inline
-String::String()
-    : cstr_(0), len_(0)
-{}
-
-inline
-String::String(const String& str)
-    : cstr_(str.cstr_), len_(str.len_)
-{
-    if (cstr_)
-        CV_XADD(((int*)cstr_)-1, 1);
-}
-
-inline
-String::String(const String& str, size_t pos, size_t len)
-    : cstr_(0), len_(0)
-{
-    pos = min(pos, str.len_);
-    len = min(str.len_ - pos, len);
-    if (!len) return;
-    if (len == str.len_)
-    {
-        CV_XADD(((int*)str.cstr_)-1, 1);
-        cstr_ = str.cstr_;
-        len_ = str.len_;
-        return;
-    }
-    memcpy(allocate(len), str.cstr_ + pos, len);
-}
-
-inline
-String::String(const char* s)
-    : cstr_(0), len_(0)
-{
-    if (!s) return;
-    size_t len = strlen(s);
-    memcpy(allocate(len), s, len);
-}
-
-inline
-String::String(const char* s, size_t n)
-    : cstr_(0), len_(0)
-{
-    if (!n) return;
-    memcpy(allocate(n), s, n);
-}
-
-inline
-String::String(size_t n, char c)
-    : cstr_(0), len_(0)
-{
-    memset(allocate(n), c, n);
-}
-
-inline
-String::String(const char* first, const char* last)
-    : cstr_(0), len_(0)
-{
-    size_t len = (size_t)(last - first);
-    memcpy(allocate(len), first, len);
-}
-
-template<typename Iterator> inline
-String::String(Iterator first, Iterator last)
-    : cstr_(0), len_(0)
-{
-    size_t len = (size_t)(last - first);
-    char* str = allocate(len);
-    while (first != last)
-    {
-        *str++ = *first;
-        ++first;
-    }
-}
-
-inline
-String::~String()
+namespace details {
+// std::tolower is int->int
+static inline char char_tolower(char ch)
 {
-    deallocate();
+    return (char)std::tolower((int)ch);
 }
-
-inline
-String& String::operator=(const String& str)
+// std::toupper is int->int
+static inline char char_toupper(char ch)
 {
-    if (&str == this) return *this;
-
-    deallocate();
-    if (str.cstr_) CV_XADD(((int*)str.cstr_)-1, 1);
-    cstr_ = str.cstr_;
-    len_ = str.len_;
-    return *this;
+    return (char)std::toupper((int)ch);
 }
-
-inline
-String& String::operator=(const char* s)
-{
-    deallocate();
-    if (!s) return *this;
-    size_t len = strlen(s);
-    memcpy(allocate(len), s, len);
-    return *this;
-}
-
-inline
-String& String::operator=(char c)
-{
-    deallocate();
-    allocate(1)[0] = c;
-    return *this;
-}
-
-inline
-String& String::operator+=(const String& str)
-{
-    *this = *this + str;
-    return *this;
-}
-
-inline
-String& String::operator+=(const char* s)
-{
-    *this = *this + s;
-    return *this;
-}
-
-inline
-String& String::operator+=(char c)
-{
-    *this = *this + c;
-    return *this;
-}
-
-inline
-size_t String::size() const
-{
-    return len_;
-}
-
-inline
-size_t String::length() const
-{
-    return len_;
-}
-
-inline
-char String::operator[](size_t idx) const
-{
-    return cstr_[idx];
-}
-
-inline
-char String::operator[](int idx) const
-{
-    return cstr_[idx];
-}
-
-inline
-const char* String::begin() const
-{
-    return cstr_;
-}
-
-inline
-const char* String::end() const
-{
-    return len_ ? cstr_ + 1 : 0;
-}
-
-inline
-bool String::empty() const
-{
-    return len_ == 0;
-}
-
-inline
-const char* String::c_str() const
-{
-    return cstr_ ? cstr_ : "";
-}
-
-inline
-void String::swap(String& str)
-{
-    cv::swap(cstr_, str.cstr_);
-    cv::swap(len_, str.len_);
-}
-
-inline
-void String::clear()
-{
-    deallocate();
-}
-
-inline
-int String::compare(const char* s) const
-{
-    if (cstr_ == s) return 0;
-    return strcmp(c_str(), s);
-}
-
-inline
-int String::compare(const String& str) const
-{
-    if (cstr_ == str.cstr_) return 0;
-    return strcmp(c_str(), str.c_str());
-}
-
-inline
-String String::substr(size_t pos, size_t len) const
-{
-    return String(*this, pos, len);
-}
-
-inline
-size_t String::find(const char* s, size_t pos, size_t n) const
-{
-    if (n == 0 || pos + n > len_) return npos;
-    const char* lmax = cstr_ + len_ - n;
-    for (const char* i = cstr_ + pos; i <= lmax; ++i)
-    {
-        size_t j = 0;
-        while (j < n && s[j] == i[j]) ++j;
-        if (j == n) return (size_t)(i - cstr_);
-    }
-    return npos;
-}
-
-inline
-size_t String::find(char c, size_t pos) const
-{
-    return find(&c, pos, 1);
-}
-
-inline
-size_t String::find(const String& str, size_t pos) const
-{
-    return find(str.c_str(), pos, str.len_);
-}
-
-inline
-size_t String::find(const char* s, size_t pos) const
-{
-    if (pos >= len_ || !s[0]) return npos;
-    const char* lmax = cstr_ + len_;
-    for (const char* i = cstr_ + pos; i < lmax; ++i)
-    {
-        size_t j = 0;
-        while (s[j] && s[j] == i[j])
-        {   if(i + j >= lmax) return npos;
-            ++j;
-        }
-        if (!s[j]) return (size_t)(i - cstr_);
-    }
-    return npos;
-}
-
-inline
-size_t String::rfind(const char* s, size_t pos, size_t n) const
-{
-    if (n > len_) return npos;
-    if (pos > len_ - n) pos = len_ - n;
-    for (const char* i = cstr_ + pos; i >= cstr_; --i)
-    {
-        size_t j = 0;
-        while (j < n && s[j] == i[j]) ++j;
-        if (j == n) return (size_t)(i - cstr_);
-    }
-    return npos;
-}
-
-inline
-size_t String::rfind(char c, size_t pos) const
-{
-    return rfind(&c, pos, 1);
-}
-
-inline
-size_t String::rfind(const String& str, size_t pos) const
-{
-    return rfind(str.c_str(), pos, str.len_);
-}
-
-inline
-size_t String::rfind(const char* s, size_t pos) const
-{
-    return rfind(s, pos, strlen(s));
-}
-
-inline
-size_t String::find_first_of(const char* s, size_t pos, size_t n) const
-{
-    if (n == 0 || pos + n > len_) return npos;
-    const char* lmax = cstr_ + len_;
-    for (const char* i = cstr_ + pos; i < lmax; ++i)
-    {
-        for (size_t j = 0; j < n; ++j)
-            if (s[j] == *i)
-                return (size_t)(i - cstr_);
-    }
-    return npos;
-}
-
-inline
-size_t String::find_first_of(char c, size_t pos) const
-{
-    return find_first_of(&c, pos, 1);
-}
-
-inline
-size_t String::find_first_of(const String& str, size_t pos) const
-{
-    return find_first_of(str.c_str(), pos, str.len_);
-}
-
-inline
-size_t String::find_first_of(const char* s, size_t pos) const
-{
-    if (len_ == 0) return npos;
-    if (pos >= len_ || !s[0]) return npos;
-    const char* lmax = cstr_ + len_;
-    for (const char* i = cstr_ + pos; i < lmax; ++i)
-    {
-        for (size_t j = 0; s[j]; ++j)
-            if (s[j] == *i)
-                return (size_t)(i - cstr_);
-    }
-    return npos;
-}
-
-inline
-size_t String::find_last_of(const char* s, size_t pos, size_t n) const
-{
-    if (len_ == 0) return npos;
-    if (pos >= len_) pos = len_ - 1;
-    for (const char* i = cstr_ + pos; i >= cstr_; --i)
-    {
-        for (size_t j = 0; j < n; ++j)
-            if (s[j] == *i)
-                return (size_t)(i - cstr_);
-    }
-    return npos;
-}
-
-inline
-size_t String::find_last_of(char c, size_t pos) const
-{
-    return find_last_of(&c, pos, 1);
-}
-
-inline
-size_t String::find_last_of(const String& str, size_t pos) const
-{
-    return find_last_of(str.c_str(), pos, str.len_);
-}
-
-inline
-size_t String::find_last_of(const char* s, size_t pos) const
-{
-    if (len_ == 0) return npos;
-    if (pos >= len_) pos = len_ - 1;
-    for (const char* i = cstr_ + pos; i >= cstr_; --i)
-    {
-        for (size_t j = 0; s[j]; ++j)
-            if (s[j] == *i)
-                return (size_t)(i - cstr_);
-    }
-    return npos;
-}
-
-inline
-String String::toLowerCase() const
-{
-    String res(cstr_, len_);
-
-    for (size_t i = 0; i < len_; ++i)
-        res.cstr_[i] = (char) ::tolower(cstr_[i]);
-
-    return res;
-}
-
+} // namespace details
 //! @endcond
 
-// ************************* cv::String non-member functions *************************
-
-//! @relates cv::String
-//! @{
-
-inline
-String operator + (const String& lhs, const String& rhs)
-{
-    String s;
-    s.allocate(lhs.len_ + rhs.len_);
-    memcpy(s.cstr_, lhs.cstr_, lhs.len_);
-    memcpy(s.cstr_ + lhs.len_, rhs.cstr_, rhs.len_);
-    return s;
-}
-
-inline
-String operator + (const String& lhs, const char* rhs)
+static inline std::string toLowerCase(const std::string& str)
 {
-    String s;
-    size_t rhslen = strlen(rhs);
-    s.allocate(lhs.len_ + rhslen);
-    memcpy(s.cstr_, lhs.cstr_, lhs.len_);
-    memcpy(s.cstr_ + lhs.len_, rhs, rhslen);
-    return s;
+    std::string result(str);
+    std::transform(result.begin(), result.end(), result.begin(), details::char_tolower);
+    return result;
 }
 
-inline
-String operator + (const char* lhs, const String& rhs)
+static inline std::string toUpperCase(const std::string& str)
 {
-    String s;
-    size_t lhslen = strlen(lhs);
-    s.allocate(lhslen + rhs.len_);
-    memcpy(s.cstr_, lhs, lhslen);
-    memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
-    return s;
+    std::string result(str);
+    std::transform(result.begin(), result.end(), result.begin(), details::char_toupper);
+    return result;
 }
 
-inline
-String operator + (const String& lhs, char rhs)
-{
-    String s;
-    s.allocate(lhs.len_ + 1);
-    memcpy(s.cstr_, lhs.cstr_, lhs.len_);
-    s.cstr_[lhs.len_] = rhs;
-    return s;
-}
-
-inline
-String operator + (char lhs, const String& rhs)
-{
-    String s;
-    s.allocate(rhs.len_ + 1);
-    s.cstr_[0] = lhs;
-    memcpy(s.cstr_ + 1, rhs.cstr_, rhs.len_);
-    return s;
-}
-
-static inline bool operator== (const String& lhs, const String& rhs) { return 0 == lhs.compare(rhs); }
-static inline bool operator== (const char*   lhs, const String& rhs) { return 0 == rhs.compare(lhs); }
-static inline bool operator== (const String& lhs, const char*   rhs) { return 0 == lhs.compare(rhs); }
-static inline bool operator!= (const String& lhs, const String& rhs) { return 0 != lhs.compare(rhs); }
-static inline bool operator!= (const char*   lhs, const String& rhs) { return 0 != rhs.compare(lhs); }
-static inline bool operator!= (const String& lhs, const char*   rhs) { return 0 != lhs.compare(rhs); }
-static inline bool operator<  (const String& lhs, const String& rhs) { return lhs.compare(rhs) <  0; }
-static inline bool operator<  (const char*   lhs, const String& rhs) { return rhs.compare(lhs) >  0; }
-static inline bool operator<  (const String& lhs, const char*   rhs) { return lhs.compare(rhs) <  0; }
-static inline bool operator<= (const String& lhs, const String& rhs) { return lhs.compare(rhs) <= 0; }
-static inline bool operator<= (const char*   lhs, const String& rhs) { return rhs.compare(lhs) >= 0; }
-static inline bool operator<= (const String& lhs, const char*   rhs) { return lhs.compare(rhs) <= 0; }
-static inline bool operator>  (const String& lhs, const String& rhs) { return lhs.compare(rhs) >  0; }
-static inline bool operator>  (const char*   lhs, const String& rhs) { return rhs.compare(lhs) <  0; }
-static inline bool operator>  (const String& lhs, const char*   rhs) { return lhs.compare(rhs) >  0; }
-static inline bool operator>= (const String& lhs, const String& rhs) { return lhs.compare(rhs) >= 0; }
-static inline bool operator>= (const char*   lhs, const String& rhs) { return rhs.compare(lhs) <= 0; }
-static inline bool operator>= (const String& lhs, const char*   rhs) { return lhs.compare(rhs) >= 0; }
-
-//! @} relates cv::String
+#endif // OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS
 
+//! @} core_basic
 } // cv
 
-#ifndef OPENCV_NOSTL_TRANSITIONAL
-namespace std
-{
-    static inline void swap(cv::String& a, cv::String& b) { a.swap(b); }
-}
-#else
-namespace cv
-{
-    template<> inline
-    void swap<cv::String>(cv::String& a, cv::String& b)
-    {
-        a.swap(b);
-    }
-}
-#endif
-
-#include "opencv2/core/ptr.inl.hpp"
-
-#endif //__OPENCV_CORE_CVSTD_HPP__
+#endif //OPENCV_CORE_CVSTD_HPP
diff --git a/IPL/include/opencv/opencv2/core/cvstd.inl.hpp b/IPL/include/opencv/opencv2/core/cvstd.inl.hpp
index ad15406..37ad1e6 100644
--- a/IPL/include/opencv/opencv2/core/cvstd.inl.hpp
+++ b/IPL/include/opencv/opencv2/core/cvstd.inl.hpp
@@ -41,19 +41,22 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_CVSTDINL_HPP__
-#define __OPENCV_CORE_CVSTDINL_HPP__
+#ifndef OPENCV_CORE_CVSTDINL_HPP
+#define OPENCV_CORE_CVSTDINL_HPP
 
-#ifndef OPENCV_NOSTL
-#  include <complex>
-#  include <ostream>
-#endif
+#include <complex>
+#include <ostream>
+#include <sstream>
 
 //! @cond IGNORED
 
+#ifdef _MSC_VER
+#pragma warning( push )
+#pragma warning( disable: 4127 )
+#endif
+
 namespace cv
 {
-#ifndef OPENCV_NOSTL
 
 template<typename _Tp> class DataType< std::complex<_Tp> >
 {
@@ -71,103 +74,6 @@ template<typename _Tp> class DataType< std::complex<_Tp> >
     typedef Vec<channel_type, channels> vec_type;
 };
 
-inline
-String::String(const std::string& str)
-    : cstr_(0), len_(0)
-{
-    if (!str.empty())
-    {
-        size_t len = str.size();
-        memcpy(allocate(len), str.c_str(), len);
-    }
-}
-
-inline
-String::String(const std::string& str, size_t pos, size_t len)
-    : cstr_(0), len_(0)
-{
-    size_t strlen = str.size();
-    pos = min(pos, strlen);
-    len = min(strlen - pos, len);
-    if (!len) return;
-    memcpy(allocate(len), str.c_str() + pos, len);
-}
-
-inline
-String& String::operator = (const std::string& str)
-{
-    deallocate();
-    if (!str.empty())
-    {
-        size_t len = str.size();
-        memcpy(allocate(len), str.c_str(), len);
-    }
-    return *this;
-}
-
-inline
-String& String::operator += (const std::string& str)
-{
-    *this = *this + str;
-    return *this;
-}
-
-inline
-String::operator std::string() const
-{
-    return std::string(cstr_, len_);
-}
-
-inline
-String operator + (const String& lhs, const std::string& rhs)
-{
-    String s;
-    size_t rhslen = rhs.size();
-    s.allocate(lhs.len_ + rhslen);
-    memcpy(s.cstr_, lhs.cstr_, lhs.len_);
-    memcpy(s.cstr_ + lhs.len_, rhs.c_str(), rhslen);
-    return s;
-}
-
-inline
-String operator + (const std::string& lhs, const String& rhs)
-{
-    String s;
-    size_t lhslen = lhs.size();
-    s.allocate(lhslen + rhs.len_);
-    memcpy(s.cstr_, lhs.c_str(), lhslen);
-    memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
-    return s;
-}
-
-inline
-FileNode::operator std::string() const
-{
-    String value;
-    read(*this, value, value);
-    return value;
-}
-
-template<> inline
-void operator >> (const FileNode& n, std::string& value)
-{
-    String val;
-    read(n, val, val);
-    value = val;
-}
-
-template<> inline
-FileStorage& operator << (FileStorage& fs, const std::string& value)
-{
-    return fs << cv::String(value);
-}
-
-static inline
-std::ostream& operator << (std::ostream& os, const String& str)
-{
-    return os << str.c_str();
-}
-
 static inline
 std::ostream& operator << (std::ostream& out, Ptr<Formatted> fmtd)
 {
@@ -183,6 +89,18 @@ std::ostream& operator << (std::ostream& out, const Mat& mtx)
     return out << Formatter::get()->format(mtx);
 }
 
+static inline
+std::ostream& operator << (std::ostream& out, const UMat& m)
+{
+    return out << m.getMat(ACCESS_READ);
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Complex<_Tp>& c)
+{
+    return out << "(" << c.re << "," << c.im << ")";
+}
+
 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const std::vector<Point_<_Tp> >& vec)
 {
@@ -221,14 +139,7 @@ template<typename _Tp, int n> static inline
 std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
 {
     out << "[";
-#ifdef _MSC_VER
-#pragma warning( push )
-#pragma warning( disable: 4127 )
-#endif
-    if(Vec<_Tp, n>::depth < CV_32F)
-#ifdef _MSC_VER
-#pragma warning( pop )
-#endif
+    if (cv::traits::Depth<_Tp>::value <= CV_32S)
     {
         for (int i = 0; i < n - 1; ++i) {
             out << (int)vec[i] << ", ";
@@ -258,10 +169,29 @@ std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
     return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
 }
 
+static inline std::ostream& operator << (std::ostream& out, const MatSize& msize)
+{
+    int i, dims = msize.dims();
+    for( i = 0; i < dims; i++ )
+    {
+        out << msize[i];
+        if( i < dims-1 )
+            out << " x ";
+    }
+    return out;
+}
+
+static inline std::ostream &operator<< (std::ostream &s, cv::Range &r)
+{
+    return s << "[" << r.start << " : " << r.end << ")";
+}
 
-#endif // OPENCV_NOSTL
 } // cv
 
+#ifdef _MSC_VER
+#pragma warning( pop )
+#endif
+
 //! @endcond
 
-#endif // __OPENCV_CORE_CVSTDINL_HPP__
+#endif // OPENCV_CORE_CVSTDINL_HPP
diff --git a/IPL/include/opencv/opencv2/core/cvstd_wrapper.hpp b/IPL/include/opencv/opencv2/core/cvstd_wrapper.hpp
new file mode 100644
index 0000000..e2c2ea5
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/cvstd_wrapper.hpp
@@ -0,0 +1,154 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_CVSTD_WRAPPER_HPP
+#define OPENCV_CORE_CVSTD_WRAPPER_HPP
+
+#include "opencv2/core/cvdef.h"
+
+#include <string>
+#include <memory>  // std::shared_ptr
+#include <type_traits>  // std::enable_if
+
+namespace cv {
+
+using std::nullptr_t;
+
+//! @addtogroup core_basic
+//! @{
+
+#ifdef CV_DOXYGEN
+
+template <typename _Tp> using Ptr = std::shared_ptr<_Tp>;  // In ideal world it should look like this, but we need some compatibility workarounds below
+
+template<typename _Tp, typename ... A1> static inline
+Ptr<_Tp> makePtr(const A1&... a1) { return std::make_shared<_Tp>(a1...); }
+
+#else  // cv::Ptr with compatibility workarounds
+
+// It should be defined for C-API types only.
+// C++ types should use regular "delete" operator.
+template<typename Y> struct DefaultDeleter;
+#if 0
+{
+    void operator()(Y* p) const;
+};
+#endif
+
+namespace sfinae {
+template<typename C, typename Ret, typename... Args>
+struct has_parenthesis_operator
+{
+private:
+    template<typename T>
+    static CV_CONSTEXPR std::true_type check(typename std::is_same<typename std::decay<decltype(std::declval<T>().operator()(std::declval<Args>()...))>::type, Ret>::type*);
+
+    template<typename> static CV_CONSTEXPR std::false_type check(...);
+
+    typedef decltype(check<C>(0)) type;
+
+public:
+#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
+    static CV_CONSTEXPR bool value = type::value;
+#else
+    // support MSVS 2013
+    static const int value = type::value;
+#endif
+};
+} // namespace sfinae
+
+template <typename T, typename = void>
+struct has_custom_delete
+        : public std::false_type {};
+
+// Force has_custom_delete to std::false_type when NVCC is compiling CUDA source files
+#ifndef __CUDACC__
+template <typename T>
+struct has_custom_delete<T, typename std::enable_if< sfinae::has_parenthesis_operator<DefaultDeleter<T>, void, T*>::value >::type >
+        : public std::true_type {};
+#endif
+
+template<typename T>
+struct Ptr : public std::shared_ptr<T>
+{
+#if 0
+    using std::shared_ptr<T>::shared_ptr;  // GCC 5.x can't handle this
+#else
+    inline Ptr() CV_NOEXCEPT : std::shared_ptr<T>() {}
+    inline Ptr(nullptr_t) CV_NOEXCEPT : std::shared_ptr<T>(nullptr) {}
+    template<typename Y, typename D> inline Ptr(Y* p, D d) : std::shared_ptr<T>(p, d) {}
+    template<typename D> inline Ptr(nullptr_t, D d) : std::shared_ptr<T>(nullptr, d) {}
+
+    template<typename Y> inline Ptr(const Ptr<Y>& r, T* ptr) CV_NOEXCEPT : std::shared_ptr<T>(r, ptr) {}
+
+    inline Ptr(const Ptr<T>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
+    inline Ptr(Ptr<T>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
+
+    template<typename Y> inline Ptr(const Ptr<Y>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
+    template<typename Y> inline Ptr(Ptr<Y>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
+#endif
+    inline Ptr(const std::shared_ptr<T>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
+    inline Ptr(std::shared_ptr<T>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
+
+    // Overload with custom DefaultDeleter: Ptr<IplImage>(...)
+    template<typename Y>
+    inline Ptr(const std::true_type&, Y* ptr) : std::shared_ptr<T>(ptr, DefaultDeleter<Y>()) {}
+
+    // Overload without custom deleter: Ptr<std::string>(...);
+    template<typename Y>
+    inline Ptr(const std::false_type&, Y* ptr) : std::shared_ptr<T>(ptr) {}
+
+    template<typename Y = T>
+    inline Ptr(Y* ptr) : Ptr(has_custom_delete<Y>(), ptr) {}
+
+    // Overload with custom DefaultDeleter: Ptr<IplImage>(...)
+    template<typename Y>
+    inline void reset(const std::true_type&, Y* ptr) { std::shared_ptr<T>::reset(ptr, DefaultDeleter<Y>()); }
+
+    // Overload without custom deleter: Ptr<std::string>(...);
+    template<typename Y>
+    inline void reset(const std::false_type&, Y* ptr) { std::shared_ptr<T>::reset(ptr); }
+
+    template<typename Y>
+    inline void reset(Y* ptr) { Ptr<T>::reset(has_custom_delete<Y>(), ptr); }
+
+    template<class Y, class Deleter>
+    void reset(Y* ptr, Deleter d) { std::shared_ptr<T>::reset(ptr, d); }
+
+    void reset() CV_NOEXCEPT { std::shared_ptr<T>::reset(); }
+
+    Ptr& operator=(const Ptr& o) { std::shared_ptr<T>::operator =(o); return *this; }
+    template<typename Y> inline Ptr& operator=(const Ptr<Y>& o) { std::shared_ptr<T>::operator =(o); return *this; }
+
+    T* operator->() const CV_NOEXCEPT { return std::shared_ptr<T>::get();}
+    typename std::add_lvalue_reference<T>::type operator*() const CV_NOEXCEPT { return *std::shared_ptr<T>::get(); }
+
+    // OpenCV 3.x methods (not a part of standard C++ library)
+    inline void release() { std::shared_ptr<T>::reset(); }
+    inline operator T* () const { return std::shared_ptr<T>::get(); }
+    inline bool empty() const { return std::shared_ptr<T>::get() == nullptr; }
+
+    template<typename Y> inline
+    Ptr<Y> staticCast() const CV_NOEXCEPT { return std::static_pointer_cast<Y>(*this); }
+
+    template<typename Y> inline
+    Ptr<Y> constCast() const CV_NOEXCEPT { return std::const_pointer_cast<Y>(*this); }
+
+    template<typename Y> inline
+    Ptr<Y> dynamicCast() const CV_NOEXCEPT { return std::dynamic_pointer_cast<Y>(*this); }
+};
+
+template<typename _Tp, typename ... A1> static inline
+Ptr<_Tp> makePtr(const A1&... a1)
+{
+    static_assert( !has_custom_delete<_Tp>::value, "Can't use this makePtr with custom DefaultDeleter");
+    return (Ptr<_Tp>)std::make_shared<_Tp>(a1...);
+}
+
+#endif // CV_DOXYGEN
+
+//! @} core_basic
+} // cv
+
+#endif //OPENCV_CORE_CVSTD_WRAPPER_HPP
diff --git a/IPL/include/opencv/opencv2/core/detail/async_promise.hpp b/IPL/include/opencv/opencv2/core/detail/async_promise.hpp
new file mode 100644
index 0000000..6eb3fb5
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/detail/async_promise.hpp
@@ -0,0 +1,71 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_ASYNC_PROMISE_HPP
+#define OPENCV_CORE_ASYNC_PROMISE_HPP
+
+#include "../async.hpp"
+
+#include "exception_ptr.hpp"
+
+namespace cv {
+
+/** @addtogroup core_async
+@{
+*/
+
+
+/** @brief Provides result of asynchronous operations
+
+*/
+class CV_EXPORTS AsyncPromise
+{
+public:
+    ~AsyncPromise() CV_NOEXCEPT;
+    AsyncPromise() CV_NOEXCEPT;
+    explicit AsyncPromise(const AsyncPromise& o) CV_NOEXCEPT;
+    AsyncPromise& operator=(const AsyncPromise& o) CV_NOEXCEPT;
+    void release() CV_NOEXCEPT;
+
+    /** Returns associated AsyncArray
+    @note Can be called once
+    */
+    AsyncArray getArrayResult();
+
+    /** Stores asynchronous result.
+    @param[in] value result
+    */
+    void setValue(InputArray value);
+
+    // TODO "move" setters
+
+#if CV__EXCEPTION_PTR
+    /** Stores exception.
+    @param[in] exception exception to be raised in AsyncArray
+    */
+    void setException(std::exception_ptr exception);
+#endif
+
+    /** Stores exception.
+    @param[in] exception exception to be raised in AsyncArray
+    */
+    void setException(const cv::Exception& exception);
+
+#ifdef CV_CXX11
+    explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; }
+    AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
+#endif
+
+
+    // PImpl
+    typedef struct AsyncArray::Impl Impl; friend struct AsyncArray::Impl;
+    inline void* _getImpl() const CV_NOEXCEPT { return p; }
+protected:
+    Impl* p;
+};
+
+
+//! @}
+} // namespace
+#endif // OPENCV_CORE_ASYNC_PROMISE_HPP
diff --git a/IPL/include/opencv/opencv2/core/detail/exception_ptr.hpp b/IPL/include/opencv/opencv2/core/detail/exception_ptr.hpp
new file mode 100644
index 0000000..d98ffc4
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/detail/exception_ptr.hpp
@@ -0,0 +1,27 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
+#define OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
+
+#ifndef CV__EXCEPTION_PTR
+#  if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2
+#    define CV__EXCEPTION_PTR 0  // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938
+#  elif defined(CV_CXX11)
+#    define CV__EXCEPTION_PTR 1
+#  elif defined(_MSC_VER)
+#    define CV__EXCEPTION_PTR (_MSC_VER >= 1600)
+#  elif defined(__clang__)
+#    define CV__EXCEPTION_PTR 0  // C++11 only (see above)
+#  elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__)
+#    define CV__EXCEPTION_PTR (__GXX_EXPERIMENTAL_CXX0X__ > 0)
+#  endif
+#endif
+#ifndef CV__EXCEPTION_PTR
+#  define CV__EXCEPTION_PTR 0
+#elif CV__EXCEPTION_PTR
+#  include <exception>  // std::exception_ptr
+#endif
+
+#endif // OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
diff --git a/IPL/include/opencv/opencv2/core/directx.hpp b/IPL/include/opencv/opencv2/core/directx.hpp
index 764af74..056a85a 100644
--- a/IPL/include/opencv/opencv2/core/directx.hpp
+++ b/IPL/include/opencv/opencv2/core/directx.hpp
@@ -39,8 +39,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_DIRECTX_HPP__
-#define __OPENCV_CORE_DIRECTX_HPP__
+#ifndef OPENCV_CORE_DIRECTX_HPP
+#define OPENCV_CORE_DIRECTX_HPP
 
 #include "mat.hpp"
 #include "ocl.hpp"
@@ -181,4 +181,4 @@ CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D
 
 } } // namespace cv::directx
 
-#endif // __OPENCV_CORE_DIRECTX_HPP__
+#endif // OPENCV_CORE_DIRECTX_HPP
diff --git a/IPL/include/opencv/opencv2/core/eigen.hpp b/IPL/include/opencv/opencv2/core/eigen.hpp
index 44df04c..741648e 100644
--- a/IPL/include/opencv/opencv2/core/eigen.hpp
+++ b/IPL/include/opencv/opencv2/core/eigen.hpp
@@ -42,8 +42,8 @@
 //M*/
 
 
-#ifndef __OPENCV_CORE_EIGEN_HPP__
-#define __OPENCV_CORE_EIGEN_HPP__
+#ifndef OPENCV_CORE_EIGEN_HPP
+#define OPENCV_CORE_EIGEN_HPP
 
 #include "opencv2/core.hpp"
 
@@ -60,18 +60,18 @@ namespace cv
 //! @{
 
 template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
-void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, Mat& dst )
+void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst )
 {
     if( !(src.Flags & Eigen::RowMajorBit) )
     {
-        Mat _src(src.cols(), src.rows(), DataType<_Tp>::type,
-              (void*)src.data(), src.stride()*sizeof(_Tp));
+        Mat _src(src.cols(), src.rows(), traits::Type<_Tp>::value,
+              (void*)src.data(), src.outerStride()*sizeof(_Tp));
         transpose(_src, dst);
     }
     else
     {
-        Mat _src(src.rows(), src.cols(), DataType<_Tp>::type,
-                 (void*)src.data(), src.stride()*sizeof(_Tp));
+        Mat _src(src.rows(), src.cols(), traits::Type<_Tp>::value,
+                 (void*)src.data(), src.outerStride()*sizeof(_Tp));
         _src.copyTo(dst);
     }
 }
@@ -98,8 +98,8 @@ void cv2eigen( const Mat& src,
     CV_DbgAssert(src.rows == _rows && src.cols == _cols);
     if( !(dst.Flags & Eigen::RowMajorBit) )
     {
-        const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         if( src.type() == _dst.type() )
             transpose(src, _dst);
         else if( src.cols == src.rows )
@@ -112,8 +112,8 @@ void cv2eigen( const Mat& src,
     }
     else
     {
-        const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         src.convertTo(_dst, _dst.type());
     }
 }
@@ -125,14 +125,14 @@ void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
 {
     if( !(dst.Flags & Eigen::RowMajorBit) )
     {
-        const Mat _dst(_cols, _rows, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         transpose(src, _dst);
     }
     else
     {
-        const Mat _dst(_rows, _cols, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         Mat(src).copyTo(_dst);
     }
 }
@@ -144,8 +144,8 @@ void cv2eigen( const Mat& src,
     dst.resize(src.rows, src.cols);
     if( !(dst.Flags & Eigen::RowMajorBit) )
     {
-        const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
-             dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+             dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         if( src.type() == _dst.type() )
             transpose(src, _dst);
         else if( src.cols == src.rows )
@@ -158,8 +158,8 @@ void cv2eigen( const Mat& src,
     }
     else
     {
-        const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         src.convertTo(_dst, _dst.type());
     }
 }
@@ -172,14 +172,14 @@ void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
     dst.resize(_rows, _cols);
     if( !(dst.Flags & Eigen::RowMajorBit) )
     {
-        const Mat _dst(_cols, _rows, DataType<_Tp>::type,
-             dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
+             dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         transpose(src, _dst);
     }
     else
     {
-        const Mat _dst(_rows, _cols, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         Mat(src).copyTo(_dst);
     }
 }
@@ -193,8 +193,8 @@ void cv2eigen( const Mat& src,
 
     if( !(dst.Flags & Eigen::RowMajorBit) )
     {
-        const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         if( src.type() == _dst.type() )
             transpose(src, _dst);
         else
@@ -202,8 +202,8 @@ void cv2eigen( const Mat& src,
     }
     else
     {
-        const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         src.convertTo(_dst, _dst.type());
     }
 }
@@ -217,14 +217,14 @@ void cv2eigen( const Matx<_Tp, _rows, 1>& src,
 
     if( !(dst.Flags & Eigen::RowMajorBit) )
     {
-        const Mat _dst(1, _rows, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(1, _rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         transpose(src, _dst);
     }
     else
     {
-        const Mat _dst(_rows, 1, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(_rows, 1, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         src.copyTo(_dst);
     }
 }
@@ -238,8 +238,8 @@ void cv2eigen( const Mat& src,
     dst.resize(src.cols);
     if( !(dst.Flags & Eigen::RowMajorBit) )
     {
-        const Mat _dst(src.cols, src.rows, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         if( src.type() == _dst.type() )
             transpose(src, _dst);
         else
@@ -247,8 +247,8 @@ void cv2eigen( const Mat& src,
     }
     else
     {
-        const Mat _dst(src.rows, src.cols, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         src.convertTo(_dst, _dst.type());
     }
 }
@@ -261,14 +261,14 @@ void cv2eigen( const Matx<_Tp, 1, _cols>& src,
     dst.resize(_cols);
     if( !(dst.Flags & Eigen::RowMajorBit) )
     {
-        const Mat _dst(_cols, 1, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(_cols, 1, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         transpose(src, _dst);
     }
     else
     {
-        const Mat _dst(1, _cols, DataType<_Tp>::type,
-                 dst.data(), (size_t)(dst.stride()*sizeof(_Tp)));
+        const Mat _dst(1, _cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
         Mat(src).copyTo(_dst);
     }
 }
diff --git a/IPL/include/opencv/opencv2/core/fast_math.hpp b/IPL/include/opencv/opencv2/core/fast_math.hpp
index b8b241b..0f53cf5 100644
--- a/IPL/include/opencv/opencv2/core/fast_math.hpp
+++ b/IPL/include/opencv/opencv2/core/fast_math.hpp
@@ -42,8 +42,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_FAST_MATH_HPP__
-#define __OPENCV_CORE_FAST_MATH_HPP__
+#ifndef OPENCV_CORE_FAST_MATH_HPP
+#define OPENCV_CORE_FAST_MATH_HPP
 
 #include "opencv2/core/cvdef.h"
 
@@ -54,34 +54,139 @@
 *                                      fast math                                         *
 \****************************************************************************************/
 
-#if defined __BORLANDC__
-#  include <fastmath.h>
-#elif defined __cplusplus
+#ifdef __cplusplus
 #  include <cmath>
 #else
-#  include <math.h>
-#endif
-
-#ifdef HAVE_TEGRA_OPTIMIZATION
-#  include "tegra_round.hpp"
+#  ifdef __BORLANDC__
+#    include <fastmath.h>
+#  else
+#    include <math.h>
+#  endif
 #endif
 
-#if CV_VFP
+#if defined(__CUDACC__)
+  // nothing, intrinsics/asm code is not supported
+#else
+  #if ((defined _MSC_VER && defined _M_X64) \
+      || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \
+      && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
+    #include <emmintrin.h>
+  #endif
+
+  #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
+      && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
+    #include <altivec.h>
+  #endif
+
+  #if defined(CV_INLINE_ROUND_FLT)
+    // user-specified version
+    // CV_INLINE_ROUND_DBL should be defined too
+  #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
     // 1. general scheme
     #define ARM_ROUND(_value, _asm_string) \
         int res; \
         float temp; \
-        asm(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
+        CV_UNUSED(temp); \
+        __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
         return res
     // 2. version for double
     #ifdef __clang__
-        #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
+        #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
     #else
-        #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
+        #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
     #endif
     // 3. version for float
-    #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
-#endif // CV_VFP
+    #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
+  #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
+    // P8 and newer machines can convert fp32/64 to int quickly.
+    #define CV_INLINE_ROUND_DBL(value) \
+        int out; \
+        double temp; \
+        __asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
+        return out;
+
+    // FP32 also works with FP64 routine above
+    #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
+  #endif
+
+  #ifdef CV_INLINE_ISINF_FLT
+    // user-specified version
+    // CV_INLINE_ISINF_DBL should be defined too
+  #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
+    #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
+    #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
+  #endif
+
+  #ifdef CV_INLINE_ISNAN_FLT
+    // user-specified version
+    // CV_INLINE_ISNAN_DBL should be defined too
+  #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
+    #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
+    #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
+  #endif
+
+  #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
+    && ( \
+        defined(__x86_64__) || defined(__i686__) \
+        || defined(__arm__) \
+        || defined(__PPC64__) \
+    )
+    /* Let builtin C math functions when available. Dedicated hardware is available to
+       round and convert FP values. */
+    #define OPENCV_USE_FASTMATH_BUILTINS 1
+  #endif
+
+  /* Enable builtin math functions if possible, desired, and available.
+     Note, not all math functions inline equally. E.g lrint will not inline
+     without the -fno-math-errno option. */
+  #if defined(CV_ICC)
+    // nothing
+  #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
+    #if defined(__clang__)
+      #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
+      #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
+        #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
+        #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
+        #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
+        #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
+      #endif
+    #elif defined(__GNUC__)
+      #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
+      #if !defined(CV_INLINE_ISNAN_DBL)
+        #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISNAN_FLT)
+        #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_DBL)
+        #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_FLT)
+        #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
+      #endif
+    #elif defined(_MSC_VER)
+      #if !defined(CV_INLINE_ISNAN_DBL)
+        #define CV_INLINE_ISNAN_DBL(value) return isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISNAN_FLT)
+        #define CV_INLINE_ISNAN_FLT(value) return isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_DBL)
+        #define CV_INLINE_ISINF_DBL(value) return isinf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_FLT)
+        #define CV_INLINE_ISINF_FLT(value) return isinf(value);
+      #endif
+    #endif
+  #endif
+
+#endif // defined(__CUDACC__)
 
 /** @brief Rounds floating-point number to the nearest integer
 
@@ -91,8 +196,11 @@
 CV_INLINE int
 cvRound( double value )
 {
-#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
-    && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+#if defined CV_INLINE_ROUND_DBL
+    CV_INLINE_ROUND_DBL(value);
+#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
+    && !defined(__CUDACC__)
     __m128d t = _mm_set_sd( value );
     return _mm_cvtsd_si32(t);
 #elif defined _MSC_VER && defined _M_IX86
@@ -103,15 +211,8 @@ cvRound( double value )
         fistp t;
     }
     return t;
-#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
-        defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
-    TEGRA_ROUND_DBL(value);
 #elif defined CV_ICC || defined __GNUC__
-# if CV_VFP
-    ARM_ROUND_DBL(value);
-# else
-    return (int)lrint(value);
-# endif
+    return (int)(lrint(value));
 #else
     /* it's ok if round does not comply with IEEE754 standard;
        the tests should allow +/-1 difference when the tested functions use round */
@@ -129,17 +230,14 @@ cvRound( double value )
  */
 CV_INLINE int cvFloor( double value )
 {
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
-    __m128d t = _mm_set_sd( value );
-    int i = _mm_cvtsd_si32(t);
-    return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
-#elif defined __GNUC__
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
+    return __builtin_floor(value);
+#else
     int i = (int)value;
     return i - (i > value);
-#else
-    int i = cvRound(value);
-    float diff = (float)(value - i);
-    return i - (diff < 0);
 #endif
 }
 
@@ -152,17 +250,14 @@ CV_INLINE int cvFloor( double value )
  */
 CV_INLINE int cvCeil( double value )
 {
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
-    __m128d t = _mm_set_sd( value );
-    int i = _mm_cvtsd_si32(t);
-    return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
-#elif defined __GNUC__
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
+    return __builtin_ceil(value);
+#else
     int i = (int)value;
     return i + (i < value);
-#else
-    int i = cvRound(value);
-    float diff = (float)(i - value);
-    return i + (diff < 0);
 #endif
 }
 
@@ -174,10 +269,14 @@ CV_INLINE int cvCeil( double value )
  otherwise. */
 CV_INLINE int cvIsNaN( double value )
 {
+#if defined CV_INLINE_ISNAN_DBL
+    CV_INLINE_ISNAN_DBL(value);
+#else
     Cv64suf ieee754;
     ieee754.f = value;
     return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
            ((unsigned)ieee754.u != 0) > 0x7ff00000;
+#endif
 }
 
 /** @brief Determines if the argument is Infinity.
@@ -188,10 +287,19 @@ CV_INLINE int cvIsNaN( double value )
  and 0 otherwise. */
 CV_INLINE int cvIsInf( double value )
 {
+#if defined CV_INLINE_ISINF_DBL
+    CV_INLINE_ISINF_DBL(value);
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__PPC64__)
+    Cv64suf ieee754;
+    ieee754.f = value;
+    return (ieee754.u & 0x7fffffff00000000) ==
+                        0x7ff0000000000000;
+#else
     Cv64suf ieee754;
     ieee754.f = value;
     return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
             (unsigned)ieee754.u == 0;
+#endif
 }
 
 #ifdef __cplusplus
@@ -199,8 +307,11 @@ CV_INLINE int cvIsInf( double value )
 /** @overload */
 CV_INLINE int cvRound(float value)
 {
-#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \
-      defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+#if defined CV_INLINE_ROUND_FLT
+    CV_INLINE_ROUND_FLT(value);
+#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
+    && !defined(__CUDACC__)
     __m128 t = _mm_set_ss( value );
     return _mm_cvtss_si32(t);
 #elif defined _MSC_VER && defined _M_IX86
@@ -211,15 +322,8 @@ CV_INLINE int cvRound(float value)
         fistp t;
     }
     return t;
-#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
-        defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
-    TEGRA_ROUND_FLT(value);
 #elif defined CV_ICC || defined __GNUC__
-# if CV_VFP
-    ARM_ROUND_FLT(value);
-# else
-    return (int)lrintf(value);
-# endif
+    return (int)(lrintf(value));
 #else
     /* it's ok if round does not comply with IEEE754 standard;
      the tests should allow +/-1 difference when the tested functions use round */
@@ -236,17 +340,14 @@ CV_INLINE int cvRound( int value )
 /** @overload */
 CV_INLINE int cvFloor( float value )
 {
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
-    __m128 t = _mm_set_ss( value );
-    int i = _mm_cvtss_si32(t);
-    return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i)));
-#elif defined __GNUC__
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
+    return __builtin_floorf(value);
+#else
     int i = (int)value;
     return i - (i > value);
-#else
-    int i = cvRound(value);
-    float diff = (float)(value - i);
-    return i - (diff < 0);
 #endif
 }
 
@@ -259,17 +360,14 @@ CV_INLINE int cvFloor( int value )
 /** @overload */
 CV_INLINE int cvCeil( float value )
 {
-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
-    __m128 t = _mm_set_ss( value );
-    int i = _mm_cvtss_si32(t);
-    return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t));
-#elif defined __GNUC__
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
+    return __builtin_ceilf(value);
+#else
     int i = (int)value;
     return i + (i < value);
-#else
-    int i = cvRound(value);
-    float diff = (float)(i - value);
-    return i + (diff < 0);
 #endif
 }
 
@@ -282,17 +380,25 @@ CV_INLINE int cvCeil( int value )
 /** @overload */
 CV_INLINE int cvIsNaN( float value )
 {
+#if defined CV_INLINE_ISNAN_FLT
+    CV_INLINE_ISNAN_FLT(value);
+#else
     Cv32suf ieee754;
     ieee754.f = value;
     return (ieee754.u & 0x7fffffff) > 0x7f800000;
+#endif
 }
 
 /** @overload */
 CV_INLINE int cvIsInf( float value )
 {
+#if defined CV_INLINE_ISINF_FLT
+    CV_INLINE_ISINF_FLT(value);
+#else
     Cv32suf ieee754;
     ieee754.f = value;
     return (ieee754.u & 0x7fffffff) == 0x7f800000;
+#endif
 }
 
 #endif // __cplusplus
diff --git a/IPL/include/opencv/opencv2/core/hal/hal.hpp b/IPL/include/opencv/opencv2/core/hal/hal.hpp
index 118913e..0d68078 100644
--- a/IPL/include/opencv/opencv2/core/hal/hal.hpp
+++ b/IPL/include/opencv/opencv2/core/hal/hal.hpp
@@ -42,23 +42,13 @@
 //
 //M*/
 
-#ifndef __OPENCV_HAL_HPP__
-#define __OPENCV_HAL_HPP__
+#ifndef OPENCV_HAL_HPP
+#define OPENCV_HAL_HPP
 
 #include "opencv2/core/cvdef.h"
+#include "opencv2/core/cvstd.hpp"
 #include "opencv2/core/hal/interface.h"
 
-//! @cond IGNORED
-#define CALL_HAL(name, fun, ...) \
-    int res = fun(__VA_ARGS__); \
-    if (res == CV_HAL_ERROR_OK) \
-        return; \
-    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
-        CV_Error_(cv::Error::StsInternal, \
-            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
-//! @endcond
-
-
 namespace cv { namespace hal {
 
 //! @addtogroup core_hal_functions
@@ -74,6 +64,23 @@ CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int
 CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
+CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
+CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors);
+CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors);
+
+CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
 
 CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
 CV_EXPORTS float normL1_(const float* a, const float* b, int n);
@@ -84,7 +91,8 @@ CV_EXPORTS void exp64f(const double* src, double* dst, int n);
 CV_EXPORTS void log32f(const float* src, float* dst, int n);
 CV_EXPORTS void log64f(const double* src, double* dst, int n);
 
-CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
+CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
+CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
 CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
 CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
 CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
@@ -171,13 +179,13 @@ CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t s
 CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
 
-CV_EXPORTS void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
-CV_EXPORTS void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
-CV_EXPORTS void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
-CV_EXPORTS void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
-CV_EXPORTS void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
-CV_EXPORTS void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
-CV_EXPORTS void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
 
 CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
 CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
@@ -187,6 +195,35 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2,
 CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
 
+CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len );
+CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len );
+
+CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len );
+CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len );
+
+struct CV_EXPORTS DFT1D
+{
+    static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
+    virtual void apply(const uchar *src, uchar *dst) = 0;
+    virtual ~DFT1D() {}
+};
+
+struct CV_EXPORTS DFT2D
+{
+    static Ptr<DFT2D> create(int width, int height, int depth,
+                             int src_channels, int dst_channels,
+                             int flags, int nonzero_rows = 0);
+    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
+    virtual ~DFT2D() {}
+};
+
+struct CV_EXPORTS DCT2D
+{
+    static Ptr<DCT2D> create(int width, int height, int depth, int flags);
+    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
+    virtual ~DCT2D() {}
+};
+
 //! @} core_hal
 
 //=============================================================================
@@ -204,6 +241,7 @@ CV_EXPORTS void exp(const double* src, double* dst, int n);
 CV_EXPORTS void log(const float* src, float* dst, int n);
 CV_EXPORTS void log(const double* src, double* dst, int n);
 
+CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
 CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
 CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
 CV_EXPORTS void sqrt(const float* src, float* dst, int len);
@@ -215,4 +253,4 @@ CV_EXPORTS void invSqrt(const double* src, double* dst, int len);
 
 }} //cv::hal
 
-#endif //__OPENCV_HAL_HPP__
+#endif //OPENCV_HAL_HPP
diff --git a/IPL/include/opencv/opencv2/core/hal/interface.h b/IPL/include/opencv/opencv2/core/hal/interface.h
index 51f7606..6f0a83d 100644
--- a/IPL/include/opencv/opencv2/core/hal/interface.h
+++ b/IPL/include/opencv/opencv2/core/hal/interface.h
@@ -1,42 +1,42 @@
-#ifndef _HAL_INTERFACE_HPP_INCLUDED_
-#define _HAL_INTERFACE_HPP_INCLUDED_
+#ifndef OPENCV_CORE_HAL_INTERFACE_H
+#define OPENCV_CORE_HAL_INTERFACE_H
 
 //! @addtogroup core_hal_interface
 //! @{
 
+//! @name Return codes
+//! @{
 #define CV_HAL_ERROR_OK 0
 #define CV_HAL_ERROR_NOT_IMPLEMENTED 1
 #define CV_HAL_ERROR_UNKNOWN -1
-
-#define CV_HAL_CMP_EQ 0
-#define CV_HAL_CMP_GT 1
-#define CV_HAL_CMP_GE 2
-#define CV_HAL_CMP_LT 3
-#define CV_HAL_CMP_LE 4
-#define CV_HAL_CMP_NE 5
+//! @}
 
 #ifdef __cplusplus
 #include <cstddef>
 #else
 #include <stddef.h>
+#include <stdbool.h>
 #endif
 
-/* primitive types */
-/*
-  schar  - signed 1 byte integer
-  uchar  - unsigned 1 byte integer
-  short  - signed 2 byte integer
-  ushort - unsigned 2 byte integer
-  int    - signed 4 byte integer
-  uint   - unsigned 4 byte integer
-  int64  - signed 8 byte integer
-  uint64 - unsigned 8 byte integer
-*/
-
+//! @name Data types
+//! primitive types
+//! - schar  - signed 1 byte integer
+//! - uchar  - unsigned 1 byte integer
+//! - short  - signed 2 byte integer
+//! - ushort - unsigned 2 byte integer
+//! - int    - signed 4 byte integer
+//! - uint   - unsigned 4 byte integer
+//! - int64  - signed 8 byte integer
+//! - uint64 - unsigned 8 byte integer
+//! @{
 #if !defined _MSC_VER && !defined __BORLANDC__
 #  if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
 #    include <cstdint>
-     typedef std::uint32_t uint;
+#    ifdef __NEWLIB__
+        typedef unsigned int uint;
+#    else
+        typedef std::uint32_t uint;
+#    endif
 #  else
 #    include <stdint.h>
      typedef uint32_t uint;
@@ -64,6 +64,127 @@ typedef signed char schar;
 #  define CV_BIG_UINT(n)  n##ULL
 #endif
 
+#define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0"
+
+#define CV_CN_MAX     512
+#define CV_CN_SHIFT   3
+#define CV_DEPTH_MAX  (1 << CV_CN_SHIFT)
+
+#define CV_8U   0
+#define CV_8S   1
+#define CV_16U  2
+#define CV_16S  3
+#define CV_32S  4
+#define CV_32F  5
+#define CV_64F  6
+#define CV_16F  7
+
+#define CV_MAT_DEPTH_MASK       (CV_DEPTH_MAX - 1)
+#define CV_MAT_DEPTH(flags)     ((flags) & CV_MAT_DEPTH_MASK)
+
+#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
+#define CV_MAKE_TYPE CV_MAKETYPE
+
+#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
+#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
+#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
+#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
+#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
+
+#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
+#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
+#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
+#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
+#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
+
+#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
+#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
+#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
+#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
+#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
+
+#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
+#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
+#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
+#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
+#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
+
+#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
+#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
+#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
+#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
+#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
+
+#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
+#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
+#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
+#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
+#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
+
+#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
+#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
+#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
+#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
+#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
+
+#define CV_16FC1 CV_MAKETYPE(CV_16F,1)
+#define CV_16FC2 CV_MAKETYPE(CV_16F,2)
+#define CV_16FC3 CV_MAKETYPE(CV_16F,3)
+#define CV_16FC4 CV_MAKETYPE(CV_16F,4)
+#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n))
+//! @}
+
+//! @name Comparison operation
+//! @sa cv::CmpTypes
+//! @{
+#define CV_HAL_CMP_EQ 0
+#define CV_HAL_CMP_GT 1
+#define CV_HAL_CMP_GE 2
+#define CV_HAL_CMP_LT 3
+#define CV_HAL_CMP_LE 4
+#define CV_HAL_CMP_NE 5
+//! @}
+
+//! @name Border processing modes
+//! @sa cv::BorderTypes
+//! @{
+#define CV_HAL_BORDER_CONSTANT 0
+#define CV_HAL_BORDER_REPLICATE 1
+#define CV_HAL_BORDER_REFLECT 2
+#define CV_HAL_BORDER_WRAP 3
+#define CV_HAL_BORDER_REFLECT_101 4
+#define CV_HAL_BORDER_TRANSPARENT 5
+#define CV_HAL_BORDER_ISOLATED 16
+//! @}
+
+//! @name DFT flags
+//! @{
+#define CV_HAL_DFT_INVERSE        1
+#define CV_HAL_DFT_SCALE          2
+#define CV_HAL_DFT_ROWS           4
+#define CV_HAL_DFT_COMPLEX_OUTPUT 16
+#define CV_HAL_DFT_REAL_OUTPUT    32
+#define CV_HAL_DFT_TWO_STAGE      64
+#define CV_HAL_DFT_STAGE_COLS    128
+#define CV_HAL_DFT_IS_CONTINUOUS 512
+#define CV_HAL_DFT_IS_INPLACE 1024
+//! @}
+
+//! @name SVD flags
+//! @{
+#define CV_HAL_SVD_NO_UV    1
+#define CV_HAL_SVD_SHORT_UV 2
+#define CV_HAL_SVD_MODIFY_A 4
+#define CV_HAL_SVD_FULL_UV  8
+//! @}
+
+//! @name Gemm flags
+//! @{
+#define CV_HAL_GEMM_1_T 1
+#define CV_HAL_GEMM_2_T 2
+#define CV_HAL_GEMM_3_T 4
+//! @}
+
 //! @}
 
 #endif
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin.hpp b/IPL/include/opencv/opencv2/core/hal/intrin.hpp
index 33e14b4..52f6b5d 100644
--- a/IPL/include/opencv/opencv2/core/hal/intrin.hpp
+++ b/IPL/include/opencv/opencv2/core/hal/intrin.hpp
@@ -42,8 +42,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_HAL_INTRIN_HPP__
-#define __OPENCV_HAL_INTRIN_HPP__
+#ifndef OPENCV_HAL_INTRIN_HPP
+#define OPENCV_HAL_INTRIN_HPP
 
 #include <cmath>
 #include <float.h>
@@ -55,266 +55,466 @@
 #define OPENCV_HAL_NOP(a) (a)
 #define OPENCV_HAL_1ST(a, b) (a)
 
+namespace {
+inline unsigned int trailingZeros32(unsigned int value) {
+#if defined(_MSC_VER)
+#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64)
+    unsigned long index = 0;
+    _BitScanForward(&index, value);
+    return (unsigned int)index;
+#elif defined(__clang__)
+    // clang-cl doesn't export _tzcnt_u32 for non BMI systems
+    return value ? __builtin_ctz(value) : 32;
+#else
+    return _tzcnt_u32(value);
+#endif
+#elif defined(__GNUC__) || defined(__GNUG__)
+    return __builtin_ctz(value);
+#elif defined(__ICC) || defined(__INTEL_COMPILER)
+    return _bit_scan_forward(value);
+#elif defined(__clang__)
+    return llvm.cttz.i32(value, true);
+#else
+    static const int MultiplyDeBruijnBitPosition[32] = {
+        0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+        31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
+    return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
+#endif
+}
+}
+
 // unlike HAL API, which is in cv::hal,
 // we put intrinsics into cv namespace to make its
 // access from within opencv code more accessible
 namespace cv {
 
-//! @addtogroup core_hal_intrin
-//! @{
+namespace hal {
 
-//! @cond IGNORED
-template<typename _Tp> struct V_TypeTraits
+enum StoreMode
 {
-    typedef _Tp int_type;
-    typedef _Tp uint_type;
-    typedef _Tp abs_type;
-    typedef _Tp sum_type;
+    STORE_UNALIGNED = 0,
+    STORE_ALIGNED = 1,
+    STORE_ALIGNED_NOCACHE = 2
+};
 
-    enum { delta = 0, shift = 0 };
+}
 
-    static int_type reinterpret_int(_Tp x) { return x; }
-    static uint_type reinterpet_uint(_Tp x) { return x; }
-    static _Tp reinterpret_from_int(int_type x) { return (_Tp)x; }
+// TODO FIXIT: Don't use "God" traits. Split on separate cases.
+template<typename _Tp> struct V_TypeTraits
+{
 };
 
-template<> struct V_TypeTraits<uchar>
-{
-    typedef uchar value_type;
-    typedef schar int_type;
-    typedef uchar uint_type;
-    typedef uchar abs_type;
-    typedef int sum_type;
+#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_, nlanes128_) \
+    template<> struct V_TypeTraits<type> \
+    { \
+        typedef type value_type; \
+        typedef int_type_ int_type; \
+        typedef abs_type_ abs_type; \
+        typedef uint_type_ uint_type; \
+        typedef w_type_ w_type; \
+        typedef q_type_ q_type; \
+        typedef sum_type_ sum_type; \
+        enum { nlanes128 = nlanes128_ }; \
+    \
+        static inline int_type reinterpret_int(type x) \
+        { \
+            union { type l; int_type i; } v; \
+            v.l = x; \
+            return v.i; \
+        } \
+    \
+        static inline type reinterpret_from_int(int_type x) \
+        { \
+            union { type l; int_type i; } v; \
+            v.i = x; \
+            return v.l; \
+        } \
+    }
 
-    typedef ushort w_type;
-    typedef unsigned q_type;
+#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_, nlanes128_) \
+    template<> struct V_TypeTraits<type> \
+    { \
+        typedef type value_type; \
+        typedef int_type_ int_type; \
+        typedef abs_type_ abs_type; \
+        typedef uint_type_ uint_type; \
+        typedef w_type_ w_type; \
+        typedef sum_type_ sum_type; \
+        enum { nlanes128 = nlanes128_ }; \
+    \
+        static inline int_type reinterpret_int(type x) \
+        { \
+            union { type l; int_type i; } v; \
+            v.l = x; \
+            return v.i; \
+        } \
+    \
+        static inline type reinterpret_from_int(int_type x) \
+        { \
+            union { type l; int_type i; } v; \
+            v.i = x; \
+            return v.l; \
+        } \
+    }
 
-    enum { delta = 128, shift = 8 };
+CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned, 16);
+CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int, 16);
+CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned, 8);
+CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int, 8);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned, 4);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int, 4);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float, 4);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64, 2);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64, 2);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double, 2);
+
+#ifndef CV_DOXYGEN
+
+#ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE
+#ifdef CV_FORCE_SIMD128_CPP
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP {
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
+#elif defined(CV_CPU_DISPATCH_MODE)
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
+#else
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
+#endif
+#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE
 
-    static int_type reinterpret_int(value_type x) { return (int_type)x; }
-    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
-    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
-};
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
+#endif
+}
 
-template<> struct V_TypeTraits<schar>
-{
-    typedef schar value_type;
-    typedef schar int_type;
-    typedef uchar uint_type;
-    typedef uchar abs_type;
-    typedef int sum_type;
+#ifdef CV_DOXYGEN
+#   undef CV_AVX2
+#   undef CV_SSE2
+#   undef CV_NEON
+#   undef CV_VSX
+#   undef CV_FP16
+#   undef CV_MSA
+#endif
 
-    typedef short w_type;
-    typedef int q_type;
+#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD) && !defined(CV_FORCE_SIMD128_CPP)
+#define CV__SIMD_FORWARD 128
+#include "opencv2/core/hal/intrin_forward.hpp"
+#endif
 
-    enum { delta = 128, shift = 8 };
+#if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP)
 
-    static int_type reinterpret_int(value_type x) { return (int_type)x; }
-    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
-    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
-};
+#include "opencv2/core/hal/intrin_sse_em.hpp"
+#include "opencv2/core/hal/intrin_sse.hpp"
 
-template<> struct V_TypeTraits<ushort>
-{
-    typedef ushort value_type;
-    typedef short int_type;
-    typedef ushort uint_type;
-    typedef ushort abs_type;
-    typedef int sum_type;
+#elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP)
 
-    typedef unsigned w_type;
-    typedef uchar nu_type;
+#include "opencv2/core/hal/intrin_neon.hpp"
 
-    enum { delta = 32768, shift = 16 };
+#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
 
-    static int_type reinterpret_int(value_type x) { return (int_type)x; }
-    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
-    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
-};
+#include "opencv2/core/hal/intrin_vsx.hpp"
 
-template<> struct V_TypeTraits<short>
-{
-    typedef short value_type;
-    typedef short int_type;
-    typedef ushort uint_type;
-    typedef ushort abs_type;
-    typedef int sum_type;
+#elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP)
 
-    typedef int w_type;
-    typedef uchar nu_type;
-    typedef schar n_type;
+#include "opencv2/core/hal/intrin_msa.hpp"
 
-    enum { delta = 128, shift = 8 };
+#elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP)
+#include "opencv2/core/hal/intrin_wasm.hpp"
 
-    static int_type reinterpret_int(value_type x) { return (int_type)x; }
-    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
-    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
-};
+#else
 
-template<> struct V_TypeTraits<unsigned>
-{
-    typedef unsigned value_type;
-    typedef int int_type;
-    typedef unsigned uint_type;
-    typedef unsigned abs_type;
-    typedef unsigned sum_type;
-
-    typedef uint64 w_type;
-    typedef ushort nu_type;
-
-    static int_type reinterpret_int(value_type x) { return (int_type)x; }
-    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
-    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
-};
+#include "opencv2/core/hal/intrin_cpp.hpp"
 
-template<> struct V_TypeTraits<int>
-{
-    typedef int value_type;
-    typedef int int_type;
-    typedef unsigned uint_type;
-    typedef unsigned abs_type;
-    typedef int sum_type;
-
-    typedef int64 w_type;
-    typedef short n_type;
-    typedef ushort nu_type;
-
-    static int_type reinterpret_int(value_type x) { return (int_type)x; }
-    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
-    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
-};
+#endif
 
-template<> struct V_TypeTraits<uint64>
-{
-    typedef uint64 value_type;
-    typedef int64 int_type;
-    typedef uint64 uint_type;
-    typedef uint64 abs_type;
-    typedef uint64 sum_type;
+// AVX2 can be used together with SSE2, so
+// we define those two sets of intrinsics at once.
+// Most of the intrinsics do not conflict (the proper overloaded variant is
+// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
+// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
+// Correspondingly, the wide intrinsics (which are mapped to the "widest"
+// available instruction set) will get vx_ prefix
+// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
+#if CV_AVX2
 
-    typedef unsigned nu_type;
+#define CV__SIMD_FORWARD 256
+#include "opencv2/core/hal/intrin_forward.hpp"
+#include "opencv2/core/hal/intrin_avx.hpp"
 
-    static int_type reinterpret_int(value_type x) { return (int_type)x; }
-    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
-    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
-};
+#endif
 
-template<> struct V_TypeTraits<int64>
-{
-    typedef int64 value_type;
-    typedef int64 int_type;
-    typedef uint64 uint_type;
-    typedef uint64 abs_type;
-    typedef int64 sum_type;
+// AVX512 can be used together with SSE2 and AVX2, so
+// we define those sets of intrinsics at once.
+// For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
+// Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
+#if CV_AVX512_SKX
 
-    typedef int nu_type;
+#define CV__SIMD_FORWARD 512
+#include "opencv2/core/hal/intrin_forward.hpp"
+#include "opencv2/core/hal/intrin_avx512.hpp"
 
-    static int_type reinterpret_int(value_type x) { return (int_type)x; }
-    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
-    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
-};
+#endif
 
+//! @cond IGNORED
 
-template<> struct V_TypeTraits<float>
-{
-    typedef float value_type;
-    typedef int int_type;
-    typedef unsigned uint_type;
-    typedef float abs_type;
-    typedef float sum_type;
-
-    typedef double w_type;
-
-    static int_type reinterpret_int(value_type x)
-    {
-        Cv32suf u;
-        u.f = x;
-        return u.i;
-    }
-    static uint_type reinterpet_uint(value_type x)
-    {
-        Cv32suf u;
-        u.f = x;
-        return u.u;
-    }
-    static value_type reinterpret_from_int(int_type x)
-    {
-        Cv32suf u;
-        u.i = x;
-        return u.f;
-    }
-};
+namespace cv {
 
-template<> struct V_TypeTraits<double>
-{
-    typedef double value_type;
-    typedef int64 int_type;
-    typedef uint64 uint_type;
-    typedef double abs_type;
-    typedef double sum_type;
-    static int_type reinterpret_int(value_type x)
-    {
-        Cv64suf u;
-        u.f = x;
-        return u.i;
-    }
-    static uint_type reinterpet_uint(value_type x)
-    {
-        Cv64suf u;
-        u.f = x;
-        return u.u;
-    }
-    static value_type reinterpret_from_int(int_type x)
-    {
-        Cv64suf u;
-        u.i = x;
-        return u.f;
-    }
-};
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+#endif
 
-template <typename T> struct V_SIMD128Traits
-{
-    enum { nlanes = 16 / sizeof(T) };
-};
+#ifndef CV_SIMD128
+#define CV_SIMD128 0
+#endif
 
-//! @endcond
+#ifndef CV_SIMD128_CPP
+#define CV_SIMD128_CPP 0
+#endif
 
-//! @}
+#ifndef CV_SIMD128_64F
+#define CV_SIMD128_64F 0
+#endif
 
-}
+#ifndef CV_SIMD256
+#define CV_SIMD256 0
+#endif
 
-#ifdef CV_DOXYGEN
-#   undef CV_SSE2
-#   undef CV_NEON
+#ifndef CV_SIMD256_64F
+#define CV_SIMD256_64F 0
 #endif
 
-#if CV_SSE2
+#ifndef CV_SIMD512
+#define CV_SIMD512 0
+#endif
 
-#include "opencv2/core/hal/intrin_sse.hpp"
+#ifndef CV_SIMD512_64F
+#define CV_SIMD512_64F 0
+#endif
 
-#elif CV_NEON
+#ifndef CV_SIMD128_FP16
+#define CV_SIMD128_FP16 0
+#endif
 
-#include "opencv2/core/hal/intrin_neon.hpp"
+#ifndef CV_SIMD256_FP16
+#define CV_SIMD256_FP16 0
+#endif
 
+#ifndef CV_SIMD512_FP16
+#define CV_SIMD512_FP16 0
+#endif
+
+//==================================================================================================
+
+#define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
+    inline vtyp vx_setall_##short_typ(typ v) { return prefix##_setall_##short_typ(v); } \
+    inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
+    inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
+    inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
+    inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
+    inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
+    inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
+    inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); } \
+    inline vtyp vx_lut(const typ* ptr, const int* idx) { return prefix##_lut(ptr, idx); } \
+    inline vtyp vx_lut_pairs(const typ* ptr, const int* idx) { return prefix##_lut_pairs(ptr, idx); }
+
+#define CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
+    inline vtyp vx_lut_quads(const typ* ptr, const int* idx) { return prefix##_lut_quads(ptr, idx); }
+
+#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
+    inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
+
+#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
+    inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
+
+#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
+    CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
+    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
+    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
+    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix)
+
+#define CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(prefix) \
+    CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(uchar, v_uint8, u8, v_uint16, v_uint32, prefix, load) \
+    CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(schar, v_int8, s8, v_int16, v_int32, prefix, load) \
+    CV_INTRIN_DEFINE_WIDE_INTRIN(ushort, v_uint16, u16, prefix, load) \
+    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(ushort, v_uint16, prefix) \
+    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(ushort, v_uint32, prefix) \
+    CV_INTRIN_DEFINE_WIDE_INTRIN(short, v_int16, s16, prefix, load) \
+    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(short, v_int16, prefix) \
+    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(short, v_int32, prefix) \
+    CV_INTRIN_DEFINE_WIDE_INTRIN(int, v_int32, s32, prefix, load) \
+    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(int, v_int32, prefix) \
+    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(int, v_int64, prefix) \
+    CV_INTRIN_DEFINE_WIDE_INTRIN(unsigned, v_uint32, u32, prefix, load) \
+    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(unsigned, v_uint32, prefix) \
+    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(unsigned, v_uint64, prefix) \
+    CV_INTRIN_DEFINE_WIDE_INTRIN(float, v_float32, f32, prefix, load) \
+    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(float, v_float32, prefix) \
+    CV_INTRIN_DEFINE_WIDE_INTRIN(int64, v_int64, s64, prefix, load) \
+    CV_INTRIN_DEFINE_WIDE_INTRIN(uint64, v_uint64, u64, prefix, load) \
+    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(float16_t, v_float32, prefix)
+
+template<typename _Tp> struct V_RegTraits
+{
+};
+
+#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
+    template<> struct V_RegTraits<_reg> \
+    { \
+        typedef _reg reg; \
+        typedef _u_reg u_reg; \
+        typedef _w_reg w_reg; \
+        typedef _q_reg q_reg; \
+        typedef _int_reg int_reg; \
+        typedef _round_reg round_reg; \
+    }
+
+#if CV_SIMD128 || CV_SIMD128_CPP
+    CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
+    CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
+    CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
+    CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
+    CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
+    CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
+#if CV_SIMD128_64F || CV_SIMD128_CPP
+    CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
 #else
+    CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
+#endif
+    CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
+    CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
+#if CV_SIMD128_64F
+    CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
+#endif
+#endif
 
-#include "opencv2/core/hal/intrin_cpp.hpp"
+#if CV_SIMD256
+    CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
+    CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
+    CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
+    CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
+    CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
+    CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
+    CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
+    CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
+    CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
+    CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
+#endif
 
+#if CV_SIMD512
+    CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
+    CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
+    CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
+    CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
+    CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
+    CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
+    CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
+    CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
+    CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
+    CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
 #endif
 
-//! @addtogroup core_hal_intrin
-//! @{
+#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
+#define CV__SIMD_NAMESPACE simd512
+namespace CV__SIMD_NAMESPACE {
+    #define CV_SIMD 1
+    #define CV_SIMD_64F CV_SIMD512_64F
+    #define CV_SIMD_FP16 CV_SIMD512_FP16
+    #define CV_SIMD_WIDTH 64
+    typedef v_uint8x64    v_uint8;
+    typedef v_int8x64     v_int8;
+    typedef v_uint16x32   v_uint16;
+    typedef v_int16x32    v_int16;
+    typedef v_uint32x16   v_uint32;
+    typedef v_int32x16    v_int32;
+    typedef v_uint64x8    v_uint64;
+    typedef v_int64x8     v_int64;
+    typedef v_float32x16  v_float32;
+    CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v512)
+#if CV_SIMD512_64F
+    typedef v_float64x8   v_float64;
+    CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v512, load)
+#endif
+        inline void vx_cleanup() { v512_cleanup(); }
+} // namespace
+using namespace CV__SIMD_NAMESPACE;
+#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
+#define CV__SIMD_NAMESPACE simd256
+namespace CV__SIMD_NAMESPACE {
+    #define CV_SIMD 1
+    #define CV_SIMD_64F CV_SIMD256_64F
+    #define CV_SIMD_FP16 CV_SIMD256_FP16
+    #define CV_SIMD_WIDTH 32
+    typedef v_uint8x32   v_uint8;
+    typedef v_int8x32    v_int8;
+    typedef v_uint16x16  v_uint16;
+    typedef v_int16x16   v_int16;
+    typedef v_uint32x8   v_uint32;
+    typedef v_int32x8    v_int32;
+    typedef v_uint64x4   v_uint64;
+    typedef v_int64x4    v_int64;
+    typedef v_float32x8  v_float32;
+    CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
+    #if CV_SIMD256_64F
+    typedef v_float64x4  v_float64;
+    CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
+    #endif
+    inline void vx_cleanup() { v256_cleanup(); }
+} // namespace
+using namespace CV__SIMD_NAMESPACE;
+#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
+#if defined CV_SIMD128_CPP
+#define CV__SIMD_NAMESPACE simd128_cpp
+#else
+#define CV__SIMD_NAMESPACE simd128
+#endif
+namespace CV__SIMD_NAMESPACE {
+    #define CV_SIMD CV_SIMD128
+    #define CV_SIMD_64F CV_SIMD128_64F
+    #define CV_SIMD_WIDTH 16
+    typedef v_uint8x16  v_uint8;
+    typedef v_int8x16   v_int8;
+    typedef v_uint16x8  v_uint16;
+    typedef v_int16x8   v_int16;
+    typedef v_uint32x4  v_uint32;
+    typedef v_int32x4   v_int32;
+    typedef v_uint64x2  v_uint64;
+    typedef v_int64x2   v_int64;
+    typedef v_float32x4 v_float32;
+    CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v)
+    #if CV_SIMD128_64F
+    typedef v_float64x2 v_float64;
+    CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load)
+    #endif
+    inline void vx_cleanup() { v_cleanup(); }
+} // namespace
+using namespace CV__SIMD_NAMESPACE;
+#endif
 
-#ifndef CV_SIMD128
-//! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
-#define CV_SIMD128 0
+#ifndef CV_SIMD_64F
+#define CV_SIMD_64F 0
 #endif
 
-#ifndef CV_SIMD128_64F
-//! Set to 1 if current intrinsics implementation supports 64-bit float vectors
-#define CV_SIMD128_64F 0
+#ifndef CV_SIMD_FP16
+#define CV_SIMD_FP16 0  //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
 #endif
 
-//! @}
+#ifndef CV_SIMD
+#define CV_SIMD 0
+#endif
+
+#include "simd_utils.impl.hpp"
+
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+#endif
+
+} // cv::
+
+//! @endcond
 
 #endif
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_avx.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_avx.hpp
new file mode 100644
index 0000000..ca315ae
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/hal/intrin_avx.hpp
@@ -0,0 +1,3125 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef OPENCV_HAL_INTRIN_AVX_HPP
+#define OPENCV_HAL_INTRIN_AVX_HPP
+
+#define CV_SIMD256 1
+#define CV_SIMD256_64F 1
+#define CV_SIMD256_FP16 0  // no native operations with FP16 type. Only load/store from float32x8 are available (if CV_FP16 == 1)
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+///////// Utils ////////////
+
+inline __m256i _v256_combine(const __m128i& lo, const __m128i& hi)
+{ return _mm256_inserti128_si256(_mm256_castsi128_si256(lo), hi, 1); }
+
+inline __m256 _v256_combine(const __m128& lo, const __m128& hi)
+{ return _mm256_insertf128_ps(_mm256_castps128_ps256(lo), hi, 1); }
+
+inline __m256d _v256_combine(const __m128d& lo, const __m128d& hi)
+{ return _mm256_insertf128_pd(_mm256_castpd128_pd256(lo), hi, 1); }
+
+inline int _v_cvtsi256_si32(const __m256i& a)
+{ return _mm_cvtsi128_si32(_mm256_castsi256_si128(a)); }
+
+inline __m256i _v256_shuffle_odd_64(const __m256i& v)
+{ return _mm256_permute4x64_epi64(v, _MM_SHUFFLE(3, 1, 2, 0)); }
+
+inline __m256d _v256_shuffle_odd_64(const __m256d& v)
+{ return _mm256_permute4x64_pd(v, _MM_SHUFFLE(3, 1, 2, 0)); }
+
+template<int imm>
+inline __m256i _v256_permute2x128(const __m256i& a, const __m256i& b)
+{ return _mm256_permute2x128_si256(a, b, imm); }
+
+template<int imm>
+inline __m256 _v256_permute2x128(const __m256& a, const __m256& b)
+{ return _mm256_permute2f128_ps(a, b, imm); }
+
+template<int imm>
+inline __m256d _v256_permute2x128(const __m256d& a, const __m256d& b)
+{ return _mm256_permute2f128_pd(a, b, imm); }
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v256_permute2x128(const _Tpvec& a, const _Tpvec& b)
+{ return _Tpvec(_v256_permute2x128<imm>(a.val, b.val)); }
+
+template<int imm>
+inline __m256i _v256_permute4x64(const __m256i& a)
+{ return _mm256_permute4x64_epi64(a, imm); }
+
+template<int imm>
+inline __m256d _v256_permute4x64(const __m256d& a)
+{ return _mm256_permute4x64_pd(a, imm); }
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v256_permute4x64(const _Tpvec& a)
+{ return _Tpvec(_v256_permute4x64<imm>(a.val)); }
+
+inline __m128i _v256_extract_high(const __m256i& v)
+{ return _mm256_extracti128_si256(v, 1); }
+
+inline __m128  _v256_extract_high(const __m256& v)
+{ return _mm256_extractf128_ps(v, 1); }
+
+inline __m128d _v256_extract_high(const __m256d& v)
+{ return _mm256_extractf128_pd(v, 1); }
+
+inline __m128i _v256_extract_low(const __m256i& v)
+{ return _mm256_castsi256_si128(v); }
+
+inline __m128  _v256_extract_low(const __m256& v)
+{ return _mm256_castps256_ps128(v); }
+
+inline __m128d _v256_extract_low(const __m256d& v)
+{ return _mm256_castpd256_pd128(v); }
+
+inline __m256i _v256_packs_epu32(const __m256i& a, const __m256i& b)
+{
+    const __m256i m = _mm256_set1_epi32(65535);
+    __m256i am = _mm256_min_epu32(a, m);
+    __m256i bm = _mm256_min_epu32(b, m);
+    return _mm256_packus_epi32(am, bm);
+}
+
+template<int i>
+inline int _v256_extract_epi8(const __m256i& a)
+{
+#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/))
+    return _mm256_extract_epi8(a, i);
+#else
+    __m128i b = _mm256_extractf128_si256(a, ((i) >> 4));
+    return _mm_extract_epi8(b, i & 15);  // SSE4.1
+#endif
+}
+
+template<int i>
+inline int _v256_extract_epi16(const __m256i& a)
+{
+#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/))
+    return _mm256_extract_epi16(a, i);
+#else
+    __m128i b = _mm256_extractf128_si256(a, ((i) >> 3));
+    return _mm_extract_epi16(b, i & 7);  // SSE2
+#endif
+}
+
+template<int i>
+inline int _v256_extract_epi32(const __m256i& a)
+{
+#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/))
+    return _mm256_extract_epi32(a, i);
+#else
+    __m128i b = _mm256_extractf128_si256(a, ((i) >> 2));
+    return _mm_extract_epi32(b, i & 3);  // SSE4.1
+#endif
+}
+
+template<int i>
+inline int64 _v256_extract_epi64(const __m256i& a)
+{
+#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/))
+    return _mm256_extract_epi64(a, i);
+#else
+    __m128i b = _mm256_extractf128_si256(a, ((i) >> 1));
+    return _mm_extract_epi64(b, i & 1);  // SSE4.1
+#endif
+}
+
+///////// Types ////////////
+
+struct v_uint8x32
+{
+    typedef uchar lane_type;
+    enum { nlanes = 32 };
+    __m256i val;
+
+    explicit v_uint8x32(__m256i v) : val(v) {}
+    v_uint8x32(uchar v0,  uchar v1,  uchar v2,  uchar v3,
+               uchar v4,  uchar v5,  uchar v6,  uchar v7,
+               uchar v8,  uchar v9,  uchar v10, uchar v11,
+               uchar v12, uchar v13, uchar v14, uchar v15,
+               uchar v16, uchar v17, uchar v18, uchar v19,
+               uchar v20, uchar v21, uchar v22, uchar v23,
+               uchar v24, uchar v25, uchar v26, uchar v27,
+               uchar v28, uchar v29, uchar v30, uchar v31)
+    {
+        val = _mm256_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3,
+            (char)v4,  (char)v5,  (char)v6 , (char)v7,  (char)v8,  (char)v9,
+            (char)v10, (char)v11, (char)v12, (char)v13, (char)v14, (char)v15,
+            (char)v16, (char)v17, (char)v18, (char)v19, (char)v20, (char)v21,
+            (char)v22, (char)v23, (char)v24, (char)v25, (char)v26, (char)v27,
+            (char)v28, (char)v29, (char)v30, (char)v31);
+    }
+    v_uint8x32() : val(_mm256_setzero_si256()) {}
+    uchar get0() const { return (uchar)_v_cvtsi256_si32(val); }
+};
+
+struct v_int8x32
+{
+    typedef schar lane_type;
+    enum { nlanes = 32 };
+    __m256i val;
+
+    explicit v_int8x32(__m256i v) : val(v) {}
+    v_int8x32(schar v0,  schar v1,  schar v2,  schar v3,
+              schar v4,  schar v5,  schar v6,  schar v7,
+              schar v8,  schar v9,  schar v10, schar v11,
+              schar v12, schar v13, schar v14, schar v15,
+              schar v16, schar v17, schar v18, schar v19,
+              schar v20, schar v21, schar v22, schar v23,
+              schar v24, schar v25, schar v26, schar v27,
+              schar v28, schar v29, schar v30, schar v31)
+    {
+        val = _mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9,
+            v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20,
+            v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31);
+    }
+    v_int8x32() : val(_mm256_setzero_si256()) {}
+    schar get0() const { return (schar)_v_cvtsi256_si32(val); }
+};
+
+struct v_uint16x16
+{
+    typedef ushort lane_type;
+    enum { nlanes = 16 };
+    __m256i val;
+
+    explicit v_uint16x16(__m256i v) : val(v) {}
+    v_uint16x16(ushort v0,  ushort v1,  ushort v2,  ushort v3,
+                ushort v4,  ushort v5,  ushort v6,  ushort v7,
+                ushort v8,  ushort v9,  ushort v10, ushort v11,
+                ushort v12, ushort v13, ushort v14, ushort v15)
+    {
+        val = _mm256_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3,
+            (short)v4,  (short)v5,  (short)v6,  (short)v7,  (short)v8,  (short)v9,
+            (short)v10, (short)v11, (short)v12, (short)v13, (short)v14, (short)v15);
+    }
+    v_uint16x16() : val(_mm256_setzero_si256()) {}
+    ushort get0() const { return (ushort)_v_cvtsi256_si32(val); }
+};
+
+struct v_int16x16
+{
+    typedef short lane_type;
+    enum { nlanes = 16 };
+    __m256i val;
+
+    explicit v_int16x16(__m256i v) : val(v) {}
+    v_int16x16(short v0,  short v1,  short v2,  short v3,
+               short v4,  short v5,  short v6,  short v7,
+               short v8,  short v9,  short v10, short v11,
+               short v12, short v13, short v14, short v15)
+    {
+        val = _mm256_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7,
+            v8, v9, v10, v11, v12, v13, v14, v15);
+    }
+    v_int16x16() : val(_mm256_setzero_si256()) {}
+    short get0() const { return (short)_v_cvtsi256_si32(val); }
+};
+
+struct v_uint32x8
+{
+    typedef unsigned lane_type;
+    enum { nlanes = 8 };
+    __m256i val;
+
+    explicit v_uint32x8(__m256i v) : val(v) {}
+    v_uint32x8(unsigned v0, unsigned v1, unsigned v2, unsigned v3,
+               unsigned v4, unsigned v5, unsigned v6, unsigned v7)
+    {
+        val = _mm256_setr_epi32((unsigned)v0, (unsigned)v1, (unsigned)v2,
+            (unsigned)v3, (unsigned)v4, (unsigned)v5, (unsigned)v6, (unsigned)v7);
+    }
+    v_uint32x8() : val(_mm256_setzero_si256()) {}
+    unsigned get0() const { return (unsigned)_v_cvtsi256_si32(val); }
+};
+
+struct v_int32x8
+{
+    typedef int lane_type;
+    enum { nlanes = 8 };
+    __m256i val;
+
+    explicit v_int32x8(__m256i v) : val(v) {}
+    v_int32x8(int v0, int v1, int v2, int v3,
+              int v4, int v5, int v6, int v7)
+    {
+        val = _mm256_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7);
+    }
+    v_int32x8() : val(_mm256_setzero_si256()) {}
+    int get0() const { return _v_cvtsi256_si32(val); }
+};
+
+struct v_float32x8
+{
+    typedef float lane_type;
+    enum { nlanes = 8 };
+    __m256 val;
+
+    explicit v_float32x8(__m256 v) : val(v) {}
+    v_float32x8(float v0, float v1, float v2, float v3,
+                float v4, float v5, float v6, float v7)
+    {
+        val = _mm256_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7);
+    }
+    v_float32x8() : val(_mm256_setzero_ps()) {}
+    float get0() const { return _mm_cvtss_f32(_mm256_castps256_ps128(val)); }
+};
+
+struct v_uint64x4
+{
+    typedef uint64 lane_type;
+    enum { nlanes = 4 };
+    __m256i val;
+
+    explicit v_uint64x4(__m256i v) : val(v) {}
+    v_uint64x4(uint64 v0, uint64 v1, uint64 v2, uint64 v3)
+    { val = _mm256_setr_epi64x((int64)v0, (int64)v1, (int64)v2, (int64)v3); }
+    v_uint64x4() : val(_mm256_setzero_si256()) {}
+    uint64 get0() const
+    {
+    #if defined __x86_64__ || defined _M_X64
+        return (uint64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val));
+    #else
+        int a = _mm_cvtsi128_si32(_mm256_castsi256_si128(val));
+        int b = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_srli_epi64(val, 32)));
+        return (unsigned)a | ((uint64)(unsigned)b << 32);
+    #endif
+    }
+};
+
+struct v_int64x4
+{
+    typedef int64 lane_type;
+    enum { nlanes = 4 };
+    __m256i val;
+
+    explicit v_int64x4(__m256i v) : val(v) {}
+    v_int64x4(int64 v0, int64 v1, int64 v2, int64 v3)
+    { val = _mm256_setr_epi64x(v0, v1, v2, v3); }
+    v_int64x4() : val(_mm256_setzero_si256()) {}
+
+    int64 get0() const
+    {
+    #if defined __x86_64__ || defined _M_X64
+        return (int64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val));
+    #else
+        int a = _mm_cvtsi128_si32(_mm256_castsi256_si128(val));
+        int b = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_srli_epi64(val, 32)));
+        return (int64)((unsigned)a | ((uint64)(unsigned)b << 32));
+    #endif
+    }
+};
+
+struct v_float64x4
+{
+    typedef double lane_type;
+    enum { nlanes = 4 };
+    __m256d val;
+
+    explicit v_float64x4(__m256d v) : val(v) {}
+    v_float64x4(double v0, double v1, double v2, double v3)
+    { val = _mm256_setr_pd(v0, v1, v2, v3); }
+    v_float64x4() : val(_mm256_setzero_pd()) {}
+    double get0() const { return _mm_cvtsd_f64(_mm256_castpd256_pd128(val)); }
+};
+
+//////////////// Load and store operations ///////////////
+
+#define OPENCV_HAL_IMPL_AVX_LOADSTORE(_Tpvec, _Tp)                    \
+    inline _Tpvec v256_load(const _Tp* ptr)                           \
+    { return _Tpvec(_mm256_loadu_si256((const __m256i*)ptr)); }       \
+    inline _Tpvec v256_load_aligned(const _Tp* ptr)                   \
+    { return _Tpvec(_mm256_load_si256((const __m256i*)ptr)); }        \
+    inline _Tpvec v256_load_low(const _Tp* ptr)                       \
+    {                                                                 \
+        __m128i v128 = _mm_loadu_si128((const __m128i*)ptr);          \
+        return _Tpvec(_mm256_castsi128_si256(v128));                  \
+    }                                                                 \
+    inline _Tpvec v256_load_halves(const _Tp* ptr0, const _Tp* ptr1)  \
+    {                                                                 \
+        __m128i vlo = _mm_loadu_si128((const __m128i*)ptr0);          \
+        __m128i vhi = _mm_loadu_si128((const __m128i*)ptr1);          \
+        return _Tpvec(_v256_combine(vlo, vhi));                       \
+    }                                                                 \
+    inline void v_store(_Tp* ptr, const _Tpvec& a)                    \
+    { _mm256_storeu_si256((__m256i*)ptr, a.val); }                    \
+    inline void v_store_aligned(_Tp* ptr, const _Tpvec& a)            \
+    { _mm256_store_si256((__m256i*)ptr, a.val); }                     \
+    inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a)    \
+    { _mm256_stream_si256((__m256i*)ptr, a.val); }                    \
+    inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
+    { \
+        if( mode == hal::STORE_UNALIGNED ) \
+            _mm256_storeu_si256((__m256i*)ptr, a.val); \
+        else if( mode == hal::STORE_ALIGNED_NOCACHE )  \
+            _mm256_stream_si256((__m256i*)ptr, a.val); \
+        else \
+            _mm256_store_si256((__m256i*)ptr, a.val); \
+    } \
+    inline void v_store_low(_Tp* ptr, const _Tpvec& a)                \
+    { _mm_storeu_si128((__m128i*)ptr, _v256_extract_low(a.val)); }    \
+    inline void v_store_high(_Tp* ptr, const _Tpvec& a)               \
+    { _mm_storeu_si128((__m128i*)ptr, _v256_extract_high(a.val)); }
+
+OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint8x32,  uchar)
+OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int8x32,   schar)
+OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint16x16, ushort)
+OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int16x16,  short)
+OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint32x8,  unsigned)
+OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int32x8,   int)
+OPENCV_HAL_IMPL_AVX_LOADSTORE(v_uint64x4,  uint64)
+OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int64x4,   int64)
+
+#define OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(_Tpvec, _Tp, suffix, halfreg)   \
+    inline _Tpvec v256_load(const _Tp* ptr)                               \
+    { return _Tpvec(_mm256_loadu_##suffix(ptr)); }                        \
+    inline _Tpvec v256_load_aligned(const _Tp* ptr)                       \
+    { return _Tpvec(_mm256_load_##suffix(ptr)); }                         \
+    inline _Tpvec v256_load_low(const _Tp* ptr)                           \
+    {                                                                     \
+        return _Tpvec(_mm256_cast##suffix##128_##suffix##256              \
+                     (_mm_loadu_##suffix(ptr)));                          \
+    }                                                                     \
+    inline _Tpvec v256_load_halves(const _Tp* ptr0, const _Tp* ptr1)      \
+    {                                                                     \
+        halfreg vlo = _mm_loadu_##suffix(ptr0);                           \
+        halfreg vhi = _mm_loadu_##suffix(ptr1);                           \
+        return _Tpvec(_v256_combine(vlo, vhi));                           \
+    }                                                                     \
+    inline void v_store(_Tp* ptr, const _Tpvec& a)                        \
+    { _mm256_storeu_##suffix(ptr, a.val); }                               \
+    inline void v_store_aligned(_Tp* ptr, const _Tpvec& a)                \
+    { _mm256_store_##suffix(ptr, a.val); }                                \
+    inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a)        \
+    { _mm256_stream_##suffix(ptr, a.val); }                               \
+    inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
+    { \
+        if( mode == hal::STORE_UNALIGNED ) \
+            _mm256_storeu_##suffix(ptr, a.val); \
+        else if( mode == hal::STORE_ALIGNED_NOCACHE )  \
+            _mm256_stream_##suffix(ptr, a.val); \
+        else \
+            _mm256_store_##suffix(ptr, a.val); \
+    } \
+    inline void v_store_low(_Tp* ptr, const _Tpvec& a)                    \
+    { _mm_storeu_##suffix(ptr, _v256_extract_low(a.val)); }               \
+    inline void v_store_high(_Tp* ptr, const _Tpvec& a)                   \
+    { _mm_storeu_##suffix(ptr, _v256_extract_high(a.val)); }
+
+OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(v_float32x8, float,  ps, __m128)
+OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(v_float64x4, double, pd, __m128d)
+
+#define OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, _Tpvecf, suffix, cast) \
+    inline _Tpvec v_reinterpret_as_##suffix(const _Tpvecf& a)   \
+    { return _Tpvec(cast(a.val)); }
+
+#define OPENCV_HAL_IMPL_AVX_INIT(_Tpvec, _Tp, suffix, ssuffix, ctype_s)          \
+    inline _Tpvec v256_setzero_##suffix()                                        \
+    { return _Tpvec(_mm256_setzero_si256()); }                                   \
+    inline _Tpvec v256_setall_##suffix(_Tp v)                                    \
+    { return _Tpvec(_mm256_set1_##ssuffix((ctype_s)v)); }                        \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32,  suffix, OPENCV_HAL_NOP)        \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32,   suffix, OPENCV_HAL_NOP)        \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, OPENCV_HAL_NOP)        \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int16x16,  suffix, OPENCV_HAL_NOP)        \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint32x8,  suffix, OPENCV_HAL_NOP)        \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int32x8,   suffix, OPENCV_HAL_NOP)        \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint64x4,  suffix, OPENCV_HAL_NOP)        \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int64x4,   suffix, OPENCV_HAL_NOP)        \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_float32x8, suffix, _mm256_castps_si256)   \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_float64x4, suffix, _mm256_castpd_si256)
+
+OPENCV_HAL_IMPL_AVX_INIT(v_uint8x32,  uchar,    u8,  epi8,   char)
+OPENCV_HAL_IMPL_AVX_INIT(v_int8x32,   schar,    s8,  epi8,   char)
+OPENCV_HAL_IMPL_AVX_INIT(v_uint16x16, ushort,   u16, epi16,  short)
+OPENCV_HAL_IMPL_AVX_INIT(v_int16x16,  short,    s16, epi16,  short)
+OPENCV_HAL_IMPL_AVX_INIT(v_uint32x8,  unsigned, u32, epi32,  int)
+OPENCV_HAL_IMPL_AVX_INIT(v_int32x8,   int,      s32, epi32,  int)
+OPENCV_HAL_IMPL_AVX_INIT(v_uint64x4,  uint64,   u64, epi64x, int64)
+OPENCV_HAL_IMPL_AVX_INIT(v_int64x4,   int64,    s64, epi64x, int64)
+
+#define OPENCV_HAL_IMPL_AVX_INIT_FLT(_Tpvec, _Tp, suffix, zsuffix, cast) \
+    inline _Tpvec v256_setzero_##suffix()                                \
+    { return _Tpvec(_mm256_setzero_##zsuffix()); }                       \
+    inline _Tpvec v256_setall_##suffix(_Tp v)                            \
+    { return _Tpvec(_mm256_set1_##zsuffix(v)); }                         \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32,  suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32,   suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int16x16,  suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint32x8,  suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int32x8,   suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint64x4,  suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int64x4,   suffix, cast)
+
+OPENCV_HAL_IMPL_AVX_INIT_FLT(v_float32x8, float,  f32, ps, _mm256_castsi256_ps)
+OPENCV_HAL_IMPL_AVX_INIT_FLT(v_float64x4, double, f64, pd, _mm256_castsi256_pd)
+
+inline v_float32x8 v_reinterpret_as_f32(const v_float32x8& a)
+{ return a; }
+inline v_float32x8 v_reinterpret_as_f32(const v_float64x4& a)
+{ return v_float32x8(_mm256_castpd_ps(a.val)); }
+
+inline v_float64x4 v_reinterpret_as_f64(const v_float64x4& a)
+{ return a; }
+inline v_float64x4 v_reinterpret_as_f64(const v_float32x8& a)
+{ return v_float64x4(_mm256_castps_pd(a.val)); }
+
+/* Recombine */
+/*#define OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, perm)                    \
+    inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b)    \
+    { return _Tpvec(perm(a.val, b.val, 0x20)); }                     \
+    inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b)   \
+    { return _Tpvec(perm(a.val, b.val, 0x31)); }                     \
+    inline void v_recombine(const _Tpvec& a, const _Tpvec& b,        \
+                             _Tpvec& c, _Tpvec& d)                   \
+    { c = v_combine_low(a, b); d = v_combine_high(a, b); }
+
+#define OPENCV_HAL_IMPL_AVX_UNPACKS(_Tpvec, suffix)                  \
+    OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, _mm256_permute2x128_si256)   \
+    inline void v_zip(const _Tpvec& a0, const _Tpvec& a1,            \
+                             _Tpvec& b0, _Tpvec& b1)                 \
+    {                                                                \
+        __m256i v0 = _v256_shuffle_odd_64(a0.val);                   \
+        __m256i v1 = _v256_shuffle_odd_64(a1.val);                   \
+        b0.val = _mm256_unpacklo_##suffix(v0, v1);                   \
+        b1.val = _mm256_unpackhi_##suffix(v0, v1);                   \
+    }
+
+OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint8x32,  epi8)
+OPENCV_HAL_IMPL_AVX_UNPACKS(v_int8x32,   epi8)
+OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint16x16, epi16)
+OPENCV_HAL_IMPL_AVX_UNPACKS(v_int16x16,  epi16)
+OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint32x8,  epi32)
+OPENCV_HAL_IMPL_AVX_UNPACKS(v_int32x8,   epi32)
+OPENCV_HAL_IMPL_AVX_UNPACKS(v_uint64x4,  epi64)
+OPENCV_HAL_IMPL_AVX_UNPACKS(v_int64x4,   epi64)
+OPENCV_HAL_IMPL_AVX_COMBINE(v_float32x8, _mm256_permute2f128_ps)
+OPENCV_HAL_IMPL_AVX_COMBINE(v_float64x4, _mm256_permute2f128_pd)
+
+inline void v_zip(const v_float32x8& a0, const v_float32x8& a1, v_float32x8& b0, v_float32x8& b1)
+{
+    __m256 v0 = _mm256_unpacklo_ps(a0.val, a1.val);
+    __m256 v1 = _mm256_unpackhi_ps(a0.val, a1.val);
+    v_recombine(v_float32x8(v0), v_float32x8(v1), b0, b1);
+}
+
+inline void v_zip(const v_float64x4& a0, const v_float64x4& a1, v_float64x4& b0, v_float64x4& b1)
+{
+    __m256d v0 = _v_shuffle_odd_64(a0.val);
+    __m256d v1 = _v_shuffle_odd_64(a1.val);
+    b0.val = _mm256_unpacklo_pd(v0, v1);
+    b1.val = _mm256_unpackhi_pd(v0, v1);
+}*/
+
+//////////////// Variant Value reordering ///////////////
+
+// unpacks
+#define OPENCV_HAL_IMPL_AVX_UNPACK(_Tpvec, suffix)                 \
+    inline _Tpvec v256_unpacklo(const _Tpvec& a, const _Tpvec& b)  \
+    { return _Tpvec(_mm256_unpacklo_##suffix(a.val, b.val)); }     \
+    inline _Tpvec v256_unpackhi(const _Tpvec& a, const _Tpvec& b)  \
+    { return _Tpvec(_mm256_unpackhi_##suffix(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_AVX_UNPACK(v_uint8x32,  epi8)
+OPENCV_HAL_IMPL_AVX_UNPACK(v_int8x32,   epi8)
+OPENCV_HAL_IMPL_AVX_UNPACK(v_uint16x16, epi16)
+OPENCV_HAL_IMPL_AVX_UNPACK(v_int16x16,  epi16)
+OPENCV_HAL_IMPL_AVX_UNPACK(v_uint32x8,  epi32)
+OPENCV_HAL_IMPL_AVX_UNPACK(v_int32x8,   epi32)
+OPENCV_HAL_IMPL_AVX_UNPACK(v_uint64x4,  epi64)
+OPENCV_HAL_IMPL_AVX_UNPACK(v_int64x4,   epi64)
+OPENCV_HAL_IMPL_AVX_UNPACK(v_float32x8, ps)
+OPENCV_HAL_IMPL_AVX_UNPACK(v_float64x4, pd)
+
+// blend
+#define OPENCV_HAL_IMPL_AVX_BLEND(_Tpvec, suffix)               \
+    template<int m>                                             \
+    inline _Tpvec v256_blend(const _Tpvec& a, const _Tpvec& b)  \
+    { return _Tpvec(_mm256_blend_##suffix(a.val, b.val, m)); }
+
+OPENCV_HAL_IMPL_AVX_BLEND(v_uint16x16, epi16)
+OPENCV_HAL_IMPL_AVX_BLEND(v_int16x16,  epi16)
+OPENCV_HAL_IMPL_AVX_BLEND(v_uint32x8,  epi32)
+OPENCV_HAL_IMPL_AVX_BLEND(v_int32x8,   epi32)
+OPENCV_HAL_IMPL_AVX_BLEND(v_float32x8, ps)
+OPENCV_HAL_IMPL_AVX_BLEND(v_float64x4, pd)
+
+template<int m>
+inline v_uint64x4 v256_blend(const v_uint64x4& a, const v_uint64x4& b)
+{
+    enum {M0 = m};
+    enum {M1 = (M0 | (M0 << 2)) & 0x33};
+    enum {M2 = (M1 | (M1 << 1)) & 0x55};
+    enum {MM =  M2 | (M2 << 1)};
+    return v_uint64x4(_mm256_blend_epi32(a.val, b.val, MM));
+}
+template<int m>
+inline v_int64x4 v256_blend(const v_int64x4& a, const v_int64x4& b)
+{ return v_int64x4(v256_blend<m>(v_uint64x4(a.val), v_uint64x4(b.val)).val); }
+
+// shuffle
+// todo: emulate 64bit
+#define OPENCV_HAL_IMPL_AVX_SHUFFLE(_Tpvec, intrin)  \
+    template<int m>                                  \
+    inline _Tpvec v256_shuffle(const _Tpvec& a)      \
+    { return _Tpvec(_mm256_##intrin(a.val, m)); }
+
+OPENCV_HAL_IMPL_AVX_SHUFFLE(v_uint32x8,  shuffle_epi32)
+OPENCV_HAL_IMPL_AVX_SHUFFLE(v_int32x8,   shuffle_epi32)
+OPENCV_HAL_IMPL_AVX_SHUFFLE(v_float32x8, permute_ps)
+OPENCV_HAL_IMPL_AVX_SHUFFLE(v_float64x4, permute_pd)
+
+template<typename _Tpvec>
+inline void v256_zip(const _Tpvec& a, const _Tpvec& b, _Tpvec& ab0, _Tpvec& ab1)
+{
+    ab0 = v256_unpacklo(a, b);
+    ab1 = v256_unpackhi(a, b);
+}
+
+template<typename _Tpvec>
+inline _Tpvec v256_combine_diagonal(const _Tpvec& a, const _Tpvec& b)
+{ return _Tpvec(_mm256_blend_epi32(a.val, b.val, 0xf0)); }
+
+inline v_float32x8 v256_combine_diagonal(const v_float32x8& a, const v_float32x8& b)
+{ return v256_blend<0xf0>(a, b); }
+
+inline v_float64x4 v256_combine_diagonal(const v_float64x4& a, const v_float64x4& b)
+{ return v256_blend<0xc>(a, b); }
+
+template<typename _Tpvec>
+inline _Tpvec v256_alignr_128(const _Tpvec& a, const _Tpvec& b)
+{ return v256_permute2x128<0x21>(a, b); }
+
+template<typename _Tpvec>
+inline _Tpvec v256_alignr_64(const _Tpvec& a, const _Tpvec& b)
+{ return _Tpvec(_mm256_alignr_epi8(a.val, b.val, 8)); }
+inline v_float64x4 v256_alignr_64(const v_float64x4& a, const v_float64x4& b)
+{ return v_float64x4(_mm256_shuffle_pd(b.val, a.val, _MM_SHUFFLE(0, 0, 1, 1))); }
+// todo: emulate float32
+
+template<typename _Tpvec>
+inline _Tpvec v256_swap_halves(const _Tpvec& a)
+{ return v256_permute2x128<1>(a, a); }
+
+template<typename _Tpvec>
+inline _Tpvec v256_reverse_64(const _Tpvec& a)
+{ return v256_permute4x64<_MM_SHUFFLE(0, 1, 2, 3)>(a); }
+
+// ZIP
+#define OPENCV_HAL_IMPL_AVX_ZIP(_Tpvec)                              \
+    inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b)    \
+    { return v256_permute2x128<0x20>(a, b); }                        \
+    inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b)   \
+    { return v256_permute2x128<0x31>(a, b); }                        \
+    inline void v_recombine(const _Tpvec& a, const _Tpvec& b,        \
+                             _Tpvec& c, _Tpvec& d)                   \
+    {                                                                \
+        _Tpvec a1b0 = v256_alignr_128(a, b);                         \
+        c = v256_combine_diagonal(a, a1b0);                          \
+        d = v256_combine_diagonal(a1b0, b);                          \
+    }                                                                \
+    inline void v_zip(const _Tpvec& a, const _Tpvec& b,              \
+                      _Tpvec& ab0, _Tpvec& ab1)                      \
+    {                                                                \
+        _Tpvec ab0ab2, ab1ab3;                                       \
+        v256_zip(a, b, ab0ab2, ab1ab3);                              \
+        v_recombine(ab0ab2, ab1ab3, ab0, ab1);                       \
+    }
+
+OPENCV_HAL_IMPL_AVX_ZIP(v_uint8x32)
+OPENCV_HAL_IMPL_AVX_ZIP(v_int8x32)
+OPENCV_HAL_IMPL_AVX_ZIP(v_uint16x16)
+OPENCV_HAL_IMPL_AVX_ZIP(v_int16x16)
+OPENCV_HAL_IMPL_AVX_ZIP(v_uint32x8)
+OPENCV_HAL_IMPL_AVX_ZIP(v_int32x8)
+OPENCV_HAL_IMPL_AVX_ZIP(v_uint64x4)
+OPENCV_HAL_IMPL_AVX_ZIP(v_int64x4)
+OPENCV_HAL_IMPL_AVX_ZIP(v_float32x8)
+OPENCV_HAL_IMPL_AVX_ZIP(v_float64x4)
+
+////////// Arithmetic, bitwise and comparison operations /////////
+
+/* Element-wise binary and unary operations */
+
+/** Arithmetics **/
+#define OPENCV_HAL_IMPL_AVX_BIN_OP(bin_op, _Tpvec, intrin)            \
+    inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b)  \
+    { return _Tpvec(intrin(a.val, b.val)); }                          \
+    inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b)    \
+    { a.val = intrin(a.val, b.val); return a; }
+
+OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint8x32,  _mm256_adds_epu8)
+OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint8x32,  _mm256_subs_epu8)
+OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int8x32,   _mm256_adds_epi8)
+OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int8x32,   _mm256_subs_epi8)
+OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint16x16, _mm256_adds_epu16)
+OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint16x16, _mm256_subs_epu16)
+OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int16x16,  _mm256_adds_epi16)
+OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int16x16,  _mm256_subs_epi16)
+OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint32x8,  _mm256_add_epi32)
+OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint32x8,  _mm256_sub_epi32)
+OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_uint32x8,  _mm256_mullo_epi32)
+OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int32x8,   _mm256_add_epi32)
+OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int32x8,   _mm256_sub_epi32)
+OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_int32x8,   _mm256_mullo_epi32)
+OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_uint64x4,  _mm256_add_epi64)
+OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_uint64x4,  _mm256_sub_epi64)
+OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_int64x4,   _mm256_add_epi64)
+OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_int64x4,   _mm256_sub_epi64)
+
+OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_float32x8, _mm256_add_ps)
+OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_float32x8, _mm256_sub_ps)
+OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_float32x8, _mm256_mul_ps)
+OPENCV_HAL_IMPL_AVX_BIN_OP(/, v_float32x8, _mm256_div_ps)
+OPENCV_HAL_IMPL_AVX_BIN_OP(+, v_float64x4, _mm256_add_pd)
+OPENCV_HAL_IMPL_AVX_BIN_OP(-, v_float64x4, _mm256_sub_pd)
+OPENCV_HAL_IMPL_AVX_BIN_OP(*, v_float64x4, _mm256_mul_pd)
+OPENCV_HAL_IMPL_AVX_BIN_OP(/, v_float64x4, _mm256_div_pd)
+
+// saturating multiply 8-bit, 16-bit
+inline v_uint8x32 operator * (const v_uint8x32& a, const v_uint8x32& b)
+{
+    v_uint16x16 c, d;
+    v_mul_expand(a, b, c, d);
+    return v_pack(c, d);
+}
+inline v_int8x32 operator * (const v_int8x32& a, const v_int8x32& b)
+{
+    v_int16x16 c, d;
+    v_mul_expand(a, b, c, d);
+    return v_pack(c, d);
+}
+inline v_uint16x16 operator * (const v_uint16x16& a, const v_uint16x16& b)
+{
+    __m256i pl = _mm256_mullo_epi16(a.val, b.val);
+    __m256i ph = _mm256_mulhi_epu16(a.val, b.val);
+    __m256i p0 = _mm256_unpacklo_epi16(pl, ph);
+    __m256i p1 = _mm256_unpackhi_epi16(pl, ph);
+    return v_uint16x16(_v256_packs_epu32(p0, p1));
+}
+inline v_int16x16 operator * (const v_int16x16& a, const v_int16x16& b)
+{
+    __m256i pl = _mm256_mullo_epi16(a.val, b.val);
+    __m256i ph = _mm256_mulhi_epi16(a.val, b.val);
+    __m256i p0 = _mm256_unpacklo_epi16(pl, ph);
+    __m256i p1 = _mm256_unpackhi_epi16(pl, ph);
+    return v_int16x16(_mm256_packs_epi32(p0, p1));
+}
+inline v_uint8x32& operator *= (v_uint8x32& a, const v_uint8x32& b)
+{ a = a * b; return a; }
+inline v_int8x32& operator *= (v_int8x32& a, const v_int8x32& b)
+{ a = a * b; return a; }
+inline v_uint16x16& operator *= (v_uint16x16& a, const v_uint16x16& b)
+{ a = a * b; return a; }
+inline v_int16x16& operator *= (v_int16x16& a, const v_int16x16& b)
+{ a = a * b; return a; }
+
+/** Non-saturating arithmetics **/
+#define OPENCV_HAL_IMPL_AVX_BIN_FUNC(func, _Tpvec, intrin) \
+    inline _Tpvec func(const _Tpvec& a, const _Tpvec& b)   \
+    { return _Tpvec(intrin(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint8x32,  _mm256_add_epi8)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int8x32,   _mm256_add_epi8)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_uint16x16, _mm256_add_epi16)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_add_wrap, v_int16x16,  _mm256_add_epi16)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint8x32,  _mm256_sub_epi8)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int8x32,   _mm256_sub_epi8)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_uint16x16, _mm256_sub_epi16)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_sub_wrap, v_int16x16,  _mm256_sub_epi16)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_mul_wrap, v_uint16x16, _mm256_mullo_epi16)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_mul_wrap, v_int16x16,  _mm256_mullo_epi16)
+
+inline v_uint8x32 v_mul_wrap(const v_uint8x32& a, const v_uint8x32& b)
+{
+    __m256i ad = _mm256_srai_epi16(a.val, 8);
+    __m256i bd = _mm256_srai_epi16(b.val, 8);
+    __m256i p0 = _mm256_mullo_epi16(a.val, b.val); // even
+    __m256i p1 = _mm256_slli_epi16(_mm256_mullo_epi16(ad, bd), 8); // odd
+
+    const __m256i b01 = _mm256_set1_epi32(0xFF00FF00);
+    return v_uint8x32(_mm256_blendv_epi8(p0, p1, b01));
+}
+inline v_int8x32 v_mul_wrap(const v_int8x32& a, const v_int8x32& b)
+{
+    return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b)));
+}
+
+//  Multiply and expand
+inline void v_mul_expand(const v_uint8x32& a, const v_uint8x32& b,
+                         v_uint16x16& c, v_uint16x16& d)
+{
+    v_uint16x16 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c = v_mul_wrap(a0, b0);
+    d = v_mul_wrap(a1, b1);
+}
+
+inline void v_mul_expand(const v_int8x32& a, const v_int8x32& b,
+                         v_int16x16& c, v_int16x16& d)
+{
+    v_int16x16 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c = v_mul_wrap(a0, b0);
+    d = v_mul_wrap(a1, b1);
+}
+
+inline void v_mul_expand(const v_int16x16& a, const v_int16x16& b,
+                         v_int32x8& c, v_int32x8& d)
+{
+    v_int16x16 vhi = v_int16x16(_mm256_mulhi_epi16(a.val, b.val));
+
+    v_int16x16 v0, v1;
+    v_zip(v_mul_wrap(a, b), vhi, v0, v1);
+
+    c = v_reinterpret_as_s32(v0);
+    d = v_reinterpret_as_s32(v1);
+}
+
+inline void v_mul_expand(const v_uint16x16& a, const v_uint16x16& b,
+                         v_uint32x8& c, v_uint32x8& d)
+{
+    v_uint16x16 vhi = v_uint16x16(_mm256_mulhi_epu16(a.val, b.val));
+
+    v_uint16x16 v0, v1;
+    v_zip(v_mul_wrap(a, b), vhi, v0, v1);
+
+    c = v_reinterpret_as_u32(v0);
+    d = v_reinterpret_as_u32(v1);
+}
+
+inline void v_mul_expand(const v_uint32x8& a, const v_uint32x8& b,
+                         v_uint64x4& c, v_uint64x4& d)
+{
+    __m256i v0 = _mm256_mul_epu32(a.val, b.val);
+    __m256i v1 = _mm256_mul_epu32(_mm256_srli_epi64(a.val, 32), _mm256_srli_epi64(b.val, 32));
+    v_zip(v_uint64x4(v0), v_uint64x4(v1), c, d);
+}
+
+inline v_int16x16 v_mul_hi(const v_int16x16& a, const v_int16x16& b) { return v_int16x16(_mm256_mulhi_epi16(a.val, b.val)); }
+inline v_uint16x16 v_mul_hi(const v_uint16x16& a, const v_uint16x16& b) { return v_uint16x16(_mm256_mulhi_epu16(a.val, b.val)); }
+
+/** Bitwise shifts **/
+#define OPENCV_HAL_IMPL_AVX_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai)  \
+    inline _Tpuvec operator << (const _Tpuvec& a, int imm)            \
+    { return _Tpuvec(_mm256_slli_##suffix(a.val, imm)); }             \
+    inline _Tpsvec operator << (const _Tpsvec& a, int imm)            \
+    { return _Tpsvec(_mm256_slli_##suffix(a.val, imm)); }             \
+    inline _Tpuvec operator >> (const _Tpuvec& a, int imm)            \
+    { return _Tpuvec(_mm256_srli_##suffix(a.val, imm)); }             \
+    inline _Tpsvec operator >> (const _Tpsvec& a, int imm)            \
+    { return _Tpsvec(srai(a.val, imm)); }                             \
+    template<int imm>                                                 \
+    inline _Tpuvec v_shl(const _Tpuvec& a)                            \
+    { return _Tpuvec(_mm256_slli_##suffix(a.val, imm)); }             \
+    template<int imm>                                                 \
+    inline _Tpsvec v_shl(const _Tpsvec& a)                            \
+    { return _Tpsvec(_mm256_slli_##suffix(a.val, imm)); }             \
+    template<int imm>                                                 \
+    inline _Tpuvec v_shr(const _Tpuvec& a)                            \
+    { return _Tpuvec(_mm256_srli_##suffix(a.val, imm)); }             \
+    template<int imm>                                                 \
+    inline _Tpsvec v_shr(const _Tpsvec& a)                            \
+    { return _Tpsvec(srai(a.val, imm)); }
+
+OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint16x16, v_int16x16, epi16, _mm256_srai_epi16)
+OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint32x8,  v_int32x8,  epi32, _mm256_srai_epi32)
+
+inline __m256i _mm256_srai_epi64xx(const __m256i a, int imm)
+{
+    __m256i d = _mm256_set1_epi64x((int64)1 << 63);
+    __m256i r = _mm256_srli_epi64(_mm256_add_epi64(a, d), imm);
+    return _mm256_sub_epi64(r, _mm256_srli_epi64(d, imm));
+}
+OPENCV_HAL_IMPL_AVX_SHIFT_OP(v_uint64x4,  v_int64x4,  epi64, _mm256_srai_epi64xx)
+
+
+/** Bitwise logic **/
+#define OPENCV_HAL_IMPL_AVX_LOGIC_OP(_Tpvec, suffix, not_const)  \
+    OPENCV_HAL_IMPL_AVX_BIN_OP(&, _Tpvec, _mm256_and_##suffix)   \
+    OPENCV_HAL_IMPL_AVX_BIN_OP(|, _Tpvec, _mm256_or_##suffix)    \
+    OPENCV_HAL_IMPL_AVX_BIN_OP(^, _Tpvec, _mm256_xor_##suffix)   \
+    inline _Tpvec operator ~ (const _Tpvec& a)                   \
+    { return _Tpvec(_mm256_xor_##suffix(a.val, not_const)); }
+
+OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint8x32,   si256, _mm256_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int8x32,    si256, _mm256_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint16x16,  si256, _mm256_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int16x16,   si256, _mm256_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint32x8,   si256, _mm256_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int32x8,    si256, _mm256_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_uint64x4,   si256, _mm256_set1_epi64x(-1))
+OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_int64x4,    si256, _mm256_set1_epi64x(-1))
+OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_float32x8,  ps,    _mm256_castsi256_ps(_mm256_set1_epi32(-1)))
+OPENCV_HAL_IMPL_AVX_LOGIC_OP(v_float64x4,  pd,    _mm256_castsi256_pd(_mm256_set1_epi32(-1)))
+
+/** Select **/
+#define OPENCV_HAL_IMPL_AVX_SELECT(_Tpvec, suffix)                               \
+    inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+    { return _Tpvec(_mm256_blendv_##suffix(b.val, a.val, mask.val)); }
+
+OPENCV_HAL_IMPL_AVX_SELECT(v_uint8x32,  epi8)
+OPENCV_HAL_IMPL_AVX_SELECT(v_int8x32,   epi8)
+OPENCV_HAL_IMPL_AVX_SELECT(v_uint16x16, epi8)
+OPENCV_HAL_IMPL_AVX_SELECT(v_int16x16,  epi8)
+OPENCV_HAL_IMPL_AVX_SELECT(v_uint32x8,  epi8)
+OPENCV_HAL_IMPL_AVX_SELECT(v_int32x8,   epi8)
+OPENCV_HAL_IMPL_AVX_SELECT(v_float32x8, ps)
+OPENCV_HAL_IMPL_AVX_SELECT(v_float64x4, pd)
+
+/** Comparison **/
+#define OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpvec)                     \
+    inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b)  \
+    { return ~(a == b); }                                         \
+    inline _Tpvec operator <  (const _Tpvec& a, const _Tpvec& b)  \
+    { return b > a; }                                             \
+    inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b)  \
+    { return ~(a < b); }                                          \
+    inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b)  \
+    { return b >= a; }
+
+#define OPENCV_HAL_IMPL_AVX_CMP_OP_INT(_Tpuvec, _Tpsvec, suffix, sbit)   \
+    inline _Tpuvec operator == (const _Tpuvec& a, const _Tpuvec& b)      \
+    { return _Tpuvec(_mm256_cmpeq_##suffix(a.val, b.val)); }             \
+    inline _Tpuvec operator > (const _Tpuvec& a, const _Tpuvec& b)       \
+    {                                                                    \
+        __m256i smask = _mm256_set1_##suffix(sbit);                      \
+        return _Tpuvec(_mm256_cmpgt_##suffix(                            \
+                       _mm256_xor_si256(a.val, smask),                   \
+                       _mm256_xor_si256(b.val, smask)));                 \
+    }                                                                    \
+    inline _Tpsvec operator == (const _Tpsvec& a, const _Tpsvec& b)      \
+    { return _Tpsvec(_mm256_cmpeq_##suffix(a.val, b.val)); }             \
+    inline _Tpsvec operator > (const _Tpsvec& a, const _Tpsvec& b)       \
+    { return _Tpsvec(_mm256_cmpgt_##suffix(a.val, b.val)); }             \
+    OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpuvec)                               \
+    OPENCV_HAL_IMPL_AVX_CMP_OP_OV(_Tpsvec)
+
+OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint8x32,  v_int8x32,  epi8,  (char)-128)
+OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint16x16, v_int16x16, epi16, (short)-32768)
+OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint32x8,  v_int32x8,  epi32, (int)0x80000000)
+
+#define OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(_Tpvec)                 \
+    inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+    { return _Tpvec(_mm256_cmpeq_epi64(a.val, b.val)); }         \
+    inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+    { return ~(a == b); }
+
+OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_uint64x4)
+OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_int64x4)
+
+#define OPENCV_HAL_IMPL_AVX_CMP_FLT(bin_op, imm8, _Tpvec, suffix)    \
+    inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+    { return _Tpvec(_mm256_cmp_##suffix(a.val, b.val, imm8)); }
+
+#define OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(_Tpvec, suffix)               \
+    OPENCV_HAL_IMPL_AVX_CMP_FLT(==, _CMP_EQ_OQ,  _Tpvec, suffix)     \
+    OPENCV_HAL_IMPL_AVX_CMP_FLT(!=, _CMP_NEQ_OQ, _Tpvec, suffix)     \
+    OPENCV_HAL_IMPL_AVX_CMP_FLT(<,  _CMP_LT_OQ,  _Tpvec, suffix)     \
+    OPENCV_HAL_IMPL_AVX_CMP_FLT(>,  _CMP_GT_OQ,  _Tpvec, suffix)     \
+    OPENCV_HAL_IMPL_AVX_CMP_FLT(<=, _CMP_LE_OQ,  _Tpvec, suffix)     \
+    OPENCV_HAL_IMPL_AVX_CMP_FLT(>=, _CMP_GE_OQ,  _Tpvec, suffix)
+
+OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float32x8, ps)
+OPENCV_HAL_IMPL_AVX_CMP_OP_FLT(v_float64x4, pd)
+
+inline v_float32x8 v_not_nan(const v_float32x8& a)
+{ return v_float32x8(_mm256_cmp_ps(a.val, a.val, _CMP_ORD_Q)); }
+inline v_float64x4 v_not_nan(const v_float64x4& a)
+{ return v_float64x4(_mm256_cmp_pd(a.val, a.val, _CMP_ORD_Q)); }
+
+/** min/max **/
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint8x32,  _mm256_min_epu8)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint8x32,  _mm256_max_epu8)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int8x32,   _mm256_min_epi8)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int8x32,   _mm256_max_epi8)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint16x16, _mm256_min_epu16)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint16x16, _mm256_max_epu16)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int16x16,  _mm256_min_epi16)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int16x16,  _mm256_max_epi16)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_uint32x8,  _mm256_min_epu32)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_uint32x8,  _mm256_max_epu32)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_int32x8,   _mm256_min_epi32)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_int32x8,   _mm256_max_epi32)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_float32x8, _mm256_min_ps)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float32x8, _mm256_max_ps)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_min, v_float64x4, _mm256_min_pd)
+OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float64x4, _mm256_max_pd)
+
+/** Rotate **/
+template<int imm>
+inline v_uint8x32 v_rotate_left(const v_uint8x32& a, const v_uint8x32& b)
+{
+    enum {IMM_R = (16 - imm) & 0xFF};
+    enum {IMM_R2 = (32 - imm) & 0xFF};
+
+    if (imm == 0)  return a;
+    if (imm == 32) return b;
+    if (imm > 32)  return v_uint8x32();
+
+    __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x03);
+    if (imm == 16) return v_uint8x32(swap);
+    if (imm < 16)  return v_uint8x32(_mm256_alignr_epi8(a.val, swap, IMM_R));
+    return v_uint8x32(_mm256_alignr_epi8(swap, b.val, IMM_R2)); // imm < 32
+}
+
+template<int imm>
+inline v_uint8x32 v_rotate_right(const v_uint8x32& a, const v_uint8x32& b)
+{
+    enum {IMM_L = (imm - 16) & 0xFF};
+
+    if (imm == 0)  return a;
+    if (imm == 32) return b;
+    if (imm > 32)  return v_uint8x32();
+
+    __m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x21);
+    if (imm == 16) return v_uint8x32(swap);
+    if (imm < 16)  return v_uint8x32(_mm256_alignr_epi8(swap, a.val, imm));
+    return v_uint8x32(_mm256_alignr_epi8(b.val, swap, IMM_L));
+}
+
+template<int imm>
+inline v_uint8x32 v_rotate_left(const v_uint8x32& a)
+{
+    enum {IMM_L = (imm - 16) & 0xFF};
+    enum {IMM_R = (16 - imm) & 0xFF};
+
+    if (imm == 0) return a;
+    if (imm > 32) return v_uint8x32();
+
+    // ESAC control[3] ? [127:0] = 0
+    __m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(0, 0, 2, 0));
+    if (imm == 16) return v_uint8x32(swapz);
+    if (imm < 16)  return v_uint8x32(_mm256_alignr_epi8(a.val, swapz, IMM_R));
+    return v_uint8x32(_mm256_slli_si256(swapz, IMM_L));
+}
+
+template<int imm>
+inline v_uint8x32 v_rotate_right(const v_uint8x32& a)
+{
+    enum {IMM_L = (imm - 16) & 0xFF};
+
+    if (imm == 0) return a;
+    if (imm > 32) return v_uint8x32();
+
+    // ESAC control[3] ? [127:0] = 0
+    __m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(2, 0, 0, 1));
+    if (imm == 16) return v_uint8x32(swapz);
+    if (imm < 16)  return v_uint8x32(_mm256_alignr_epi8(swapz, a.val, imm));
+    return v_uint8x32(_mm256_srli_si256(swapz, IMM_L));
+}
+
+#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast)     \
+    template<int imm>                                             \
+    inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b)        \
+    {                                                             \
+        enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)};  \
+        v_uint8x32 ret = intrin<IMMxW>(v_reinterpret_as_u8(a),    \
+                                       v_reinterpret_as_u8(b));   \
+        return _Tpvec(cast(ret.val));                             \
+    }                                                             \
+    template<int imm>                                             \
+    inline _Tpvec intrin(const _Tpvec& a)                         \
+    {                                                             \
+        enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)};  \
+        v_uint8x32 ret = intrin<IMMxW>(v_reinterpret_as_u8(a));   \
+        return _Tpvec(cast(ret.val));                             \
+    }
+
+#define OPENCV_HAL_IMPL_AVX_ROTATE(_Tpvec)                                  \
+    OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left,  _Tpvec, OPENCV_HAL_NOP) \
+    OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, _Tpvec, OPENCV_HAL_NOP)
+
+OPENCV_HAL_IMPL_AVX_ROTATE(v_int8x32)
+OPENCV_HAL_IMPL_AVX_ROTATE(v_uint16x16)
+OPENCV_HAL_IMPL_AVX_ROTATE(v_int16x16)
+OPENCV_HAL_IMPL_AVX_ROTATE(v_uint32x8)
+OPENCV_HAL_IMPL_AVX_ROTATE(v_int32x8)
+OPENCV_HAL_IMPL_AVX_ROTATE(v_uint64x4)
+OPENCV_HAL_IMPL_AVX_ROTATE(v_int64x4)
+
+OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left,  v_float32x8, _mm256_castsi256_ps)
+OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float32x8, _mm256_castsi256_ps)
+OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left,  v_float64x4, _mm256_castsi256_pd)
+OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float64x4, _mm256_castsi256_pd)
+
+/** Reverse **/
+inline v_uint8x32 v_reverse(const v_uint8x32 &a)
+{
+    static const __m256i perm = _mm256_setr_epi8(
+            15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+            15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+    __m256i vec = _mm256_shuffle_epi8(a.val, perm);
+    return v_uint8x32(_mm256_permute2x128_si256(vec, vec, 1));
+}
+
+inline v_int8x32 v_reverse(const v_int8x32 &a)
+{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
+
+inline v_uint16x16 v_reverse(const v_uint16x16 &a)
+{
+    static const __m256i perm = _mm256_setr_epi8(
+            14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1,
+            14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
+    __m256i vec = _mm256_shuffle_epi8(a.val, perm);
+    return v_uint16x16(_mm256_permute2x128_si256(vec, vec, 1));
+}
+
+inline v_int16x16 v_reverse(const v_int16x16 &a)
+{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
+
+inline v_uint32x8 v_reverse(const v_uint32x8 &a)
+{
+    static const __m256i perm = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+    return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm));
+}
+
+inline v_int32x8 v_reverse(const v_int32x8 &a)
+{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_float32x8 v_reverse(const v_float32x8 &a)
+{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_uint64x4 v_reverse(const v_uint64x4 &a)
+{
+    return v_uint64x4(_mm256_permute4x64_epi64(a.val, _MM_SHUFFLE(0, 1, 2, 3)));
+}
+
+inline v_int64x4 v_reverse(const v_int64x4 &a)
+{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
+
+inline v_float64x4 v_reverse(const v_float64x4 &a)
+{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
+
+////////// Reduce and mask /////////
+
+/** Reduce **/
+inline unsigned v_reduce_sum(const v_uint8x32& a)
+{
+    __m256i half = _mm256_sad_epu8(a.val, _mm256_setzero_si256());
+    __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
+    return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
+}
+inline int v_reduce_sum(const v_int8x32& a)
+{
+    __m256i half = _mm256_sad_epu8(_mm256_xor_si256(a.val, _mm256_set1_epi8((schar)-128)), _mm256_setzero_si256());
+    __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
+    return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))) - 4096;
+}
+#define OPENCV_HAL_IMPL_AVX_REDUCE_32(_Tpvec, sctype, func, intrin) \
+    inline sctype v_reduce_##func(const _Tpvec& a) \
+    { \
+        __m128i val = intrin(_v256_extract_low(a.val), _v256_extract_high(a.val)); \
+        val = intrin(val, _mm_srli_si128(val,8)); \
+        val = intrin(val, _mm_srli_si128(val,4)); \
+        val = intrin(val, _mm_srli_si128(val,2)); \
+        val = intrin(val, _mm_srli_si128(val,1)); \
+        return (sctype)_mm_cvtsi128_si32(val); \
+    }
+
+OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, min, _mm_min_epu8)
+OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32,  schar, min, _mm_min_epi8)
+OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, max, _mm_max_epu8)
+OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32,  schar, max, _mm_max_epi8)
+
+#define OPENCV_HAL_IMPL_AVX_REDUCE_16(_Tpvec, sctype, func, intrin) \
+    inline sctype v_reduce_##func(const _Tpvec& a)                  \
+    {                                                               \
+        __m128i v0 = _v256_extract_low(a.val);                      \
+        __m128i v1 = _v256_extract_high(a.val);                     \
+        v0 = intrin(v0, v1);                                        \
+        v0 = intrin(v0, _mm_srli_si128(v0, 8));                     \
+        v0 = intrin(v0, _mm_srli_si128(v0, 4));                     \
+        v0 = intrin(v0, _mm_srli_si128(v0, 2));                     \
+        return (sctype) _mm_cvtsi128_si32(v0);                      \
+    }
+
+OPENCV_HAL_IMPL_AVX_REDUCE_16(v_uint16x16, ushort, min, _mm_min_epu16)
+OPENCV_HAL_IMPL_AVX_REDUCE_16(v_int16x16,  short,  min, _mm_min_epi16)
+OPENCV_HAL_IMPL_AVX_REDUCE_16(v_uint16x16, ushort, max, _mm_max_epu16)
+OPENCV_HAL_IMPL_AVX_REDUCE_16(v_int16x16,  short,  max, _mm_max_epi16)
+
+#define OPENCV_HAL_IMPL_AVX_REDUCE_8(_Tpvec, sctype, func, intrin) \
+    inline sctype v_reduce_##func(const _Tpvec& a)                 \
+    {                                                              \
+        __m128i v0 = _v256_extract_low(a.val);                     \
+        __m128i v1 = _v256_extract_high(a.val);                    \
+        v0 = intrin(v0, v1);                                       \
+        v0 = intrin(v0, _mm_srli_si128(v0, 8));                    \
+        v0 = intrin(v0, _mm_srli_si128(v0, 4));                    \
+        return (sctype) _mm_cvtsi128_si32(v0);                     \
+    }
+
+OPENCV_HAL_IMPL_AVX_REDUCE_8(v_uint32x8, unsigned, min, _mm_min_epu32)
+OPENCV_HAL_IMPL_AVX_REDUCE_8(v_int32x8,  int,      min, _mm_min_epi32)
+OPENCV_HAL_IMPL_AVX_REDUCE_8(v_uint32x8, unsigned, max, _mm_max_epu32)
+OPENCV_HAL_IMPL_AVX_REDUCE_8(v_int32x8,  int,      max, _mm_max_epi32)
+
+#define OPENCV_HAL_IMPL_AVX_REDUCE_FLT(func, intrin)                  \
+    inline float v_reduce_##func(const v_float32x8& a)                \
+    {                                                                 \
+        __m128 v0 = _v256_extract_low(a.val);                         \
+        __m128 v1 = _v256_extract_high(a.val);                        \
+        v0 = intrin(v0, v1);                                          \
+        v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 3, 2))); \
+        v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 0, 1))); \
+        return _mm_cvtss_f32(v0);                                     \
+    }
+
+OPENCV_HAL_IMPL_AVX_REDUCE_FLT(min, _mm_min_ps)
+OPENCV_HAL_IMPL_AVX_REDUCE_FLT(max, _mm_max_ps)
+
+inline int v_reduce_sum(const v_int32x8& a)
+{
+    __m256i s0 = _mm256_hadd_epi32(a.val, a.val);
+            s0 = _mm256_hadd_epi32(s0, s0);
+
+    __m128i s1 = _v256_extract_high(s0);
+            s1 = _mm_add_epi32(_v256_extract_low(s0), s1);
+
+    return _mm_cvtsi128_si32(s1);
+}
+
+inline unsigned v_reduce_sum(const v_uint32x8& a)
+{ return v_reduce_sum(v_reinterpret_as_s32(a)); }
+
+inline int v_reduce_sum(const v_int16x16& a)
+{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); }
+inline unsigned v_reduce_sum(const v_uint16x16& a)
+{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); }
+
+inline float v_reduce_sum(const v_float32x8& a)
+{
+    __m256 s0 = _mm256_hadd_ps(a.val, a.val);
+           s0 = _mm256_hadd_ps(s0, s0);
+
+    __m128 s1 = _v256_extract_high(s0);
+           s1 = _mm_add_ps(_v256_extract_low(s0), s1);
+
+    return _mm_cvtss_f32(s1);
+}
+
+inline uint64 v_reduce_sum(const v_uint64x4& a)
+{
+    uint64 CV_DECL_ALIGNED(32) idx[2];
+    _mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val)));
+    return idx[0] + idx[1];
+}
+inline int64 v_reduce_sum(const v_int64x4& a)
+{
+    int64 CV_DECL_ALIGNED(32) idx[2];
+    _mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val)));
+    return idx[0] + idx[1];
+}
+inline double v_reduce_sum(const v_float64x4& a)
+{
+    __m256d s0 = _mm256_hadd_pd(a.val, a.val);
+    return _mm_cvtsd_f64(_mm_add_pd(_v256_extract_low(s0), _v256_extract_high(s0)));
+}
+
+inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b,
+                                 const v_float32x8& c, const v_float32x8& d)
+{
+    __m256 ab = _mm256_hadd_ps(a.val, b.val);
+    __m256 cd = _mm256_hadd_ps(c.val, d.val);
+    return v_float32x8(_mm256_hadd_ps(ab, cd));
+}
+
+inline unsigned v_reduce_sad(const v_uint8x32& a, const v_uint8x32& b)
+{
+    __m256i half = _mm256_sad_epu8(a.val, b.val);
+    __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
+    return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
+}
+inline unsigned v_reduce_sad(const v_int8x32& a, const v_int8x32& b)
+{
+    __m256i half = _mm256_set1_epi8(0x7f);
+    half = _mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half));
+    __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
+    return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
+}
+inline unsigned v_reduce_sad(const v_uint16x16& a, const v_uint16x16& b)
+{
+    v_uint32x8 l, h;
+    v_expand(v_add_wrap(a - b, b - a), l, h);
+    return v_reduce_sum(l + h);
+}
+inline unsigned v_reduce_sad(const v_int16x16& a, const v_int16x16& b)
+{
+    v_uint32x8 l, h;
+    v_expand(v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))), l, h);
+    return v_reduce_sum(l + h);
+}
+inline unsigned v_reduce_sad(const v_uint32x8& a, const v_uint32x8& b)
+{
+    return v_reduce_sum(v_max(a, b) - v_min(a, b));
+}
+inline unsigned v_reduce_sad(const v_int32x8& a, const v_int32x8& b)
+{
+    v_int32x8 m = a < b;
+    return v_reduce_sum(v_reinterpret_as_u32(((a - b) ^ m) - m));
+}
+inline float v_reduce_sad(const v_float32x8& a, const v_float32x8& b)
+{
+    return v_reduce_sum((a - b) & v_float32x8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))));
+}
+
+/** Popcount **/
+inline v_uint8x32 v_popcount(const v_uint8x32& a)
+{
+    __m256i _popcnt_table = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+                                             0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
+    __m256i _popcnt_mask = _mm256_set1_epi8(0x0F);
+    return v_uint8x32(_mm256_add_epi8(_mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(                  a.val    , _popcnt_mask)),
+                                      _mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(_mm256_srli_epi16(a.val, 4), _popcnt_mask))));
+}
+inline v_uint16x16 v_popcount(const v_uint16x16& a)
+{
+    v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a));
+    p += v_rotate_right<1>(p);
+    return v_reinterpret_as_u16(p) & v256_setall_u16(0x00ff);
+}
+inline v_uint32x8 v_popcount(const v_uint32x8& a)
+{
+    v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a));
+    p += v_rotate_right<1>(p);
+    p += v_rotate_right<2>(p);
+    return v_reinterpret_as_u32(p) & v256_setall_u32(0x000000ff);
+}
+inline v_uint64x4 v_popcount(const v_uint64x4& a)
+{
+    return v_uint64x4(_mm256_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm256_setzero_si256()));
+}
+inline v_uint8x32 v_popcount(const v_int8x32& a)
+{ return v_popcount(v_reinterpret_as_u8(a)); }
+inline v_uint16x16 v_popcount(const v_int16x16& a)
+{ return v_popcount(v_reinterpret_as_u16(a)); }
+inline v_uint32x8 v_popcount(const v_int32x8& a)
+{ return v_popcount(v_reinterpret_as_u32(a)); }
+inline v_uint64x4 v_popcount(const v_int64x4& a)
+{ return v_popcount(v_reinterpret_as_u64(a)); }
+
+/** Mask **/
+inline int v_signmask(const v_int8x32& a)
+{ return _mm256_movemask_epi8(a.val); }
+inline int v_signmask(const v_uint8x32& a)
+{ return v_signmask(v_reinterpret_as_s8(a)); }
+
+inline int v_signmask(const v_int16x16& a)
+{ return v_signmask(v_pack(a, a)) & 0xFFFF; }
+inline int v_signmask(const v_uint16x16& a)
+{ return v_signmask(v_reinterpret_as_s16(a)); }
+
+inline int v_signmask(const v_float32x8& a)
+{ return _mm256_movemask_ps(a.val); }
+inline int v_signmask(const v_float64x4& a)
+{ return _mm256_movemask_pd(a.val); }
+
+inline int v_signmask(const v_int32x8& a)
+{ return v_signmask(v_reinterpret_as_f32(a)); }
+inline int v_signmask(const v_uint32x8& a)
+{ return v_signmask(v_reinterpret_as_f32(a)); }
+
+inline int v_signmask(const v_int64x4& a)
+{ return v_signmask(v_reinterpret_as_f64(a)); }
+inline int v_signmask(const v_uint64x4& a)
+{ return v_signmask(v_reinterpret_as_f64(a)); }
+
+inline int v_scan_forward(const v_int8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
+inline int v_scan_forward(const v_uint8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
+inline int v_scan_forward(const v_int16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
+inline int v_scan_forward(const v_uint16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
+inline int v_scan_forward(const v_int32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
+inline int v_scan_forward(const v_uint32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
+inline int v_scan_forward(const v_float32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
+inline int v_scan_forward(const v_int64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
+inline int v_scan_forward(const v_uint64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
+inline int v_scan_forward(const v_float64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
+
+/** Checks **/
+#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, allmask) \
+    inline bool v_check_all(const _Tpvec& a) { return v_signmask(a) == allmask; } \
+    inline bool v_check_any(const _Tpvec& a) { return v_signmask(a) != 0; }
+OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, -1)
+OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, -1)
+OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, 255)
+OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, 255)
+OPENCV_HAL_IMPL_AVX_CHECK(v_uint64x4, 15)
+OPENCV_HAL_IMPL_AVX_CHECK(v_int64x4, 15)
+OPENCV_HAL_IMPL_AVX_CHECK(v_float32x8, 255)
+OPENCV_HAL_IMPL_AVX_CHECK(v_float64x4, 15)
+
+#define OPENCV_HAL_IMPL_AVX_CHECK_SHORT(_Tpvec)  \
+    inline bool v_check_all(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) == 0xaaaaaaaa; } \
+    inline bool v_check_any(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) != 0; }
+OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_uint16x16)
+OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_int16x16)
+
+////////// Other math /////////
+
+/** Some frequent operations **/
+#define OPENCV_HAL_IMPL_AVX_MULADD(_Tpvec, suffix)                            \
+    inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)    \
+    { return _Tpvec(_mm256_fmadd_##suffix(a.val, b.val, c.val)); }            \
+    inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
+    { return _Tpvec(_mm256_fmadd_##suffix(a.val, b.val, c.val)); }            \
+    inline _Tpvec v_sqrt(const _Tpvec& x)                                     \
+    { return _Tpvec(_mm256_sqrt_##suffix(x.val)); }                           \
+    inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b)           \
+    { return v_fma(a, a, b * b); }                                            \
+    inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b)               \
+    { return v_sqrt(v_fma(a, a, b*b)); }
+
+OPENCV_HAL_IMPL_AVX_MULADD(v_float32x8, ps)
+OPENCV_HAL_IMPL_AVX_MULADD(v_float64x4, pd)
+
+inline v_int32x8 v_fma(const v_int32x8& a, const v_int32x8& b, const v_int32x8& c)
+{
+    return a * b + c;
+}
+
+inline v_int32x8 v_muladd(const v_int32x8& a, const v_int32x8& b, const v_int32x8& c)
+{
+    return v_fma(a, b, c);
+}
+
+inline v_float32x8 v_invsqrt(const v_float32x8& x)
+{
+    v_float32x8 half = x * v256_setall_f32(0.5);
+    v_float32x8 t  = v_float32x8(_mm256_rsqrt_ps(x.val));
+    // todo: _mm256_fnmsub_ps
+    t *= v256_setall_f32(1.5) - ((t * t) * half);
+    return t;
+}
+
+inline v_float64x4 v_invsqrt(const v_float64x4& x)
+{
+    return v256_setall_f64(1.) / v_sqrt(x);
+}
+
+/** Absolute values **/
+#define OPENCV_HAL_IMPL_AVX_ABS(_Tpvec, suffix)         \
+    inline v_u##_Tpvec v_abs(const v_##_Tpvec& x)       \
+    { return v_u##_Tpvec(_mm256_abs_##suffix(x.val)); }
+
+OPENCV_HAL_IMPL_AVX_ABS(int8x32,  epi8)
+OPENCV_HAL_IMPL_AVX_ABS(int16x16, epi16)
+OPENCV_HAL_IMPL_AVX_ABS(int32x8,  epi32)
+
+inline v_float32x8 v_abs(const v_float32x8& x)
+{ return x & v_float32x8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))); }
+inline v_float64x4 v_abs(const v_float64x4& x)
+{ return x & v_float64x4(_mm256_castsi256_pd(_mm256_srli_epi64(_mm256_set1_epi64x(-1), 1))); }
+
+/** Absolute difference **/
+inline v_uint8x32 v_absdiff(const v_uint8x32& a, const v_uint8x32& b)
+{ return v_add_wrap(a - b,  b - a); }
+inline v_uint16x16 v_absdiff(const v_uint16x16& a, const v_uint16x16& b)
+{ return v_add_wrap(a - b,  b - a); }
+inline v_uint32x8 v_absdiff(const v_uint32x8& a, const v_uint32x8& b)
+{ return v_max(a, b) - v_min(a, b); }
+
+inline v_uint8x32 v_absdiff(const v_int8x32& a, const v_int8x32& b)
+{
+    v_int8x32 d = v_sub_wrap(a, b);
+    v_int8x32 m = a < b;
+    return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m));
+}
+
+inline v_uint16x16 v_absdiff(const v_int16x16& a, const v_int16x16& b)
+{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); }
+
+inline v_uint32x8 v_absdiff(const v_int32x8& a, const v_int32x8& b)
+{
+    v_int32x8 d = a - b;
+    v_int32x8 m = a < b;
+    return v_reinterpret_as_u32((d ^ m) - m);
+}
+
+inline v_float32x8 v_absdiff(const v_float32x8& a, const v_float32x8& b)
+{ return v_abs(a - b); }
+
+inline v_float64x4 v_absdiff(const v_float64x4& a, const v_float64x4& b)
+{ return v_abs(a - b); }
+
+/** Saturating absolute difference **/
+inline v_int8x32 v_absdiffs(const v_int8x32& a, const v_int8x32& b)
+{
+    v_int8x32 d = a - b;
+    v_int8x32 m = a < b;
+    return (d ^ m) - m;
+}
+inline v_int16x16 v_absdiffs(const v_int16x16& a, const v_int16x16& b)
+{ return v_max(a, b) - v_min(a, b); }
+
+////////// Conversions /////////
+
+/** Rounding **/
+inline v_int32x8 v_round(const v_float32x8& a)
+{ return v_int32x8(_mm256_cvtps_epi32(a.val)); }
+
+inline v_int32x8 v_round(const v_float64x4& a)
+{ return v_int32x8(_mm256_castsi128_si256(_mm256_cvtpd_epi32(a.val))); }
+
+inline v_int32x8 v_round(const v_float64x4& a, const v_float64x4& b)
+{
+    __m128i ai = _mm256_cvtpd_epi32(a.val), bi = _mm256_cvtpd_epi32(b.val);
+    return v_int32x8(_v256_combine(ai, bi));
+}
+
+inline v_int32x8 v_trunc(const v_float32x8& a)
+{ return v_int32x8(_mm256_cvttps_epi32(a.val)); }
+
+inline v_int32x8 v_trunc(const v_float64x4& a)
+{ return v_int32x8(_mm256_castsi128_si256(_mm256_cvttpd_epi32(a.val))); }
+
+inline v_int32x8 v_floor(const v_float32x8& a)
+{ return v_int32x8(_mm256_cvttps_epi32(_mm256_floor_ps(a.val))); }
+
+inline v_int32x8 v_floor(const v_float64x4& a)
+{ return v_trunc(v_float64x4(_mm256_floor_pd(a.val))); }
+
+inline v_int32x8 v_ceil(const v_float32x8& a)
+{ return v_int32x8(_mm256_cvttps_epi32(_mm256_ceil_ps(a.val))); }
+
+inline v_int32x8 v_ceil(const v_float64x4& a)
+{ return v_trunc(v_float64x4(_mm256_ceil_pd(a.val))); }
+
+/** To float **/
+inline v_float32x8 v_cvt_f32(const v_int32x8& a)
+{ return v_float32x8(_mm256_cvtepi32_ps(a.val)); }
+
+inline v_float32x8 v_cvt_f32(const v_float64x4& a)
+{ return v_float32x8(_mm256_castps128_ps256(_mm256_cvtpd_ps(a.val))); }
+
+inline v_float32x8 v_cvt_f32(const v_float64x4& a, const v_float64x4& b)
+{
+    __m128 af = _mm256_cvtpd_ps(a.val), bf = _mm256_cvtpd_ps(b.val);
+    return v_float32x8(_v256_combine(af, bf));
+}
+
+inline v_float64x4 v_cvt_f64(const v_int32x8& a)
+{ return v_float64x4(_mm256_cvtepi32_pd(_v256_extract_low(a.val))); }
+
+inline v_float64x4 v_cvt_f64_high(const v_int32x8& a)
+{ return v_float64x4(_mm256_cvtepi32_pd(_v256_extract_high(a.val))); }
+
+inline v_float64x4 v_cvt_f64(const v_float32x8& a)
+{ return v_float64x4(_mm256_cvtps_pd(_v256_extract_low(a.val))); }
+
+inline v_float64x4 v_cvt_f64_high(const v_float32x8& a)
+{ return v_float64x4(_mm256_cvtps_pd(_v256_extract_high(a.val))); }
+
+// from (Mysticial and wim) https://stackoverflow.com/q/41144668
+inline v_float64x4 v_cvt_f64(const v_int64x4& v)
+{
+    // constants encoded as floating-point
+    __m256i magic_i_lo   = _mm256_set1_epi64x(0x4330000000000000); // 2^52
+    __m256i magic_i_hi32 = _mm256_set1_epi64x(0x4530000080000000); // 2^84 + 2^63
+    __m256i magic_i_all  = _mm256_set1_epi64x(0x4530000080100000); // 2^84 + 2^63 + 2^52
+    __m256d magic_d_all  = _mm256_castsi256_pd(magic_i_all);
+
+    // Blend the 32 lowest significant bits of v with magic_int_lo
+    __m256i v_lo         = _mm256_blend_epi32(magic_i_lo, v.val, 0x55);
+    // Extract the 32 most significant bits of v
+    __m256i v_hi         = _mm256_srli_epi64(v.val, 32);
+    // Flip the msb of v_hi and blend with 0x45300000
+            v_hi         = _mm256_xor_si256(v_hi, magic_i_hi32);
+    // Compute in double precision
+    __m256d v_hi_dbl     = _mm256_sub_pd(_mm256_castsi256_pd(v_hi), magic_d_all);
+    // (v_hi - magic_d_all) + v_lo  Do not assume associativity of floating point addition
+    __m256d result       = _mm256_add_pd(v_hi_dbl, _mm256_castsi256_pd(v_lo));
+    return v_float64x4(result);
+}
+
+////////////// Lookup table access ////////////////////
+
+inline v_int8x32 v256_lut(const schar* tab, const int* idx)
+{
+    return v_int8x32(_mm256_setr_epi8(tab[idx[ 0]], tab[idx[ 1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]],
+                                      tab[idx[ 8]], tab[idx[ 9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]],
+                                      tab[idx[16]], tab[idx[17]], tab[idx[18]], tab[idx[19]], tab[idx[20]], tab[idx[21]], tab[idx[22]], tab[idx[23]],
+                                      tab[idx[24]], tab[idx[25]], tab[idx[26]], tab[idx[27]], tab[idx[28]], tab[idx[29]], tab[idx[30]], tab[idx[31]]));
+}
+inline v_int8x32 v256_lut_pairs(const schar* tab, const int* idx)
+{
+    return v_int8x32(_mm256_setr_epi16(*(const short*)(tab + idx[ 0]), *(const short*)(tab + idx[ 1]), *(const short*)(tab + idx[ 2]), *(const short*)(tab + idx[ 3]),
+                                       *(const short*)(tab + idx[ 4]), *(const short*)(tab + idx[ 5]), *(const short*)(tab + idx[ 6]), *(const short*)(tab + idx[ 7]),
+                                       *(const short*)(tab + idx[ 8]), *(const short*)(tab + idx[ 9]), *(const short*)(tab + idx[10]), *(const short*)(tab + idx[11]),
+                                       *(const short*)(tab + idx[12]), *(const short*)(tab + idx[13]), *(const short*)(tab + idx[14]), *(const short*)(tab + idx[15])));
+}
+inline v_int8x32 v256_lut_quads(const schar* tab, const int* idx)
+{
+    return v_int8x32(_mm256_i32gather_epi32((const int*)tab, _mm256_loadu_si256((const __m256i*)idx), 1));
+}
+inline v_uint8x32 v256_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut((const schar *)tab, idx)); }
+inline v_uint8x32 v256_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut_pairs((const schar *)tab, idx)); }
+inline v_uint8x32 v256_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v256_lut_quads((const schar *)tab, idx)); }
+
+inline v_int16x16 v256_lut(const short* tab, const int* idx)
+{
+    return v_int16x16(_mm256_setr_epi16(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]],
+                                        tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]));
+}
+inline v_int16x16 v256_lut_pairs(const short* tab, const int* idx)
+{
+    return v_int16x16(_mm256_i32gather_epi32((const int*)tab, _mm256_loadu_si256((const __m256i*)idx), 2));
+}
+inline v_int16x16 v256_lut_quads(const short* tab, const int* idx)
+{
+#if defined(__GNUC__)
+    return v_int16x16(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 2));//Looks like intrinsic has wrong definition
+#else
+    return v_int16x16(_mm256_i32gather_epi64((const int64*)tab, _mm_loadu_si128((const __m128i*)idx), 2));
+#endif
+}
+inline v_uint16x16 v256_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut((const short *)tab, idx)); }
+inline v_uint16x16 v256_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut_pairs((const short *)tab, idx)); }
+inline v_uint16x16 v256_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v256_lut_quads((const short *)tab, idx)); }
+
+inline v_int32x8 v256_lut(const int* tab, const int* idx)
+{
+    return v_int32x8(_mm256_i32gather_epi32(tab, _mm256_loadu_si256((const __m256i*)idx), 4));
+}
+inline v_int32x8 v256_lut_pairs(const int* tab, const int* idx)
+{
+#if defined(__GNUC__)
+    return v_int32x8(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 4));
+#else
+    return v_int32x8(_mm256_i32gather_epi64((const int64*)tab, _mm_loadu_si128((const __m128i*)idx), 4));
+#endif
+}
+inline v_int32x8 v256_lut_quads(const int* tab, const int* idx)
+{
+    return v_int32x8(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1]))));
+}
+inline v_uint32x8 v256_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut((const int *)tab, idx)); }
+inline v_uint32x8 v256_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_pairs((const int *)tab, idx)); }
+inline v_uint32x8 v256_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_quads((const int *)tab, idx)); }
+
+inline v_int64x4 v256_lut(const int64* tab, const int* idx)
+{
+#if defined(__GNUC__)
+    return v_int64x4(_mm256_i32gather_epi64((const long long int*)tab, _mm_loadu_si128((const __m128i*)idx), 8));
+#else
+    return v_int64x4(_mm256_i32gather_epi64(tab, _mm_loadu_si128((const __m128i*)idx), 8));
+#endif
+}
+inline v_int64x4 v256_lut_pairs(const int64* tab, const int* idx)
+{
+    return v_int64x4(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1]))));
+}
+inline v_uint64x4 v256_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut((const int64 *)tab, idx)); }
+inline v_uint64x4 v256_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut_pairs((const int64 *)tab, idx)); }
+
+inline v_float32x8 v256_lut(const float* tab, const int* idx)
+{
+    return v_float32x8(_mm256_i32gather_ps(tab, _mm256_loadu_si256((const __m256i*)idx), 4));
+}
+inline v_float32x8 v256_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v256_lut_pairs((const int *)tab, idx)); }
+inline v_float32x8 v256_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v256_lut_quads((const int *)tab, idx)); }
+
+inline v_float64x4 v256_lut(const double* tab, const int* idx)
+{
+    return v_float64x4(_mm256_i32gather_pd(tab, _mm_loadu_si128((const __m128i*)idx), 8));
+}
+inline v_float64x4 v256_lut_pairs(const double* tab, const int* idx) { return v_float64x4(_v256_combine(_mm_loadu_pd(tab + idx[0]), _mm_loadu_pd(tab + idx[1]))); }
+
+inline v_int32x8 v_lut(const int* tab, const v_int32x8& idxvec)
+{
+    return v_int32x8(_mm256_i32gather_epi32(tab, idxvec.val, 4));
+}
+
+inline v_uint32x8 v_lut(const unsigned* tab, const v_int32x8& idxvec)
+{
+    return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec));
+}
+
+inline v_float32x8 v_lut(const float* tab, const v_int32x8& idxvec)
+{
+    return v_float32x8(_mm256_i32gather_ps(tab, idxvec.val, 4));
+}
+
+inline v_float64x4 v_lut(const double* tab, const v_int32x8& idxvec)
+{
+    return v_float64x4(_mm256_i32gather_pd(tab, _mm256_castsi256_si128(idxvec.val), 8));
+}
+
+inline void v_lut_deinterleave(const float* tab, const v_int32x8& idxvec, v_float32x8& x, v_float32x8& y)
+{
+    int CV_DECL_ALIGNED(32) idx[8];
+    v_store_aligned(idx, idxvec);
+    __m128 z = _mm_setzero_ps();
+    __m128 xy01, xy45, xy23, xy67;
+    xy01 = _mm_loadl_pi(z, (const __m64*)(tab + idx[0]));
+    xy01 = _mm_loadh_pi(xy01, (const __m64*)(tab + idx[1]));
+    xy45 = _mm_loadl_pi(z, (const __m64*)(tab + idx[4]));
+    xy45 = _mm_loadh_pi(xy45, (const __m64*)(tab + idx[5]));
+    __m256 xy0145 = _v256_combine(xy01, xy45);
+    xy23 = _mm_loadl_pi(z, (const __m64*)(tab + idx[2]));
+    xy23 = _mm_loadh_pi(xy23, (const __m64*)(tab + idx[3]));
+    xy67 = _mm_loadl_pi(z, (const __m64*)(tab + idx[6]));
+    xy67 = _mm_loadh_pi(xy67, (const __m64*)(tab + idx[7]));
+    __m256 xy2367 = _v256_combine(xy23, xy67);
+
+    __m256 xxyy0145 = _mm256_unpacklo_ps(xy0145, xy2367);
+    __m256 xxyy2367 = _mm256_unpackhi_ps(xy0145, xy2367);
+
+    x = v_float32x8(_mm256_unpacklo_ps(xxyy0145, xxyy2367));
+    y = v_float32x8(_mm256_unpackhi_ps(xxyy0145, xxyy2367));
+}
+
+inline void v_lut_deinterleave(const double* tab, const v_int32x8& idxvec, v_float64x4& x, v_float64x4& y)
+{
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_low(idx, idxvec);
+    __m128d xy0 = _mm_loadu_pd(tab + idx[0]);
+    __m128d xy2 = _mm_loadu_pd(tab + idx[2]);
+    __m128d xy1 = _mm_loadu_pd(tab + idx[1]);
+    __m128d xy3 = _mm_loadu_pd(tab + idx[3]);
+    __m256d xy02 = _v256_combine(xy0, xy2);
+    __m256d xy13 = _v256_combine(xy1, xy3);
+
+    x = v_float64x4(_mm256_unpacklo_pd(xy02, xy13));
+    y = v_float64x4(_mm256_unpackhi_pd(xy02, xy13));
+}
+
+inline v_int8x32 v_interleave_pairs(const v_int8x32& vec)
+{
+    return v_int8x32(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0d0e0c0b090a08, 0x0705060403010200, 0x0f0d0e0c0b090a08, 0x0705060403010200)));
+}
+inline v_uint8x32 v_interleave_pairs(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
+inline v_int8x32 v_interleave_quads(const v_int8x32& vec)
+{
+    return v_int8x32(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0b0e0a0d090c08, 0x0703060205010400, 0x0f0b0e0a0d090c08, 0x0703060205010400)));
+}
+inline v_uint8x32 v_interleave_quads(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x16 v_interleave_pairs(const v_int16x16& vec)
+{
+    return v_int16x16(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0e0b0a0d0c0908, 0x0706030205040100, 0x0f0e0b0a0d0c0908, 0x0706030205040100)));
+}
+inline v_uint16x16 v_interleave_pairs(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
+inline v_int16x16 v_interleave_quads(const v_int16x16& vec)
+{
+    return v_int16x16(_mm256_shuffle_epi8(vec.val, _mm256_set_epi64x(0x0f0e07060d0c0504, 0x0b0a030209080100, 0x0f0e07060d0c0504, 0x0b0a030209080100)));
+}
+inline v_uint16x16 v_interleave_quads(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x8 v_interleave_pairs(const v_int32x8& vec)
+{
+    return v_int32x8(_mm256_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0)));
+}
+inline v_uint32x8 v_interleave_pairs(const v_uint32x8& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+inline v_float32x8 v_interleave_pairs(const v_float32x8& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+
+inline v_int8x32 v_pack_triplets(const v_int8x32& vec)
+{
+    return v_int8x32(_mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(vec.val, _mm256_broadcastsi128_si256(_mm_set_epi64x(0xffffff0f0e0d0c0a, 0x0908060504020100))),
+                                                 _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000)));
+}
+inline v_uint8x32 v_pack_triplets(const v_uint8x32& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x16 v_pack_triplets(const v_int16x16& vec)
+{
+    return v_int16x16(_mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(vec.val, _mm256_broadcastsi128_si256(_mm_set_epi64x(0xffff0f0e0d0c0b0a, 0x0908050403020100))),
+                                                  _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000)));
+}
+inline v_uint16x16 v_pack_triplets(const v_uint16x16& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x8 v_pack_triplets(const v_int32x8& vec)
+{
+    return v_int32x8(_mm256_permutevar8x32_epi32(vec.val, _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000)));
+}
+inline v_uint32x8 v_pack_triplets(const v_uint32x8& vec) { return v_reinterpret_as_u32(v_pack_triplets(v_reinterpret_as_s32(vec))); }
+inline v_float32x8 v_pack_triplets(const v_float32x8& vec)
+{
+    return v_float32x8(_mm256_permutevar8x32_ps(vec.val, _mm256_set_epi64x(0x0000000700000007, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000)));
+}
+
+////////// Matrix operations /////////
+
+//////// Dot Product ////////
+
+// 16 >> 32
+inline v_int32x8 v_dotprod(const v_int16x16& a, const v_int16x16& b)
+{ return v_int32x8(_mm256_madd_epi16(a.val, b.val)); }
+inline v_int32x8 v_dotprod(const v_int16x16& a, const v_int16x16& b, const v_int32x8& c)
+{ return v_dotprod(a, b) + c; }
+
+// 32 >> 64
+inline v_int64x4 v_dotprod(const v_int32x8& a, const v_int32x8& b)
+{
+    __m256i even = _mm256_mul_epi32(a.val, b.val);
+    __m256i odd = _mm256_mul_epi32(_mm256_srli_epi64(a.val, 32), _mm256_srli_epi64(b.val, 32));
+    return v_int64x4(_mm256_add_epi64(even, odd));
+}
+inline v_int64x4 v_dotprod(const v_int32x8& a, const v_int32x8& b, const v_int64x4& c)
+{ return v_dotprod(a, b) + c; }
+
+// 8 >> 32
+inline v_uint32x8 v_dotprod_expand(const v_uint8x32& a, const v_uint8x32& b)
+{
+    __m256i even_m = _mm256_set1_epi32(0xFF00FF00);
+    __m256i even_a = _mm256_blendv_epi8(a.val, _mm256_setzero_si256(), even_m);
+    __m256i odd_a  = _mm256_srli_epi16(a.val, 8);
+
+    __m256i even_b = _mm256_blendv_epi8(b.val, _mm256_setzero_si256(), even_m);
+    __m256i odd_b  = _mm256_srli_epi16(b.val, 8);
+
+    __m256i prod0  = _mm256_madd_epi16(even_a, even_b);
+    __m256i prod1  = _mm256_madd_epi16(odd_a, odd_b);
+    return v_uint32x8(_mm256_add_epi32(prod0, prod1));
+}
+inline v_uint32x8 v_dotprod_expand(const v_uint8x32& a, const v_uint8x32& b, const v_uint32x8& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+inline v_int32x8 v_dotprod_expand(const v_int8x32& a, const v_int8x32& b)
+{
+    __m256i even_a = _mm256_srai_epi16(_mm256_bslli_epi128(a.val, 1), 8);
+    __m256i odd_a  = _mm256_srai_epi16(a.val, 8);
+
+    __m256i even_b = _mm256_srai_epi16(_mm256_bslli_epi128(b.val, 1), 8);
+    __m256i odd_b  = _mm256_srai_epi16(b.val, 8);
+
+    __m256i prod0  = _mm256_madd_epi16(even_a, even_b);
+    __m256i prod1  = _mm256_madd_epi16(odd_a, odd_b);
+    return v_int32x8(_mm256_add_epi32(prod0, prod1));
+}
+inline v_int32x8 v_dotprod_expand(const v_int8x32& a, const v_int8x32& b, const v_int32x8& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 16 >> 64
+inline v_uint64x4 v_dotprod_expand(const v_uint16x16& a, const v_uint16x16& b)
+{
+    __m256i mullo = _mm256_mullo_epi16(a.val, b.val);
+    __m256i mulhi = _mm256_mulhi_epu16(a.val, b.val);
+    __m256i mul0  = _mm256_unpacklo_epi16(mullo, mulhi);
+    __m256i mul1  = _mm256_unpackhi_epi16(mullo, mulhi);
+
+    __m256i p02   = _mm256_blend_epi32(mul0, _mm256_setzero_si256(), 0xAA);
+    __m256i p13   = _mm256_srli_epi64(mul0, 32);
+    __m256i p46   = _mm256_blend_epi32(mul1, _mm256_setzero_si256(), 0xAA);
+    __m256i p57   = _mm256_srli_epi64(mul1, 32);
+
+    __m256i p15_  = _mm256_add_epi64(p02, p13);
+    __m256i p9d_  = _mm256_add_epi64(p46, p57);
+
+    return v_uint64x4(_mm256_add_epi64(
+        _mm256_unpacklo_epi64(p15_, p9d_),
+        _mm256_unpackhi_epi64(p15_, p9d_)
+    ));
+}
+inline v_uint64x4 v_dotprod_expand(const v_uint16x16& a, const v_uint16x16& b, const v_uint64x4& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+inline v_int64x4 v_dotprod_expand(const v_int16x16& a, const v_int16x16& b)
+{
+    __m256i prod = _mm256_madd_epi16(a.val, b.val);
+    __m256i sign = _mm256_srai_epi32(prod, 31);
+
+    __m256i lo = _mm256_unpacklo_epi32(prod, sign);
+    __m256i hi = _mm256_unpackhi_epi32(prod, sign);
+
+    return v_int64x4(_mm256_add_epi64(
+        _mm256_unpacklo_epi64(lo, hi),
+        _mm256_unpackhi_epi64(lo, hi)
+    ));
+}
+inline v_int64x4 v_dotprod_expand(const v_int16x16& a, const v_int16x16& b, const v_int64x4& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 32 >> 64f
+inline v_float64x4 v_dotprod_expand(const v_int32x8& a, const v_int32x8& b)
+{ return v_cvt_f64(v_dotprod(a, b)); }
+inline v_float64x4 v_dotprod_expand(const v_int32x8& a, const v_int32x8& b, const v_float64x4& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+//////// Fast Dot Product ////////
+
+// 16 >> 32
+inline v_int32x8 v_dotprod_fast(const v_int16x16& a, const v_int16x16& b)
+{ return v_dotprod(a, b); }
+inline v_int32x8 v_dotprod_fast(const v_int16x16& a, const v_int16x16& b, const v_int32x8& c)
+{ return v_dotprod(a, b, c); }
+
+// 32 >> 64
+inline v_int64x4 v_dotprod_fast(const v_int32x8& a, const v_int32x8& b)
+{ return v_dotprod(a, b); }
+inline v_int64x4 v_dotprod_fast(const v_int32x8& a, const v_int32x8& b, const v_int64x4& c)
+{ return v_dotprod(a, b, c); }
+
+// 8 >> 32
+inline v_uint32x8 v_dotprod_expand_fast(const v_uint8x32& a, const v_uint8x32& b)
+{ return v_dotprod_expand(a, b); }
+inline v_uint32x8 v_dotprod_expand_fast(const v_uint8x32& a, const v_uint8x32& b, const v_uint32x8& c)
+{ return v_dotprod_expand(a, b, c); }
+
+inline v_int32x8 v_dotprod_expand_fast(const v_int8x32& a, const v_int8x32& b)
+{ return v_dotprod_expand(a, b); }
+inline v_int32x8 v_dotprod_expand_fast(const v_int8x32& a, const v_int8x32& b, const v_int32x8& c)
+{ return v_dotprod_expand(a, b, c); }
+
+// 16 >> 64
+inline v_uint64x4 v_dotprod_expand_fast(const v_uint16x16& a, const v_uint16x16& b)
+{
+    __m256i mullo = _mm256_mullo_epi16(a.val, b.val);
+    __m256i mulhi = _mm256_mulhi_epu16(a.val, b.val);
+    __m256i mul0  = _mm256_unpacklo_epi16(mullo, mulhi);
+    __m256i mul1  = _mm256_unpackhi_epi16(mullo, mulhi);
+
+    __m256i p02   = _mm256_blend_epi32(mul0, _mm256_setzero_si256(), 0xAA);
+    __m256i p13   = _mm256_srli_epi64(mul0, 32);
+    __m256i p46   = _mm256_blend_epi32(mul1, _mm256_setzero_si256(), 0xAA);
+    __m256i p57   = _mm256_srli_epi64(mul1, 32);
+
+    __m256i p15_  = _mm256_add_epi64(p02, p13);
+    __m256i p9d_  = _mm256_add_epi64(p46, p57);
+
+    return v_uint64x4(_mm256_add_epi64(p15_, p9d_));
+}
+inline v_uint64x4 v_dotprod_expand_fast(const v_uint16x16& a, const v_uint16x16& b, const v_uint64x4& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+inline v_int64x4 v_dotprod_expand_fast(const v_int16x16& a, const v_int16x16& b)
+{
+    __m256i prod = _mm256_madd_epi16(a.val, b.val);
+    __m256i sign = _mm256_srai_epi32(prod, 31);
+    __m256i lo = _mm256_unpacklo_epi32(prod, sign);
+    __m256i hi = _mm256_unpackhi_epi32(prod, sign);
+    return v_int64x4(_mm256_add_epi64(lo, hi));
+}
+inline v_int64x4 v_dotprod_expand_fast(const v_int16x16& a, const v_int16x16& b, const v_int64x4& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+// 32 >> 64f
+inline v_float64x4 v_dotprod_expand_fast(const v_int32x8& a, const v_int32x8& b)
+{ return v_dotprod_expand(a, b); }
+inline v_float64x4 v_dotprod_expand_fast(const v_int32x8& a, const v_int32x8& b, const v_float64x4& c)
+{ return v_dotprod_expand(a, b, c); }
+
+#define OPENCV_HAL_AVX_SPLAT2_PS(a, im) \
+    v_float32x8(_mm256_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im)))
+
+inline v_float32x8 v_matmul(const v_float32x8& v, const v_float32x8& m0,
+                            const v_float32x8& m1, const v_float32x8& m2,
+                            const v_float32x8& m3)
+{
+    v_float32x8 v04 = OPENCV_HAL_AVX_SPLAT2_PS(v, 0);
+    v_float32x8 v15 = OPENCV_HAL_AVX_SPLAT2_PS(v, 1);
+    v_float32x8 v26 = OPENCV_HAL_AVX_SPLAT2_PS(v, 2);
+    v_float32x8 v37 = OPENCV_HAL_AVX_SPLAT2_PS(v, 3);
+    return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, v37 * m3)));
+}
+
+inline v_float32x8 v_matmuladd(const v_float32x8& v, const v_float32x8& m0,
+                               const v_float32x8& m1, const v_float32x8& m2,
+                               const v_float32x8& a)
+{
+    v_float32x8 v04 = OPENCV_HAL_AVX_SPLAT2_PS(v, 0);
+    v_float32x8 v15 = OPENCV_HAL_AVX_SPLAT2_PS(v, 1);
+    v_float32x8 v26 = OPENCV_HAL_AVX_SPLAT2_PS(v, 2);
+    return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, a)));
+}
+
+#define OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to)    \
+    inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1,              \
+                               const _Tpvec& a2, const _Tpvec& a3,              \
+                               _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3)  \
+    {                                                                           \
+        __m256i t0 = cast_from(_mm256_unpacklo_##suffix(a0.val, a1.val));       \
+        __m256i t1 = cast_from(_mm256_unpacklo_##suffix(a2.val, a3.val));       \
+        __m256i t2 = cast_from(_mm256_unpackhi_##suffix(a0.val, a1.val));       \
+        __m256i t3 = cast_from(_mm256_unpackhi_##suffix(a2.val, a3.val));       \
+        b0.val = cast_to(_mm256_unpacklo_epi64(t0, t1));                        \
+        b1.val = cast_to(_mm256_unpackhi_epi64(t0, t1));                        \
+        b2.val = cast_to(_mm256_unpacklo_epi64(t2, t3));                        \
+        b3.val = cast_to(_mm256_unpackhi_epi64(t2, t3));                        \
+    }
+
+OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_uint32x8,  epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_int32x8,   epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_AVX_TRANSPOSE4x4(v_float32x8, ps, _mm256_castps_si256, _mm256_castsi256_ps)
+
+//////////////// Value reordering ///////////////
+
+/* Expand */
+#define OPENCV_HAL_IMPL_AVX_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin)    \
+    inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
+    {                                                               \
+        b0.val = intrin(_v256_extract_low(a.val));                  \
+        b1.val = intrin(_v256_extract_high(a.val));                 \
+    }                                                               \
+    inline _Tpwvec v_expand_low(const _Tpvec& a)                    \
+    { return _Tpwvec(intrin(_v256_extract_low(a.val))); }           \
+    inline _Tpwvec v_expand_high(const _Tpvec& a)                   \
+    { return _Tpwvec(intrin(_v256_extract_high(a.val))); }          \
+    inline _Tpwvec v256_load_expand(const _Tp* ptr)                 \
+    {                                                               \
+        __m128i a = _mm_loadu_si128((const __m128i*)ptr);           \
+        return _Tpwvec(intrin(a));                                  \
+    }
+
+OPENCV_HAL_IMPL_AVX_EXPAND(v_uint8x32,  v_uint16x16, uchar,    _mm256_cvtepu8_epi16)
+OPENCV_HAL_IMPL_AVX_EXPAND(v_int8x32,   v_int16x16,  schar,    _mm256_cvtepi8_epi16)
+OPENCV_HAL_IMPL_AVX_EXPAND(v_uint16x16, v_uint32x8,  ushort,   _mm256_cvtepu16_epi32)
+OPENCV_HAL_IMPL_AVX_EXPAND(v_int16x16,  v_int32x8,   short,    _mm256_cvtepi16_epi32)
+OPENCV_HAL_IMPL_AVX_EXPAND(v_uint32x8,  v_uint64x4,  unsigned, _mm256_cvtepu32_epi64)
+OPENCV_HAL_IMPL_AVX_EXPAND(v_int32x8,   v_int64x4,   int,      _mm256_cvtepi32_epi64)
+
+#define OPENCV_HAL_IMPL_AVX_EXPAND_Q(_Tpvec, _Tp, intrin)   \
+    inline _Tpvec v256_load_expand_q(const _Tp* ptr)        \
+    {                                                       \
+        __m128i a = _mm_loadl_epi64((const __m128i*)ptr);   \
+        return _Tpvec(intrin(a));                           \
+    }
+
+OPENCV_HAL_IMPL_AVX_EXPAND_Q(v_uint32x8, uchar, _mm256_cvtepu8_epi32)
+OPENCV_HAL_IMPL_AVX_EXPAND_Q(v_int32x8,  schar, _mm256_cvtepi8_epi32)
+
+/* pack */
+// 16
+inline v_int8x32 v_pack(const v_int16x16& a, const v_int16x16& b)
+{ return v_int8x32(_v256_shuffle_odd_64(_mm256_packs_epi16(a.val, b.val))); }
+
+inline v_uint8x32 v_pack(const v_uint16x16& a, const v_uint16x16& b)
+{
+    __m256i t = _mm256_set1_epi16(255);
+    __m256i a1 = _mm256_min_epu16(a.val, t);
+    __m256i b1 = _mm256_min_epu16(b.val, t);
+    return v_uint8x32(_v256_shuffle_odd_64(_mm256_packus_epi16(a1, b1)));
+}
+
+inline v_uint8x32 v_pack_u(const v_int16x16& a, const v_int16x16& b)
+{
+    return v_uint8x32(_v256_shuffle_odd_64(_mm256_packus_epi16(a.val, b.val)));
+}
+
+inline void v_pack_store(schar* ptr, const v_int16x16& a)
+{ v_store_low(ptr, v_pack(a, a)); }
+
+inline void v_pack_store(uchar* ptr, const v_uint16x16& a)
+{
+    const __m256i m = _mm256_set1_epi16(255);
+    __m256i am = _mm256_min_epu16(a.val, m);
+            am =  _v256_shuffle_odd_64(_mm256_packus_epi16(am, am));
+    v_store_low(ptr, v_uint8x32(am));
+}
+
+inline void v_pack_u_store(uchar* ptr, const v_int16x16& a)
+{ v_store_low(ptr, v_pack_u(a, a)); }
+
+template<int n> inline
+v_uint8x32 v_rshr_pack(const v_uint16x16& a, const v_uint16x16& b)
+{
+    // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers.
+    v_uint16x16 delta = v256_setall_u16((short)(1 << (n-1)));
+    return v_pack_u(v_reinterpret_as_s16((a + delta) >> n),
+                    v_reinterpret_as_s16((b + delta) >> n));
+}
+
+template<int n> inline
+void v_rshr_pack_store(uchar* ptr, const v_uint16x16& a)
+{
+    v_uint16x16 delta = v256_setall_u16((short)(1 << (n-1)));
+    v_pack_u_store(ptr, v_reinterpret_as_s16((a + delta) >> n));
+}
+
+template<int n> inline
+v_uint8x32 v_rshr_pack_u(const v_int16x16& a, const v_int16x16& b)
+{
+    v_int16x16 delta = v256_setall_s16((short)(1 << (n-1)));
+    return v_pack_u((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_u_store(uchar* ptr, const v_int16x16& a)
+{
+    v_int16x16 delta = v256_setall_s16((short)(1 << (n-1)));
+    v_pack_u_store(ptr, (a + delta) >> n);
+}
+
+template<int n> inline
+v_int8x32 v_rshr_pack(const v_int16x16& a, const v_int16x16& b)
+{
+    v_int16x16 delta = v256_setall_s16((short)(1 << (n-1)));
+    return v_pack((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_store(schar* ptr, const v_int16x16& a)
+{
+    v_int16x16 delta = v256_setall_s16((short)(1 << (n-1)));
+    v_pack_store(ptr, (a + delta) >> n);
+}
+
+// 32
+inline v_int16x16 v_pack(const v_int32x8& a, const v_int32x8& b)
+{ return v_int16x16(_v256_shuffle_odd_64(_mm256_packs_epi32(a.val, b.val))); }
+
+inline v_uint16x16 v_pack(const v_uint32x8& a, const v_uint32x8& b)
+{ return v_uint16x16(_v256_shuffle_odd_64(_v256_packs_epu32(a.val, b.val))); }
+
+inline v_uint16x16 v_pack_u(const v_int32x8& a, const v_int32x8& b)
+{ return v_uint16x16(_v256_shuffle_odd_64(_mm256_packus_epi32(a.val, b.val))); }
+
+inline void v_pack_store(short* ptr, const v_int32x8& a)
+{ v_store_low(ptr, v_pack(a, a)); }
+
+inline void v_pack_store(ushort* ptr, const v_uint32x8& a)
+{
+    const __m256i m = _mm256_set1_epi32(65535);
+    __m256i am = _mm256_min_epu32(a.val, m);
+            am = _v256_shuffle_odd_64(_mm256_packus_epi32(am, am));
+    v_store_low(ptr, v_uint16x16(am));
+}
+
+inline void v_pack_u_store(ushort* ptr, const v_int32x8& a)
+{ v_store_low(ptr, v_pack_u(a, a)); }
+
+
+template<int n> inline
+v_uint16x16 v_rshr_pack(const v_uint32x8& a, const v_uint32x8& b)
+{
+    // we assume that n > 0, and so the shifted 32-bit values can be treated as signed numbers.
+    v_uint32x8 delta = v256_setall_u32(1 << (n-1));
+    return v_pack_u(v_reinterpret_as_s32((a + delta) >> n),
+                    v_reinterpret_as_s32((b + delta) >> n));
+}
+
+template<int n> inline
+void v_rshr_pack_store(ushort* ptr, const v_uint32x8& a)
+{
+    v_uint32x8 delta = v256_setall_u32(1 << (n-1));
+    v_pack_u_store(ptr, v_reinterpret_as_s32((a + delta) >> n));
+}
+
+template<int n> inline
+v_uint16x16 v_rshr_pack_u(const v_int32x8& a, const v_int32x8& b)
+{
+    v_int32x8 delta = v256_setall_s32(1 << (n-1));
+    return v_pack_u((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_u_store(ushort* ptr, const v_int32x8& a)
+{
+    v_int32x8 delta = v256_setall_s32(1 << (n-1));
+    v_pack_u_store(ptr, (a + delta) >> n);
+}
+
+template<int n> inline
+v_int16x16 v_rshr_pack(const v_int32x8& a, const v_int32x8& b)
+{
+    v_int32x8 delta = v256_setall_s32(1 << (n-1));
+    return v_pack((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_store(short* ptr, const v_int32x8& a)
+{
+    v_int32x8 delta = v256_setall_s32(1 << (n-1));
+    v_pack_store(ptr, (a + delta) >> n);
+}
+
+// 64
+// Non-saturating pack
+inline v_uint32x8 v_pack(const v_uint64x4& a, const v_uint64x4& b)
+{
+    __m256i a0 = _mm256_shuffle_epi32(a.val, _MM_SHUFFLE(0, 0, 2, 0));
+    __m256i b0 = _mm256_shuffle_epi32(b.val, _MM_SHUFFLE(0, 0, 2, 0));
+    __m256i ab = _mm256_unpacklo_epi64(a0, b0); // a0, a1, b0, b1, a2, a3, b2, b3
+    return v_uint32x8(_v256_shuffle_odd_64(ab));
+}
+
+inline v_int32x8 v_pack(const v_int64x4& a, const v_int64x4& b)
+{ return v_reinterpret_as_s32(v_pack(v_reinterpret_as_u64(a), v_reinterpret_as_u64(b))); }
+
+inline void v_pack_store(unsigned* ptr, const v_uint64x4& a)
+{
+    __m256i a0 = _mm256_shuffle_epi32(a.val, _MM_SHUFFLE(0, 0, 2, 0));
+    v_store_low(ptr, v_uint32x8(_v256_shuffle_odd_64(a0)));
+}
+
+inline void v_pack_store(int* ptr, const v_int64x4& b)
+{ v_pack_store((unsigned*)ptr, v_reinterpret_as_u64(b)); }
+
+template<int n> inline
+v_uint32x8 v_rshr_pack(const v_uint64x4& a, const v_uint64x4& b)
+{
+    v_uint64x4 delta = v256_setall_u64((uint64)1 << (n-1));
+    return v_pack((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_store(unsigned* ptr, const v_uint64x4& a)
+{
+    v_uint64x4 delta = v256_setall_u64((uint64)1 << (n-1));
+    v_pack_store(ptr, (a + delta) >> n);
+}
+
+template<int n> inline
+v_int32x8 v_rshr_pack(const v_int64x4& a, const v_int64x4& b)
+{
+    v_int64x4 delta = v256_setall_s64((int64)1 << (n-1));
+    return v_pack((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_store(int* ptr, const v_int64x4& a)
+{
+    v_int64x4 delta = v256_setall_s64((int64)1 << (n-1));
+    v_pack_store(ptr, (a + delta) >> n);
+}
+
+// pack boolean
+inline v_uint8x32 v_pack_b(const v_uint16x16& a, const v_uint16x16& b)
+{
+    __m256i ab = _mm256_packs_epi16(a.val, b.val);
+    return v_uint8x32(_v256_shuffle_odd_64(ab));
+}
+
+inline v_uint8x32 v_pack_b(const v_uint32x8& a, const v_uint32x8& b,
+                           const v_uint32x8& c, const v_uint32x8& d)
+{
+    __m256i ab = _mm256_packs_epi32(a.val, b.val);
+    __m256i cd = _mm256_packs_epi32(c.val, d.val);
+
+    __m256i abcd = _v256_shuffle_odd_64(_mm256_packs_epi16(ab, cd));
+    return v_uint8x32(_mm256_shuffle_epi32(abcd, _MM_SHUFFLE(3, 1, 2, 0)));
+}
+
+inline v_uint8x32 v_pack_b(const v_uint64x4& a, const v_uint64x4& b, const v_uint64x4& c,
+                           const v_uint64x4& d, const v_uint64x4& e, const v_uint64x4& f,
+                           const v_uint64x4& g, const v_uint64x4& h)
+{
+    __m256i ab = _mm256_packs_epi32(a.val, b.val);
+    __m256i cd = _mm256_packs_epi32(c.val, d.val);
+    __m256i ef = _mm256_packs_epi32(e.val, f.val);
+    __m256i gh = _mm256_packs_epi32(g.val, h.val);
+
+    __m256i abcd = _mm256_packs_epi32(ab, cd);
+    __m256i efgh = _mm256_packs_epi32(ef, gh);
+    __m256i pkall = _v256_shuffle_odd_64(_mm256_packs_epi16(abcd, efgh));
+
+    __m256i rev = _mm256_alignr_epi8(pkall, pkall, 8);
+    return v_uint8x32(_mm256_unpacklo_epi16(pkall, rev));
+}
+
+/* Recombine */
+// its up there with load and store operations
+
+/* Extract */
+#define OPENCV_HAL_IMPL_AVX_EXTRACT(_Tpvec)                    \
+    template<int s>                                            \
+    inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)  \
+    { return v_rotate_right<s>(a, b); }
+
+OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint8x32)
+OPENCV_HAL_IMPL_AVX_EXTRACT(v_int8x32)
+OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint16x16)
+OPENCV_HAL_IMPL_AVX_EXTRACT(v_int16x16)
+OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint32x8)
+OPENCV_HAL_IMPL_AVX_EXTRACT(v_int32x8)
+OPENCV_HAL_IMPL_AVX_EXTRACT(v_uint64x4)
+OPENCV_HAL_IMPL_AVX_EXTRACT(v_int64x4)
+OPENCV_HAL_IMPL_AVX_EXTRACT(v_float32x8)
+OPENCV_HAL_IMPL_AVX_EXTRACT(v_float64x4)
+
+template<int i>
+inline uchar v_extract_n(v_uint8x32 a)
+{
+    return (uchar)_v256_extract_epi8<i>(a.val);
+}
+
+template<int i>
+inline schar v_extract_n(v_int8x32 a)
+{
+    return (schar)v_extract_n<i>(v_reinterpret_as_u8(a));
+}
+
+template<int i>
+inline ushort v_extract_n(v_uint16x16 a)
+{
+    return (ushort)_v256_extract_epi16<i>(a.val);
+}
+
+template<int i>
+inline short v_extract_n(v_int16x16 a)
+{
+    return (short)v_extract_n<i>(v_reinterpret_as_u16(a));
+}
+
+template<int i>
+inline uint v_extract_n(v_uint32x8 a)
+{
+    return (uint)_v256_extract_epi32<i>(a.val);
+}
+
+template<int i>
+inline int v_extract_n(v_int32x8 a)
+{
+    return (int)v_extract_n<i>(v_reinterpret_as_u32(a));
+}
+
+template<int i>
+inline uint64 v_extract_n(v_uint64x4 a)
+{
+    return (uint64)_v256_extract_epi64<i>(a.val);
+}
+
+template<int i>
+inline int64 v_extract_n(v_int64x4 v)
+{
+    return (int64)v_extract_n<i>(v_reinterpret_as_u64(v));
+}
+
+template<int i>
+inline float v_extract_n(v_float32x8 v)
+{
+    union { uint iv; float fv; } d;
+    d.iv = v_extract_n<i>(v_reinterpret_as_u32(v));
+    return d.fv;
+}
+
+template<int i>
+inline double v_extract_n(v_float64x4 v)
+{
+    union { uint64 iv; double dv; } d;
+    d.iv = v_extract_n<i>(v_reinterpret_as_u64(v));
+    return d.dv;
+}
+
+template<int i>
+inline v_uint32x8 v_broadcast_element(v_uint32x8 a)
+{
+    static const __m256i perm = _mm256_set1_epi32((char)i);
+    return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm));
+}
+
+template<int i>
+inline v_int32x8 v_broadcast_element(const v_int32x8 &a)
+{ return v_reinterpret_as_s32(v_broadcast_element<i>(v_reinterpret_as_u32(a))); }
+
+template<int i>
+inline v_float32x8 v_broadcast_element(const v_float32x8 &a)
+{ return v_reinterpret_as_f32(v_broadcast_element<i>(v_reinterpret_as_u32(a))); }
+
+
+///////////////////// load deinterleave /////////////////////////////
+
+inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b )
+{
+    __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
+
+    const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15,
+                                               0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
+    __m256i p0 = _mm256_shuffle_epi8(ab0, sh);
+    __m256i p1 = _mm256_shuffle_epi8(ab1, sh);
+    __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
+    __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
+    __m256i a0 = _mm256_unpacklo_epi64(pl, ph);
+    __m256i b0 = _mm256_unpackhi_epi64(pl, ph);
+    a = v_uint8x32(a0);
+    b = v_uint8x32(b0);
+}
+
+inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b )
+{
+    __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
+
+    const __m256i sh = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
+                                               0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
+    __m256i p0 = _mm256_shuffle_epi8(ab0, sh);
+    __m256i p1 = _mm256_shuffle_epi8(ab1, sh);
+    __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
+    __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
+    __m256i a0 = _mm256_unpacklo_epi64(pl, ph);
+    __m256i b0 = _mm256_unpackhi_epi64(pl, ph);
+    a = v_uint16x16(a0);
+    b = v_uint16x16(b0);
+}
+
+inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b )
+{
+    __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
+
+    const int sh = 0+2*4+1*16+3*64;
+    __m256i p0 = _mm256_shuffle_epi32(ab0, sh);
+    __m256i p1 = _mm256_shuffle_epi32(ab1, sh);
+    __m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
+    __m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
+    __m256i a0 = _mm256_unpacklo_epi64(pl, ph);
+    __m256i b0 = _mm256_unpackhi_epi64(pl, ph);
+    a = v_uint32x8(a0);
+    b = v_uint32x8(b0);
+}
+
+inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b )
+{
+    __m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 4));
+
+    __m256i pl = _mm256_permute2x128_si256(ab0, ab1, 0 + 2*16);
+    __m256i ph = _mm256_permute2x128_si256(ab0, ab1, 1 + 3*16);
+    __m256i a0 = _mm256_unpacklo_epi64(pl, ph);
+    __m256i b0 = _mm256_unpackhi_epi64(pl, ph);
+    a = v_uint64x4(a0);
+    b = v_uint64x4(b0);
+}
+
+inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b, v_uint8x32& c )
+{
+    __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
+    __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64));
+
+    __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
+    __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
+
+    const __m256i m0 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
+                                               0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
+    const __m256i m1 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
+                                               -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1);
+
+    __m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1);
+    __m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0);
+    __m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1);
+
+    const __m256i
+    sh_b = _mm256_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13,
+                            0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13),
+    sh_g = _mm256_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14,
+                            1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14),
+    sh_r = _mm256_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15,
+                            2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15);
+    b0 = _mm256_shuffle_epi8(b0, sh_b);
+    g0 = _mm256_shuffle_epi8(g0, sh_g);
+    r0 = _mm256_shuffle_epi8(r0, sh_r);
+
+    a = v_uint8x32(b0);
+    b = v_uint8x32(g0);
+    c = v_uint8x32(r0);
+}
+
+inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b, v_uint16x16& c )
+{
+    __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
+    __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
+
+    __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
+    __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
+
+    const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1,
+                                               0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0);
+    const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0,
+                                               -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0);
+    __m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1);
+    __m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1);
+    __m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0);
+    const __m256i sh_b = _mm256_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11,
+                                                 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
+    const __m256i sh_g = _mm256_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13,
+                                                 2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13);
+    const __m256i sh_r = _mm256_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15,
+                                                 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
+    b0 = _mm256_shuffle_epi8(b0, sh_b);
+    g0 = _mm256_shuffle_epi8(g0, sh_g);
+    r0 = _mm256_shuffle_epi8(r0, sh_r);
+
+    a = v_uint16x16(b0);
+    b = v_uint16x16(g0);
+    c = v_uint16x16(r0);
+}
+
+inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b, v_uint32x8& c )
+{
+    __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
+    __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
+
+    __m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
+    __m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
+
+    __m256i b0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_low, s02_high, 0x24), bgr1, 0x92);
+    __m256i g0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_high, s02_low, 0x92), bgr1, 0x24);
+    __m256i r0 = _mm256_blend_epi32(_mm256_blend_epi32(bgr1, s02_low, 0x24), s02_high, 0x92);
+
+    b0 = _mm256_shuffle_epi32(b0, 0x6c);
+    g0 = _mm256_shuffle_epi32(g0, 0xb1);
+    r0 = _mm256_shuffle_epi32(r0, 0xc6);
+
+    a = v_uint32x8(b0);
+    b = v_uint32x8(g0);
+    c = v_uint32x8(r0);
+}
+
+inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b, v_uint64x4& c )
+{
+    __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 4));
+    __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
+
+    __m256i s01 = _mm256_blend_epi32(bgr0, bgr1, 0xf0);
+    __m256i s12 = _mm256_blend_epi32(bgr1, bgr2, 0xf0);
+    __m256i s20r = _mm256_permute4x64_epi64(_mm256_blend_epi32(bgr2, bgr0, 0xf0), 0x1b);
+    __m256i b0 = _mm256_unpacklo_epi64(s01, s20r);
+    __m256i g0 = _mm256_alignr_epi8(s12, s01, 8);
+    __m256i r0 = _mm256_unpackhi_epi64(s20r, s12);
+
+    a = v_uint64x4(b0);
+    b = v_uint64x4(g0);
+    c = v_uint64x4(r0);
+}
+
+inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b, v_uint8x32& c, v_uint8x32& d )
+{
+    __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
+    __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64));
+    __m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 96));
+    const __m256i sh = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
+                                               0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
+
+    __m256i p0 = _mm256_shuffle_epi8(bgr0, sh);
+    __m256i p1 = _mm256_shuffle_epi8(bgr1, sh);
+    __m256i p2 = _mm256_shuffle_epi8(bgr2, sh);
+    __m256i p3 = _mm256_shuffle_epi8(bgr3, sh);
+
+    __m256i p01l = _mm256_unpacklo_epi32(p0, p1);
+    __m256i p01h = _mm256_unpackhi_epi32(p0, p1);
+    __m256i p23l = _mm256_unpacklo_epi32(p2, p3);
+    __m256i p23h = _mm256_unpackhi_epi32(p2, p3);
+
+    __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16);
+    __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16);
+    __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16);
+    __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16);
+
+    __m256i b0 = _mm256_unpacklo_epi32(pll, plh);
+    __m256i g0 = _mm256_unpackhi_epi32(pll, plh);
+    __m256i r0 = _mm256_unpacklo_epi32(phl, phh);
+    __m256i a0 = _mm256_unpackhi_epi32(phl, phh);
+
+    a = v_uint8x32(b0);
+    b = v_uint8x32(g0);
+    c = v_uint8x32(r0);
+    d = v_uint8x32(a0);
+}
+
+inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b, v_uint16x16& c, v_uint16x16& d )
+{
+    __m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
+    __m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
+    __m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 48));
+    const __m256i sh = _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
+                                               0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
+    __m256i p0 = _mm256_shuffle_epi8(bgr0, sh);
+    __m256i p1 = _mm256_shuffle_epi8(bgr1, sh);
+    __m256i p2 = _mm256_shuffle_epi8(bgr2, sh);
+    __m256i p3 = _mm256_shuffle_epi8(bgr3, sh);
+
+    __m256i p01l = _mm256_unpacklo_epi32(p0, p1);
+    __m256i p01h = _mm256_unpackhi_epi32(p0, p1);
+    __m256i p23l = _mm256_unpacklo_epi32(p2, p3);
+    __m256i p23h = _mm256_unpackhi_epi32(p2, p3);
+
+    __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16);
+    __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16);
+    __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16);
+    __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16);
+
+    __m256i b0 = _mm256_unpacklo_epi32(pll, plh);
+    __m256i g0 = _mm256_unpackhi_epi32(pll, plh);
+    __m256i r0 = _mm256_unpacklo_epi32(phl, phh);
+    __m256i a0 = _mm256_unpackhi_epi32(phl, phh);
+
+    a = v_uint16x16(b0);
+    b = v_uint16x16(g0);
+    c = v_uint16x16(r0);
+    d = v_uint16x16(a0);
+}
+
+inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b, v_uint32x8& c, v_uint32x8& d )
+{
+    __m256i p0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i p1 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
+    __m256i p2 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
+    __m256i p3 = _mm256_loadu_si256((const __m256i*)(ptr + 24));
+
+    __m256i p01l = _mm256_unpacklo_epi32(p0, p1);
+    __m256i p01h = _mm256_unpackhi_epi32(p0, p1);
+    __m256i p23l = _mm256_unpacklo_epi32(p2, p3);
+    __m256i p23h = _mm256_unpackhi_epi32(p2, p3);
+
+    __m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16);
+    __m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16);
+    __m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16);
+    __m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16);
+
+    __m256i b0 = _mm256_unpacklo_epi32(pll, plh);
+    __m256i g0 = _mm256_unpackhi_epi32(pll, plh);
+    __m256i r0 = _mm256_unpacklo_epi32(phl, phh);
+    __m256i a0 = _mm256_unpackhi_epi32(phl, phh);
+
+    a = v_uint32x8(b0);
+    b = v_uint32x8(g0);
+    c = v_uint32x8(r0);
+    d = v_uint32x8(a0);
+}
+
+inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b, v_uint64x4& c, v_uint64x4& d )
+{
+    __m256i bgra0 = _mm256_loadu_si256((const __m256i*)ptr);
+    __m256i bgra1 = _mm256_loadu_si256((const __m256i*)(ptr + 4));
+    __m256i bgra2 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
+    __m256i bgra3 = _mm256_loadu_si256((const __m256i*)(ptr + 12));
+
+    __m256i l02 = _mm256_permute2x128_si256(bgra0, bgra2, 0 + 2*16);
+    __m256i h02 = _mm256_permute2x128_si256(bgra0, bgra2, 1 + 3*16);
+    __m256i l13 = _mm256_permute2x128_si256(bgra1, bgra3, 0 + 2*16);
+    __m256i h13 = _mm256_permute2x128_si256(bgra1, bgra3, 1 + 3*16);
+
+    __m256i b0 = _mm256_unpacklo_epi64(l02, l13);
+    __m256i g0 = _mm256_unpackhi_epi64(l02, l13);
+    __m256i r0 = _mm256_unpacklo_epi64(h02, h13);
+    __m256i a0 = _mm256_unpackhi_epi64(h02, h13);
+
+    a = v_uint64x4(b0);
+    b = v_uint64x4(g0);
+    c = v_uint64x4(r0);
+    d = v_uint64x4(a0);
+}
+
+///////////////////////////// store interleave /////////////////////////////////////
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x32& y,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m256i xy_l = _mm256_unpacklo_epi8(x.val, y.val);
+    __m256i xy_h = _mm256_unpackhi_epi8(x.val, y.val);
+
+    __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
+    __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, xy0);
+        _mm256_stream_si256((__m256i*)(ptr + 32), xy1);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, xy0);
+        _mm256_store_si256((__m256i*)(ptr + 32), xy1);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, xy0);
+        _mm256_storeu_si256((__m256i*)(ptr + 32), xy1);
+    }
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint16x16& y,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m256i xy_l = _mm256_unpacklo_epi16(x.val, y.val);
+    __m256i xy_h = _mm256_unpackhi_epi16(x.val, y.val);
+
+    __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
+    __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, xy0);
+        _mm256_stream_si256((__m256i*)(ptr + 16), xy1);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, xy0);
+        _mm256_store_si256((__m256i*)(ptr + 16), xy1);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, xy0);
+        _mm256_storeu_si256((__m256i*)(ptr + 16), xy1);
+    }
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint32x8& y,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m256i xy_l = _mm256_unpacklo_epi32(x.val, y.val);
+    __m256i xy_h = _mm256_unpackhi_epi32(x.val, y.val);
+
+    __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
+    __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, xy0);
+        _mm256_stream_si256((__m256i*)(ptr + 8), xy1);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, xy0);
+        _mm256_store_si256((__m256i*)(ptr + 8), xy1);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, xy0);
+        _mm256_storeu_si256((__m256i*)(ptr + 8), xy1);
+    }
+}
+
+inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64x4& y,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m256i xy_l = _mm256_unpacklo_epi64(x.val, y.val);
+    __m256i xy_h = _mm256_unpackhi_epi64(x.val, y.val);
+
+    __m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
+    __m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, xy0);
+        _mm256_stream_si256((__m256i*)(ptr + 4), xy1);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, xy0);
+        _mm256_store_si256((__m256i*)(ptr + 4), xy1);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, xy0);
+        _mm256_storeu_si256((__m256i*)(ptr + 4), xy1);
+    }
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x32& a, const v_uint8x32& b, const v_uint8x32& c,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    const __m256i sh_b = _mm256_setr_epi8(
+            0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5,
+            0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
+    const __m256i sh_g = _mm256_setr_epi8(
+            5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10,
+            5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
+    const __m256i sh_r = _mm256_setr_epi8(
+            10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15,
+            10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
+
+    __m256i b0 = _mm256_shuffle_epi8(a.val, sh_b);
+    __m256i g0 = _mm256_shuffle_epi8(b.val, sh_g);
+    __m256i r0 = _mm256_shuffle_epi8(c.val, sh_r);
+
+    const __m256i m0 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
+                                               0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
+    const __m256i m1 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
+                                               0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
+
+    __m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
+    __m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1);
+    __m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1);
+
+    __m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
+    __m256i bgr1 = _mm256_permute2x128_si256(p2, p0, 0 + 3*16);
+    __m256i bgr2 = _mm256_permute2x128_si256(p1, p2, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, bgr0);
+        _mm256_stream_si256((__m256i*)(ptr + 32), bgr1);
+        _mm256_stream_si256((__m256i*)(ptr + 64), bgr2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, bgr0);
+        _mm256_store_si256((__m256i*)(ptr + 32), bgr1);
+        _mm256_store_si256((__m256i*)(ptr + 64), bgr2);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, bgr0);
+        _mm256_storeu_si256((__m256i*)(ptr + 32), bgr1);
+        _mm256_storeu_si256((__m256i*)(ptr + 64), bgr2);
+    }
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x16& a, const v_uint16x16& b, const v_uint16x16& c,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    const __m256i sh_b = _mm256_setr_epi8(
+         0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11,
+         0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
+    const __m256i sh_g = _mm256_setr_epi8(
+         10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5,
+         10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5);
+    const __m256i sh_r = _mm256_setr_epi8(
+         4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15,
+         4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
+
+    __m256i b0 = _mm256_shuffle_epi8(a.val, sh_b);
+    __m256i g0 = _mm256_shuffle_epi8(b.val, sh_g);
+    __m256i r0 = _mm256_shuffle_epi8(c.val, sh_r);
+
+    const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1,
+                                               0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0);
+    const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0,
+                                               -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0);
+
+    __m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
+    __m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1);
+    __m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1);
+
+    __m256i bgr0 = _mm256_permute2x128_si256(p0, p2, 0 + 2*16);
+    //__m256i bgr1 = p1;
+    __m256i bgr2 = _mm256_permute2x128_si256(p0, p2, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, bgr0);
+        _mm256_stream_si256((__m256i*)(ptr + 16), p1);
+        _mm256_stream_si256((__m256i*)(ptr + 32), bgr2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, bgr0);
+        _mm256_store_si256((__m256i*)(ptr + 16), p1);
+        _mm256_store_si256((__m256i*)(ptr + 32), bgr2);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, bgr0);
+        _mm256_storeu_si256((__m256i*)(ptr + 16), p1);
+        _mm256_storeu_si256((__m256i*)(ptr + 32), bgr2);
+    }
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x8& a, const v_uint32x8& b, const v_uint32x8& c,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m256i b0 = _mm256_shuffle_epi32(a.val, 0x6c);
+    __m256i g0 = _mm256_shuffle_epi32(b.val, 0xb1);
+    __m256i r0 = _mm256_shuffle_epi32(c.val, 0xc6);
+
+    __m256i p0 = _mm256_blend_epi32(_mm256_blend_epi32(b0, g0, 0x92), r0, 0x24);
+    __m256i p1 = _mm256_blend_epi32(_mm256_blend_epi32(g0, r0, 0x92), b0, 0x24);
+    __m256i p2 = _mm256_blend_epi32(_mm256_blend_epi32(r0, b0, 0x92), g0, 0x24);
+
+    __m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
+    //__m256i bgr1 = p2;
+    __m256i bgr2 = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, bgr0);
+        _mm256_stream_si256((__m256i*)(ptr + 8), p2);
+        _mm256_stream_si256((__m256i*)(ptr + 16), bgr2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, bgr0);
+        _mm256_store_si256((__m256i*)(ptr + 8), p2);
+        _mm256_store_si256((__m256i*)(ptr + 16), bgr2);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, bgr0);
+        _mm256_storeu_si256((__m256i*)(ptr + 8), p2);
+        _mm256_storeu_si256((__m256i*)(ptr + 16), bgr2);
+    }
+}
+
+inline void v_store_interleave( uint64* ptr, const v_uint64x4& a, const v_uint64x4& b, const v_uint64x4& c,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m256i s01 = _mm256_unpacklo_epi64(a.val, b.val);
+    __m256i s12 = _mm256_unpackhi_epi64(b.val, c.val);
+    __m256i s20 = _mm256_blend_epi32(c.val, a.val, 0xcc);
+
+    __m256i bgr0 = _mm256_permute2x128_si256(s01, s20, 0 + 2*16);
+    __m256i bgr1 = _mm256_blend_epi32(s01, s12, 0x0f);
+    __m256i bgr2 = _mm256_permute2x128_si256(s20, s12, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, bgr0);
+        _mm256_stream_si256((__m256i*)(ptr + 4), bgr1);
+        _mm256_stream_si256((__m256i*)(ptr + 8), bgr2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, bgr0);
+        _mm256_store_si256((__m256i*)(ptr + 4), bgr1);
+        _mm256_store_si256((__m256i*)(ptr + 8), bgr2);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, bgr0);
+        _mm256_storeu_si256((__m256i*)(ptr + 4), bgr1);
+        _mm256_storeu_si256((__m256i*)(ptr + 8), bgr2);
+    }
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x32& a, const v_uint8x32& b,
+                                const v_uint8x32& c, const v_uint8x32& d,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m256i bg0 = _mm256_unpacklo_epi8(a.val, b.val);
+    __m256i bg1 = _mm256_unpackhi_epi8(a.val, b.val);
+    __m256i ra0 = _mm256_unpacklo_epi8(c.val, d.val);
+    __m256i ra1 = _mm256_unpackhi_epi8(c.val, d.val);
+
+    __m256i bgra0_ = _mm256_unpacklo_epi16(bg0, ra0);
+    __m256i bgra1_ = _mm256_unpackhi_epi16(bg0, ra0);
+    __m256i bgra2_ = _mm256_unpacklo_epi16(bg1, ra1);
+    __m256i bgra3_ = _mm256_unpackhi_epi16(bg1, ra1);
+
+    __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16);
+    __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16);
+    __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
+    __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, bgra0);
+        _mm256_stream_si256((__m256i*)(ptr + 32), bgra1);
+        _mm256_stream_si256((__m256i*)(ptr + 64), bgra2);
+        _mm256_stream_si256((__m256i*)(ptr + 96), bgra3);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, bgra0);
+        _mm256_store_si256((__m256i*)(ptr + 32), bgra1);
+        _mm256_store_si256((__m256i*)(ptr + 64), bgra2);
+        _mm256_store_si256((__m256i*)(ptr + 96), bgra3);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, bgra0);
+        _mm256_storeu_si256((__m256i*)(ptr + 32), bgra1);
+        _mm256_storeu_si256((__m256i*)(ptr + 64), bgra2);
+        _mm256_storeu_si256((__m256i*)(ptr + 96), bgra3);
+    }
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x16& a, const v_uint16x16& b,
+                                const v_uint16x16& c, const v_uint16x16& d,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m256i bg0 = _mm256_unpacklo_epi16(a.val, b.val);
+    __m256i bg1 = _mm256_unpackhi_epi16(a.val, b.val);
+    __m256i ra0 = _mm256_unpacklo_epi16(c.val, d.val);
+    __m256i ra1 = _mm256_unpackhi_epi16(c.val, d.val);
+
+    __m256i bgra0_ = _mm256_unpacklo_epi32(bg0, ra0);
+    __m256i bgra1_ = _mm256_unpackhi_epi32(bg0, ra0);
+    __m256i bgra2_ = _mm256_unpacklo_epi32(bg1, ra1);
+    __m256i bgra3_ = _mm256_unpackhi_epi32(bg1, ra1);
+
+    __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16);
+    __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16);
+    __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
+    __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, bgra0);
+        _mm256_stream_si256((__m256i*)(ptr + 16), bgra1);
+        _mm256_stream_si256((__m256i*)(ptr + 32), bgra2);
+        _mm256_stream_si256((__m256i*)(ptr + 48), bgra3);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, bgra0);
+        _mm256_store_si256((__m256i*)(ptr + 16), bgra1);
+        _mm256_store_si256((__m256i*)(ptr + 32), bgra2);
+        _mm256_store_si256((__m256i*)(ptr + 48), bgra3);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, bgra0);
+        _mm256_storeu_si256((__m256i*)(ptr + 16), bgra1);
+        _mm256_storeu_si256((__m256i*)(ptr + 32), bgra2);
+        _mm256_storeu_si256((__m256i*)(ptr + 48), bgra3);
+    }
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x8& a, const v_uint32x8& b,
+                                const v_uint32x8& c, const v_uint32x8& d,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m256i bg0 = _mm256_unpacklo_epi32(a.val, b.val);
+    __m256i bg1 = _mm256_unpackhi_epi32(a.val, b.val);
+    __m256i ra0 = _mm256_unpacklo_epi32(c.val, d.val);
+    __m256i ra1 = _mm256_unpackhi_epi32(c.val, d.val);
+
+    __m256i bgra0_ = _mm256_unpacklo_epi64(bg0, ra0);
+    __m256i bgra1_ = _mm256_unpackhi_epi64(bg0, ra0);
+    __m256i bgra2_ = _mm256_unpacklo_epi64(bg1, ra1);
+    __m256i bgra3_ = _mm256_unpackhi_epi64(bg1, ra1);
+
+    __m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16);
+    __m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16);
+    __m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
+    __m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, bgra0);
+        _mm256_stream_si256((__m256i*)(ptr + 8), bgra1);
+        _mm256_stream_si256((__m256i*)(ptr + 16), bgra2);
+        _mm256_stream_si256((__m256i*)(ptr + 24), bgra3);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, bgra0);
+        _mm256_store_si256((__m256i*)(ptr + 8), bgra1);
+        _mm256_store_si256((__m256i*)(ptr + 16), bgra2);
+        _mm256_store_si256((__m256i*)(ptr + 24), bgra3);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, bgra0);
+        _mm256_storeu_si256((__m256i*)(ptr + 8), bgra1);
+        _mm256_storeu_si256((__m256i*)(ptr + 16), bgra2);
+        _mm256_storeu_si256((__m256i*)(ptr + 24), bgra3);
+    }
+}
+
+inline void v_store_interleave( uint64* ptr, const v_uint64x4& a, const v_uint64x4& b,
+                                const v_uint64x4& c, const v_uint64x4& d,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m256i bg0 = _mm256_unpacklo_epi64(a.val, b.val);
+    __m256i bg1 = _mm256_unpackhi_epi64(a.val, b.val);
+    __m256i ra0 = _mm256_unpacklo_epi64(c.val, d.val);
+    __m256i ra1 = _mm256_unpackhi_epi64(c.val, d.val);
+
+    __m256i bgra0 = _mm256_permute2x128_si256(bg0, ra0, 0 + 2*16);
+    __m256i bgra1 = _mm256_permute2x128_si256(bg1, ra1, 0 + 2*16);
+    __m256i bgra2 = _mm256_permute2x128_si256(bg0, ra0, 1 + 3*16);
+    __m256i bgra3 = _mm256_permute2x128_si256(bg1, ra1, 1 + 3*16);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm256_stream_si256((__m256i*)ptr, bgra0);
+        _mm256_stream_si256((__m256i*)(ptr + 4), bgra1);
+        _mm256_stream_si256((__m256i*)(ptr + 8), bgra2);
+        _mm256_stream_si256((__m256i*)(ptr + 12), bgra3);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm256_store_si256((__m256i*)ptr, bgra0);
+        _mm256_store_si256((__m256i*)(ptr + 4), bgra1);
+        _mm256_store_si256((__m256i*)(ptr + 8), bgra2);
+        _mm256_store_si256((__m256i*)(ptr + 12), bgra3);
+    }
+    else
+    {
+        _mm256_storeu_si256((__m256i*)ptr, bgra0);
+        _mm256_storeu_si256((__m256i*)(ptr + 4), bgra1);
+        _mm256_storeu_si256((__m256i*)(ptr + 8), bgra2);
+        _mm256_storeu_si256((__m256i*)(ptr + 12), bgra3);
+    }
+}
+
+#define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \
+{ \
+    _Tpvec1 a1, b1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+} \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \
+{ \
+    _Tpvec1 a1, b1, c1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+    c0 = v_reinterpret_as_##suffix0(c1); \
+} \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \
+{ \
+    _Tpvec1 a1, b1, c1, d1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+    c0 = v_reinterpret_as_##suffix0(c1); \
+    d0 = v_reinterpret_as_##suffix0(d1); \
+} \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
+                                hal::StoreMode mode=hal::STORE_UNALIGNED ) \
+{ \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, mode);      \
+} \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0, \
+                                hal::StoreMode mode=hal::STORE_UNALIGNED ) \
+{ \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode);  \
+} \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
+                                const _Tpvec0& c0, const _Tpvec0& d0, \
+                                hal::StoreMode mode=hal::STORE_UNALIGNED ) \
+{ \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
+    _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \
+}
+
+OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int8x32, schar, s8, v_uint8x32, uchar, u8)
+OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int16x16, short, s16, v_uint16x16, ushort, u16)
+OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int32x8, int, s32, v_uint32x8, unsigned, u32)
+OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, unsigned, u32)
+OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64)
+OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64)
+
+// FP16
+inline v_float32x8 v256_load_expand(const float16_t* ptr)
+{
+    return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
+}
+
+inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
+{
+    __m128i ah = _mm256_cvtps_ph(a.val, 0);
+    _mm_storeu_si128((__m128i*)ptr, ah);
+}
+
+inline void v256_cleanup() { _mm256_zeroall(); }
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+} // cv::
+
+#endif // OPENCV_HAL_INTRIN_AVX_HPP
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_avx512.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_avx512.hpp
new file mode 100644
index 0000000..e189582
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/hal/intrin_avx512.hpp
@@ -0,0 +1,3049 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef OPENCV_HAL_INTRIN_AVX512_HPP
+#define OPENCV_HAL_INTRIN_AVX512_HPP
+
+#if defined(_MSC_VER) && (_MSC_VER < 1920/*MSVS2019*/)
+# pragma warning(disable:4146)  // unary minus operator applied to unsigned type, result still unsigned
+# pragma warning(disable:4309)  // 'argument': truncation of constant value
+# pragma warning(disable:4310)  // cast truncates constant value
+#endif
+
+#define CVT_ROUND_MODES_IMPLEMENTED 0
+
+#define CV_SIMD512 1
+#define CV_SIMD512_64F 1
+#define CV_SIMD512_FP16 0  // no native operations with FP16 type. Only load/store from float32x8 are available (if CV_FP16 == 1)
+
+#define _v512_set_epu64(a7, a6, a5, a4, a3, a2, a1, a0) _mm512_set_epi64((int64)(a7),(int64)(a6),(int64)(a5),(int64)(a4),(int64)(a3),(int64)(a2),(int64)(a1),(int64)(a0))
+#define _v512_set_epu32(a15, a14, a13, a12, a11, a10,  a9,  a8,  a7,  a6,  a5,  a4,  a3,  a2,  a1,  a0) \
+        _mm512_set_epi64(((int64)(a15)<<32)|(int64)(a14), ((int64)(a13)<<32)|(int64)(a12), ((int64)(a11)<<32)|(int64)(a10), ((int64)( a9)<<32)|(int64)( a8), \
+                         ((int64)( a7)<<32)|(int64)( a6), ((int64)( a5)<<32)|(int64)( a4), ((int64)( a3)<<32)|(int64)( a2), ((int64)( a1)<<32)|(int64)( a0))
+#define _v512_set_epu16(a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \
+                        a15, a14, a13, a12, a11, a10,  a9,  a8,  a7,  a6,  a5,  a4,  a3,  a2,  a1,  a0) \
+        _v512_set_epu32(((unsigned)(a31)<<16)|(unsigned)(a30), ((unsigned)(a29)<<16)|(unsigned)(a28), ((unsigned)(a27)<<16)|(unsigned)(a26), ((unsigned)(a25)<<16)|(unsigned)(a24), \
+                        ((unsigned)(a23)<<16)|(unsigned)(a22), ((unsigned)(a21)<<16)|(unsigned)(a20), ((unsigned)(a19)<<16)|(unsigned)(a18), ((unsigned)(a17)<<16)|(unsigned)(a16), \
+                        ((unsigned)(a15)<<16)|(unsigned)(a14), ((unsigned)(a13)<<16)|(unsigned)(a12), ((unsigned)(a11)<<16)|(unsigned)(a10), ((unsigned)( a9)<<16)|(unsigned)( a8), \
+                        ((unsigned)( a7)<<16)|(unsigned)( a6), ((unsigned)( a5)<<16)|(unsigned)( a4), ((unsigned)( a3)<<16)|(unsigned)( a2), ((unsigned)( a1)<<16)|(unsigned)( a0))
+#define _v512_set_epu8(a63, a62, a61, a60, a59, a58, a57, a56, a55, a54, a53, a52, a51, a50, a49, a48, \
+                       a47, a46, a45, a44, a43, a42, a41, a40, a39, a38, a37, a36, a35, a34, a33, a32, \
+                       a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \
+                       a15, a14, a13, a12, a11, a10,  a9,  a8,  a7,  a6,  a5,  a4,  a3,  a2,  a1,  a0) \
+        _v512_set_epu32(((unsigned)(a63)<<24)|((unsigned)(a62)<<16)|((unsigned)(a61)<<8)|(unsigned)(a60),((unsigned)(a59)<<24)|((unsigned)(a58)<<16)|((unsigned)(a57)<<8)|(unsigned)(a56), \
+                        ((unsigned)(a55)<<24)|((unsigned)(a54)<<16)|((unsigned)(a53)<<8)|(unsigned)(a52),((unsigned)(a51)<<24)|((unsigned)(a50)<<16)|((unsigned)(a49)<<8)|(unsigned)(a48), \
+                        ((unsigned)(a47)<<24)|((unsigned)(a46)<<16)|((unsigned)(a45)<<8)|(unsigned)(a44),((unsigned)(a43)<<24)|((unsigned)(a42)<<16)|((unsigned)(a41)<<8)|(unsigned)(a40), \
+                        ((unsigned)(a39)<<24)|((unsigned)(a38)<<16)|((unsigned)(a37)<<8)|(unsigned)(a36),((unsigned)(a35)<<24)|((unsigned)(a34)<<16)|((unsigned)(a33)<<8)|(unsigned)(a32), \
+                        ((unsigned)(a31)<<24)|((unsigned)(a30)<<16)|((unsigned)(a29)<<8)|(unsigned)(a28),((unsigned)(a27)<<24)|((unsigned)(a26)<<16)|((unsigned)(a25)<<8)|(unsigned)(a24), \
+                        ((unsigned)(a23)<<24)|((unsigned)(a22)<<16)|((unsigned)(a21)<<8)|(unsigned)(a20),((unsigned)(a19)<<24)|((unsigned)(a18)<<16)|((unsigned)(a17)<<8)|(unsigned)(a16), \
+                        ((unsigned)(a15)<<24)|((unsigned)(a14)<<16)|((unsigned)(a13)<<8)|(unsigned)(a12),((unsigned)(a11)<<24)|((unsigned)(a10)<<16)|((unsigned)( a9)<<8)|(unsigned)( a8), \
+                        ((unsigned)( a7)<<24)|((unsigned)( a6)<<16)|((unsigned)( a5)<<8)|(unsigned)( a4),((unsigned)( a3)<<24)|((unsigned)( a2)<<16)|((unsigned)( a1)<<8)|(unsigned)( a0))
+#define _v512_set_epi8(a63, a62, a61, a60, a59, a58, a57, a56, a55, a54, a53, a52, a51, a50, a49, a48, \
+                       a47, a46, a45, a44, a43, a42, a41, a40, a39, a38, a37, a36, a35, a34, a33, a32, \
+                       a31, a30, a29, a28, a27, a26, a25, a24, a23, a22, a21, a20, a19, a18, a17, a16, \
+                       a15, a14, a13, a12, a11, a10,  a9,  a8,  a7,  a6,  a5,  a4,  a3,  a2,  a1,  a0) \
+        _v512_set_epu8((uchar)(a63), (uchar)(a62), (uchar)(a61), (uchar)(a60), (uchar)(a59), (uchar)(a58), (uchar)(a57), (uchar)(a56), \
+                       (uchar)(a55), (uchar)(a54), (uchar)(a53), (uchar)(a52), (uchar)(a51), (uchar)(a50), (uchar)(a49), (uchar)(a48), \
+                       (uchar)(a47), (uchar)(a46), (uchar)(a45), (uchar)(a44), (uchar)(a43), (uchar)(a42), (uchar)(a41), (uchar)(a40), \
+                       (uchar)(a39), (uchar)(a38), (uchar)(a37), (uchar)(a36), (uchar)(a35), (uchar)(a34), (uchar)(a33), (uchar)(a32), \
+                       (uchar)(a31), (uchar)(a30), (uchar)(a29), (uchar)(a28), (uchar)(a27), (uchar)(a26), (uchar)(a25), (uchar)(a24), \
+                       (uchar)(a23), (uchar)(a22), (uchar)(a21), (uchar)(a20), (uchar)(a19), (uchar)(a18), (uchar)(a17), (uchar)(a16), \
+                       (uchar)(a15), (uchar)(a14), (uchar)(a13), (uchar)(a12), (uchar)(a11), (uchar)(a10), (uchar)( a9), (uchar)( a8), \
+                       (uchar)( a7), (uchar)( a6), (uchar)( a5), (uchar)( a4), (uchar)( a3), (uchar)( a2), (uchar)( a1), (uchar)( a0))
+
+#ifndef _mm512_cvtpd_pslo
+#ifdef _mm512_zextsi256_si512
+#define _mm512_cvtpd_pslo(a) _mm512_zextps256_ps512(_mm512_cvtpd_ps(a))
+#else
+//if preferred way to extend with zeros is unavailable
+#define _mm512_cvtpd_pslo(a) _mm512_castps256_ps512(_mm512_cvtpd_ps(a))
+#endif
+#endif
+///////// Utils ////////////
+
+namespace
+{
+
+inline __m512i _v512_combine(const __m256i& lo, const __m256i& hi)
+{ return _mm512_inserti32x8(_mm512_castsi256_si512(lo), hi, 1); }
+
+inline __m512 _v512_combine(const __m256& lo, const __m256& hi)
+{ return _mm512_insertf32x8(_mm512_castps256_ps512(lo), hi, 1); }
+
+inline __m512d _v512_combine(const __m256d& lo, const __m256d& hi)
+{ return _mm512_insertf64x4(_mm512_castpd256_pd512(lo), hi, 1); }
+
+inline int _v_cvtsi512_si32(const __m512i& a)
+{ return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); }
+
+inline __m256i _v512_extract_high(const __m512i& v)
+{ return _mm512_extracti32x8_epi32(v, 1); }
+
+inline __m256  _v512_extract_high(const __m512& v)
+{ return _mm512_extractf32x8_ps(v, 1); }
+
+inline __m256d _v512_extract_high(const __m512d& v)
+{ return _mm512_extractf64x4_pd(v, 1); }
+
+inline __m256i _v512_extract_low(const __m512i& v)
+{ return _mm512_castsi512_si256(v); }
+
+inline __m256  _v512_extract_low(const __m512& v)
+{ return _mm512_castps512_ps256(v); }
+
+inline __m256d _v512_extract_low(const __m512d& v)
+{ return _mm512_castpd512_pd256(v); }
+
+inline __m512i _v512_insert(const __m512i& a, const __m256i& b)
+{ return _mm512_inserti32x8(a, b, 0); }
+
+inline __m512 _v512_insert(const __m512& a, const __m256& b)
+{ return _mm512_insertf32x8(a, b, 0); }
+
+inline __m512d _v512_insert(const __m512d& a, const __m256d& b)
+{ return _mm512_insertf64x4(a, b, 0); }
+
+}
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+///////// Types ////////////
+
+struct v_uint8x64
+{
+    typedef uchar lane_type;
+    enum { nlanes = 64 };
+    __m512i val;
+
+    explicit v_uint8x64(__m512i v) : val(v) {}
+    v_uint8x64(uchar v0,  uchar v1,  uchar v2,  uchar v3,
+               uchar v4,  uchar v5,  uchar v6,  uchar v7,
+               uchar v8,  uchar v9,  uchar v10, uchar v11,
+               uchar v12, uchar v13, uchar v14, uchar v15,
+               uchar v16, uchar v17, uchar v18, uchar v19,
+               uchar v20, uchar v21, uchar v22, uchar v23,
+               uchar v24, uchar v25, uchar v26, uchar v27,
+               uchar v28, uchar v29, uchar v30, uchar v31,
+               uchar v32, uchar v33, uchar v34, uchar v35,
+               uchar v36, uchar v37, uchar v38, uchar v39,
+               uchar v40, uchar v41, uchar v42, uchar v43,
+               uchar v44, uchar v45, uchar v46, uchar v47,
+               uchar v48, uchar v49, uchar v50, uchar v51,
+               uchar v52, uchar v53, uchar v54, uchar v55,
+               uchar v56, uchar v57, uchar v58, uchar v59,
+               uchar v60, uchar v61, uchar v62, uchar v63)
+    {
+        val = _v512_set_epu8(v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48,
+                             v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32,
+                             v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16,
+                             v15, v14, v13, v12, v11, v10, v9,  v8,  v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0);
+    }
+    v_uint8x64() : val(_mm512_setzero_si512()) {}
+    uchar get0() const { return (uchar)_v_cvtsi512_si32(val); }
+};
+
+struct v_int8x64
+{
+    typedef schar lane_type;
+    enum { nlanes = 64 };
+    __m512i val;
+
+    explicit v_int8x64(__m512i v) : val(v) {}
+    v_int8x64(schar v0,  schar v1,  schar v2,  schar v3,
+              schar v4,  schar v5,  schar v6,  schar v7,
+              schar v8,  schar v9,  schar v10, schar v11,
+              schar v12, schar v13, schar v14, schar v15,
+              schar v16, schar v17, schar v18, schar v19,
+              schar v20, schar v21, schar v22, schar v23,
+              schar v24, schar v25, schar v26, schar v27,
+              schar v28, schar v29, schar v30, schar v31,
+              schar v32, schar v33, schar v34, schar v35,
+              schar v36, schar v37, schar v38, schar v39,
+              schar v40, schar v41, schar v42, schar v43,
+              schar v44, schar v45, schar v46, schar v47,
+              schar v48, schar v49, schar v50, schar v51,
+              schar v52, schar v53, schar v54, schar v55,
+              schar v56, schar v57, schar v58, schar v59,
+              schar v60, schar v61, schar v62, schar v63)
+    {
+        val = _v512_set_epi8(v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48,
+                             v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32,
+                             v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16,
+                             v15, v14, v13, v12, v11, v10, v9,  v8,  v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0);
+    }
+    v_int8x64() : val(_mm512_setzero_si512()) {}
+    schar get0() const { return (schar)_v_cvtsi512_si32(val); }
+};
+
+struct v_uint16x32
+{
+    typedef ushort lane_type;
+    enum { nlanes = 32 };
+    __m512i val;
+
+    explicit v_uint16x32(__m512i v) : val(v) {}
+    v_uint16x32(ushort v0,  ushort v1,  ushort v2,  ushort v3,
+                ushort v4,  ushort v5,  ushort v6,  ushort v7,
+                ushort v8,  ushort v9,  ushort v10, ushort v11,
+                ushort v12, ushort v13, ushort v14, ushort v15,
+                ushort v16, ushort v17, ushort v18, ushort v19,
+                ushort v20, ushort v21, ushort v22, ushort v23,
+                ushort v24, ushort v25, ushort v26, ushort v27,
+                ushort v28, ushort v29, ushort v30, ushort v31)
+    {
+        val = _v512_set_epu16(v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16,
+                              v15, v14, v13, v12, v11, v10, v9,  v8,  v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0);
+    }
+    v_uint16x32() : val(_mm512_setzero_si512()) {}
+    ushort get0() const { return (ushort)_v_cvtsi512_si32(val); }
+};
+
+struct v_int16x32
+{
+    typedef short lane_type;
+    enum { nlanes = 32 };
+    __m512i val;
+
+    explicit v_int16x32(__m512i v) : val(v) {}
+    v_int16x32(short v0,  short v1,  short v2,  short v3,  short v4,  short v5,  short v6,  short v7,
+               short v8,  short v9,  short v10, short v11, short v12, short v13, short v14, short v15,
+               short v16, short v17, short v18, short v19, short v20, short v21, short v22, short v23,
+               short v24, short v25, short v26, short v27, short v28, short v29, short v30, short v31)
+    {
+        val = _v512_set_epu16((ushort)v31, (ushort)v30, (ushort)v29, (ushort)v28, (ushort)v27, (ushort)v26, (ushort)v25, (ushort)v24,
+                              (ushort)v23, (ushort)v22, (ushort)v21, (ushort)v20, (ushort)v19, (ushort)v18, (ushort)v17, (ushort)v16,
+                              (ushort)v15, (ushort)v14, (ushort)v13, (ushort)v12, (ushort)v11, (ushort)v10, (ushort)v9 , (ushort)v8,
+                              (ushort)v7 , (ushort)v6 , (ushort)v5 , (ushort)v4 , (ushort)v3 , (ushort)v2 , (ushort)v1 , (ushort)v0);
+    }
+    v_int16x32() : val(_mm512_setzero_si512()) {}
+    short get0() const { return (short)_v_cvtsi512_si32(val); }
+};
+
+struct v_uint32x16
+{
+    typedef unsigned lane_type;
+    enum { nlanes = 16 };
+    __m512i val;
+
+    explicit v_uint32x16(__m512i v) : val(v) {}
+    v_uint32x16(unsigned v0,  unsigned v1,  unsigned v2,  unsigned v3,
+                unsigned v4,  unsigned v5,  unsigned v6,  unsigned v7,
+                unsigned v8,  unsigned v9,  unsigned v10, unsigned v11,
+                unsigned v12, unsigned v13, unsigned v14, unsigned v15)
+    {
+        val = _mm512_setr_epi32((int)v0,  (int)v1,  (int)v2,  (int)v3, (int)v4,  (int)v5,  (int)v6,  (int)v7,
+                                (int)v8,  (int)v9,  (int)v10, (int)v11, (int)v12, (int)v13, (int)v14, (int)v15);
+    }
+    v_uint32x16() : val(_mm512_setzero_si512()) {}
+    unsigned get0() const { return (unsigned)_v_cvtsi512_si32(val); }
+};
+
+struct v_int32x16
+{
+    typedef int lane_type;
+    enum { nlanes = 16 };
+    __m512i val;
+
+    explicit v_int32x16(__m512i v) : val(v) {}
+    v_int32x16(int v0, int v1, int v2,  int v3,  int v4,  int v5,  int v6,  int v7,
+               int v8, int v9, int v10, int v11, int v12, int v13, int v14, int v15)
+    {
+        val = _mm512_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15);
+    }
+    v_int32x16() : val(_mm512_setzero_si512()) {}
+    int get0() const { return _v_cvtsi512_si32(val); }
+};
+
+struct v_float32x16
+{
+    typedef float lane_type;
+    enum { nlanes = 16 };
+    __m512 val;
+
+    explicit v_float32x16(__m512 v) : val(v) {}
+    v_float32x16(float v0, float v1, float v2,  float v3,  float v4,  float v5,  float v6,  float v7,
+                 float v8, float v9, float v10, float v11, float v12, float v13, float v14, float v15)
+    {
+        val = _mm512_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15);
+    }
+    v_float32x16() : val(_mm512_setzero_ps()) {}
+    float get0() const { return _mm_cvtss_f32(_mm512_castps512_ps128(val)); }
+};
+
+struct v_uint64x8
+{
+    typedef uint64 lane_type;
+    enum { nlanes = 8 };
+    __m512i val;
+
+    explicit v_uint64x8(__m512i v) : val(v) {}
+    v_uint64x8(uint64 v0, uint64 v1, uint64 v2, uint64 v3, uint64 v4, uint64 v5, uint64 v6, uint64 v7)
+    { val = _mm512_setr_epi64((int64)v0, (int64)v1, (int64)v2, (int64)v3, (int64)v4, (int64)v5, (int64)v6, (int64)v7); }
+    v_uint64x8() : val(_mm512_setzero_si512()) {}
+    uint64 get0() const
+    {
+    #if defined __x86_64__ || defined _M_X64
+        return (uint64)_mm_cvtsi128_si64(_mm512_castsi512_si128(val));
+    #else
+        int a = _mm_cvtsi128_si32(_mm512_castsi512_si128(val));
+        int b = _mm_cvtsi128_si32(_mm512_castsi512_si128(_mm512_srli_epi64(val, 32)));
+        return (unsigned)a | ((uint64)(unsigned)b << 32);
+    #endif
+    }
+};
+
+struct v_int64x8
+{
+    typedef int64 lane_type;
+    enum { nlanes = 8 };
+    __m512i val;
+
+    explicit v_int64x8(__m512i v) : val(v) {}
+    v_int64x8(int64 v0, int64 v1, int64 v2, int64 v3, int64 v4, int64 v5, int64 v6, int64 v7)
+    { val = _mm512_setr_epi64(v0, v1, v2, v3, v4, v5, v6, v7); }
+    v_int64x8() : val(_mm512_setzero_si512()) {}
+
+    int64 get0() const
+    {
+    #if defined __x86_64__ || defined _M_X64
+        return (int64)_mm_cvtsi128_si64(_mm512_castsi512_si128(val));
+    #else
+        int a = _mm_cvtsi128_si32(_mm512_castsi512_si128(val));
+        int b = _mm_cvtsi128_si32(_mm512_castsi512_si128(_mm512_srli_epi64(val, 32)));
+        return (int64)((unsigned)a | ((uint64)(unsigned)b << 32));
+    #endif
+    }
+};
+
+struct v_float64x8
+{
+    typedef double lane_type;
+    enum { nlanes = 8 };
+    __m512d val;
+
+    explicit v_float64x8(__m512d v) : val(v) {}
+    v_float64x8(double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7)
+    { val = _mm512_setr_pd(v0, v1, v2, v3, v4, v5, v6, v7); }
+    v_float64x8() : val(_mm512_setzero_pd()) {}
+    double get0() const { return _mm_cvtsd_f64(_mm512_castpd512_pd128(val)); }
+};
+
+//////////////// Load and store operations ///////////////
+
+#define OPENCV_HAL_IMPL_AVX512_LOADSTORE(_Tpvec, _Tp)                    \
+    inline _Tpvec v512_load(const _Tp* ptr)                           \
+    { return _Tpvec(_mm512_loadu_si512((const __m512i*)ptr)); }       \
+    inline _Tpvec v512_load_aligned(const _Tp* ptr)                   \
+    { return _Tpvec(_mm512_load_si512((const __m512i*)ptr)); }        \
+    inline _Tpvec v512_load_low(const _Tp* ptr)                       \
+    {                                                                 \
+        __m256i v256 = _mm256_loadu_si256((const __m256i*)ptr);       \
+        return _Tpvec(_mm512_castsi256_si512(v256));                  \
+    }                                                                 \
+    inline _Tpvec v512_load_halves(const _Tp* ptr0, const _Tp* ptr1)  \
+    {                                                                 \
+        __m256i vlo = _mm256_loadu_si256((const __m256i*)ptr0);       \
+        __m256i vhi = _mm256_loadu_si256((const __m256i*)ptr1);       \
+        return _Tpvec(_v512_combine(vlo, vhi));                       \
+    }                                                                 \
+    inline void v_store(_Tp* ptr, const _Tpvec& a)                    \
+    { _mm512_storeu_si512((__m512i*)ptr, a.val); }                    \
+    inline void v_store_aligned(_Tp* ptr, const _Tpvec& a)            \
+    { _mm512_store_si512((__m512i*)ptr, a.val); }                     \
+    inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a)    \
+    { _mm512_stream_si512((__m512i*)ptr, a.val); }                    \
+    inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
+    { \
+        if( mode == hal::STORE_UNALIGNED ) \
+            _mm512_storeu_si512((__m512i*)ptr, a.val); \
+        else if( mode == hal::STORE_ALIGNED_NOCACHE )  \
+            _mm512_stream_si512((__m512i*)ptr, a.val); \
+        else \
+            _mm512_store_si512((__m512i*)ptr, a.val); \
+    } \
+    inline void v_store_low(_Tp* ptr, const _Tpvec& a)                \
+    { _mm256_storeu_si256((__m256i*)ptr, _v512_extract_low(a.val)); }    \
+    inline void v_store_high(_Tp* ptr, const _Tpvec& a)               \
+    { _mm256_storeu_si256((__m256i*)ptr, _v512_extract_high(a.val)); }
+
+OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint8x64,  uchar)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int8x64,   schar)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint16x32, ushort)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int16x32,  short)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint32x16,  unsigned)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int32x16,   int)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_uint64x8,  uint64)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE(v_int64x8,   int64)
+
+#define OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(_Tpvec, _Tp, suffix, halfreg)   \
+    inline _Tpvec v512_load(const _Tp* ptr)                               \
+    { return _Tpvec(_mm512_loadu_##suffix(ptr)); }                        \
+    inline _Tpvec v512_load_aligned(const _Tp* ptr)                       \
+    { return _Tpvec(_mm512_load_##suffix(ptr)); }                         \
+    inline _Tpvec v512_load_low(const _Tp* ptr)                           \
+    {                                                                     \
+        return _Tpvec(_mm512_cast##suffix##256_##suffix##512              \
+                     (_mm256_loadu_##suffix(ptr)));                       \
+    }                                                                     \
+    inline _Tpvec v512_load_halves(const _Tp* ptr0, const _Tp* ptr1)      \
+    {                                                                     \
+        halfreg vlo = _mm256_loadu_##suffix(ptr0);                        \
+        halfreg vhi = _mm256_loadu_##suffix(ptr1);                        \
+        return _Tpvec(_v512_combine(vlo, vhi));                           \
+    }                                                                     \
+    inline void v_store(_Tp* ptr, const _Tpvec& a)                        \
+    { _mm512_storeu_##suffix(ptr, a.val); }                               \
+    inline void v_store_aligned(_Tp* ptr, const _Tpvec& a)                \
+    { _mm512_store_##suffix(ptr, a.val); }                                \
+    inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a)        \
+    { _mm512_stream_##suffix(ptr, a.val); }                               \
+    inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
+    { \
+        if( mode == hal::STORE_UNALIGNED ) \
+            _mm512_storeu_##suffix(ptr, a.val); \
+        else if( mode == hal::STORE_ALIGNED_NOCACHE )  \
+            _mm512_stream_##suffix(ptr, a.val); \
+        else \
+            _mm512_store_##suffix(ptr, a.val); \
+    } \
+    inline void v_store_low(_Tp* ptr, const _Tpvec& a)                    \
+    { _mm256_storeu_##suffix(ptr, _v512_extract_low(a.val)); }            \
+    inline void v_store_high(_Tp* ptr, const _Tpvec& a)                   \
+    { _mm256_storeu_##suffix(ptr, _v512_extract_high(a.val)); }
+
+OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(v_float32x16, float,  ps, __m256)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(v_float64x8, double, pd, __m256d)
+
+#define OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, _Tpvecf, suffix, cast) \
+    inline _Tpvec v_reinterpret_as_##suffix(const _Tpvecf& a)   \
+    { return _Tpvec(cast(a.val)); }
+
+#define OPENCV_HAL_IMPL_AVX512_INIT(_Tpvec, _Tp, suffix, ssuffix, ctype_s)         \
+    inline _Tpvec v512_setzero_##suffix()                                          \
+    { return _Tpvec(_mm512_setzero_si512()); }                                     \
+    inline _Tpvec v512_setall_##suffix(_Tp v)                                      \
+    { return _Tpvec(_mm512_set1_##ssuffix((ctype_s)v)); }                          \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64,   suffix, OPENCV_HAL_NOP)      \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64,    suffix, OPENCV_HAL_NOP)      \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32,  suffix, OPENCV_HAL_NOP)      \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int16x32,   suffix, OPENCV_HAL_NOP)      \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint32x16,  suffix, OPENCV_HAL_NOP)      \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int32x16,   suffix, OPENCV_HAL_NOP)      \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint64x8,   suffix, OPENCV_HAL_NOP)      \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int64x8,    suffix, OPENCV_HAL_NOP)      \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_float32x16, suffix, _mm512_castps_si512) \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_float64x8,  suffix, _mm512_castpd_si512)
+
+OPENCV_HAL_IMPL_AVX512_INIT(v_uint8x64,  uchar,    u8,  epi8,   char)
+OPENCV_HAL_IMPL_AVX512_INIT(v_int8x64,   schar,    s8,  epi8,   char)
+OPENCV_HAL_IMPL_AVX512_INIT(v_uint16x32, ushort,   u16, epi16,  short)
+OPENCV_HAL_IMPL_AVX512_INIT(v_int16x32,  short,    s16, epi16,  short)
+OPENCV_HAL_IMPL_AVX512_INIT(v_uint32x16, unsigned, u32, epi32,  int)
+OPENCV_HAL_IMPL_AVX512_INIT(v_int32x16,  int,      s32, epi32,  int)
+OPENCV_HAL_IMPL_AVX512_INIT(v_uint64x8,  uint64,   u64, epi64,  int64)
+OPENCV_HAL_IMPL_AVX512_INIT(v_int64x8,   int64,    s64, epi64,  int64)
+
+#define OPENCV_HAL_IMPL_AVX512_INIT_FLT(_Tpvec, _Tp, suffix, zsuffix, cast) \
+    inline _Tpvec v512_setzero_##suffix()                                   \
+    { return _Tpvec(_mm512_setzero_##zsuffix()); }                          \
+    inline _Tpvec v512_setall_##suffix(_Tp v)                               \
+    { return _Tpvec(_mm512_set1_##zsuffix(v)); }                            \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64,  suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64,   suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int16x32,  suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint32x16, suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int32x16,  suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint64x8,  suffix, cast)          \
+    OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int64x8,   suffix, cast)
+
+OPENCV_HAL_IMPL_AVX512_INIT_FLT(v_float32x16, float,  f32, ps, _mm512_castsi512_ps)
+OPENCV_HAL_IMPL_AVX512_INIT_FLT(v_float64x8,  double, f64, pd, _mm512_castsi512_pd)
+
+inline v_float32x16 v_reinterpret_as_f32(const v_float32x16& a)
+{ return a; }
+inline v_float32x16 v_reinterpret_as_f32(const v_float64x8& a)
+{ return v_float32x16(_mm512_castpd_ps(a.val)); }
+
+inline v_float64x8 v_reinterpret_as_f64(const v_float64x8& a)
+{ return a; }
+inline v_float64x8 v_reinterpret_as_f64(const v_float32x16& a)
+{ return v_float64x8(_mm512_castps_pd(a.val)); }
+
+// FP16
+inline v_float32x16 v512_load_expand(const float16_t* ptr)
+{
+    return v_float32x16(_mm512_cvtph_ps(_mm256_loadu_si256((const __m256i*)ptr)));
+}
+
+inline void v_pack_store(float16_t* ptr, const v_float32x16& a)
+{
+    __m256i ah = _mm512_cvtps_ph(a.val, 0);
+    _mm256_storeu_si256((__m256i*)ptr, ah);
+}
+
+/* Recombine & ZIP */
+inline void v_zip(const v_int8x64& a, const v_int8x64& b, v_int8x64& ab0, v_int8x64& ab1)
+{
+#if CV_AVX_512VBMI
+    __m512i mask0 = _v512_set_epu8( 95,  31,  94,  30,  93,  29,  92,  28,  91,  27,  90,  26,  89,  25,  88,  24,
+                                    87,  23,  86,  22,  85,  21,  84,  20,  83,  19,  82,  18,  81,  17,  80,  16,
+                                    79,  15,  78,  14,  77,  13,  76,  12,  75,  11,  74,  10,  73,   9,  72,   8,
+                                    71,   7,  70,   6,  69,   5,  68,   4,  67,   3,  66,   2,  65,   1,  64,   0);
+    ab0 = v_int8x64(_mm512_permutex2var_epi8(a.val, mask0, b.val));
+    __m512i mask1 = _v512_set_epu8(127,  63, 126,  62, 125,  61, 124,  60, 123,  59, 122,  58, 121,  57, 120,  56,
+                                   119,  55, 118,  54, 117,  53, 116,  52, 115,  51, 114,  50, 113,  49, 112,  48,
+                                   111,  47, 110,  46, 109,  45, 108,  44, 107,  43, 106,  42, 105,  41, 104,  40,
+                                   103,  39, 102,  38, 101,  37, 100,  36,  99,  35,  98,  34,  97,  33,  96,  32);
+    ab1 = v_int8x64(_mm512_permutex2var_epi8(a.val, mask1, b.val));
+#else
+    __m512i low  = _mm512_unpacklo_epi8(a.val, b.val);
+    __m512i high = _mm512_unpackhi_epi8(a.val, b.val);
+    ab0 = v_int8x64(_mm512_permutex2var_epi64(low, _v512_set_epu64(11, 10, 3, 2,  9,  8, 1, 0), high));
+    ab1 = v_int8x64(_mm512_permutex2var_epi64(low, _v512_set_epu64(15, 14, 7, 6, 13, 12, 5, 4), high));
+#endif
+}
+inline void v_zip(const v_int16x32& a, const v_int16x32& b, v_int16x32& ab0, v_int16x32& ab1)
+{
+    __m512i mask0 = _v512_set_epu16(47, 15, 46, 14, 45, 13, 44, 12, 43, 11, 42, 10, 41,  9, 40,  8,
+                                    39,  7, 38,  6, 37,  5, 36,  4, 35,  3, 34,  2, 33,  1, 32,  0);
+    ab0 = v_int16x32(_mm512_permutex2var_epi16(a.val, mask0, b.val));
+    __m512i mask1 = _v512_set_epu16(63, 31, 62, 30, 61, 29, 60, 28, 59, 27, 58, 26, 57, 25, 56, 24,
+                                    55, 23, 54, 22, 53, 21, 52, 20, 51, 19, 50, 18, 49, 17, 48, 16);
+    ab1 = v_int16x32(_mm512_permutex2var_epi16(a.val, mask1, b.val));
+}
+inline void v_zip(const v_int32x16& a, const v_int32x16& b, v_int32x16& ab0, v_int32x16& ab1)
+{
+    __m512i mask0 = _v512_set_epu32(23,  7, 22,  6, 21,  5, 20,  4, 19,  3, 18,  2, 17, 1, 16, 0);
+    ab0 = v_int32x16(_mm512_permutex2var_epi32(a.val, mask0, b.val));
+    __m512i mask1 = _v512_set_epu32(31, 15, 30, 14, 29, 13, 28, 12, 27, 11, 26, 10, 25, 9, 24, 8);
+    ab1 = v_int32x16(_mm512_permutex2var_epi32(a.val, mask1, b.val));
+}
+inline void v_zip(const v_int64x8& a, const v_int64x8& b, v_int64x8& ab0, v_int64x8& ab1)
+{
+    __m512i mask0 = _v512_set_epu64(11, 3, 10, 2,  9, 1,  8, 0);
+    ab0 = v_int64x8(_mm512_permutex2var_epi64(a.val, mask0, b.val));
+    __m512i mask1 = _v512_set_epu64(15, 7, 14, 6, 13, 5, 12, 4);
+    ab1 = v_int64x8(_mm512_permutex2var_epi64(a.val, mask1, b.val));
+}
+
+inline void v_zip(const v_uint8x64&  a, const v_uint8x64&  b, v_uint8x64& ab0, v_uint8x64& ab1)
+{
+    v_int8x64 i0, i1;
+    v_zip(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b), i0, i1);
+    ab0 = v_reinterpret_as_u8(i0);
+    ab1 = v_reinterpret_as_u8(i1);
+}
+inline void v_zip(const v_uint16x32&  a, const v_uint16x32&  b, v_uint16x32& ab0, v_uint16x32& ab1)
+{
+    v_int16x32 i0, i1;
+    v_zip(v_reinterpret_as_s16(a), v_reinterpret_as_s16(b), i0, i1);
+    ab0 = v_reinterpret_as_u16(i0);
+    ab1 = v_reinterpret_as_u16(i1);
+}
+inline void v_zip(const v_uint32x16&  a, const v_uint32x16&  b, v_uint32x16& ab0, v_uint32x16& ab1)
+{
+    v_int32x16 i0, i1;
+    v_zip(v_reinterpret_as_s32(a), v_reinterpret_as_s32(b), i0, i1);
+    ab0 = v_reinterpret_as_u32(i0);
+    ab1 = v_reinterpret_as_u32(i1);
+}
+inline void v_zip(const v_uint64x8&  a, const v_uint64x8&  b, v_uint64x8& ab0, v_uint64x8& ab1)
+{
+    v_int64x8 i0, i1;
+    v_zip(v_reinterpret_as_s64(a), v_reinterpret_as_s64(b), i0, i1);
+    ab0 = v_reinterpret_as_u64(i0);
+    ab1 = v_reinterpret_as_u64(i1);
+}
+inline void v_zip(const v_float32x16&  a, const v_float32x16&  b, v_float32x16& ab0, v_float32x16& ab1)
+{
+    v_int32x16 i0, i1;
+    v_zip(v_reinterpret_as_s32(a), v_reinterpret_as_s32(b), i0, i1);
+    ab0 = v_reinterpret_as_f32(i0);
+    ab1 = v_reinterpret_as_f32(i1);
+}
+inline void v_zip(const v_float64x8&  a, const v_float64x8&  b, v_float64x8& ab0, v_float64x8& ab1)
+{
+    v_int64x8 i0, i1;
+    v_zip(v_reinterpret_as_s64(a), v_reinterpret_as_s64(b), i0, i1);
+    ab0 = v_reinterpret_as_f64(i0);
+    ab1 = v_reinterpret_as_f64(i1);
+}
+
+#define OPENCV_HAL_IMPL_AVX512_COMBINE(_Tpvec, suffix)                                    \
+    inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b)                         \
+    { return _Tpvec(_v512_combine(_v512_extract_low(a.val), _v512_extract_low(b.val))); } \
+    inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b)                        \
+    { return _Tpvec(_v512_insert(b.val, _v512_extract_high(a.val))); }                    \
+    inline void v_recombine(const _Tpvec& a, const _Tpvec& b,                             \
+                                  _Tpvec& c, _Tpvec& d)                                   \
+    {                                                                                     \
+        c.val = _v512_combine(_v512_extract_low(a.val),_v512_extract_low(b.val));         \
+        d.val = _v512_insert(b.val,_v512_extract_high(a.val));                            \
+    }
+
+
+OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint8x64,   epi8)
+OPENCV_HAL_IMPL_AVX512_COMBINE(v_int8x64,    epi8)
+OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint16x32,  epi16)
+OPENCV_HAL_IMPL_AVX512_COMBINE(v_int16x32,   epi16)
+OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint32x16,  epi32)
+OPENCV_HAL_IMPL_AVX512_COMBINE(v_int32x16,   epi32)
+OPENCV_HAL_IMPL_AVX512_COMBINE(v_uint64x8,   epi64)
+OPENCV_HAL_IMPL_AVX512_COMBINE(v_int64x8,    epi64)
+OPENCV_HAL_IMPL_AVX512_COMBINE(v_float32x16, ps)
+OPENCV_HAL_IMPL_AVX512_COMBINE(v_float64x8,  pd)
+
+////////// Arithmetic, bitwise and comparison operations /////////
+
+/* Element-wise binary and unary operations */
+
+/** Non-saturating arithmetics **/
+#define OPENCV_HAL_IMPL_AVX512_BIN_FUNC(func, _Tpvec, intrin) \
+    inline _Tpvec func(const _Tpvec& a, const _Tpvec& b)      \
+    { return _Tpvec(intrin(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_uint8x64, _mm512_add_epi8)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_int8x64, _mm512_add_epi8)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_uint16x32, _mm512_add_epi16)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_add_wrap, v_int16x32, _mm512_add_epi16)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_uint8x64, _mm512_sub_epi8)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_int8x64, _mm512_sub_epi8)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_uint16x32, _mm512_sub_epi16)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_sub_wrap, v_int16x32, _mm512_sub_epi16)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_mul_wrap, v_uint16x32, _mm512_mullo_epi16)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_mul_wrap, v_int16x32, _mm512_mullo_epi16)
+
+inline v_uint8x64 v_mul_wrap(const v_uint8x64& a, const v_uint8x64& b)
+{
+    __m512i ad = _mm512_srai_epi16(a.val, 8);
+    __m512i bd = _mm512_srai_epi16(b.val, 8);
+    __m512i p0 = _mm512_mullo_epi16(a.val, b.val); // even
+    __m512i p1 = _mm512_slli_epi16(_mm512_mullo_epi16(ad, bd), 8); // odd
+    return v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, p0, p1));
+}
+inline v_int8x64 v_mul_wrap(const v_int8x64& a, const v_int8x64& b)
+{
+    return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b)));
+}
+
+#define OPENCV_HAL_IMPL_AVX512_BIN_OP(bin_op, _Tpvec, intrin)            \
+    inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b)     \
+    { return _Tpvec(intrin(a.val, b.val)); }                             \
+    inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b)       \
+    { a.val = intrin(a.val, b.val); return a; }
+
+OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint32x16, _mm512_add_epi32)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint32x16, _mm512_sub_epi32)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int32x16, _mm512_add_epi32)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int32x16, _mm512_sub_epi32)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint64x8, _mm512_add_epi64)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint64x8, _mm512_sub_epi64)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int64x8, _mm512_add_epi64)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int64x8, _mm512_sub_epi64)
+
+OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_uint32x16, _mm512_mullo_epi32)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_int32x16, _mm512_mullo_epi32)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_uint64x8, _mm512_mullo_epi64)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_int64x8, _mm512_mullo_epi64)
+
+/** Saturating arithmetics **/
+OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint8x64,  _mm512_adds_epu8)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint8x64,  _mm512_subs_epu8)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int8x64,   _mm512_adds_epi8)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int8x64,   _mm512_subs_epi8)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_uint16x32, _mm512_adds_epu16)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_uint16x32, _mm512_subs_epu16)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_int16x32,  _mm512_adds_epi16)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_int16x32,  _mm512_subs_epi16)
+
+OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_float32x16, _mm512_add_ps)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_float32x16, _mm512_sub_ps)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_float32x16, _mm512_mul_ps)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(/, v_float32x16, _mm512_div_ps)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(+, v_float64x8, _mm512_add_pd)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(-, v_float64x8, _mm512_sub_pd)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(*, v_float64x8, _mm512_mul_pd)
+OPENCV_HAL_IMPL_AVX512_BIN_OP(/, v_float64x8, _mm512_div_pd)
+
+// saturating multiply
+inline v_uint8x64 operator * (const v_uint8x64& a, const v_uint8x64& b)
+{
+    v_uint16x32 c, d;
+    v_mul_expand(a, b, c, d);
+    return v_pack(c, d);
+}
+inline v_int8x64 operator * (const v_int8x64& a, const v_int8x64& b)
+{
+    v_int16x32 c, d;
+    v_mul_expand(a, b, c, d);
+    return v_pack(c, d);
+}
+inline v_uint16x32 operator * (const v_uint16x32& a, const v_uint16x32& b)
+{
+    __m512i pl = _mm512_mullo_epi16(a.val, b.val);
+    __m512i ph = _mm512_mulhi_epu16(a.val, b.val);
+    __m512i p0 = _mm512_unpacklo_epi16(pl, ph);
+    __m512i p1 = _mm512_unpackhi_epi16(pl, ph);
+
+    const __m512i m = _mm512_set1_epi32(65535);
+    return v_uint16x32(_mm512_packus_epi32(_mm512_min_epu32(p0, m), _mm512_min_epu32(p1, m)));
+}
+inline v_int16x32 operator * (const v_int16x32& a, const v_int16x32& b)
+{
+    __m512i pl = _mm512_mullo_epi16(a.val, b.val);
+    __m512i ph = _mm512_mulhi_epi16(a.val, b.val);
+    __m512i p0 = _mm512_unpacklo_epi16(pl, ph);
+    __m512i p1 = _mm512_unpackhi_epi16(pl, ph);
+    return v_int16x32(_mm512_packs_epi32(p0, p1));
+}
+
+inline v_uint8x64& operator *= (v_uint8x64& a, const v_uint8x64& b)
+{ a = a * b; return a; }
+inline v_int8x64& operator *= (v_int8x64& a, const v_int8x64& b)
+{ a = a * b; return a; }
+inline v_uint16x32& operator *= (v_uint16x32& a, const v_uint16x32& b)
+{ a = a * b; return a; }
+inline v_int16x32& operator *= (v_int16x32& a, const v_int16x32& b)
+{ a = a * b; return a; }
+
+inline v_int16x32 v_mul_hi(const v_int16x32& a, const v_int16x32& b) { return v_int16x32(_mm512_mulhi_epi16(a.val, b.val)); }
+inline v_uint16x32 v_mul_hi(const v_uint16x32& a, const v_uint16x32& b) { return v_uint16x32(_mm512_mulhi_epu16(a.val, b.val)); }
+
+//  Multiply and expand
+inline void v_mul_expand(const v_uint8x64& a, const v_uint8x64& b,
+                         v_uint16x32& c, v_uint16x32& d)
+{
+    v_uint16x32 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c = v_mul_wrap(a0, b0);
+    d = v_mul_wrap(a1, b1);
+}
+
+inline void v_mul_expand(const v_int8x64& a, const v_int8x64& b,
+                         v_int16x32& c, v_int16x32& d)
+{
+    v_int16x32 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c = v_mul_wrap(a0, b0);
+    d = v_mul_wrap(a1, b1);
+}
+
+inline void v_mul_expand(const v_int16x32& a, const v_int16x32& b,
+                         v_int32x16& c, v_int32x16& d)
+{
+    v_int16x32 v0, v1;
+    v_zip(v_mul_wrap(a, b), v_mul_hi(a, b), v0, v1);
+
+    c = v_reinterpret_as_s32(v0);
+    d = v_reinterpret_as_s32(v1);
+}
+
+inline void v_mul_expand(const v_uint16x32& a, const v_uint16x32& b,
+                         v_uint32x16& c, v_uint32x16& d)
+{
+    v_uint16x32 v0, v1;
+    v_zip(v_mul_wrap(a, b), v_mul_hi(a, b), v0, v1);
+
+    c = v_reinterpret_as_u32(v0);
+    d = v_reinterpret_as_u32(v1);
+}
+
+inline void v_mul_expand(const v_uint32x16& a, const v_uint32x16& b,
+                         v_uint64x8& c, v_uint64x8& d)
+{
+    v_zip(v_uint64x8(_mm512_mul_epu32(a.val, b.val)),
+          v_uint64x8(_mm512_mul_epu32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32))), c, d);
+}
+
+inline void v_mul_expand(const v_int32x16& a, const v_int32x16& b,
+    v_int64x8& c, v_int64x8& d)
+{
+    v_zip(v_int64x8(_mm512_mul_epi32(a.val, b.val)),
+          v_int64x8(_mm512_mul_epi32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32))), c, d);
+}
+
+/** Bitwise shifts **/
+#define OPENCV_HAL_IMPL_AVX512_SHIFT_OP(_Tpuvec, _Tpsvec, suffix) \
+    inline _Tpuvec operator << (const _Tpuvec& a, int imm)        \
+    { return _Tpuvec(_mm512_slli_##suffix(a.val, imm)); }         \
+    inline _Tpsvec operator << (const _Tpsvec& a, int imm)        \
+    { return _Tpsvec(_mm512_slli_##suffix(a.val, imm)); }         \
+    inline _Tpuvec operator >> (const _Tpuvec& a, int imm)        \
+    { return _Tpuvec(_mm512_srli_##suffix(a.val, imm)); }         \
+    inline _Tpsvec operator >> (const _Tpsvec& a, int imm)        \
+    { return _Tpsvec(_mm512_srai_##suffix(a.val, imm)); }         \
+    template<int imm>                                             \
+    inline _Tpuvec v_shl(const _Tpuvec& a)                        \
+    { return _Tpuvec(_mm512_slli_##suffix(a.val, imm)); }         \
+    template<int imm>                                             \
+    inline _Tpsvec v_shl(const _Tpsvec& a)                        \
+    { return _Tpsvec(_mm512_slli_##suffix(a.val, imm)); }         \
+    template<int imm>                                             \
+    inline _Tpuvec v_shr(const _Tpuvec& a)                        \
+    { return _Tpuvec(_mm512_srli_##suffix(a.val, imm)); }         \
+    template<int imm>                                             \
+    inline _Tpsvec v_shr(const _Tpsvec& a)                        \
+    { return _Tpsvec(_mm512_srai_##suffix(a.val, imm)); }
+
+OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint16x32, v_int16x32, epi16)
+OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint32x16, v_int32x16, epi32)
+OPENCV_HAL_IMPL_AVX512_SHIFT_OP(v_uint64x8,  v_int64x8,  epi64)
+
+
+/** Bitwise logic **/
+#define OPENCV_HAL_IMPL_AVX512_LOGIC_OP(_Tpvec, suffix, not_const) \
+    OPENCV_HAL_IMPL_AVX512_BIN_OP(&, _Tpvec, _mm512_and_##suffix)  \
+    OPENCV_HAL_IMPL_AVX512_BIN_OP(|, _Tpvec, _mm512_or_##suffix)   \
+    OPENCV_HAL_IMPL_AVX512_BIN_OP(^, _Tpvec, _mm512_xor_##suffix)  \
+    inline _Tpvec operator ~ (const _Tpvec& a)                     \
+    { return _Tpvec(_mm512_xor_##suffix(a.val, not_const)); }
+
+OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint8x64,   si512, _mm512_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int8x64,    si512, _mm512_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint16x32,  si512, _mm512_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int16x32,   si512, _mm512_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint32x16,  si512, _mm512_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int32x16,   si512, _mm512_set1_epi32(-1))
+OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_uint64x8,   si512, _mm512_set1_epi64(-1))
+OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_int64x8,    si512, _mm512_set1_epi64(-1))
+OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_float32x16, ps,    _mm512_castsi512_ps(_mm512_set1_epi32(-1)))
+OPENCV_HAL_IMPL_AVX512_LOGIC_OP(v_float64x8,  pd,    _mm512_castsi512_pd(_mm512_set1_epi32(-1)))
+
+/** Select **/
+#define OPENCV_HAL_IMPL_AVX512_SELECT(_Tpvec, suffix, zsuf)                      \
+    inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+    { return _Tpvec(_mm512_mask_blend_##suffix(_mm512_cmp_##suffix##_mask(mask.val, _mm512_setzero_##zsuf(), _MM_CMPINT_EQ), a.val, b.val)); }
+
+OPENCV_HAL_IMPL_AVX512_SELECT(v_uint8x64,   epi8, si512)
+OPENCV_HAL_IMPL_AVX512_SELECT(v_int8x64,    epi8, si512)
+OPENCV_HAL_IMPL_AVX512_SELECT(v_uint16x32, epi16, si512)
+OPENCV_HAL_IMPL_AVX512_SELECT(v_int16x32,  epi16, si512)
+OPENCV_HAL_IMPL_AVX512_SELECT(v_uint32x16, epi32, si512)
+OPENCV_HAL_IMPL_AVX512_SELECT(v_int32x16,  epi32, si512)
+OPENCV_HAL_IMPL_AVX512_SELECT(v_uint64x8,  epi64, si512)
+OPENCV_HAL_IMPL_AVX512_SELECT(v_int64x8,   epi64, si512)
+OPENCV_HAL_IMPL_AVX512_SELECT(v_float32x16,   ps,    ps)
+OPENCV_HAL_IMPL_AVX512_SELECT(v_float64x8,    pd,    pd)
+
+/** Comparison **/
+#define OPENCV_HAL_IMPL_AVX512_CMP_INT(bin_op, imm8, _Tpvec, sufcmp, sufset, tval) \
+    inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b)               \
+    { return _Tpvec(_mm512_maskz_set1_##sufset(_mm512_cmp_##sufcmp##_mask(a.val, b.val, imm8), tval)); }
+
+#define OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(_Tpvec, sufcmp, sufset, tval)              \
+    OPENCV_HAL_IMPL_AVX512_CMP_INT(==, _MM_CMPINT_EQ,  _Tpvec, sufcmp, sufset, tval) \
+    OPENCV_HAL_IMPL_AVX512_CMP_INT(!=, _MM_CMPINT_NE,  _Tpvec, sufcmp, sufset, tval) \
+    OPENCV_HAL_IMPL_AVX512_CMP_INT(<,  _MM_CMPINT_LT,  _Tpvec, sufcmp, sufset, tval) \
+    OPENCV_HAL_IMPL_AVX512_CMP_INT(>,  _MM_CMPINT_NLE, _Tpvec, sufcmp, sufset, tval) \
+    OPENCV_HAL_IMPL_AVX512_CMP_INT(<=, _MM_CMPINT_LE,  _Tpvec, sufcmp, sufset, tval) \
+    OPENCV_HAL_IMPL_AVX512_CMP_INT(>=, _MM_CMPINT_NLT, _Tpvec, sufcmp, sufset, tval)
+
+OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint8x64,   epu8,  epi8, (char)-1)
+OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int8x64,    epi8,  epi8, (char)-1)
+OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint16x32, epu16, epi16, (short)-1)
+OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int16x32,  epi16, epi16, (short)-1)
+OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint32x16, epu32, epi32, (int)-1)
+OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int32x16,  epi32, epi32, (int)-1)
+OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_uint64x8,  epu64, epi64, (int64)-1)
+OPENCV_HAL_IMPL_AVX512_CMP_OP_INT(v_int64x8,   epi64, epi64, (int64)-1)
+
+#define OPENCV_HAL_IMPL_AVX512_CMP_FLT(bin_op, imm8, _Tpvec, sufcmp, sufset, tval) \
+    inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b)               \
+    { return _Tpvec(_mm512_castsi512_##sufcmp(_mm512_maskz_set1_##sufset(_mm512_cmp_##sufcmp##_mask(a.val, b.val, imm8), tval))); }
+
+#define OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(_Tpvec, sufcmp, sufset, tval)           \
+    OPENCV_HAL_IMPL_AVX512_CMP_FLT(==, _CMP_EQ_OQ,  _Tpvec, sufcmp, sufset, tval) \
+    OPENCV_HAL_IMPL_AVX512_CMP_FLT(!=, _CMP_NEQ_OQ, _Tpvec, sufcmp, sufset, tval) \
+    OPENCV_HAL_IMPL_AVX512_CMP_FLT(<,  _CMP_LT_OQ,  _Tpvec, sufcmp, sufset, tval) \
+    OPENCV_HAL_IMPL_AVX512_CMP_FLT(>,  _CMP_GT_OQ,  _Tpvec, sufcmp, sufset, tval) \
+    OPENCV_HAL_IMPL_AVX512_CMP_FLT(<=, _CMP_LE_OQ,  _Tpvec, sufcmp, sufset, tval) \
+    OPENCV_HAL_IMPL_AVX512_CMP_FLT(>=, _CMP_GE_OQ,  _Tpvec, sufcmp, sufset, tval)
+
+OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(v_float32x16, ps, epi32, (int)-1)
+OPENCV_HAL_IMPL_AVX512_CMP_OP_FLT(v_float64x8,  pd, epi64, (int64)-1)
+
+inline v_float32x16 v_not_nan(const v_float32x16& a)
+{ return v_float32x16(_mm512_castsi512_ps(_mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a.val, a.val, _CMP_ORD_Q), (int)-1))); }
+inline v_float64x8 v_not_nan(const v_float64x8& a)
+{ return v_float64x8(_mm512_castsi512_pd(_mm512_maskz_set1_epi64(_mm512_cmp_pd_mask(a.val, a.val, _CMP_ORD_Q), (int64)-1))); }
+
+/** min/max **/
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint8x64,   _mm512_min_epu8)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint8x64,   _mm512_max_epu8)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int8x64,    _mm512_min_epi8)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int8x64,    _mm512_max_epi8)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint16x32,  _mm512_min_epu16)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint16x32,  _mm512_max_epu16)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int16x32,   _mm512_min_epi16)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int16x32,   _mm512_max_epi16)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint32x16,  _mm512_min_epu32)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint32x16,  _mm512_max_epu32)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int32x16,   _mm512_min_epi32)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int32x16,   _mm512_max_epi32)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_uint64x8,   _mm512_min_epu64)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_uint64x8,   _mm512_max_epu64)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_int64x8,    _mm512_min_epi64)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_int64x8,    _mm512_max_epi64)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_float32x16, _mm512_min_ps)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_float32x16, _mm512_max_ps)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_min, v_float64x8,  _mm512_min_pd)
+OPENCV_HAL_IMPL_AVX512_BIN_FUNC(v_max, v_float64x8,  _mm512_max_pd)
+
+/** Rotate **/
+namespace {
+    template<bool prec, int imm4, bool part, int imm32>
+    struct _v_rotate_right { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64&) { return v_int8x64(); }};
+    template<int imm4, int imm32>
+    struct _v_rotate_right<true, imm4, false, imm32> { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b)
+    {
+        return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(b.val, a.val, imm32    ),    imm4 *8),
+                                         _mm512_slli_epi32(_mm512_alignr_epi32(b.val, a.val, imm32 + 1), (4-imm4)*8)));
+    }};
+    template<int imm4>
+    struct _v_rotate_right<true, imm4, false, 15> { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b)
+    {
+        return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(b.val, a.val, 15),    imm4 *8),
+                                         _mm512_slli_epi32(                                b.val, (4-imm4)*8)));
+    }};
+    template<int imm4, int imm32>
+    struct _v_rotate_right<true, imm4, true, imm32> { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b)
+    {
+        return v_int8x64(_mm512_or_si512(_mm512_srli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 16),    imm4 *8),
+                                         _mm512_slli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 15), (4-imm4)*8)));
+    }};
+    template<int imm4>
+    struct _v_rotate_right<true, imm4, true, 31> { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b)
+    { return v_int8x64(_mm512_srli_epi32(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, 15), imm4*8)); }};
+    template<int imm32>
+    struct _v_rotate_right<false, 0, false, imm32> { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64& b)
+    { return v_int8x64(_mm512_alignr_epi32(b.val, a.val, imm32)); }};
+    template<>
+    struct _v_rotate_right<false, 0, false, 0> { static inline v_int8x64 eval(const v_int8x64& a, const v_int8x64&) { return a; }};
+    template<int imm32>
+    struct _v_rotate_right<false, 0, true, imm32> { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b)
+    { return v_int8x64(_mm512_alignr_epi32(_mm512_setzero_si512(), b.val, imm32 - 16)); }};
+    template<>
+    struct _v_rotate_right<false, 0, true, 16> { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64& b) { return b; }};
+    template<>
+    struct _v_rotate_right<false, 0, true, 32> { static inline v_int8x64 eval(const v_int8x64&, const v_int8x64&) { return v_int8x64(); }};
+}
+template<int imm> inline v_int8x64 v_rotate_right(const v_int8x64& a, const v_int8x64& b)
+{
+    return imm >= 128 ? v_int8x64() :
+#if CV_AVX_512VBMI
+    v_int8x64(_mm512_permutex2var_epi8(a.val,
+    _v512_set_epu8(0x3f + imm, 0x3e + imm, 0x3d + imm, 0x3c + imm, 0x3b + imm, 0x3a + imm, 0x39 + imm, 0x38 + imm,
+                   0x37 + imm, 0x36 + imm, 0x35 + imm, 0x34 + imm, 0x33 + imm, 0x32 + imm, 0x31 + imm, 0x30 + imm,
+                   0x2f + imm, 0x2e + imm, 0x2d + imm, 0x2c + imm, 0x2b + imm, 0x2a + imm, 0x29 + imm, 0x28 + imm,
+                   0x27 + imm, 0x26 + imm, 0x25 + imm, 0x24 + imm, 0x23 + imm, 0x22 + imm, 0x21 + imm, 0x20 + imm,
+                   0x1f + imm, 0x1e + imm, 0x1d + imm, 0x1c + imm, 0x1b + imm, 0x1a + imm, 0x19 + imm, 0x18 + imm,
+                   0x17 + imm, 0x16 + imm, 0x15 + imm, 0x14 + imm, 0x13 + imm, 0x12 + imm, 0x11 + imm, 0x10 + imm,
+                   0x0f + imm, 0x0e + imm, 0x0d + imm, 0x0c + imm, 0x0b + imm, 0x0a + imm, 0x09 + imm, 0x08 + imm,
+                   0x07 + imm, 0x06 + imm, 0x05 + imm, 0x04 + imm, 0x03 + imm, 0x02 + imm, 0x01 + imm, 0x00 + imm), b.val));
+#else
+    _v_rotate_right<imm%4!=0, imm%4, (imm/4 > 15), imm/4>::eval(a, b);
+#endif
+}
+template<int imm>
+inline v_int8x64 v_rotate_left(const v_int8x64& a, const v_int8x64& b)
+{
+    if (imm == 0) return a;
+    if (imm == 64) return b;
+    if (imm >= 128) return v_int8x64();
+#if CV_AVX_512VBMI
+    return v_int8x64(_mm512_permutex2var_epi8(b.val,
+           _v512_set_epi8(0x7f - imm,0x7e - imm,0x7d - imm,0x7c - imm,0x7b - imm,0x7a - imm,0x79 - imm,0x78 - imm,
+                          0x77 - imm,0x76 - imm,0x75 - imm,0x74 - imm,0x73 - imm,0x72 - imm,0x71 - imm,0x70 - imm,
+                          0x6f - imm,0x6e - imm,0x6d - imm,0x6c - imm,0x6b - imm,0x6a - imm,0x69 - imm,0x68 - imm,
+                          0x67 - imm,0x66 - imm,0x65 - imm,0x64 - imm,0x63 - imm,0x62 - imm,0x61 - imm,0x60 - imm,
+                          0x5f - imm,0x5e - imm,0x5d - imm,0x5c - imm,0x5b - imm,0x5a - imm,0x59 - imm,0x58 - imm,
+                          0x57 - imm,0x56 - imm,0x55 - imm,0x54 - imm,0x53 - imm,0x52 - imm,0x51 - imm,0x50 - imm,
+                          0x4f - imm,0x4e - imm,0x4d - imm,0x4c - imm,0x4b - imm,0x4a - imm,0x49 - imm,0x48 - imm,
+                          0x47 - imm,0x46 - imm,0x45 - imm,0x44 - imm,0x43 - imm,0x42 - imm,0x41 - imm,0x40 - imm), a.val));
+#else
+    return imm < 64 ? v_rotate_right<64 - imm>(b, a) : v_rotate_right<128 - imm>(v512_setzero_s8(), b);
+#endif
+}
+template<int imm>
+inline v_int8x64 v_rotate_right(const v_int8x64& a)
+{
+    if (imm == 0) return a;
+    if (imm >= 64) return v_int8x64();
+#if CV_AVX_512VBMI
+    return v_int8x64(_mm512_maskz_permutexvar_epi8(0xFFFFFFFFFFFFFFFF >> imm,
+           _v512_set_epu8(0x3f + imm,0x3e + imm,0x3d + imm,0x3c + imm,0x3b + imm,0x3a + imm,0x39 + imm,0x38 + imm,
+                          0x37 + imm,0x36 + imm,0x35 + imm,0x34 + imm,0x33 + imm,0x32 + imm,0x31 + imm,0x30 + imm,
+                          0x2f + imm,0x2e + imm,0x2d + imm,0x2c + imm,0x2b + imm,0x2a + imm,0x29 + imm,0x28 + imm,
+                          0x27 + imm,0x26 + imm,0x25 + imm,0x24 + imm,0x23 + imm,0x22 + imm,0x21 + imm,0x20 + imm,
+                          0x1f + imm,0x1e + imm,0x1d + imm,0x1c + imm,0x1b + imm,0x1a + imm,0x19 + imm,0x18 + imm,
+                          0x17 + imm,0x16 + imm,0x15 + imm,0x14 + imm,0x13 + imm,0x12 + imm,0x11 + imm,0x10 + imm,
+                          0x0f + imm,0x0e + imm,0x0d + imm,0x0c + imm,0x0b + imm,0x0a + imm,0x09 + imm,0x08 + imm,
+                          0x07 + imm,0x06 + imm,0x05 + imm,0x04 + imm,0x03 + imm,0x02 + imm,0x01 + imm,0x00 + imm), a.val));
+#else
+    return v_rotate_right<imm>(a, v512_setzero_s8());
+#endif
+}
+template<int imm>
+inline v_int8x64 v_rotate_left(const v_int8x64& a)
+{
+    if (imm == 0) return a;
+    if (imm >= 64) return v_int8x64();
+#if CV_AVX_512VBMI
+    return v_int8x64(_mm512_maskz_permutexvar_epi8(0xFFFFFFFFFFFFFFFF << imm,
+           _v512_set_epi8(0x3f - imm,0x3e - imm,0x3d - imm,0x3c - imm,0x3b - imm,0x3a - imm,0x39 - imm,0x38 - imm,
+                          0x37 - imm,0x36 - imm,0x35 - imm,0x34 - imm,0x33 - imm,0x32 - imm,0x31 - imm,0x30 - imm,
+                          0x2f - imm,0x2e - imm,0x2d - imm,0x2c - imm,0x2b - imm,0x2a - imm,0x29 - imm,0x28 - imm,
+                          0x27 - imm,0x26 - imm,0x25 - imm,0x24 - imm,0x23 - imm,0x22 - imm,0x21 - imm,0x20 - imm,
+                          0x1f - imm,0x1e - imm,0x1d - imm,0x1c - imm,0x1b - imm,0x1a - imm,0x19 - imm,0x18 - imm,
+                          0x17 - imm,0x16 - imm,0x15 - imm,0x14 - imm,0x13 - imm,0x12 - imm,0x11 - imm,0x10 - imm,
+                          0x0f - imm,0x0e - imm,0x0d - imm,0x0c - imm,0x0b - imm,0x0a - imm,0x09 - imm,0x08 - imm,
+                          0x07 - imm,0x06 - imm,0x05 - imm,0x04 - imm,0x03 - imm,0x02 - imm,0x01 - imm,0x00 - imm), a.val));
+#else
+    return v_rotate_right<64 - imm>(v512_setzero_s8(), a);
+#endif
+}
+
+#define OPENCV_HAL_IMPL_AVX512_ROTATE_PM(_Tpvec, suffix)                                                                                   \
+template<int imm> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b)                                                            \
+{ return v_reinterpret_as_##suffix(v_rotate_left<imm * sizeof(_Tpvec::lane_type)>(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b))); }      \
+template<int imm> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b)                                                           \
+{ return v_reinterpret_as_##suffix(v_rotate_right<imm * sizeof(_Tpvec::lane_type)>(v_reinterpret_as_s8(a), v_reinterpret_as_s8(b))); }     \
+template<int imm> inline _Tpvec v_rotate_left(const _Tpvec& a)                                                                             \
+{ return v_reinterpret_as_##suffix(v_rotate_left<imm * sizeof(_Tpvec::lane_type)>(v_reinterpret_as_s8(a))); }                              \
+template<int imm> inline _Tpvec v_rotate_right(const _Tpvec& a)                                                                            \
+{ return v_reinterpret_as_##suffix(v_rotate_right<imm * sizeof(_Tpvec::lane_type)>(v_reinterpret_as_s8(a))); }
+
+#define OPENCV_HAL_IMPL_AVX512_ROTATE_EC(_Tpvec, suffix)                                                                                   \
+template<int imm>                                                                                                                          \
+inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b)                                                                              \
+{                                                                                                                                          \
+    enum { SHIFT2 = (_Tpvec::nlanes - imm) };                                                                                              \
+    enum { MASK = ((1 << _Tpvec::nlanes) - 1) };                                                                                           \
+    if (imm == 0) return a;                                                                                                                \
+    if (imm == _Tpvec::nlanes) return b;                                                                                                   \
+    if (imm >= 2*_Tpvec::nlanes) return _Tpvec();                                                                                          \
+    return _Tpvec(_mm512_mask_expand_##suffix(_mm512_maskz_compress_##suffix((MASK << SHIFT2)&MASK, b.val), (MASK << (imm))&MASK, a.val)); \
+}                                                                                                                                          \
+template<int imm>                                                                                                                          \
+inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b)                                                                             \
+{                                                                                                                                          \
+    enum { SHIFT2 = (_Tpvec::nlanes - imm) };                                                                                              \
+    enum { MASK = ((1 << _Tpvec::nlanes) - 1) };                                                                                           \
+    if (imm == 0) return a;                                                                                                                \
+    if (imm == _Tpvec::nlanes) return b;                                                                                                   \
+    if (imm >= 2*_Tpvec::nlanes) return _Tpvec();                                                                                          \
+    return _Tpvec(_mm512_mask_expand_##suffix(_mm512_maskz_compress_##suffix((MASK << (imm))&MASK, a.val), (MASK << SHIFT2)&MASK, b.val)); \
+}                                                                                                                                          \
+template<int imm>                                                                                                                          \
+inline _Tpvec v_rotate_left(const _Tpvec& a)                                                                                               \
+{                                                                                                                                          \
+    if (imm == 0) return a;                                                                                                                \
+    if (imm >= _Tpvec::nlanes) return _Tpvec();                                                                                            \
+    return _Tpvec(_mm512_maskz_expand_##suffix((1 << _Tpvec::nlanes) - (1 << (imm)), a.val));                                              \
+}                                                                                                                                          \
+template<int imm>                                                                                                                          \
+inline _Tpvec v_rotate_right(const _Tpvec& a)                                                                                              \
+{                                                                                                                                          \
+    if (imm == 0) return a;                                                                                                                \
+    if (imm >= _Tpvec::nlanes) return _Tpvec();                                                                                            \
+    return _Tpvec(_mm512_maskz_compress_##suffix((1 << _Tpvec::nlanes) - (1 << (imm)), a.val));                                            \
+}
+
+OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_uint8x64,   u8)
+OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_uint16x32,  u16)
+OPENCV_HAL_IMPL_AVX512_ROTATE_PM(v_int16x32,   s16)
+OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_uint32x16,  epi32)
+OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_int32x16,   epi32)
+OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_uint64x8,   epi64)
+OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_int64x8,    epi64)
+OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float32x16, ps)
+OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float64x8,  pd)
+
+/** Reverse **/
+inline v_uint8x64 v_reverse(const v_uint8x64 &a)
+{
+#if CV_AVX_512VBMI
+    static const __m512i perm = _mm512_set_epi32(
+            0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f,
+            0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f,
+            0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f,
+            0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f);
+    return v_uint8x64(_mm512_permutexvar_epi8(perm, a.val));
+#else
+    static const __m512i shuf = _mm512_set_epi32(
+            0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f,
+            0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f,
+            0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f,
+            0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
+    static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6);
+    __m512i vec = _mm512_shuffle_epi8(a.val, shuf);
+    return v_uint8x64(_mm512_permutexvar_epi64(perm, vec));
+#endif
+}
+
+inline v_int8x64 v_reverse(const v_int8x64 &a)
+{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
+
+inline v_uint16x32 v_reverse(const v_uint16x32 &a)
+{
+#if CV_AVX_512VBMI
+    static const __m512i perm = _mm512_set_epi32(
+            0x00000001, 0x00020003, 0x00040005, 0x00060007,
+            0x00080009, 0x000a000b, 0x000c000d, 0x000e000f,
+            0x00100011, 0x00120013, 0x00140015, 0x00160017,
+            0x00180019, 0x001a001b, 0x001c001d, 0x001e001f);
+    return v_uint16x32(_mm512_permutexvar_epi16(perm, a.val));
+#else
+    static const __m512i shuf = _mm512_set_epi32(
+            0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e,
+            0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e,
+            0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e,
+            0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e);
+    static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6);
+    __m512i vec = _mm512_shuffle_epi8(a.val, shuf);
+    return v_uint16x32(_mm512_permutexvar_epi64(perm, vec));
+#endif
+}
+
+inline v_int16x32 v_reverse(const v_int16x32 &a)
+{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
+
+inline v_uint32x16 v_reverse(const v_uint32x16 &a)
+{
+    static const __m512i perm = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,14, 15);
+    return v_uint32x16(_mm512_permutexvar_epi32(perm, a.val));
+}
+
+inline v_int32x16 v_reverse(const v_int32x16 &a)
+{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_float32x16 v_reverse(const v_float32x16 &a)
+{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_uint64x8 v_reverse(const v_uint64x8 &a)
+{
+    static const __m512i perm = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+    return v_uint64x8(_mm512_permutexvar_epi64(perm, a.val));
+}
+
+inline v_int64x8 v_reverse(const v_int64x8 &a)
+{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
+
+inline v_float64x8 v_reverse(const v_float64x8 &a)
+{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
+
+////////// Reduce /////////
+
+/** Reduce **/
+#define OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64(a, b) a + b
+#define OPENCV_HAL_IMPL_AVX512_REDUCE_8(sctype, func, _Tpvec, ifunc, scop)                                          \
+    inline sctype v_reduce_##func(const _Tpvec& a)                                                                  \
+    { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val));                           \
+      sctype CV_DECL_ALIGNED(64) idx[2];                                                                            \
+      _mm_store_si128((__m128i*)idx, _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1))); \
+      return scop(idx[0], idx[1]); }
+OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, min, v_uint64x8, min_epu64, min)
+OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, max, v_uint64x8, max_epu64, max)
+OPENCV_HAL_IMPL_AVX512_REDUCE_8(uint64, sum, v_uint64x8, add_epi64, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64)
+OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64,  min, v_int64x8,  min_epi64, min)
+OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64,  max, v_int64x8,  max_epi64, max)
+OPENCV_HAL_IMPL_AVX512_REDUCE_8(int64,  sum, v_int64x8,  add_epi64, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64)
+
+#define OPENCV_HAL_IMPL_AVX512_REDUCE_8F(func, ifunc, scop)                                         \
+    inline double v_reduce_##func(const v_float64x8& a)                                             \
+    { __m256d half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val));           \
+      double CV_DECL_ALIGNED(64) idx[2];                                                            \
+      _mm_store_pd(idx, _mm_##ifunc(_mm256_castpd256_pd128(half), _mm256_extractf128_pd(half, 1))); \
+      return scop(idx[0], idx[1]); }
+OPENCV_HAL_IMPL_AVX512_REDUCE_8F(min, min_pd, min)
+OPENCV_HAL_IMPL_AVX512_REDUCE_8F(max, max_pd, max)
+OPENCV_HAL_IMPL_AVX512_REDUCE_8F(sum, add_pd, OPENCV_HAL_IMPL_AVX512_REDUCE_ADD64)
+
+#define OPENCV_HAL_IMPL_AVX512_REDUCE_16(sctype, func, _Tpvec, ifunc)                                 \
+    inline sctype v_reduce_##func(const _Tpvec& a)                                                    \
+    { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val));             \
+      __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \
+      quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8));                                     \
+      quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4));                                     \
+      return (sctype)_mm_cvtsi128_si32(quarter); }
+OPENCV_HAL_IMPL_AVX512_REDUCE_16(uint, min, v_uint32x16, min_epu32)
+OPENCV_HAL_IMPL_AVX512_REDUCE_16(uint, max, v_uint32x16, max_epu32)
+OPENCV_HAL_IMPL_AVX512_REDUCE_16(int,  min, v_int32x16,  min_epi32)
+OPENCV_HAL_IMPL_AVX512_REDUCE_16(int,  max, v_int32x16,  max_epi32)
+
+#define OPENCV_HAL_IMPL_AVX512_REDUCE_16F(func, ifunc)                                            \
+    inline float v_reduce_##func(const v_float32x16& a)                                           \
+    { __m256 half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val));          \
+      __m128 quarter = _mm_##ifunc(_mm256_castps256_ps128(half), _mm256_extractf128_ps(half, 1)); \
+      quarter = _mm_##ifunc(quarter, _mm_permute_ps(quarter, _MM_SHUFFLE(0, 0, 3, 2)));           \
+      quarter = _mm_##ifunc(quarter, _mm_permute_ps(quarter, _MM_SHUFFLE(0, 0, 0, 1)));           \
+      return _mm_cvtss_f32(quarter); }
+OPENCV_HAL_IMPL_AVX512_REDUCE_16F(min, min_ps)
+OPENCV_HAL_IMPL_AVX512_REDUCE_16F(max, max_ps)
+
+inline float v_reduce_sum(const v_float32x16& a)
+{
+    __m256 half = _mm256_add_ps(_v512_extract_low(a.val), _v512_extract_high(a.val));
+    __m128 quarter = _mm_add_ps(_mm256_castps256_ps128(half), _mm256_extractf128_ps(half, 1));
+    quarter = _mm_hadd_ps(quarter, quarter);
+    return _mm_cvtss_f32(_mm_hadd_ps(quarter, quarter));
+}
+inline int v_reduce_sum(const v_int32x16& a)
+{
+    __m256i half = _mm256_add_epi32(_v512_extract_low(a.val), _v512_extract_high(a.val));
+    __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1));
+    quarter = _mm_hadd_epi32(quarter, quarter);
+    return _mm_cvtsi128_si32(_mm_hadd_epi32(quarter, quarter));
+}
+inline uint v_reduce_sum(const v_uint32x16& a)
+{ return (uint)v_reduce_sum(v_reinterpret_as_s32(a)); }
+
+#define OPENCV_HAL_IMPL_AVX512_REDUCE_32(sctype, func, _Tpvec, ifunc)                                 \
+    inline sctype v_reduce_##func(const _Tpvec& a)                                                    \
+    { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val));             \
+      __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \
+      quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8));                                     \
+      quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4));                                     \
+      quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 2));                                     \
+      return (sctype)_mm_cvtsi128_si32(quarter); }
+OPENCV_HAL_IMPL_AVX512_REDUCE_32(ushort, min, v_uint16x32, min_epu16)
+OPENCV_HAL_IMPL_AVX512_REDUCE_32(ushort, max, v_uint16x32, max_epu16)
+OPENCV_HAL_IMPL_AVX512_REDUCE_32(short,  min, v_int16x32,  min_epi16)
+OPENCV_HAL_IMPL_AVX512_REDUCE_32(short,  max, v_int16x32,  max_epi16)
+
+inline int v_reduce_sum(const v_int16x32& a)
+{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); }
+inline uint v_reduce_sum(const v_uint16x32& a)
+{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); }
+
+#define OPENCV_HAL_IMPL_AVX512_REDUCE_64(sctype, func, _Tpvec, ifunc)                                 \
+    inline sctype v_reduce_##func(const _Tpvec& a)                                                    \
+    { __m256i half = _mm256_##ifunc(_v512_extract_low(a.val), _v512_extract_high(a.val));             \
+      __m128i quarter = _mm_##ifunc(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1)); \
+      quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 8));                                     \
+      quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 4));                                     \
+      quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 2));                                     \
+      quarter = _mm_##ifunc(quarter, _mm_srli_si128(quarter, 1));                                     \
+      return (sctype)_mm_cvtsi128_si32(quarter); }
+OPENCV_HAL_IMPL_AVX512_REDUCE_64(uchar, min, v_uint8x64, min_epu8)
+OPENCV_HAL_IMPL_AVX512_REDUCE_64(uchar, max, v_uint8x64, max_epu8)
+OPENCV_HAL_IMPL_AVX512_REDUCE_64(schar, min, v_int8x64,  min_epi8)
+OPENCV_HAL_IMPL_AVX512_REDUCE_64(schar, max, v_int8x64,  max_epi8)
+
+#define OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(sctype, _Tpvec, suffix)                                    \
+    inline sctype v_reduce_sum(const _Tpvec& a)                                                         \
+    {   __m512i a16 = _mm512_add_epi16(_mm512_cvt##suffix##_epi16(_v512_extract_low(a.val)),            \
+                                       _mm512_cvt##suffix##_epi16(_v512_extract_high(a.val)));          \
+        a16 = _mm512_cvtepi16_epi32(_mm256_add_epi16(_v512_extract_low(a16), _v512_extract_high(a16))); \
+        __m256i a8 = _mm256_add_epi32(_v512_extract_low(a16), _v512_extract_high(a16));                 \
+        __m128i a4 = _mm_add_epi32(_mm256_castsi256_si128(a8), _mm256_extracti128_si256(a8, 1));        \
+        a4 = _mm_hadd_epi32(a4, a4);                                                                    \
+        return (sctype)_mm_cvtsi128_si32(_mm_hadd_epi32(a4, a4)); }
+OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(uint, v_uint8x64, epu8)
+OPENCV_HAL_IMPL_AVX512_REDUCE_64_SUM(int,  v_int8x64,  epi8)
+
+inline v_float32x16 v_reduce_sum4(const v_float32x16& a, const v_float32x16& b,
+                                  const v_float32x16& c, const v_float32x16& d)
+{
+    __m256 abl = _mm256_hadd_ps(_v512_extract_low(a.val), _v512_extract_low(b.val));
+    __m256 abh = _mm256_hadd_ps(_v512_extract_high(a.val), _v512_extract_high(b.val));
+    __m256 cdl = _mm256_hadd_ps(_v512_extract_low(c.val), _v512_extract_low(d.val));
+    __m256 cdh = _mm256_hadd_ps(_v512_extract_high(c.val), _v512_extract_high(d.val));
+    return v_float32x16(_v512_combine(_mm256_hadd_ps(abl, cdl), _mm256_hadd_ps(abh, cdh)));
+}
+
+inline unsigned v_reduce_sad(const v_uint8x64& a, const v_uint8x64& b)
+{
+    __m512i val = _mm512_sad_epu8(a.val, b.val);
+    __m256i half = _mm256_add_epi32(_v512_extract_low(val), _v512_extract_high(val));
+    __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1));
+    return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
+}
+inline unsigned v_reduce_sad(const v_int8x64& a, const v_int8x64& b)
+{
+    __m512i val = _mm512_set1_epi8(-128);
+    val = _mm512_sad_epu8(_mm512_add_epi8(a.val, val), _mm512_add_epi8(b.val, val));
+    __m256i half = _mm256_add_epi32(_v512_extract_low(val), _v512_extract_high(val));
+    __m128i quarter = _mm_add_epi32(_mm256_castsi256_si128(half), _mm256_extracti128_si256(half, 1));
+    return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
+}
+inline unsigned v_reduce_sad(const v_uint16x32& a, const v_uint16x32& b)
+{ return v_reduce_sum(v_add_wrap(a - b, b - a)); }
+inline unsigned v_reduce_sad(const v_int16x32& a, const v_int16x32& b)
+{ return v_reduce_sum(v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b)))); }
+inline unsigned v_reduce_sad(const v_uint32x16& a, const v_uint32x16& b)
+{ return v_reduce_sum(v_max(a, b) - v_min(a, b)); }
+inline unsigned v_reduce_sad(const v_int32x16& a, const v_int32x16& b)
+{ return v_reduce_sum(v_reinterpret_as_u32(v_max(a, b) - v_min(a, b))); }
+inline float v_reduce_sad(const v_float32x16& a, const v_float32x16& b)
+{ return v_reduce_sum((a - b) & v_float32x16(_mm512_castsi512_ps(_mm512_set1_epi32(0x7fffffff)))); }
+inline double v_reduce_sad(const v_float64x8& a, const v_float64x8& b)
+{ return v_reduce_sum((a - b) & v_float64x8(_mm512_castsi512_pd(_mm512_set1_epi64(0x7fffffffffffffff)))); }
+
+/** Popcount **/
+inline v_uint8x64 v_popcount(const v_int8x64& a)
+{
+#if CV_AVX_512BITALG
+    return v_uint8x64(_mm512_popcnt_epi8(a.val));
+#elif CV_AVX_512VBMI
+    __m512i _popcnt_table0 = _v512_set_epu8(7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3,
+                                            5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1,
+                                            5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1,
+                                            4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0);
+    __m512i _popcnt_table1 = _v512_set_epu8(7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3,
+                                            6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2,
+                                            6, 5, 5, 4, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 3, 2,
+                                            5, 4, 4, 3, 4, 3, 3, 2, 4, 3, 3, 2, 3, 2, 2, 1);
+    return v_uint8x64(_mm512_sub_epi8(_mm512_permutex2var_epi8(_popcnt_table0, a.val, _popcnt_table1), _mm512_movm_epi8(_mm512_movepi8_mask(a.val))));
+#else
+    __m512i _popcnt_table = _mm512_set4_epi32(0x04030302, 0x03020201, 0x03020201, 0x02010100);
+    __m512i _popcnt_mask = _mm512_set1_epi8(0x0F);
+
+    return v_uint8x64(_mm512_add_epi8(_mm512_shuffle_epi8(_popcnt_table, _mm512_and_si512(                  a.val,     _popcnt_mask)),
+                                      _mm512_shuffle_epi8(_popcnt_table, _mm512_and_si512(_mm512_srli_epi16(a.val, 4), _popcnt_mask))));
+#endif
+}
+inline v_uint16x32 v_popcount(const v_int16x32& a)
+{
+#if CV_AVX_512BITALG
+    return v_uint16x32(_mm512_popcnt_epi16(a.val));
+#elif CV_AVX_512VPOPCNTDQ
+    __m512i zero = _mm512_setzero_si512();
+    return v_uint16x32(_mm512_packs_epi32(_mm512_popcnt_epi32(_mm512_unpacklo_epi16(a.val, zero)),
+                                          _mm512_popcnt_epi32(_mm512_unpackhi_epi16(a.val, zero))));
+#else
+    v_uint8x64 p = v_popcount(v_reinterpret_as_s8(a));
+    p += v_rotate_right<1>(p);
+    return v_reinterpret_as_u16(p) & v512_setall_u16(0x00ff);
+#endif
+}
+inline v_uint32x16 v_popcount(const v_int32x16& a)
+{
+#if CV_AVX_512VPOPCNTDQ
+    return v_uint32x16(_mm512_popcnt_epi32(a.val));
+#else
+    v_uint8x64 p = v_popcount(v_reinterpret_as_s8(a));
+    p += v_rotate_right<1>(p);
+    p += v_rotate_right<2>(p);
+    return v_reinterpret_as_u32(p) & v512_setall_u32(0x000000ff);
+#endif
+}
+inline v_uint64x8 v_popcount(const v_int64x8& a)
+{
+#if CV_AVX_512VPOPCNTDQ
+    return v_uint64x8(_mm512_popcnt_epi64(a.val));
+#else
+    return v_uint64x8(_mm512_sad_epu8(v_popcount(v_reinterpret_as_s8(a)).val, _mm512_setzero_si512()));
+#endif
+}
+
+
+inline v_uint8x64  v_popcount(const v_uint8x64&  a) { return v_popcount(v_reinterpret_as_s8 (a)); }
+inline v_uint16x32 v_popcount(const v_uint16x32& a) { return v_popcount(v_reinterpret_as_s16(a)); }
+inline v_uint32x16 v_popcount(const v_uint32x16& a) { return v_popcount(v_reinterpret_as_s32(a)); }
+inline v_uint64x8  v_popcount(const v_uint64x8&  a) { return v_popcount(v_reinterpret_as_s64(a)); }
+
+
+////////// Other math /////////
+
+/** Some frequent operations **/
+#define OPENCV_HAL_IMPL_AVX512_MULADD(_Tpvec, suffix)                         \
+    inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)    \
+    { return _Tpvec(_mm512_fmadd_##suffix(a.val, b.val, c.val)); }            \
+    inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
+    { return _Tpvec(_mm512_fmadd_##suffix(a.val, b.val, c.val)); }            \
+    inline _Tpvec v_sqrt(const _Tpvec& x)                                     \
+    { return _Tpvec(_mm512_sqrt_##suffix(x.val)); }                           \
+    inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b)           \
+    { return v_fma(a, a, b * b); }                                            \
+    inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b)               \
+    { return v_sqrt(v_fma(a, a, b * b)); }
+
+OPENCV_HAL_IMPL_AVX512_MULADD(v_float32x16, ps)
+OPENCV_HAL_IMPL_AVX512_MULADD(v_float64x8,  pd)
+
+inline v_int32x16 v_fma(const v_int32x16& a, const v_int32x16& b, const v_int32x16& c)
+{ return a * b + c; }
+inline v_int32x16 v_muladd(const v_int32x16& a, const v_int32x16& b, const v_int32x16& c)
+{ return v_fma(a, b, c); }
+
+inline v_float32x16 v_invsqrt(const v_float32x16& x)
+{
+#if CV_AVX_512ER
+    return v_float32x16(_mm512_rsqrt28_ps(x.val));
+#else
+    v_float32x16 half = x * v512_setall_f32(0.5);
+    v_float32x16 t  = v_float32x16(_mm512_rsqrt14_ps(x.val));
+    t *= v512_setall_f32(1.5) - ((t * t) * half);
+    return t;
+#endif
+}
+
+inline v_float64x8 v_invsqrt(const v_float64x8& x)
+{
+#if CV_AVX_512ER
+    return v_float64x8(_mm512_rsqrt28_pd(x.val));
+#else
+    return v512_setall_f64(1.) / v_sqrt(x);
+//    v_float64x8 half = x * v512_setall_f64(0.5);
+//    v_float64x8 t = v_float64x8(_mm512_rsqrt14_pd(x.val));
+//    t *= v512_setall_f64(1.5) - ((t * t) * half);
+//    t *= v512_setall_f64(1.5) - ((t * t) * half);
+//    return t;
+#endif
+}
+
+/** Absolute values **/
+#define OPENCV_HAL_IMPL_AVX512_ABS(_Tpvec, _Tpuvec, suffix) \
+    inline _Tpuvec v_abs(const _Tpvec& x)                   \
+    { return _Tpuvec(_mm512_abs_##suffix(x.val)); }
+
+OPENCV_HAL_IMPL_AVX512_ABS(v_int8x64,    v_uint8x64,    epi8)
+OPENCV_HAL_IMPL_AVX512_ABS(v_int16x32,   v_uint16x32,  epi16)
+OPENCV_HAL_IMPL_AVX512_ABS(v_int32x16,   v_uint32x16,  epi32)
+OPENCV_HAL_IMPL_AVX512_ABS(v_int64x8,    v_uint64x8,   epi64)
+
+inline v_float32x16 v_abs(const v_float32x16& x)
+{
+#ifdef _mm512_abs_pd
+    return v_float32x16(_mm512_abs_ps(x.val));
+#else
+    return v_float32x16(_mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(x.val),
+                        _v512_set_epu64(0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF,
+                                        0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF))));
+#endif
+}
+
+inline v_float64x8 v_abs(const v_float64x8& x)
+{
+#ifdef _mm512_abs_pd
+    #if defined __GNUC__ && (__GNUC__ < 7 || (__GNUC__ == 7 && __GNUC_MINOR__ <= 3) || (__GNUC__ == 8 && __GNUC_MINOR__ <= 2))
+        // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87476
+        return v_float64x8(_mm512_abs_pd(_mm512_castpd_ps(x.val)));
+    #else
+        return v_float64x8(_mm512_abs_pd(x.val));
+    #endif
+#else
+    return v_float64x8(_mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(x.val),
+                       _v512_set_epu64(0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF,
+                                       0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF))));
+#endif
+}
+
+/** Absolute difference **/
+inline v_uint8x64 v_absdiff(const v_uint8x64& a, const v_uint8x64& b)
+{ return v_add_wrap(a - b,  b - a); }
+inline v_uint16x32 v_absdiff(const v_uint16x32& a, const v_uint16x32& b)
+{ return v_add_wrap(a - b,  b - a); }
+inline v_uint32x16 v_absdiff(const v_uint32x16& a, const v_uint32x16& b)
+{ return v_max(a, b) - v_min(a, b); }
+
+inline v_uint8x64 v_absdiff(const v_int8x64& a, const v_int8x64& b)
+{
+    v_int8x64 d = v_sub_wrap(a, b);
+    v_int8x64 m = a < b;
+    return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m));
+}
+
+inline v_uint16x32 v_absdiff(const v_int16x32& a, const v_int16x32& b)
+{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); }
+
+inline v_uint32x16 v_absdiff(const v_int32x16& a, const v_int32x16& b)
+{
+    v_int32x16 d = a - b;
+    v_int32x16 m = a < b;
+    return v_reinterpret_as_u32((d ^ m) - m);
+}
+
+inline v_float32x16 v_absdiff(const v_float32x16& a, const v_float32x16& b)
+{ return v_abs(a - b); }
+
+inline v_float64x8 v_absdiff(const v_float64x8& a, const v_float64x8& b)
+{ return v_abs(a - b); }
+
+/** Saturating absolute difference **/
+inline v_int8x64 v_absdiffs(const v_int8x64& a, const v_int8x64& b)
+{
+    v_int8x64 d = a - b;
+    v_int8x64 m = a < b;
+    return (d ^ m) - m;
+}
+inline v_int16x32 v_absdiffs(const v_int16x32& a, const v_int16x32& b)
+{ return v_max(a, b) - v_min(a, b); }
+
+////////// Conversions /////////
+
+/** Rounding **/
+inline v_int32x16 v_round(const v_float32x16& a)
+{ return v_int32x16(_mm512_cvtps_epi32(a.val)); }
+
+inline v_int32x16 v_round(const v_float64x8& a)
+{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(a.val))); }
+
+inline v_int32x16 v_round(const v_float64x8& a, const v_float64x8& b)
+{ return v_int32x16(_v512_combine(_mm512_cvtpd_epi32(a.val), _mm512_cvtpd_epi32(b.val))); }
+
+inline v_int32x16 v_trunc(const v_float32x16& a)
+{ return v_int32x16(_mm512_cvttps_epi32(a.val)); }
+
+inline v_int32x16 v_trunc(const v_float64x8& a)
+{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvttpd_epi32(a.val))); }
+
+#if CVT_ROUND_MODES_IMPLEMENTED
+inline v_int32x16 v_floor(const v_float32x16& a)
+{ return v_int32x16(_mm512_cvt_roundps_epi32(a.val, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)); }
+
+inline v_int32x16 v_floor(const v_float64x8& a)
+{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvt_roundpd_epi32(a.val, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC))); }
+
+inline v_int32x16 v_ceil(const v_float32x16& a)
+{ return v_int32x16(_mm512_cvt_roundps_epi32(a.val, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)); }
+
+inline v_int32x16 v_ceil(const v_float64x8& a)
+{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvt_roundpd_epi32(a.val, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC))); }
+#else
+inline v_int32x16 v_floor(const v_float32x16& a)
+{ return v_int32x16(_mm512_cvtps_epi32(_mm512_roundscale_ps(a.val, 1))); }
+
+inline v_int32x16 v_floor(const v_float64x8& a)
+{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(_mm512_roundscale_pd(a.val, 1)))); }
+
+inline v_int32x16 v_ceil(const v_float32x16& a)
+{ return v_int32x16(_mm512_cvtps_epi32(_mm512_roundscale_ps(a.val, 2))); }
+
+inline v_int32x16 v_ceil(const v_float64x8& a)
+{ return v_int32x16(_mm512_castsi256_si512(_mm512_cvtpd_epi32(_mm512_roundscale_pd(a.val, 2)))); }
+#endif
+
+/** To float **/
+inline v_float32x16 v_cvt_f32(const v_int32x16& a)
+{ return v_float32x16(_mm512_cvtepi32_ps(a.val)); }
+
+inline v_float32x16 v_cvt_f32(const v_float64x8& a)
+{ return v_float32x16(_mm512_cvtpd_pslo(a.val)); }
+
+inline v_float32x16 v_cvt_f32(const v_float64x8& a, const v_float64x8& b)
+{ return v_float32x16(_v512_combine(_mm512_cvtpd_ps(a.val), _mm512_cvtpd_ps(b.val))); }
+
+inline v_float64x8 v_cvt_f64(const v_int32x16& a)
+{ return v_float64x8(_mm512_cvtepi32_pd(_v512_extract_low(a.val))); }
+
+inline v_float64x8 v_cvt_f64_high(const v_int32x16& a)
+{ return v_float64x8(_mm512_cvtepi32_pd(_v512_extract_high(a.val))); }
+
+inline v_float64x8 v_cvt_f64(const v_float32x16& a)
+{ return v_float64x8(_mm512_cvtps_pd(_v512_extract_low(a.val))); }
+
+inline v_float64x8 v_cvt_f64_high(const v_float32x16& a)
+{ return v_float64x8(_mm512_cvtps_pd(_v512_extract_high(a.val))); }
+
+// from (Mysticial and wim) https://stackoverflow.com/q/41144668
+inline v_float64x8 v_cvt_f64(const v_int64x8& v)
+{
+#if CV_AVX_512DQ
+    return v_float64x8(_mm512_cvtepi64_pd(v.val));
+#else
+    // constants encoded as floating-point
+    __m512i magic_i_lo   = _mm512_set1_epi64(0x4330000000000000); // 2^52
+    __m512i magic_i_hi32 = _mm512_set1_epi64(0x4530000080000000); // 2^84 + 2^63
+    __m512i magic_i_all  = _mm512_set1_epi64(0x4530000080100000); // 2^84 + 2^63 + 2^52
+    __m512d magic_d_all  = _mm512_castsi512_pd(magic_i_all);
+
+    // Blend the 32 lowest significant bits of v with magic_int_lo
+    __m512i v_lo         = _mm512_mask_blend_epi32(0x5555, magic_i_lo, v.val);
+    // Extract the 32 most significant bits of v
+    __m512i v_hi         = _mm512_srli_epi64(v.val, 32);
+    // Flip the msb of v_hi and blend with 0x45300000
+            v_hi         = _mm512_xor_si512(v_hi, magic_i_hi32);
+    // Compute in double precision
+    __m512d v_hi_dbl     = _mm512_sub_pd(_mm512_castsi512_pd(v_hi), magic_d_all);
+    // (v_hi - magic_d_all) + v_lo  Do not assume associativity of floating point addition
+    __m512d result       = _mm512_add_pd(v_hi_dbl, _mm512_castsi512_pd(v_lo));
+    return v_float64x8(result);
+#endif
+}
+
+////////////// Lookup table access ////////////////////
+
+inline v_int8x64 v512_lut(const schar* tab, const int* idx)
+{
+    __m128i p0 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx    ), (const int *)tab, 1));
+    __m128i p1 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 1));
+    __m128i p2 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 2), (const int *)tab, 1));
+    __m128i p3 = _mm512_cvtepi32_epi8(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 3), (const int *)tab, 1));
+    return v_int8x64(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512(p0), p1, 1), p2, 2), p3, 3));
+}
+inline v_int8x64 v512_lut_pairs(const schar* tab, const int* idx)
+{
+    __m256i p0 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx    ), (const int *)tab, 1));
+    __m256i p1 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 1));
+    return v_int8x64(_v512_combine(p0, p1));
+}
+inline v_int8x64 v512_lut_quads(const schar* tab, const int* idx)
+{
+    return v_int8x64(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), (const int *)tab, 1));
+}
+inline v_uint8x64 v512_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut((const schar *)tab, idx)); }
+inline v_uint8x64 v512_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut_pairs((const schar *)tab, idx)); }
+inline v_uint8x64 v512_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v512_lut_quads((const schar *)tab, idx)); }
+
+inline v_int16x32 v512_lut(const short* tab, const int* idx)
+{
+    __m256i p0 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx    ), (const int *)tab, 2));
+    __m256i p1 = _mm512_cvtepi32_epi16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx + 1), (const int *)tab, 2));
+    return v_int16x32(_v512_combine(p0, p1));
+}
+inline v_int16x32 v512_lut_pairs(const short* tab, const int* idx)
+{
+    return v_int16x32(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), (const int *)tab, 2));
+}
+inline v_int16x32 v512_lut_quads(const short* tab, const int* idx)
+{
+#if defined(__GNUC__)
+    return v_int16x32(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 2));
+#else
+    return v_int16x32(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const int64*)tab, 2));
+#endif
+}
+inline v_uint16x32 v512_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut((const short *)tab, idx)); }
+inline v_uint16x32 v512_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut_pairs((const short *)tab, idx)); }
+inline v_uint16x32 v512_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v512_lut_quads((const short *)tab, idx)); }
+
+inline v_int32x16 v512_lut(const int* tab, const int* idx)
+{
+    return v_int32x16(_mm512_i32gather_epi32(_mm512_loadu_si512((const __m512i*)idx), tab, 4));
+}
+inline v_int32x16 v512_lut_pairs(const int* tab, const int* idx)
+{
+#if defined(__GNUC__)
+    return v_int32x16(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 4));
+#else
+    return v_int32x16(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const int64*)tab, 4));
+#endif
+}
+inline v_int32x16 v512_lut_quads(const int* tab, const int* idx)
+{
+    return v_int32x16(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512(
+                          _mm_loadu_si128((const __m128i*)(tab + idx[0]))),
+                          _mm_loadu_si128((const __m128i*)(tab + idx[1])), 1),
+                          _mm_loadu_si128((const __m128i*)(tab + idx[2])), 2),
+                          _mm_loadu_si128((const __m128i*)(tab + idx[3])), 3));
+}
+inline v_uint32x16 v512_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut((const int *)tab, idx)); }
+inline v_uint32x16 v512_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut_pairs((const int *)tab, idx)); }
+inline v_uint32x16 v512_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v512_lut_quads((const int *)tab, idx)); }
+
+inline v_int64x8 v512_lut(const int64* tab, const int* idx)
+{
+#if defined(__GNUC__)
+    return v_int64x8(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), (const long long int*)tab, 8));
+#else
+    return v_int64x8(_mm512_i32gather_epi64(_mm256_loadu_si256((const __m256i*)idx), tab , 8));
+#endif
+}
+inline v_int64x8 v512_lut_pairs(const int64* tab, const int* idx)
+{
+    return v_int64x8(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512(
+                         _mm_loadu_si128((const __m128i*)(tab + idx[0]))),
+                         _mm_loadu_si128((const __m128i*)(tab + idx[1])), 1),
+                         _mm_loadu_si128((const __m128i*)(tab + idx[2])), 2),
+                         _mm_loadu_si128((const __m128i*)(tab + idx[3])), 3));
+}
+inline v_uint64x8 v512_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v512_lut((const int64 *)tab, idx)); }
+inline v_uint64x8 v512_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v512_lut_pairs((const int64 *)tab, idx)); }
+
+inline v_float32x16 v512_lut(const float* tab, const int* idx)
+{
+    return v_float32x16(_mm512_i32gather_ps(_mm512_loadu_si512((const __m512i*)idx), tab, 4));
+}
+inline v_float32x16 v512_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v512_lut_pairs((const int *)tab, idx)); }
+inline v_float32x16 v512_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v512_lut_quads((const int *)tab, idx)); }
+
+inline v_float64x8 v512_lut(const double* tab, const int* idx)
+{
+    return v_float64x8(_mm512_i32gather_pd(_mm256_loadu_si256((const __m256i*)idx), tab, 8));
+}
+inline v_float64x8 v512_lut_pairs(const double* tab, const int* idx)
+{
+        return v_float64x8(_mm512_insertf64x2(_mm512_insertf64x2(_mm512_insertf64x2(_mm512_castpd128_pd512(
+                               _mm_loadu_pd(tab + idx[0])),
+                               _mm_loadu_pd(tab + idx[1]), 1),
+                               _mm_loadu_pd(tab + idx[2]), 2),
+                               _mm_loadu_pd(tab + idx[3]), 3));
+}
+
+inline v_int32x16 v_lut(const int* tab, const v_int32x16& idxvec)
+{
+    return v_int32x16(_mm512_i32gather_epi32(idxvec.val, tab, 4));
+}
+
+inline v_uint32x16 v_lut(const unsigned* tab, const v_int32x16& idxvec)
+{
+    return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec));
+}
+
+inline v_float32x16 v_lut(const float* tab, const v_int32x16& idxvec)
+{
+    return v_float32x16(_mm512_i32gather_ps(idxvec.val, tab, 4));
+}
+
+inline v_float64x8 v_lut(const double* tab, const v_int32x16& idxvec)
+{
+    return v_float64x8(_mm512_i32gather_pd(_v512_extract_low(idxvec.val), tab, 8));
+}
+
+inline void v_lut_deinterleave(const float* tab, const v_int32x16& idxvec, v_float32x16& x, v_float32x16& y)
+{
+    x.val = _mm512_i32gather_ps(idxvec.val, tab, 4);
+    y.val = _mm512_i32gather_ps(idxvec.val, &tab[1], 4);
+}
+
+inline void v_lut_deinterleave(const double* tab, const v_int32x16& idxvec, v_float64x8& x, v_float64x8& y)
+{
+    x.val = _mm512_i32gather_pd(_v512_extract_low(idxvec.val), tab, 8);
+    y.val = _mm512_i32gather_pd(_v512_extract_low(idxvec.val), &tab[1], 8);
+}
+
+inline v_int8x64 v_interleave_pairs(const v_int8x64& vec)
+{
+    return v_int8x64(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0d0e0c, 0x0b090a08, 0x07050604, 0x03010200)));
+}
+inline v_uint8x64 v_interleave_pairs(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
+inline v_int8x64 v_interleave_quads(const v_int8x64& vec)
+{
+    return v_int8x64(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0b0e0a, 0x0d090c08, 0x07030602, 0x05010400)));
+}
+inline v_uint8x64 v_interleave_quads(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x32 v_interleave_pairs(const v_int16x32& vec)
+{
+    return v_int16x32(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0e0b0a, 0x0d0c0908, 0x07060302, 0x05040100)));
+}
+inline v_uint16x32 v_interleave_pairs(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
+inline v_int16x32 v_interleave_quads(const v_int16x32& vec)
+{
+    return v_int16x32(_mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0x0f0e0706, 0x0d0c0504, 0x0b0a0302, 0x09080100)));
+}
+inline v_uint16x32 v_interleave_quads(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x16 v_interleave_pairs(const v_int32x16& vec)
+{
+    return v_int32x16(_mm512_shuffle_epi32(vec.val, _MM_PERM_ACBD));
+}
+inline v_uint32x16 v_interleave_pairs(const v_uint32x16& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+inline v_float32x16 v_interleave_pairs(const v_float32x16& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+
+inline v_int8x64 v_pack_triplets(const v_int8x64& vec)
+{
+    return v_int8x64(_mm512_permutexvar_epi32(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a,
+                                                              0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000),
+                                              _mm512_shuffle_epi8(vec.val, _mm512_set4_epi32(0xffffff0f, 0x0e0d0c0a, 0x09080605, 0x04020100))));
+}
+inline v_uint8x64 v_pack_triplets(const v_uint8x64& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x32 v_pack_triplets(const v_int16x32& vec)
+{
+    return v_int16x32(_mm512_permutexvar_epi16(_v512_set_epu64(0x001f001f001f001f, 0x001f001f001f001f, 0x001e001d001c001a, 0x0019001800160015,
+                                                               0x0014001200110010, 0x000e000d000c000a, 0x0009000800060005, 0x0004000200010000), vec.val));
+}
+inline v_uint16x32 v_pack_triplets(const v_uint16x32& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x16 v_pack_triplets(const v_int32x16& vec)
+{
+    return v_int32x16(_mm512_permutexvar_epi32(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a,
+                                                               0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), vec.val));
+}
+inline v_uint32x16 v_pack_triplets(const v_uint32x16& vec) { return v_reinterpret_as_u32(v_pack_triplets(v_reinterpret_as_s32(vec))); }
+inline v_float32x16 v_pack_triplets(const v_float32x16& vec)
+{
+    return v_float32x16(_mm512_permutexvar_ps(_v512_set_epu64(0x0000000f0000000f, 0x0000000f0000000f, 0x0000000e0000000d, 0x0000000c0000000a,
+                                                              0x0000000900000008, 0x0000000600000005, 0x0000000400000002, 0x0000000100000000), vec.val));
+}
+
+////////// Matrix operations /////////
+
+//////// Dot Product ////////
+
+// 16 >> 32
+inline v_int32x16 v_dotprod(const v_int16x32& a, const v_int16x32& b)
+{ return v_int32x16(_mm512_madd_epi16(a.val, b.val)); }
+inline v_int32x16 v_dotprod(const v_int16x32& a, const v_int16x32& b, const v_int32x16& c)
+{ return v_dotprod(a, b) + c; }
+
+// 32 >> 64
+inline v_int64x8 v_dotprod(const v_int32x16& a, const v_int32x16& b)
+{
+    __m512i even = _mm512_mul_epi32(a.val, b.val);
+    __m512i odd = _mm512_mul_epi32(_mm512_srli_epi64(a.val, 32), _mm512_srli_epi64(b.val, 32));
+    return v_int64x8(_mm512_add_epi64(even, odd));
+}
+inline v_int64x8 v_dotprod(const v_int32x16& a, const v_int32x16& b, const v_int64x8& c)
+{ return v_dotprod(a, b) + c; }
+
+// 8 >> 32
+inline v_uint32x16 v_dotprod_expand(const v_uint8x64& a, const v_uint8x64& b)
+{
+    __m512i even_a = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, a.val, _mm512_setzero_si512());
+    __m512i odd_a  = _mm512_srli_epi16(a.val, 8);
+
+    __m512i even_b = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, b.val, _mm512_setzero_si512());
+    __m512i odd_b  = _mm512_srli_epi16(b.val, 8);
+
+    __m512i prod0  = _mm512_madd_epi16(even_a, even_b);
+    __m512i prod1  = _mm512_madd_epi16(odd_a, odd_b);
+    return v_uint32x16(_mm512_add_epi32(prod0, prod1));
+}
+inline v_uint32x16 v_dotprod_expand(const v_uint8x64& a, const v_uint8x64& b, const v_uint32x16& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+inline v_int32x16 v_dotprod_expand(const v_int8x64& a, const v_int8x64& b)
+{
+    __m512i even_a = _mm512_srai_epi16(_mm512_bslli_epi128(a.val, 1), 8);
+    __m512i odd_a  = _mm512_srai_epi16(a.val, 8);
+
+    __m512i even_b = _mm512_srai_epi16(_mm512_bslli_epi128(b.val, 1), 8);
+    __m512i odd_b  = _mm512_srai_epi16(b.val, 8);
+
+    __m512i prod0  = _mm512_madd_epi16(even_a, even_b);
+    __m512i prod1  = _mm512_madd_epi16(odd_a, odd_b);
+    return v_int32x16(_mm512_add_epi32(prod0, prod1));
+}
+inline v_int32x16 v_dotprod_expand(const v_int8x64& a, const v_int8x64& b, const v_int32x16& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 16 >> 64
+inline v_uint64x8 v_dotprod_expand(const v_uint16x32& a, const v_uint16x32& b)
+{
+    __m512i mullo = _mm512_mullo_epi16(a.val, b.val);
+    __m512i mulhi = _mm512_mulhi_epu16(a.val, b.val);
+    __m512i mul0  = _mm512_unpacklo_epi16(mullo, mulhi);
+    __m512i mul1  = _mm512_unpackhi_epi16(mullo, mulhi);
+
+    __m512i p02   = _mm512_mask_blend_epi32(0xAAAA, mul0, _mm512_setzero_si512());
+    __m512i p13   = _mm512_srli_epi64(mul0, 32);
+    __m512i p46   = _mm512_mask_blend_epi32(0xAAAA, mul1, _mm512_setzero_si512());
+    __m512i p57   = _mm512_srli_epi64(mul1, 32);
+
+    __m512i p15_  = _mm512_add_epi64(p02, p13);
+    __m512i p9d_  = _mm512_add_epi64(p46, p57);
+
+    return v_uint64x8(_mm512_add_epi64(
+        _mm512_unpacklo_epi64(p15_, p9d_),
+        _mm512_unpackhi_epi64(p15_, p9d_)
+    ));
+}
+inline v_uint64x8 v_dotprod_expand(const v_uint16x32& a, const v_uint16x32& b, const v_uint64x8& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+inline v_int64x8 v_dotprod_expand(const v_int16x32& a, const v_int16x32& b)
+{
+    __m512i prod = _mm512_madd_epi16(a.val, b.val);
+    __m512i even = _mm512_srai_epi64(_mm512_bslli_epi128(prod, 4), 32);
+    __m512i odd  = _mm512_srai_epi64(prod, 32);
+    return v_int64x8(_mm512_add_epi64(even, odd));
+}
+inline v_int64x8 v_dotprod_expand(const v_int16x32& a, const v_int16x32& b, const v_int64x8& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 32 >> 64f
+inline v_float64x8 v_dotprod_expand(const v_int32x16& a, const v_int32x16& b)
+{ return v_cvt_f64(v_dotprod(a, b)); }
+inline v_float64x8 v_dotprod_expand(const v_int32x16& a, const v_int32x16& b, const v_float64x8& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+//////// Fast Dot Product ////////
+
+// 16 >> 32
+inline v_int32x16 v_dotprod_fast(const v_int16x32& a, const v_int16x32& b)
+{ return v_dotprod(a, b); }
+inline v_int32x16 v_dotprod_fast(const v_int16x32& a, const v_int16x32& b, const v_int32x16& c)
+{ return v_dotprod(a, b, c); }
+
+// 32 >> 64
+inline v_int64x8 v_dotprod_fast(const v_int32x16& a, const v_int32x16& b)
+{ return v_dotprod(a, b); }
+inline v_int64x8 v_dotprod_fast(const v_int32x16& a, const v_int32x16& b, const v_int64x8& c)
+{ return v_dotprod(a, b, c); }
+
+// 8 >> 32
+inline v_uint32x16 v_dotprod_expand_fast(const v_uint8x64& a, const v_uint8x64& b)
+{ return v_dotprod_expand(a, b); }
+inline v_uint32x16 v_dotprod_expand_fast(const v_uint8x64& a, const v_uint8x64& b, const v_uint32x16& c)
+{ return v_dotprod_expand(a, b, c); }
+
+inline v_int32x16 v_dotprod_expand_fast(const v_int8x64& a, const v_int8x64& b)
+{ return v_dotprod_expand(a, b); }
+inline v_int32x16 v_dotprod_expand_fast(const v_int8x64& a, const v_int8x64& b, const v_int32x16& c)
+{ return v_dotprod_expand(a, b, c); }
+
+// 16 >> 64
+inline v_uint64x8 v_dotprod_expand_fast(const v_uint16x32& a, const v_uint16x32& b)
+{
+    __m512i mullo = _mm512_mullo_epi16(a.val, b.val);
+    __m512i mulhi = _mm512_mulhi_epu16(a.val, b.val);
+    __m512i mul0  = _mm512_unpacklo_epi16(mullo, mulhi);
+    __m512i mul1  = _mm512_unpackhi_epi16(mullo, mulhi);
+
+    __m512i p02   = _mm512_mask_blend_epi32(0xAAAA, mul0, _mm512_setzero_si512());
+    __m512i p13   = _mm512_srli_epi64(mul0, 32);
+    __m512i p46   = _mm512_mask_blend_epi32(0xAAAA, mul1, _mm512_setzero_si512());
+    __m512i p57   = _mm512_srli_epi64(mul1, 32);
+
+    __m512i p15_  = _mm512_add_epi64(p02, p13);
+    __m512i p9d_  = _mm512_add_epi64(p46, p57);
+    return v_uint64x8(_mm512_add_epi64(p15_, p9d_));
+}
+inline v_uint64x8 v_dotprod_expand_fast(const v_uint16x32& a, const v_uint16x32& b, const v_uint64x8& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+inline v_int64x8 v_dotprod_expand_fast(const v_int16x32& a, const v_int16x32& b)
+{ return v_dotprod_expand(a, b); }
+inline v_int64x8 v_dotprod_expand_fast(const v_int16x32& a, const v_int16x32& b, const v_int64x8& c)
+{ return v_dotprod_expand(a, b, c); }
+
+// 32 >> 64f
+inline v_float64x8 v_dotprod_expand_fast(const v_int32x16& a, const v_int32x16& b)
+{ return v_dotprod_expand(a, b); }
+inline v_float64x8 v_dotprod_expand_fast(const v_int32x16& a, const v_int32x16& b, const v_float64x8& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+
+#define OPENCV_HAL_AVX512_SPLAT2_PS(a, im) \
+    v_float32x16(_mm512_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im)))
+
+inline v_float32x16 v_matmul(const v_float32x16& v,
+                             const v_float32x16& m0, const v_float32x16& m1,
+                             const v_float32x16& m2, const v_float32x16& m3)
+{
+    v_float32x16 v04 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 0);
+    v_float32x16 v15 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 1);
+    v_float32x16 v26 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 2);
+    v_float32x16 v37 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 3);
+    return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, v37 * m3)));
+}
+
+inline v_float32x16 v_matmuladd(const v_float32x16& v,
+                                const v_float32x16& m0, const v_float32x16& m1,
+                                const v_float32x16& m2, const v_float32x16& a)
+{
+    v_float32x16 v04 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 0);
+    v_float32x16 v15 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 1);
+    v_float32x16 v26 = OPENCV_HAL_AVX512_SPLAT2_PS(v, 2);
+    return v_fma(v04, m0, v_fma(v15, m1, v_fma(v26, m2, a)));
+}
+
+#define OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \
+    inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1,              \
+                               const _Tpvec& a2, const _Tpvec& a3,              \
+                               _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3)  \
+    {                                                                           \
+        __m512i t0 = cast_from(_mm512_unpacklo_##suffix(a0.val, a1.val));       \
+        __m512i t1 = cast_from(_mm512_unpacklo_##suffix(a2.val, a3.val));       \
+        __m512i t2 = cast_from(_mm512_unpackhi_##suffix(a0.val, a1.val));       \
+        __m512i t3 = cast_from(_mm512_unpackhi_##suffix(a2.val, a3.val));       \
+        b0.val = cast_to(_mm512_unpacklo_epi64(t0, t1));                        \
+        b1.val = cast_to(_mm512_unpackhi_epi64(t0, t1));                        \
+        b2.val = cast_to(_mm512_unpacklo_epi64(t2, t3));                        \
+        b3.val = cast_to(_mm512_unpackhi_epi64(t2, t3));                        \
+    }
+
+OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_uint32x16,  epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_int32x16,   epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_AVX512_TRANSPOSE4x4(v_float32x16, ps, _mm512_castps_si512, _mm512_castsi512_ps)
+
+//////////////// Value reordering ///////////////
+
+/* Expand */
+#define OPENCV_HAL_IMPL_AVX512_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \
+    inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
+    {                                                               \
+        b0.val = intrin(_v512_extract_low(a.val));                  \
+        b1.val = intrin(_v512_extract_high(a.val));                 \
+    }                                                               \
+    inline _Tpwvec v_expand_low(const _Tpvec& a)                    \
+    { return _Tpwvec(intrin(_v512_extract_low(a.val))); }           \
+    inline _Tpwvec v_expand_high(const _Tpvec& a)                   \
+    { return _Tpwvec(intrin(_v512_extract_high(a.val))); }          \
+    inline _Tpwvec v512_load_expand(const _Tp* ptr)                 \
+    {                                                               \
+        __m256i a = _mm256_loadu_si256((const __m256i*)ptr);        \
+        return _Tpwvec(intrin(a));                                  \
+    }
+
+OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint8x64,  v_uint16x32, uchar,    _mm512_cvtepu8_epi16)
+OPENCV_HAL_IMPL_AVX512_EXPAND(v_int8x64,   v_int16x32,  schar,    _mm512_cvtepi8_epi16)
+OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint16x32, v_uint32x16, ushort,   _mm512_cvtepu16_epi32)
+OPENCV_HAL_IMPL_AVX512_EXPAND(v_int16x32,  v_int32x16,  short,    _mm512_cvtepi16_epi32)
+OPENCV_HAL_IMPL_AVX512_EXPAND(v_uint32x16, v_uint64x8,  unsigned, _mm512_cvtepu32_epi64)
+OPENCV_HAL_IMPL_AVX512_EXPAND(v_int32x16,  v_int64x8,   int,      _mm512_cvtepi32_epi64)
+
+#define OPENCV_HAL_IMPL_AVX512_EXPAND_Q(_Tpvec, _Tp, intrin) \
+    inline _Tpvec v512_load_expand_q(const _Tp* ptr)         \
+    {                                                        \
+        __m128i a = _mm_loadu_si128((const __m128i*)ptr);    \
+        return _Tpvec(intrin(a));                            \
+    }
+
+OPENCV_HAL_IMPL_AVX512_EXPAND_Q(v_uint32x16, uchar, _mm512_cvtepu8_epi32)
+OPENCV_HAL_IMPL_AVX512_EXPAND_Q(v_int32x16,  schar, _mm512_cvtepi8_epi32)
+
+/* pack */
+// 16
+inline v_int8x64 v_pack(const v_int16x32& a, const v_int16x32& b)
+{ return v_int8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); }
+
+inline v_uint8x64 v_pack(const v_uint16x32& a, const v_uint16x32& b)
+{
+    const __m512i t = _mm512_set1_epi16(255);
+    return v_uint8x64(_v512_combine(_mm512_cvtepi16_epi8(_mm512_min_epu16(a.val, t)), _mm512_cvtepi16_epi8(_mm512_min_epu16(b.val, t))));
+}
+
+inline v_uint8x64 v_pack_u(const v_int16x32& a, const v_int16x32& b)
+{
+    return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi16(a.val, b.val)));
+}
+
+inline void v_pack_store(schar* ptr, const v_int16x32& a)
+{ v_store_low(ptr, v_pack(a, a)); }
+
+inline void v_pack_store(uchar* ptr, const v_uint16x32& a)
+{
+    const __m512i m = _mm512_set1_epi16(255);
+    _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi16_epi8(_mm512_min_epu16(a.val, m)));
+}
+
+inline void v_pack_u_store(uchar* ptr, const v_int16x32& a)
+{ v_store_low(ptr, v_pack_u(a, a)); }
+
+template<int n> inline
+v_uint8x64 v_rshr_pack(const v_uint16x32& a, const v_uint16x32& b)
+{
+    // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers.
+    v_uint16x32 delta = v512_setall_u16((short)(1 << (n-1)));
+    return v_pack_u(v_reinterpret_as_s16((a + delta) >> n),
+                    v_reinterpret_as_s16((b + delta) >> n));
+}
+
+template<int n> inline
+void v_rshr_pack_store(uchar* ptr, const v_uint16x32& a)
+{
+    v_uint16x32 delta = v512_setall_u16((short)(1 << (n-1)));
+    v_pack_u_store(ptr, v_reinterpret_as_s16((a + delta) >> n));
+}
+
+template<int n> inline
+v_uint8x64 v_rshr_pack_u(const v_int16x32& a, const v_int16x32& b)
+{
+    v_int16x32 delta = v512_setall_s16((short)(1 << (n-1)));
+    return v_pack_u((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_u_store(uchar* ptr, const v_int16x32& a)
+{
+    v_int16x32 delta = v512_setall_s16((short)(1 << (n-1)));
+    v_pack_u_store(ptr, (a + delta) >> n);
+}
+
+template<int n> inline
+v_int8x64 v_rshr_pack(const v_int16x32& a, const v_int16x32& b)
+{
+    v_int16x32 delta = v512_setall_s16((short)(1 << (n-1)));
+    return v_pack((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_store(schar* ptr, const v_int16x32& a)
+{
+    v_int16x32 delta = v512_setall_s16((short)(1 << (n-1)));
+    v_pack_store(ptr, (a + delta) >> n);
+}
+
+// 32
+inline v_int16x32 v_pack(const v_int32x16& a, const v_int32x16& b)
+{ return v_int16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi32(a.val, b.val))); }
+
+inline v_uint16x32 v_pack(const v_uint32x16& a, const v_uint32x16& b)
+{
+    const __m512i m = _mm512_set1_epi32(65535);
+    return v_uint16x32(_v512_combine(_mm512_cvtepi32_epi16(_mm512_min_epu32(a.val, m)), _mm512_cvtepi32_epi16(_mm512_min_epu32(b.val, m))));
+}
+
+inline v_uint16x32 v_pack_u(const v_int32x16& a, const v_int32x16& b)
+{ return v_uint16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi32(a.val, b.val))); }
+
+inline void v_pack_store(short* ptr, const v_int32x16& a)
+{ v_store_low(ptr, v_pack(a, a)); }
+
+inline void v_pack_store(ushort* ptr, const v_uint32x16& a)
+{
+    const __m512i m = _mm512_set1_epi32(65535);
+    _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi32_epi16(_mm512_min_epu32(a.val, m)));
+}
+
+inline void v_pack_u_store(ushort* ptr, const v_int32x16& a)
+{ v_store_low(ptr, v_pack_u(a, a)); }
+
+
+template<int n> inline
+v_uint16x32 v_rshr_pack(const v_uint32x16& a, const v_uint32x16& b)
+{
+    v_uint32x16 delta = v512_setall_u32(1 << (n-1));
+    return v_pack_u(v_reinterpret_as_s32((a + delta) >> n),
+                    v_reinterpret_as_s32((b + delta) >> n));
+}
+
+template<int n> inline
+void v_rshr_pack_store(ushort* ptr, const v_uint32x16& a)
+{
+    v_uint32x16 delta = v512_setall_u32(1 << (n-1));
+    v_pack_u_store(ptr, v_reinterpret_as_s32((a + delta) >> n));
+}
+
+template<int n> inline
+v_uint16x32 v_rshr_pack_u(const v_int32x16& a, const v_int32x16& b)
+{
+    v_int32x16 delta = v512_setall_s32(1 << (n-1));
+    return v_pack_u((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_u_store(ushort* ptr, const v_int32x16& a)
+{
+    v_int32x16 delta = v512_setall_s32(1 << (n-1));
+    v_pack_u_store(ptr, (a + delta) >> n);
+}
+
+template<int n> inline
+v_int16x32 v_rshr_pack(const v_int32x16& a, const v_int32x16& b)
+{
+    v_int32x16 delta = v512_setall_s32(1 << (n-1));
+    return v_pack((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_store(short* ptr, const v_int32x16& a)
+{
+    v_int32x16 delta = v512_setall_s32(1 << (n-1));
+    v_pack_store(ptr, (a + delta) >> n);
+}
+
+// 64
+// Non-saturating pack
+inline v_uint32x16 v_pack(const v_uint64x8& a, const v_uint64x8& b)
+{ return v_uint32x16(_v512_combine(_mm512_cvtepi64_epi32(a.val), _mm512_cvtepi64_epi32(b.val))); }
+
+inline v_int32x16 v_pack(const v_int64x8& a, const v_int64x8& b)
+{ return v_reinterpret_as_s32(v_pack(v_reinterpret_as_u64(a), v_reinterpret_as_u64(b))); }
+
+inline void v_pack_store(unsigned* ptr, const v_uint64x8& a)
+{ _mm256_storeu_si256((__m256i*)ptr, _mm512_cvtepi64_epi32(a.val)); }
+
+inline void v_pack_store(int* ptr, const v_int64x8& b)
+{ v_pack_store((unsigned*)ptr, v_reinterpret_as_u64(b)); }
+
+template<int n> inline
+v_uint32x16 v_rshr_pack(const v_uint64x8& a, const v_uint64x8& b)
+{
+    v_uint64x8 delta = v512_setall_u64((uint64)1 << (n-1));
+    return v_pack((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_store(unsigned* ptr, const v_uint64x8& a)
+{
+    v_uint64x8 delta = v512_setall_u64((uint64)1 << (n-1));
+    v_pack_store(ptr, (a + delta) >> n);
+}
+
+template<int n> inline
+v_int32x16 v_rshr_pack(const v_int64x8& a, const v_int64x8& b)
+{
+    v_int64x8 delta = v512_setall_s64((int64)1 << (n-1));
+    return v_pack((a + delta) >> n, (b + delta) >> n);
+}
+
+template<int n> inline
+void v_rshr_pack_store(int* ptr, const v_int64x8& a)
+{
+    v_int64x8 delta = v512_setall_s64((int64)1 << (n-1));
+    v_pack_store(ptr, (a + delta) >> n);
+}
+
+// pack boolean
+inline v_uint8x64 v_pack_b(const v_uint16x32& a, const v_uint16x32& b)
+{ return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); }
+
+inline v_uint8x64 v_pack_b(const v_uint32x16& a, const v_uint32x16& b,
+                           const v_uint32x16& c, const v_uint32x16& d)
+{
+    __m512i ab = _mm512_packs_epi32(a.val, b.val);
+    __m512i cd = _mm512_packs_epi32(c.val, d.val);
+
+    return v_uint8x64(_mm512_permutexvar_epi32(_v512_set_epu32(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0), _mm512_packs_epi16(ab, cd)));
+}
+
+inline v_uint8x64 v_pack_b(const v_uint64x8& a, const v_uint64x8& b, const v_uint64x8& c,
+                           const v_uint64x8& d, const v_uint64x8& e, const v_uint64x8& f,
+                           const v_uint64x8& g, const v_uint64x8& h)
+{
+    __m512i ab = _mm512_packs_epi32(a.val, b.val);
+    __m512i cd = _mm512_packs_epi32(c.val, d.val);
+    __m512i ef = _mm512_packs_epi32(e.val, f.val);
+    __m512i gh = _mm512_packs_epi32(g.val, h.val);
+
+    __m512i abcd = _mm512_packs_epi32(ab, cd);
+    __m512i efgh = _mm512_packs_epi32(ef, gh);
+
+    return v_uint8x64(_mm512_permutexvar_epi16(_v512_set_epu16(31, 23, 15, 7, 30, 22, 14, 6, 29, 21, 13, 5, 28, 20, 12, 4,
+                                                               27, 19, 11, 3, 26, 18, 10, 2, 25, 17,  9, 1, 24, 16,  8, 0), _mm512_packs_epi16(abcd, efgh)));
+}
+
+/* Recombine */
+// its up there with load and store operations
+
+/* Extract */
+#define OPENCV_HAL_IMPL_AVX512_EXTRACT(_Tpvec)                \
+    template<int s>                                           \
+    inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
+    { return v_rotate_right<s>(a, b); }
+
+OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint8x64)
+OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int8x64)
+OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint16x32)
+OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int16x32)
+OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint32x16)
+OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int32x16)
+OPENCV_HAL_IMPL_AVX512_EXTRACT(v_uint64x8)
+OPENCV_HAL_IMPL_AVX512_EXTRACT(v_int64x8)
+OPENCV_HAL_IMPL_AVX512_EXTRACT(v_float32x16)
+OPENCV_HAL_IMPL_AVX512_EXTRACT(v_float64x8)
+
+#define OPENCV_HAL_IMPL_AVX512_EXTRACT_N(_Tpvec, _Tp) \
+template<int i> inline _Tp v_extract_n(_Tpvec v) { return v_rotate_right<i>(v).get0(); }
+
+OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint8x64, uchar)
+OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int8x64, schar)
+OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint16x32, ushort)
+OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int16x32, short)
+OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint32x16, uint)
+OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int32x16, int)
+OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_uint64x8, uint64)
+OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_int64x8, int64)
+OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_float32x16, float)
+OPENCV_HAL_IMPL_AVX512_EXTRACT_N(v_float64x8, double)
+
+template<int i>
+inline v_uint32x16 v_broadcast_element(v_uint32x16 a)
+{
+    static const __m512i perm = _mm512_set1_epi32((char)i);
+    return v_uint32x16(_mm512_permutexvar_epi32(perm, a.val));
+}
+
+template<int i>
+inline v_int32x16 v_broadcast_element(const v_int32x16 &a)
+{ return v_reinterpret_as_s32(v_broadcast_element<i>(v_reinterpret_as_u32(a))); }
+
+template<int i>
+inline v_float32x16 v_broadcast_element(const v_float32x16 &a)
+{ return v_reinterpret_as_f32(v_broadcast_element<i>(v_reinterpret_as_u32(a))); }
+
+
+///////////////////// load deinterleave /////////////////////////////
+
+inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b )
+{
+    __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 64));
+#if CV_AVX_512VBMI
+    __m512i mask0 = _v512_set_epu8(126, 124, 122, 120, 118, 116, 114, 112, 110, 108, 106, 104, 102, 100, 98, 96,
+                                    94,  92,  90,  88,  86,  84,  82,  80,  78,  76,  74,  72,  70,  68, 66, 64,
+                                    62,  60,  58,  56,  54,  52,  50,  48,  46,  44,  42,  40,  38,  36, 34, 32,
+                                    30,  28,  26,  24,  22,  20,  18,  16,  14,  12,  10,   8,   6,   4,  2,  0);
+    __m512i mask1 = _v512_set_epu8(127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, 105, 103, 101, 99, 97,
+                                    95,  93,  91,  89,  87,  85,  83,  81,  79,  77,  75,  73,  71,  69, 67, 65,
+                                    63,  61,  59,  57,  55,  53,  51,  49,  47,  45,  43,  41,  39,  37, 35, 33,
+                                    31,  29,  27,  25,  23,  21,  19,  17,  15,  13,  11,   9,   7,   5,  3,  1);
+    a = v_uint8x64(_mm512_permutex2var_epi8(ab0, mask0, ab1));
+    b = v_uint8x64(_mm512_permutex2var_epi8(ab0, mask1, ab1));
+#else
+    __m512i mask0 = _mm512_set4_epi32(0x0f0d0b09, 0x07050301, 0x0e0c0a08, 0x06040200);
+    __m512i a0b0 = _mm512_shuffle_epi8(ab0, mask0);
+    __m512i a1b1 = _mm512_shuffle_epi8(ab1, mask0);
+    __m512i mask1 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0);
+    __m512i mask2 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1);
+    a = v_uint8x64(_mm512_permutex2var_epi64(a0b0, mask1, a1b1));
+    b = v_uint8x64(_mm512_permutex2var_epi64(a0b0, mask2, a1b1));
+#endif
+}
+
+inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b )
+{
+    __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 32));
+    __m512i mask0 = _v512_set_epu16(62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32,
+                                    30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10,  8,  6,  4,  2,  0);
+    __m512i mask1 = _v512_set_epu16(63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33,
+                                    31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11,  9,  7,  5,  3,  1);
+    a = v_uint16x32(_mm512_permutex2var_epi16(ab0, mask0, ab1));
+    b = v_uint16x32(_mm512_permutex2var_epi16(ab0, mask1, ab1));
+}
+
+inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b )
+{
+    __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 16));
+    __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0);
+    __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1);
+    a = v_uint32x16(_mm512_permutex2var_epi32(ab0, mask0, ab1));
+    b = v_uint32x16(_mm512_permutex2var_epi32(ab0, mask1, ab1));
+}
+
+inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b )
+{
+    __m512i ab0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i ab1 = _mm512_loadu_si512((const __m512i*)(ptr + 8));
+    __m512i mask0 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0);
+    __m512i mask1 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1);
+    a = v_uint64x8(_mm512_permutex2var_epi64(ab0, mask0, ab1));
+    b = v_uint64x8(_mm512_permutex2var_epi64(ab0, mask1, ab1));
+}
+
+inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b, v_uint8x64& c )
+{
+    __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 64));
+    __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 128));
+
+#if CV_AVX_512VBMI2
+    __m512i mask0 = _v512_set_epu8(126, 123, 120, 117, 114, 111, 108, 105, 102,  99,  96,  93,  90,  87,  84, 81,
+                                    78,  75,  72,  69,  66,  63,  60,  57,  54,  51,  48,  45,  42,  39,  36, 33,
+                                    30,  27,  24,  21,  18,  15,  12,   9,   6,   3,   0,  62,  59,  56,  53, 50,
+                                    47,  44,  41,  38,  35,  32,  29,  26,  23,  20,  17,  14,  11,   8,   5,  2);
+    __m512i r0b01 = _mm512_permutex2var_epi8(bgr0, mask0, bgr1);
+    __m512i b1g12 = _mm512_permutex2var_epi8(bgr1, mask0, bgr2);
+    __m512i r12b2 = _mm512_permutex2var_epi8(bgr1,
+                    _v512_set_epu8(125, 122, 119, 116, 113, 110, 107, 104, 101,  98,  95,  92,  89,  86,  83, 80,
+                                    77,  74,  71,  68,  65, 127, 124, 121, 118, 115, 112, 109, 106, 103, 100, 97,
+                                    94,  91,  88,  85,  82,  79,  76,  73,  70,  67,  64,  61,  58,  55,  52, 49,
+                                    46,  43,  40,  37,  34,  31,  28,  25,  22,  19,  16,  13,  10,   7,   4,  1), bgr2);
+    a = v_uint8x64(_mm512_mask_compress_epi8(r12b2, 0xffffffffffe00000, r0b01));
+    b = v_uint8x64(_mm512_mask_compress_epi8(b1g12, 0x2492492492492492, bgr0));
+    c = v_uint8x64(_mm512_mask_expand_epi8(r0b01, 0xffffffffffe00000, r12b2));
+#elif CV_AVX_512VBMI
+    __m512i b0g0b1 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr1, bgr0);
+    __m512i g1r1g2 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr2, bgr1);
+    __m512i r2b2r0 = _mm512_mask_blend_epi8(0xb6db6db6db6db6db, bgr0, bgr2);
+    a = v_uint8x64(_mm512_permutex2var_epi8(b0g0b1, _v512_set_epu8(125, 122, 119, 116, 113, 110, 107, 104, 101,  98,  95,  92,  89,  86,  83,  80,
+                                                                    77,  74,  71,  68,  65,  63,  61,  60,  58,  57,  55,  54,  52,  51,  49,  48,
+                                                                    46,  45,  43,  42,  40,  39,  37,  36,  34,  33,  31,  30,  28,  27,  25,  24,
+                                                                    23,  21,  20,  18,  17,  15,  14,  12,  11,   9,   8,   6,   5,   3,   2,   0), bgr2));
+    b = v_uint8x64(_mm512_permutex2var_epi8(g1r1g2, _v512_set_epu8( 63,  61,  60,  58,  57,  55,  54,  52,  51,  49,  48,  46,  45,  43,  42,  40,
+                                                                    39,  37,  36,  34,  33,  31,  30,  28,  27,  25,  24,  23,  21,  20,  18,  17,
+                                                                    15,  14,  12,  11,   9,   8,   6,   5,   3,   2,   0, 126, 123, 120, 117, 114,
+                                                                   111, 108, 105, 102,  99,  96,  93,  90,  87,  84,  81,  78,  75,  72,  69,  66), bgr0));
+    c = v_uint8x64(_mm512_permutex2var_epi8(r2b2r0, _v512_set_epu8( 63,  60,  57,  54,  51,  48,  45,  42,  39,  36,  33,  30,  27,  24,  21,  18,
+                                                                    15,  12,   9,   6,   3,   0, 125, 122, 119, 116, 113, 110, 107, 104, 101,  98,
+                                                                    95,  92,  89,  86,  83,  80,  77,  74,  71,  68,  65,  62,  59,  56,  53,  50,
+                                                                    47,  44,  41,  38,  35,  32,  29,  26,  23,  20,  17,  14,  11,   8,   5,   2), bgr1));
+#else
+    __m512i mask0 = _v512_set_epu16(61, 58, 55, 52, 49, 46, 43, 40, 37, 34, 63, 60, 57, 54, 51, 48,
+                                    45, 42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12,  9,  6,  3,  0);
+    __m512i b01g1 = _mm512_permutex2var_epi16(bgr0, mask0, bgr1);
+    __m512i r12b2 = _mm512_permutex2var_epi16(bgr1, mask0, bgr2);
+    __m512i g20r0 = _mm512_permutex2var_epi16(bgr2, mask0, bgr0);
+
+    __m512i b0g0 = _mm512_mask_blend_epi32(0xf800, b01g1, r12b2);
+    __m512i r0b1 = _mm512_permutex2var_epi16(bgr1, _v512_set_epu16(42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 29, 26, 23, 20, 17,
+                                                                   14, 11,  8,  5,  2, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43), g20r0);
+    __m512i g1r1 = _mm512_alignr_epi32(r12b2, g20r0, 11);
+    a = v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, b0g0, r0b1));
+    c = v_uint8x64(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, r0b1, g1r1));
+    b = v_uint8x64(_mm512_shuffle_epi8(_mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, g1r1, b0g0), _mm512_set4_epi32(0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001)));
+#endif
+}
+
+inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b, v_uint16x32& c )
+{
+    __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 32));
+    __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 64));
+
+    __m512i mask0 = _v512_set_epu16(61, 58, 55, 52, 49, 46, 43, 40, 37, 34, 63, 60, 57, 54, 51, 48,
+                                    45, 42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12,  9,  6,  3,  0);
+    __m512i b01g1 = _mm512_permutex2var_epi16(bgr0, mask0, bgr1);
+    __m512i r12b2 = _mm512_permutex2var_epi16(bgr1, mask0, bgr2);
+    __m512i g20r0 = _mm512_permutex2var_epi16(bgr2, mask0, bgr0);
+
+    a = v_uint16x32(_mm512_mask_blend_epi32(0xf800, b01g1, r12b2));
+    b = v_uint16x32(_mm512_permutex2var_epi16(bgr1, _v512_set_epu16(42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 29, 26, 23, 20, 17,
+                                                                    14, 11,  8,  5,  2, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43), g20r0));
+    c = v_uint16x32(_mm512_alignr_epi32(r12b2, g20r0, 11));
+}
+
+inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b, v_uint32x16& c )
+{
+    __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 16));
+    __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 32));
+
+    __m512i mask0 = _v512_set_epu32(29, 26, 23, 20, 17, 30, 27, 24, 21, 18, 15, 12, 9, 6, 3, 0);
+    __m512i b01r1 = _mm512_permutex2var_epi32(bgr0, mask0, bgr1);
+    __m512i g12b2 = _mm512_permutex2var_epi32(bgr1, mask0, bgr2);
+    __m512i r20g0 = _mm512_permutex2var_epi32(bgr2, mask0, bgr0);
+
+    a = v_uint32x16(_mm512_mask_blend_epi32(0xf800, b01r1, g12b2));
+    b = v_uint32x16(_mm512_alignr_epi32(g12b2, r20g0, 11));
+    c = v_uint32x16(_mm512_permutex2var_epi32(bgr1, _v512_set_epu32(21, 20, 19, 18, 17, 16, 13, 10, 7, 4, 1, 26, 25, 24, 23, 22), r20g0));
+}
+
+inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b, v_uint64x8& c )
+{
+    __m512i bgr0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i bgr1 = _mm512_loadu_si512((const __m512i*)(ptr + 8));
+    __m512i bgr2 = _mm512_loadu_si512((const __m512i*)(ptr + 16));
+
+    __m512i mask0 = _v512_set_epu64(13, 10, 15, 12, 9, 6, 3, 0);
+    __m512i b01g1 = _mm512_permutex2var_epi64(bgr0, mask0, bgr1);
+    __m512i r12b2 = _mm512_permutex2var_epi64(bgr1, mask0, bgr2);
+    __m512i g20r0 = _mm512_permutex2var_epi64(bgr2, mask0, bgr0);
+
+    a = v_uint64x8(_mm512_mask_blend_epi64(0xc0, b01g1, r12b2));
+    c = v_uint64x8(_mm512_alignr_epi64(r12b2, g20r0, 6));
+    b = v_uint64x8(_mm512_permutex2var_epi64(bgr1, _v512_set_epu64(10, 9, 8, 5, 2, 13, 12, 11), g20r0));
+}
+
+inline void v_load_deinterleave( const uchar* ptr, v_uint8x64& a, v_uint8x64& b, v_uint8x64& c, v_uint8x64& d )
+{
+    __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 64));
+    __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 128));
+    __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 192));
+
+#if CV_AVX_512VBMI
+    __m512i mask0 = _v512_set_epu8(126, 124, 122, 120, 118, 116, 114, 112, 110, 108, 106, 104, 102, 100, 98, 96,
+                                    94,  92,  90,  88,  86,  84,  82,  80,  78,  76,  74,  72,  70,  68, 66, 64,
+                                    62,  60,  58,  56,  54,  52,  50,  48,  46,  44,  42,  40,  38,  36, 34, 32,
+                                    30,  28,  26,  24,  22,  20,  18,  16,  14,  12,  10,   8,   6,   4,  2,  0);
+    __m512i mask1 = _v512_set_epu8(127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, 105, 103, 101, 99, 97,
+                                    95,  93,  91,  89,  87,  85,  83,  81,  79,  77,  75,  73,  71,  69, 67, 65,
+                                    63,  61,  59,  57,  55,  53,  51,  49,  47,  45,  43,  41,  39,  37, 35, 33,
+                                    31,  29,  27,  25,  23,  21,  19,  17,  15,  13,  11,   9,   7,   5,  3,  1);
+
+    __m512i br01 = _mm512_permutex2var_epi8(bgra0, mask0, bgra1);
+    __m512i ga01 = _mm512_permutex2var_epi8(bgra0, mask1, bgra1);
+    __m512i br23 = _mm512_permutex2var_epi8(bgra2, mask0, bgra3);
+    __m512i ga23 = _mm512_permutex2var_epi8(bgra2, mask1, bgra3);
+
+    a = v_uint8x64(_mm512_permutex2var_epi8(br01, mask0, br23));
+    c = v_uint8x64(_mm512_permutex2var_epi8(br01, mask1, br23));
+    b = v_uint8x64(_mm512_permutex2var_epi8(ga01, mask0, ga23));
+    d = v_uint8x64(_mm512_permutex2var_epi8(ga01, mask1, ga23));
+#else
+    __m512i mask = _mm512_set4_epi32(0x0f0b0703, 0x0e0a0602, 0x0d090501, 0x0c080400);
+    __m512i b0g0r0a0 = _mm512_shuffle_epi8(bgra0, mask);
+    __m512i b1g1r1a1 = _mm512_shuffle_epi8(bgra1, mask);
+    __m512i b2g2r2a2 = _mm512_shuffle_epi8(bgra2, mask);
+    __m512i b3g3r3a3 = _mm512_shuffle_epi8(bgra3, mask);
+
+    __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0);
+    __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1);
+
+    __m512i br01 = _mm512_permutex2var_epi32(b0g0r0a0, mask0, b1g1r1a1);
+    __m512i ga01 = _mm512_permutex2var_epi32(b0g0r0a0, mask1, b1g1r1a1);
+    __m512i br23 = _mm512_permutex2var_epi32(b2g2r2a2, mask0, b3g3r3a3);
+    __m512i ga23 = _mm512_permutex2var_epi32(b2g2r2a2, mask1, b3g3r3a3);
+
+    a = v_uint8x64(_mm512_permutex2var_epi32(br01, mask0, br23));
+    c = v_uint8x64(_mm512_permutex2var_epi32(br01, mask1, br23));
+    b = v_uint8x64(_mm512_permutex2var_epi32(ga01, mask0, ga23));
+    d = v_uint8x64(_mm512_permutex2var_epi32(ga01, mask1, ga23));
+#endif
+}
+
+inline void v_load_deinterleave( const ushort* ptr, v_uint16x32& a, v_uint16x32& b, v_uint16x32& c, v_uint16x32& d )
+{
+    __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 32));
+    __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 64));
+    __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 96));
+
+    __m512i mask0 = _v512_set_epu16(62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32,
+                                    30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10,  8,  6,  4,  2,  0);
+    __m512i mask1 = _v512_set_epu16(63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33,
+                                    31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11,  9,  7,  5,  3,  1);
+
+    __m512i br01 = _mm512_permutex2var_epi16(bgra0, mask0, bgra1);
+    __m512i ga01 = _mm512_permutex2var_epi16(bgra0, mask1, bgra1);
+    __m512i br23 = _mm512_permutex2var_epi16(bgra2, mask0, bgra3);
+    __m512i ga23 = _mm512_permutex2var_epi16(bgra2, mask1, bgra3);
+
+    a = v_uint16x32(_mm512_permutex2var_epi16(br01, mask0, br23));
+    c = v_uint16x32(_mm512_permutex2var_epi16(br01, mask1, br23));
+    b = v_uint16x32(_mm512_permutex2var_epi16(ga01, mask0, ga23));
+    d = v_uint16x32(_mm512_permutex2var_epi16(ga01, mask1, ga23));
+}
+
+inline void v_load_deinterleave( const unsigned* ptr, v_uint32x16& a, v_uint32x16& b, v_uint32x16& c, v_uint32x16& d )
+{
+    __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 16));
+    __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 32));
+    __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 48));
+
+    __m512i mask0 = _v512_set_epu32(30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0);
+    __m512i mask1 = _v512_set_epu32(31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1);
+
+    __m512i br01 = _mm512_permutex2var_epi32(bgra0, mask0, bgra1);
+    __m512i ga01 = _mm512_permutex2var_epi32(bgra0, mask1, bgra1);
+    __m512i br23 = _mm512_permutex2var_epi32(bgra2, mask0, bgra3);
+    __m512i ga23 = _mm512_permutex2var_epi32(bgra2, mask1, bgra3);
+
+    a = v_uint32x16(_mm512_permutex2var_epi32(br01, mask0, br23));
+    c = v_uint32x16(_mm512_permutex2var_epi32(br01, mask1, br23));
+    b = v_uint32x16(_mm512_permutex2var_epi32(ga01, mask0, ga23));
+    d = v_uint32x16(_mm512_permutex2var_epi32(ga01, mask1, ga23));
+}
+
+inline void v_load_deinterleave( const uint64* ptr, v_uint64x8& a, v_uint64x8& b, v_uint64x8& c, v_uint64x8& d )
+{
+    __m512i bgra0 = _mm512_loadu_si512((const __m512i*)ptr);
+    __m512i bgra1 = _mm512_loadu_si512((const __m512i*)(ptr + 8));
+    __m512i bgra2 = _mm512_loadu_si512((const __m512i*)(ptr + 16));
+    __m512i bgra3 = _mm512_loadu_si512((const __m512i*)(ptr + 24));
+
+    __m512i mask0 = _v512_set_epu64(14, 12, 10, 8, 6, 4, 2, 0);
+    __m512i mask1 = _v512_set_epu64(15, 13, 11, 9, 7, 5, 3, 1);
+
+    __m512i br01 = _mm512_permutex2var_epi64(bgra0, mask0, bgra1);
+    __m512i ga01 = _mm512_permutex2var_epi64(bgra0, mask1, bgra1);
+    __m512i br23 = _mm512_permutex2var_epi64(bgra2, mask0, bgra3);
+    __m512i ga23 = _mm512_permutex2var_epi64(bgra2, mask1, bgra3);
+
+    a = v_uint64x8(_mm512_permutex2var_epi64(br01, mask0, br23));
+    c = v_uint64x8(_mm512_permutex2var_epi64(br01, mask1, br23));
+    b = v_uint64x8(_mm512_permutex2var_epi64(ga01, mask0, ga23));
+    d = v_uint64x8(_mm512_permutex2var_epi64(ga01, mask1, ga23));
+}
+
+///////////////////////////// store interleave /////////////////////////////////////
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x64& x, const v_uint8x64& y,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    v_uint8x64 low, high;
+    v_zip(x, y, low, high);
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, low.val);
+        _mm512_stream_si512((__m512i*)(ptr + 64), high.val);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, low.val);
+        _mm512_store_si512((__m512i*)(ptr + 64), high.val);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, low.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 64), high.val);
+    }
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x32& x, const v_uint16x32& y,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    v_uint16x32 low, high;
+    v_zip(x, y, low, high);
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, low.val);
+        _mm512_stream_si512((__m512i*)(ptr + 32), high.val);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, low.val);
+        _mm512_store_si512((__m512i*)(ptr + 32), high.val);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, low.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 32), high.val);
+    }
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x16& x, const v_uint32x16& y,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    v_uint32x16 low, high;
+    v_zip(x, y, low, high);
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, low.val);
+        _mm512_stream_si512((__m512i*)(ptr + 16), high.val);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, low.val);
+        _mm512_store_si512((__m512i*)(ptr + 16), high.val);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, low.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 16), high.val);
+    }
+}
+
+inline void v_store_interleave( uint64* ptr, const v_uint64x8& x, const v_uint64x8& y,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    v_uint64x8 low, high;
+    v_zip(x, y, low, high);
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, low.val);
+        _mm512_stream_si512((__m512i*)(ptr + 8), high.val);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, low.val);
+        _mm512_store_si512((__m512i*)(ptr + 8), high.val);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, low.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 8), high.val);
+    }
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x64& a, const v_uint8x64& b, const v_uint8x64& c,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+#if CV_AVX_512VBMI
+    __m512i mask0 = _v512_set_epu8(127,  84,  20, 126,  83,  19, 125,  82,  18, 124,  81,  17, 123,  80,  16, 122,
+                                    79,  15, 121,  78,  14, 120,  77,  13, 119,  76,  12, 118,  75,  11, 117,  74,
+                                    10, 116,  73,   9, 115,  72,   8, 114,  71,   7, 113,  70,   6, 112,  69,   5,
+                                   111,  68,   4, 110,  67,   3, 109,  66,   2, 108,  65,   1, 107,  64,   0, 106);
+    __m512i mask1 = _v512_set_epu8( 21,  42, 105,  20,  41, 104,  19,  40, 103,  18,  39, 102,  17,  38, 101,  16,
+                                    37, 100,  15,  36,  99,  14,  35,  98,  13,  34,  97,  12,  33,  96,  11,  32,
+                                    95,  10,  31,  94,   9,  30,  93,   8,  29,  92,   7,  28,  91,   6,  27,  90,
+                                     5,  26,  89,   4,  25,  88,   3,  24,  87,   2,  23,  86,   1,  22,  85,   0);
+    __m512i mask2 = _v512_set_epu8(106, 127,  63, 105, 126,  62, 104, 125,  61, 103, 124,  60, 102, 123,  59, 101,
+                                   122,  58, 100, 121,  57,  99, 120,  56,  98, 119,  55,  97, 118,  54,  96, 117,
+                                    53,  95, 116,  52,  94, 115,  51,  93, 114,  50,  92, 113,  49,  91, 112,  48,
+                                    90, 111,  47,  89, 110,  46,  88, 109,  45,  87, 108,  44,  86, 107,  43,  85);
+    __m512i r2g0r0 = _mm512_permutex2var_epi8(b.val, mask0, c.val);
+    __m512i b0r1b1 = _mm512_permutex2var_epi8(a.val, mask1, c.val);
+    __m512i g1b2g2 = _mm512_permutex2var_epi8(a.val, mask2, b.val);
+
+    __m512i bgr0 = _mm512_mask_blend_epi8(0x9249249249249249, r2g0r0, b0r1b1);
+    __m512i bgr1 = _mm512_mask_blend_epi8(0x9249249249249249, b0r1b1, g1b2g2);
+    __m512i bgr2 = _mm512_mask_blend_epi8(0x9249249249249249, g1b2g2, r2g0r0);
+#else
+    __m512i g1g0 = _mm512_shuffle_epi8(b.val, _mm512_set4_epi32(0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001));
+    __m512i b0g0 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, a.val, g1g0);
+    __m512i r0b1 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, c.val, a.val);
+    __m512i g1r1 = _mm512_mask_blend_epi8(0xAAAAAAAAAAAAAAAA, g1g0, c.val);
+
+    __m512i mask0 = _v512_set_epu16(42, 10, 31, 41,  9, 30, 40,  8, 29, 39,  7, 28, 38,  6, 27, 37,
+                                     5, 26, 36,  4, 25, 35,  3, 24, 34,  2, 23, 33,  1, 22, 32,  0);
+    __m512i mask1 = _v512_set_epu16(21, 52, 41, 20, 51, 40, 19, 50, 39, 18, 49, 38, 17, 48, 37, 16,
+                                    47, 36, 15, 46, 35, 14, 45, 34, 13, 44, 33, 12, 43, 32, 11, 42);
+    __m512i mask2 = _v512_set_epu16(63, 31, 20, 62, 30, 19, 61, 29, 18, 60, 28, 17, 59, 27, 16, 58,
+                                    26, 15, 57, 25, 14, 56, 24, 13, 55, 23, 12, 54, 22, 11, 53, 21);
+    __m512i b0g0b2 = _mm512_permutex2var_epi16(b0g0, mask0, r0b1);
+    __m512i r1b1r0 = _mm512_permutex2var_epi16(b0g0, mask1, g1r1);
+    __m512i g2r2g1 = _mm512_permutex2var_epi16(r0b1, mask2, g1r1);
+
+    __m512i bgr0 = _mm512_mask_blend_epi16(0x24924924, b0g0b2, r1b1r0);
+    __m512i bgr1 = _mm512_mask_blend_epi16(0x24924924, r1b1r0, g2r2g1);
+    __m512i bgr2 = _mm512_mask_blend_epi16(0x24924924, g2r2g1, b0g0b2);
+#endif
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, bgr0);
+        _mm512_stream_si512((__m512i*)(ptr + 64), bgr1);
+        _mm512_stream_si512((__m512i*)(ptr + 128), bgr2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, bgr0);
+        _mm512_store_si512((__m512i*)(ptr + 64), bgr1);
+        _mm512_store_si512((__m512i*)(ptr + 128), bgr2);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, bgr0);
+        _mm512_storeu_si512((__m512i*)(ptr + 64), bgr1);
+        _mm512_storeu_si512((__m512i*)(ptr + 128), bgr2);
+    }
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x32& a, const v_uint16x32& b, const v_uint16x32& c,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m512i mask0 = _v512_set_epu16(42, 10, 31, 41,  9, 30, 40,  8, 29, 39,  7, 28, 38,  6, 27, 37,
+                                     5, 26, 36,  4, 25, 35,  3, 24, 34,  2, 23, 33,  1, 22, 32,  0);
+    __m512i mask1 = _v512_set_epu16(21, 52, 41, 20, 51, 40, 19, 50, 39, 18, 49, 38, 17, 48, 37, 16,
+                                    47, 36, 15, 46, 35, 14, 45, 34, 13, 44, 33, 12, 43, 32, 11, 42);
+    __m512i mask2 = _v512_set_epu16(63, 31, 20, 62, 30, 19, 61, 29, 18, 60, 28, 17, 59, 27, 16, 58,
+                                    26, 15, 57, 25, 14, 56, 24, 13, 55, 23, 12, 54, 22, 11, 53, 21);
+    __m512i b0g0b2 = _mm512_permutex2var_epi16(a.val, mask0, b.val);
+    __m512i r1b1r0 = _mm512_permutex2var_epi16(a.val, mask1, c.val);
+    __m512i g2r2g1 = _mm512_permutex2var_epi16(b.val, mask2, c.val);
+
+    __m512i bgr0 = _mm512_mask_blend_epi16(0x24924924, b0g0b2, r1b1r0);
+    __m512i bgr1 = _mm512_mask_blend_epi16(0x24924924, r1b1r0, g2r2g1);
+    __m512i bgr2 = _mm512_mask_blend_epi16(0x24924924, g2r2g1, b0g0b2);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, bgr0);
+        _mm512_stream_si512((__m512i*)(ptr + 32), bgr1);
+        _mm512_stream_si512((__m512i*)(ptr + 64), bgr2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, bgr0);
+        _mm512_store_si512((__m512i*)(ptr + 32), bgr1);
+        _mm512_store_si512((__m512i*)(ptr + 64), bgr2);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, bgr0);
+        _mm512_storeu_si512((__m512i*)(ptr + 32), bgr1);
+        _mm512_storeu_si512((__m512i*)(ptr + 64), bgr2);
+    }
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x16& a, const v_uint32x16& b, const v_uint32x16& c,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m512i mask0 = _v512_set_epu32(26, 31, 15, 25, 30, 14, 24, 29, 13, 23, 28, 12, 22, 27, 11, 21);
+    __m512i mask1 = _v512_set_epu32(31, 10, 25, 30,  9, 24, 29,  8, 23, 28,  7, 22, 27,  6, 21, 26);
+    __m512i g1b2g2 = _mm512_permutex2var_epi32(a.val, mask0, b.val);
+    __m512i r2r1b1 = _mm512_permutex2var_epi32(a.val, mask1, c.val);
+
+    __m512i bgr0 = _mm512_mask_expand_epi32(_mm512_mask_expand_epi32(_mm512_maskz_expand_epi32(0x9249, a.val), 0x2492, b.val), 0x4924, c.val);
+    __m512i bgr1 = _mm512_mask_blend_epi32(0x9249, r2r1b1, g1b2g2);
+    __m512i bgr2 = _mm512_mask_blend_epi32(0x9249, g1b2g2, r2r1b1);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, bgr0);
+        _mm512_stream_si512((__m512i*)(ptr + 16), bgr1);
+        _mm512_stream_si512((__m512i*)(ptr + 32), bgr2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, bgr0);
+        _mm512_store_si512((__m512i*)(ptr + 16), bgr1);
+        _mm512_store_si512((__m512i*)(ptr + 32), bgr2);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, bgr0);
+        _mm512_storeu_si512((__m512i*)(ptr + 16), bgr1);
+        _mm512_storeu_si512((__m512i*)(ptr + 32), bgr2);
+    }
+}
+
+inline void v_store_interleave( uint64* ptr, const v_uint64x8& a, const v_uint64x8& b, const v_uint64x8& c,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    __m512i mask0 = _v512_set_epu64( 5, 12,  7,  4, 11,  6,  3, 10);
+    __m512i mask1 = _v512_set_epu64(15,  7,  4, 14,  6,  3, 13,  5);
+    __m512i r1b1b2 = _mm512_permutex2var_epi64(a.val, mask0, c.val);
+    __m512i g2r2g1 = _mm512_permutex2var_epi64(b.val, mask1, c.val);
+
+    __m512i bgr0 = _mm512_mask_expand_epi64(_mm512_mask_expand_epi64(_mm512_maskz_expand_epi64(0x49, a.val), 0x92, b.val), 0x24, c.val);
+    __m512i bgr1 = _mm512_mask_blend_epi64(0xdb, g2r2g1, r1b1b2);
+    __m512i bgr2 = _mm512_mask_blend_epi64(0xdb, r1b1b2, g2r2g1);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, bgr0);
+        _mm512_stream_si512((__m512i*)(ptr + 8), bgr1);
+        _mm512_stream_si512((__m512i*)(ptr + 16), bgr2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, bgr0);
+        _mm512_store_si512((__m512i*)(ptr + 8), bgr1);
+        _mm512_store_si512((__m512i*)(ptr + 16), bgr2);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, bgr0);
+        _mm512_storeu_si512((__m512i*)(ptr + 8), bgr1);
+        _mm512_storeu_si512((__m512i*)(ptr + 16), bgr2);
+    }
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x64& a, const v_uint8x64& b,
+                                const v_uint8x64& c, const v_uint8x64& d,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    v_uint8x64 br01, br23, ga01, ga23;
+    v_zip(a, c, br01, br23);
+    v_zip(b, d, ga01, ga23);
+    v_uint8x64 bgra0, bgra1, bgra2, bgra3;
+    v_zip(br01, ga01, bgra0, bgra1);
+    v_zip(br23, ga23, bgra2, bgra3);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, bgra0.val);
+        _mm512_stream_si512((__m512i*)(ptr + 64), bgra1.val);
+        _mm512_stream_si512((__m512i*)(ptr + 128), bgra2.val);
+        _mm512_stream_si512((__m512i*)(ptr + 192), bgra3.val);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, bgra0.val);
+        _mm512_store_si512((__m512i*)(ptr + 64), bgra1.val);
+        _mm512_store_si512((__m512i*)(ptr + 128), bgra2.val);
+        _mm512_store_si512((__m512i*)(ptr + 192), bgra3.val);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, bgra0.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 64), bgra1.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 128), bgra2.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 192), bgra3.val);
+    }
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x32& a, const v_uint16x32& b,
+                                const v_uint16x32& c, const v_uint16x32& d,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    v_uint16x32 br01, br23, ga01, ga23;
+    v_zip(a, c, br01, br23);
+    v_zip(b, d, ga01, ga23);
+    v_uint16x32 bgra0, bgra1, bgra2, bgra3;
+    v_zip(br01, ga01, bgra0, bgra1);
+    v_zip(br23, ga23, bgra2, bgra3);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, bgra0.val);
+        _mm512_stream_si512((__m512i*)(ptr + 32), bgra1.val);
+        _mm512_stream_si512((__m512i*)(ptr + 64), bgra2.val);
+        _mm512_stream_si512((__m512i*)(ptr + 96), bgra3.val);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, bgra0.val);
+        _mm512_store_si512((__m512i*)(ptr + 32), bgra1.val);
+        _mm512_store_si512((__m512i*)(ptr + 64), bgra2.val);
+        _mm512_store_si512((__m512i*)(ptr + 96), bgra3.val);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, bgra0.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 32), bgra1.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 64), bgra2.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 96), bgra3.val);
+    }
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x16& a, const v_uint32x16& b,
+                                const v_uint32x16& c, const v_uint32x16& d,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    v_uint32x16 br01, br23, ga01, ga23;
+    v_zip(a, c, br01, br23);
+    v_zip(b, d, ga01, ga23);
+    v_uint32x16 bgra0, bgra1, bgra2, bgra3;
+    v_zip(br01, ga01, bgra0, bgra1);
+    v_zip(br23, ga23, bgra2, bgra3);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, bgra0.val);
+        _mm512_stream_si512((__m512i*)(ptr + 16), bgra1.val);
+        _mm512_stream_si512((__m512i*)(ptr + 32), bgra2.val);
+        _mm512_stream_si512((__m512i*)(ptr + 48), bgra3.val);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, bgra0.val);
+        _mm512_store_si512((__m512i*)(ptr + 16), bgra1.val);
+        _mm512_store_si512((__m512i*)(ptr + 32), bgra2.val);
+        _mm512_store_si512((__m512i*)(ptr + 48), bgra3.val);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, bgra0.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 16), bgra1.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 32), bgra2.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 48), bgra3.val);
+    }
+}
+
+inline void v_store_interleave( uint64* ptr, const v_uint64x8& a, const v_uint64x8& b,
+                                const v_uint64x8& c, const v_uint64x8& d,
+                                hal::StoreMode mode=hal::STORE_UNALIGNED )
+{
+    v_uint64x8 br01, br23, ga01, ga23;
+    v_zip(a, c, br01, br23);
+    v_zip(b, d, ga01, ga23);
+    v_uint64x8 bgra0, bgra1, bgra2, bgra3;
+    v_zip(br01, ga01, bgra0, bgra1);
+    v_zip(br23, ga23, bgra2, bgra3);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm512_stream_si512((__m512i*)ptr, bgra0.val);
+        _mm512_stream_si512((__m512i*)(ptr + 8), bgra1.val);
+        _mm512_stream_si512((__m512i*)(ptr + 16), bgra2.val);
+        _mm512_stream_si512((__m512i*)(ptr + 24), bgra3.val);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm512_store_si512((__m512i*)ptr, bgra0.val);
+        _mm512_store_si512((__m512i*)(ptr + 8), bgra1.val);
+        _mm512_store_si512((__m512i*)(ptr + 16), bgra2.val);
+        _mm512_store_si512((__m512i*)(ptr + 24), bgra3.val);
+    }
+    else
+    {
+        _mm512_storeu_si512((__m512i*)ptr, bgra0.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 8), bgra1.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 16), bgra2.val);
+        _mm512_storeu_si512((__m512i*)(ptr + 24), bgra3.val);
+    }
+}
+
+#define OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \
+{ \
+    _Tpvec1 a1, b1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+} \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \
+{ \
+    _Tpvec1 a1, b1, c1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+    c0 = v_reinterpret_as_##suffix0(c1); \
+} \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \
+{ \
+    _Tpvec1 a1, b1, c1, d1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+    c0 = v_reinterpret_as_##suffix0(c1); \
+    d0 = v_reinterpret_as_##suffix0(d1); \
+} \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
+                                hal::StoreMode mode=hal::STORE_UNALIGNED ) \
+{ \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, mode);      \
+} \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0, \
+                                hal::StoreMode mode=hal::STORE_UNALIGNED ) \
+{ \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode);  \
+} \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
+                                const _Tpvec0& c0, const _Tpvec0& d0, \
+                                hal::StoreMode mode=hal::STORE_UNALIGNED ) \
+{ \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
+    _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \
+}
+
+OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int8x64, schar, s8, v_uint8x64, uchar, u8)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int16x32, short, s16, v_uint16x32, ushort, u16)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int32x16, int, s32, v_uint32x16, unsigned, u32)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_float32x16, float, f32, v_uint32x16, unsigned, u32)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_int64x8, int64, s64, v_uint64x8, uint64, u64)
+OPENCV_HAL_IMPL_AVX512_LOADSTORE_INTERLEAVE(v_float64x8, double, f64, v_uint64x8, uint64, u64)
+
+////////// Mask and checks /////////
+
+/** Mask **/
+inline int64 v_signmask(const v_int8x64& a) { return (int64)_mm512_movepi8_mask(a.val); }
+inline int v_signmask(const v_int16x32& a) { return (int)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); }
+inline int v_signmask(const v_int32x16& a) { return (int)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); }
+inline int v_signmask(const v_int64x8& a) { return (int)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); }
+
+inline int64 v_signmask(const v_uint8x64& a) { return v_signmask(v_reinterpret_as_s8(a)); }
+inline int v_signmask(const v_uint16x32& a) { return v_signmask(v_reinterpret_as_s16(a)); }
+inline int v_signmask(const v_uint32x16& a) { return v_signmask(v_reinterpret_as_s32(a)); }
+inline int v_signmask(const v_uint64x8& a) { return v_signmask(v_reinterpret_as_s64(a)); }
+inline int v_signmask(const v_float32x16& a) { return v_signmask(v_reinterpret_as_s32(a)); }
+inline int v_signmask(const v_float64x8& a) { return v_signmask(v_reinterpret_as_s64(a)); }
+
+/** Checks **/
+inline bool v_check_all(const v_int8x64& a) { return !(bool)_mm512_cmp_epi8_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); }
+inline bool v_check_any(const v_int8x64& a) { return (bool)_mm512_movepi8_mask(a.val); }
+inline bool v_check_all(const v_int16x32& a) { return !(bool)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); }
+inline bool v_check_any(const v_int16x32& a) { return (bool)_mm512_cmp_epi16_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); }
+inline bool v_check_all(const v_int32x16& a) { return !(bool)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); }
+inline bool v_check_any(const v_int32x16& a) { return (bool)_mm512_cmp_epi32_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); }
+inline bool v_check_all(const v_int64x8& a) { return !(bool)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_NLT); }
+inline bool v_check_any(const v_int64x8& a) { return (bool)_mm512_cmp_epi64_mask(a.val, _mm512_setzero_si512(), _MM_CMPINT_LT); }
+
+inline bool v_check_all(const v_float32x16& a) { return v_check_all(v_reinterpret_as_s32(a)); }
+inline bool v_check_any(const v_float32x16& a) { return v_check_any(v_reinterpret_as_s32(a)); }
+inline bool v_check_all(const v_float64x8& a) { return v_check_all(v_reinterpret_as_s64(a)); }
+inline bool v_check_any(const v_float64x8& a) { return v_check_any(v_reinterpret_as_s64(a)); }
+inline bool v_check_all(const v_uint8x64& a) { return v_check_all(v_reinterpret_as_s8(a)); }
+inline bool v_check_all(const v_uint16x32& a) { return v_check_all(v_reinterpret_as_s16(a)); }
+inline bool v_check_all(const v_uint32x16& a) { return v_check_all(v_reinterpret_as_s32(a)); }
+inline bool v_check_all(const v_uint64x8& a) { return v_check_all(v_reinterpret_as_s64(a)); }
+inline bool v_check_any(const v_uint8x64& a) { return v_check_any(v_reinterpret_as_s8(a)); }
+inline bool v_check_any(const v_uint16x32& a) { return v_check_any(v_reinterpret_as_s16(a)); }
+inline bool v_check_any(const v_uint32x16& a) { return v_check_any(v_reinterpret_as_s32(a)); }
+inline bool v_check_any(const v_uint64x8& a) { return v_check_any(v_reinterpret_as_s64(a)); }
+
+inline int v_scan_forward(const v_int8x64& a)
+{
+    int64 mask = _mm512_movepi8_mask(a.val);
+    int mask32 = (int)mask;
+    return mask != 0 ? mask32 != 0 ? trailingZeros32(mask32) : 32 + trailingZeros32((int)(mask >> 32)) : 0;
+}
+inline int v_scan_forward(const v_uint8x64& a) { return v_scan_forward(v_reinterpret_as_s8(a)); }
+inline int v_scan_forward(const v_int16x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))); }
+inline int v_scan_forward(const v_uint16x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))); }
+inline int v_scan_forward(const v_int32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; }
+inline int v_scan_forward(const v_uint32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; }
+inline int v_scan_forward(const v_float32x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 2; }
+inline int v_scan_forward(const v_int64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; }
+inline int v_scan_forward(const v_uint64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; }
+inline int v_scan_forward(const v_float64x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s16(a))) / 4; }
+
+inline void v512_cleanup() { _mm256_zeroall(); }
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+} // cv::
+
+#endif // OPENCV_HAL_INTRIN_AVX_HPP
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_cpp.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_cpp.hpp
index 3929e0d..859bfd7 100644
--- a/IPL/include/opencv/opencv2/core/hal/intrin_cpp.hpp
+++ b/IPL/include/opencv/opencv2/core/hal/intrin_cpp.hpp
@@ -42,17 +42,29 @@
 //
 //M*/
 
-#ifndef __OPENCV_HAL_INTRIN_CPP_HPP__
-#define __OPENCV_HAL_INTRIN_CPP_HPP__
+#ifndef OPENCV_HAL_INTRIN_CPP_HPP
+#define OPENCV_HAL_INTRIN_CPP_HPP
 
 #include <limits>
 #include <cstring>
 #include <algorithm>
 #include "opencv2/core/saturate.hpp"
 
+//! @cond IGNORED
+#define CV_SIMD128_CPP 1
+#if defined(CV_FORCE_SIMD128_CPP) || defined(CV_DOXYGEN)
+#define CV_SIMD128 1
+#define CV_SIMD128_64F 1
+#endif
+//! @endcond
+
 namespace cv
 {
 
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+#endif
+
 /** @addtogroup core_hal_intrin
 
 "Universal intrinsics" is a types and functions set intended to simplify vectorization of code on
@@ -69,10 +81,10 @@ implemented as a structure based on a one SIMD register.
 
 - cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char
 - cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short
-- cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsgined/signed) - int
+- cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsigned/signed) - int
 - cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64
 - cv::v_float32x4: four 32-bit floating point values (signed) - float
-- cv::v_float64x2: two 64-bit floating point valies (signed) - double
+- cv::v_float64x2: two 64-bit floating point values (signed) - double
 
 @note
 cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to
@@ -95,7 +107,7 @@ block and to save contents of the register to memory block.
 @ref v_setall_s8, @ref v_setall_u8, ...,
 @ref v_setzero_u8, @ref v_setzero_s8, ...
 - Memory operations:
-@ref v_load, @ref v_load_aligned, @ref v_load_halves,
+@ref v_load, @ref v_load_aligned, @ref v_load_low, @ref v_load_halves,
 @ref v_store, @ref v_store_aligned,
 @ref v_store_high, @ref v_store_low
 
@@ -103,11 +115,12 @@ block and to save contents of the register to memory block.
 
 These operations allow to reorder or recombine elements in one or multiple vectors.
 
-- Interleave, deinterleave (3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
-- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand
-- Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u,
+- Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
+- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand, @ref v_expand_low, @ref v_expand_high
+- Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u,
 @ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store
 - Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high
+- Reverse: @ref v_reverse
 - Extract: @ref v_extract
 
 
@@ -116,32 +129,32 @@ These operations allow to reorder or recombine elements in one or multiple vecto
 Element-wise binary and unary operations.
 
 - Arithmetics:
-@ref operator+(const v_reg &a, const v_reg &b) "+",
-@ref operator-(const v_reg &a, const v_reg &b) "-",
-@ref operator*(const v_reg &a, const v_reg &b) "*",
-@ref operator/(const v_reg &a, const v_reg &b) "/",
+@ref operator +(const v_reg &a, const v_reg &b) "+",
+@ref operator -(const v_reg &a, const v_reg &b) "-",
+@ref operator *(const v_reg &a, const v_reg &b) "*",
+@ref operator /(const v_reg &a, const v_reg &b) "/",
 @ref v_mul_expand
 
 - Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap
 
 - Bitwise shifts:
-@ref operator<<(const v_reg &a, int s) "<<",
-@ref operator>>(const v_reg &a, int s) ">>",
+@ref operator <<(const v_reg &a, int s) "<<",
+@ref operator >>(const v_reg &a, int s) ">>",
 @ref v_shl, @ref v_shr
 
 - Bitwise logic:
-@ref operator&(const v_reg &a, const v_reg &b) "&",
-@ref operator|(const v_reg &a, const v_reg &b) "|",
-@ref operator^(const v_reg &a, const v_reg &b) "^",
-@ref operator~(const v_reg &a) "~"
+@ref operator &(const v_reg &a, const v_reg &b) "&",
+@ref operator |(const v_reg &a, const v_reg &b) "|",
+@ref operator ^(const v_reg &a, const v_reg &b) "^",
+@ref operator ~(const v_reg &a) "~"
 
 - Comparison:
-@ref operator>(const v_reg &a, const v_reg &b) ">",
-@ref operator>=(const v_reg &a, const v_reg &b) ">=",
-@ref operator<(const v_reg &a, const v_reg &b) "<",
-@ref operator<=(const v_reg &a, const v_reg &b) "<=",
+@ref operator >(const v_reg &a, const v_reg &b) ">",
+@ref operator >=(const v_reg &a, const v_reg &b) ">=",
+@ref operator <(const v_reg &a, const v_reg &b) "<",
+@ref operator <=(const v_reg &a, const v_reg &b) "<=",
 @ref operator==(const v_reg &a, const v_reg &b) "==",
-@ref operator!=(const v_reg &a, const v_reg &b) "!="
+@ref operator !=(const v_reg &a, const v_reg &b) "!="
 
 - min/max: @ref v_min, @ref v_max
 
@@ -149,13 +162,13 @@ Element-wise binary and unary operations.
 
 Most of these operations return only one value.
 
-- Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum
+- Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum, @ref v_popcount
 - Mask: @ref v_signmask, @ref v_check_all, @ref v_check_any, @ref v_select
 
 ### Other math
 
 - Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude
-- Absolute values: @ref v_abs, @ref v_absdiff
+- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs
 
 ### Conversions
 
@@ -167,7 +180,8 @@ Different type conversions and casts:
 
 ### Matrix operations
 
-In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref v_matmul, @ref v_transpose4x4
+In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref v_dotprod_fast,
+@ref v_dotprod_expand, @ref v_dotprod_expand_fast, @ref v_matmul, @ref v_transpose4x4
 
 ### Usability
 
@@ -181,26 +195,38 @@ Regular integers:
 |load, store        | x | x | x | x | x | x |
 |interleave         | x | x | x | x | x | x |
 |expand             | x | x | x | x | x | x |
+|expand_low         | x | x | x | x | x | x |
+|expand_high        | x | x | x | x | x | x |
 |expand_q           | x | x |   |   |   |   |
 |add, sub           | x | x | x | x | x | x |
 |add_wrap, sub_wrap | x | x | x | x |   |   |
-|mul                |   |   | x | x | x | x |
-|mul_expand         |   |   | x | x | x |   |
+|mul_wrap           | x | x | x | x |   |   |
+|mul                | x | x | x | x | x | x |
+|mul_expand         | x | x | x | x | x |   |
 |compare            | x | x | x | x | x | x |
 |shift              |   |   | x | x | x | x |
-|dotprod            |   |   |   | x |   |   |
+|dotprod            |   |   |   | x |   | x |
+|dotprod_fast       |   |   |   | x |   | x |
+|dotprod_expand     | x | x | x | x |   | x |
+|dotprod_expand_fast| x | x | x | x |   | x |
 |logical            | x | x | x | x | x | x |
 |min, max           | x | x | x | x | x | x |
 |absdiff            | x | x | x | x | x | x |
-|reduce             |   |   |   |   | x | x |
+|absdiffs           |   | x |   | x |   |   |
+|reduce             | x | x | x | x | x | x |
 |mask               | x | x | x | x | x | x |
 |pack               | x | x | x | x | x | x |
 |pack_u             | x |   | x |   |   |   |
+|pack_b             | x |   |   |   |   |   |
 |unpack             | x | x | x | x | x | x |
 |extract            | x | x | x | x | x | x |
+|rotate (lanes)     | x | x | x | x | x | x |
 |cvt_flt32          |   |   |   |   |   | x |
 |cvt_flt64          |   |   |   |   |   | x |
 |transpose4x4       |   |   |   |   | x | x |
+|reverse            | x | x | x | x | x | x |
+|extract_n          | x | x | x | x | x | x |
+|broadcast_element  |   |   |   |   | x | x |
 
 Big integers:
 
@@ -210,7 +236,11 @@ Big integers:
 |add, sub           | x | x |
 |shift              | x | x |
 |logical            | x | x |
+|reverse            | x | x |
 |extract            | x | x |
+|rotate (lanes)     | x | x |
+|cvt_flt64          |   | x |
+|extract_n          | x | x |
 
 Floating point:
 
@@ -232,7 +262,11 @@ Floating point:
 |sqrt, abs          | x | x |
 |float math         | x | x |
 |transpose4x4       | x |   |
-
+|extract            | x | x |
+|rotate (lanes)     | x | x |
+|reverse            | x | x |
+|extract_n          | x | x |
+|broadcast_element  | x |   |
 
  @{ */
 
@@ -240,8 +274,6 @@ template<typename _Tp, int n> struct v_reg
 {
 //! @cond IGNORED
     typedef _Tp lane_type;
-    typedef v_reg<typename V_TypeTraits<_Tp>::int_type, n> int_vec;
-    typedef v_reg<typename V_TypeTraits<_Tp>::abs_type, n> abs_vec;
     enum { nlanes = n };
 // !@endcond
 
@@ -346,6 +378,13 @@ template<typename _Tp, int n> struct v_reg
         return c;
     }
 
+    v_reg& operator=(const v_reg<_Tp, n> & r)
+    {
+        for( int i = 0; i < n; i++ )
+            s[i] = r.s[i];
+        return *this;
+    }
+
     _Tp s[n];
 //! @endcond
 };
@@ -371,50 +410,102 @@ typedef v_reg<uint64, 2> v_uint64x2;
 /** @brief Two 64-bit signed integer values */
 typedef v_reg<int64, 2> v_int64x2;
 
-//! @brief Helper macro
-//! @ingroup core_hal_intrin_impl
-#define OPENCV_HAL_IMPL_BIN_OP(bin_op) \
-template<typename _Tp, int n> inline v_reg<_Tp, n> \
-    operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
-{ \
-    v_reg<_Tp, n> c; \
-    for( int i = 0; i < n; i++ ) \
-        c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
-    return c; \
-} \
-template<typename _Tp, int n> inline v_reg<_Tp, n>& \
-    operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
-{ \
-    for( int i = 0; i < n; i++ ) \
-        a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
-    return a; \
-}
-
 /** @brief Add values
 
 For all types. */
-OPENCV_HAL_IMPL_BIN_OP(+)
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator+(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator+=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
 
 /** @brief Subtract values
 
 For all types. */
-OPENCV_HAL_IMPL_BIN_OP(-)
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator-(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator-=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
 
 /** @brief Multiply values
 
 For 16- and 32-bit integer types and floating types. */
-OPENCV_HAL_IMPL_BIN_OP(*)
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator*(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator*=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
 
 /** @brief Divide values
 
 For floating types only. */
-OPENCV_HAL_IMPL_BIN_OP(/)
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator/(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator/=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
 
-//! @brief Helper macro
-//! @ingroup core_hal_intrin_impl
-#define OPENCV_HAL_IMPL_BIT_OP(bit_op) \
-template<typename _Tp, int n> inline v_reg<_Tp, n> operator bit_op \
-    (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+
+/** @brief Bitwise AND
+
+Only for integer types. */
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator&(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator&=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
+
+/** @brief Bitwise OR
+
+Only for integer types. */
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
+
+/** @brief Bitwise XOR
+
+Only for integer types.*/
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator^(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator^=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
+
+/** @brief Bitwise NOT
+
+Only for integer types.*/
+template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator~(const v_reg<_Tp, n>& a);
+
+
+#ifndef CV_DOXYGEN
+
+#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \
+__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \
+__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \
+__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \
+__CV_EXPAND(macro_name(short, __VA_ARGS__)) \
+__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \
+__CV_EXPAND(macro_name(int, __VA_ARGS__)) \
+__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \
+__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \
+
+#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \
+__CV_EXPAND(macro_name(float, __VA_ARGS__)) \
+__CV_EXPAND(macro_name(double, __VA_ARGS__)) \
+
+#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \
+CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \
+CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \
+
+#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \
+template<int n> inline \
+v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
+    return c; \
+} \
+template<int n> inline \
+v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    for( int i = 0; i < n; i++ ) \
+        a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
+    return a; \
+}
+
+#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op)
+
+CV__HAL_INTRIN_IMPL_BIN_OP(+)
+CV__HAL_INTRIN_IMPL_BIN_OP(-)
+CV__HAL_INTRIN_IMPL_BIN_OP(*)
+CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, /)
+
+#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \
+template<int n> CV_INLINE \
+v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
 { \
     v_reg<_Tp, n> c; \
     typedef typename V_TypeTraits<_Tp>::int_type itype; \
@@ -423,8 +514,8 @@ template<typename _Tp, int n> inline v_reg<_Tp, n> operator bit_op \
                                                         V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
     return c; \
 } \
-template<typename _Tp, int n> inline v_reg<_Tp, n>& operator \
-    bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+template<int n> CV_INLINE \
+v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
 { \
     typedef typename V_TypeTraits<_Tp>::int_type itype; \
     for( int i = 0; i < n; i++ ) \
@@ -433,31 +524,29 @@ template<typename _Tp, int n> inline v_reg<_Tp, n>& operator \
     return a; \
 }
 
-/** @brief Bitwise AND
-
-Only for integer types. */
-OPENCV_HAL_IMPL_BIT_OP(&)
+#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \
+CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \
+CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) /* TODO: FIXIT remove this after masks refactoring */
 
-/** @brief Bitwise OR
 
-Only for integer types. */
-OPENCV_HAL_IMPL_BIT_OP(|)
+CV__HAL_INTRIN_IMPL_BIT_OP(&)
+CV__HAL_INTRIN_IMPL_BIT_OP(|)
+CV__HAL_INTRIN_IMPL_BIT_OP(^)
 
-/** @brief Bitwise XOR
+#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \
+template<int n> CV_INLINE \
+v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \
+    return c; \
+} \
 
-Only for integer types.*/
-OPENCV_HAL_IMPL_BIT_OP(^)
+CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BITWISE_NOT_, ~)
 
-/** @brief Bitwise NOT
+#endif  // !CV_DOXYGEN
 
-Only for integer types.*/
-template<typename _Tp, int n> inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a)
-{
-    v_reg<_Tp, n> c;
-    for( int i = 0; i < n; i++ )
-        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i]));
-        return c;
-}
 
 //! @brief Helper macro
 //! @ingroup core_hal_intrin_impl
@@ -470,6 +559,27 @@ template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a)
     return c; \
 }
 
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(func, cfunc) \
+inline v_reg<int, 4> func(const v_reg<float, 4>& a) \
+{ \
+    v_reg<int, 4> c; \
+    for( int i = 0; i < 4; i++ ) \
+        c.s[i] = cfunc(a.s[i]); \
+    return c; \
+} \
+inline v_reg<int, 4> func(const v_reg<double, 2>& a) \
+{ \
+    v_reg<int, 4> c; \
+    for( int i = 0; i < 2; i++ ) \
+    { \
+        c.s[i] = cfunc(a.s[i]); \
+        c.s[i + 2] = 0; \
+    } \
+    return c; \
+}
+
 /** @brief Square root of elements
 
 Only for floating point types.*/
@@ -491,22 +601,22 @@ OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs,
 /** @brief Round elements
 
 Only for floating point types.*/
-OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int)
+OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_round, cvRound)
 
 /** @brief Floor elements
 
 Only for floating point types.*/
-OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int)
+OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_floor, cvFloor)
 
 /** @brief Ceil elements
 
 Only for floating point types.*/
-OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int)
+OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_ceil, cvCeil)
 
 /** @brief Truncate elements
 
 Only for floating point types.*/
-OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int)
+OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_trunc, int)
 
 //! @brief Helper macro
 //! @ingroup core_hal_intrin_impl
@@ -560,7 +670,7 @@ OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max)
 @code
 {A1 A2 A3 ...} => min(A1,A2,A3,...)
 @endcode
-For 32-bit integer and 32-bit floating point types. */
+For all types except 64-bit integer and 64-bit floating point types. */
 OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
 
 /** @brief Find one max value
@@ -569,9 +679,45 @@ OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
 @code
 {A1 A2 A3 ...} => max(A1,A2,A3,...)
 @endcode
-For 32-bit integer and 32-bit floating point types. */
+For all types except 64-bit integer and 64-bit floating point types. */
 OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max)
 
+static const unsigned char popCountTable[] =
+{
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
+};
+/** @brief Count the 1 bits in the vector lanes and return result as corresponding unsigned type
+
+Scheme:
+@code
+{A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...}
+@endcode
+For all integer types. */
+template<typename _Tp, int n>
+inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_popcount(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::abs_type, n> b = v_reg<typename V_TypeTraits<_Tp>::abs_type, n>::zero();
+    for (int i = 0; i < n*(int)sizeof(_Tp); i++)
+        b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
+    return b;
+}
+
+
 //! @cond IGNORED
 template<typename _Tp, int n>
 inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
@@ -628,9 +774,28 @@ OPENCV_HAL_IMPL_CMP_OP(==)
 For all types except 64-bit integer values. */
 OPENCV_HAL_IMPL_CMP_OP(!=)
 
+template<int n>
+inline v_reg<float, n> v_not_nan(const v_reg<float, n>& a)
+{
+    typedef typename V_TypeTraits<float>::int_type itype;
+    v_reg<float, n> c;
+    for (int i = 0; i < n; i++)
+        c.s[i] = V_TypeTraits<float>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
+    return c;
+}
+template<int n>
+inline v_reg<double, n> v_not_nan(const v_reg<double, n>& a)
+{
+    typedef typename V_TypeTraits<double>::int_type itype;
+    v_reg<double, n> c;
+    for (int i = 0; i < n; i++)
+        c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
+    return c;
+}
+
 //! @brief Helper macro
 //! @ingroup core_hal_intrin_impl
-#define OPENCV_HAL_IMPL_ADD_SUB_OP(func, bin_op, cast_op, _Tp2) \
+#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
 template<typename _Tp, int n> \
 inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
 { \
@@ -644,12 +809,17 @@ inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
 /** @brief Add values without saturation
 
 For 8- and 16-bit integer values. */
-OPENCV_HAL_IMPL_ADD_SUB_OP(v_add_wrap, +, (_Tp), _Tp)
+OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp)
 
 /** @brief Subtract values without saturation
 
 For 8- and 16-bit integer values. */
-OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap, -, (_Tp), _Tp)
+OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp)
+
+/** @brief Multiply values without saturation
+
+For 8- and 16-bit integer values. */
+OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp)
 
 //! @cond IGNORED
 template<typename T> inline T _absdiff(T a, T b)
@@ -672,7 +842,7 @@ inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_absdiff(const v_reg<_Tp,
 {
     typedef typename V_TypeTraits<_Tp>::abs_type rtype;
     v_reg<rtype, n> c;
-    const rtype mask = std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0;
+    const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0);
     for( int i = 0; i < n; i++ )
     {
         rtype ua = a.s[i] ^ mask;
@@ -704,6 +874,19 @@ inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
     return c;
 }
 
+/** @brief Saturating absolute difference
+
+Returns \f$ saturate(|a - b|) \f$ .
+For 8-, 16-bit signed integer source types. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++)
+        c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i]));
+    return c;
+}
+
 /** @brief Inversed square root
 
 Returns \f$ 1/sqrt(a) \f$
@@ -745,11 +928,11 @@ inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>
 
 /** @brief Multiply and add
 
-Returns \f$ a*b + c \f$
-For floating point types only. */
+ Returns \f$ a*b + c \f$
+ For floating point types and signed 32bit int only. */
 template<typename _Tp, int n>
-inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
-                              const v_reg<_Tp, n>& c)
+inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                           const v_reg<_Tp, n>& c)
 {
     v_reg<_Tp, n> d;
     for( int i = 0; i < n; i++ )
@@ -757,20 +940,29 @@ inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
     return d;
 }
 
+/** @brief A synonym for v_fma */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                              const v_reg<_Tp, n>& c)
+{
+    return v_fma(a, b, c);
+}
+
 /** @brief Dot product of elements
 
 Multiply values in two registers and sum adjacent result pairs.
+
 Scheme:
 @code
   {A1 A2 ...} // 16-bit
 x {B1 B2 ...} // 16-bit
 -------------
 {A1B1+A2B2 ...} // 32-bit
+
 @endcode
-Implemented only for 16-bit signed source type (v_int16x8).
 */
 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
-    v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
 {
     typedef typename V_TypeTraits<_Tp>::w_type w_type;
     v_reg<w_type, n/2> c;
@@ -779,6 +971,117 @@ template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n
     return c;
 }
 
+/** @brief Dot product of elements
+
+Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs.
+Scheme:
+@code
+  {A1 A2 ...} // 16-bit
+x {B1 B2 ...} // 16-bit
+-------------
+  {A1B1+A2B2+C1 ...} // 32-bit
+@endcode
+*/
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+          const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<w_type, n/2> s;
+    for( int i = 0; i < (n/2); i++ )
+        s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
+    return s;
+}
+
+/** @brief Fast Dot product of elements
+
+Same as cv::v_dotprod, but it may perform unorder sum between result pairs in some platforms,
+this intrinsic can be used if the sum among all lanes is only matters
+and also it should be yielding better performance on the affected platforms.
+
+*/
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{ return v_dotprod(a, b); }
+
+/** @brief Fast Dot product of elements
+
+Same as cv::v_dotprod_fast, but add a third element to the sum of adjacent pairs.
+*/
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+               const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
+{ return v_dotprod(a, b, c); }
+
+/** @brief Dot product of elements and expand
+
+Multiply values in two registers and expand the sum of adjacent result pairs.
+
+Scheme:
+@code
+  {A1 A2 A3 A4 ...} // 8-bit
+x {B1 B2 B3 B4 ...} // 8-bit
+-------------
+  {A1B1+A2B2+A3B3+A4B4 ...} // 32-bit
+
+@endcode
+*/
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
+v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typedef typename V_TypeTraits<_Tp>::q_type q_type;
+    v_reg<q_type, n/4> s;
+    for( int i = 0; i < (n/4); i++ )
+        s.s[i] = (q_type)a.s[i*4    ]*b.s[i*4    ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
+                 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3];
+    return s;
+}
+
+/** @brief Dot product of elements
+
+Same as cv::v_dotprod_expand, but add a third element to the sum of adjacent pairs.
+Scheme:
+@code
+  {A1 A2 A3 A4 ...} // 8-bit
+x {B1 B2 B3 B4 ...} // 8-bit
+-------------
+  {A1B1+A2B2+A3B3+A4B4+C1 ...} // 32-bit
+@endcode
+*/
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
+v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                 const v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4>& c)
+{
+    typedef typename V_TypeTraits<_Tp>::q_type q_type;
+    v_reg<q_type, n/4> s;
+    for( int i = 0; i < (n/4); i++ )
+        s.s[i] = (q_type)a.s[i*4    ]*b.s[i*4    ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
+                 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i];
+    return s;
+}
+
+/** @brief Fast Dot product of elements and expand
+
+Multiply values in two registers and expand the sum of adjacent result pairs.
+
+Same as cv::v_dotprod_expand, but it may perform unorder sum between result pairs in some platforms,
+this intrinsic can be used if the sum among all lanes is only matters
+and also it should be yielding better performance on the affected platforms.
+
+*/
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
+v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{ return v_dotprod_expand(a, b); }
+
+/** @brief Fast Dot product of elements
+
+Same as cv::v_dotprod_expand_fast, but add a third element to the sum of adjacent pairs.
+*/
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
+v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                      const v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4>& c)
+{ return v_dotprod_expand(a, b, c); }
+
 /** @brief Multiply and expand
 
 Multiply values two registers and store results in two registers with wider pack type.
@@ -810,6 +1113,20 @@ template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, c
     }
 }
 
+/** @brief Multiply and extract high part
+
+Multiply values two registers and store high part of the results.
+Implemented only for 16-bit source types (v_int16x8, v_uint16x8). Returns \f$ a*b >> 16 \f$
+*/
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<_Tp, n> c;
+    for (int i = 0; i < n; i++)
+        c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8);
+    return c;
+}
+
 //! @cond IGNORED
 template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a,
                                                  v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c)
@@ -836,12 +1153,65 @@ template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg
 /** @brief Bitwise shift left
 
 For 16-, 32- and 64-bit integer values. */
-OPENCV_HAL_IMPL_SHIFT_OP(<<)
+OPENCV_HAL_IMPL_SHIFT_OP(<< )
 
 /** @brief Bitwise shift right
 
 For 16-, 32- and 64-bit integer values. */
-OPENCV_HAL_IMPL_SHIFT_OP(>>)
+OPENCV_HAL_IMPL_SHIFT_OP(>> )
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \
+template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \
+{ \
+    v_reg<_Tp, n> b; \
+    for (int i = 0; i < n; i++) \
+    { \
+        int sIndex = i opA imm; \
+        if (0 <= sIndex && sIndex < n) \
+        { \
+            b.s[i] = a.s[sIndex]; \
+        } \
+        else \
+        { \
+            b.s[i] = 0; \
+        } \
+    } \
+    return b; \
+} \
+template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    for (int i = 0; i < n; i++) \
+    { \
+        int aIndex = i opA imm; \
+        int bIndex = i opA imm opB n; \
+        if (0 <= bIndex && bIndex < n) \
+        { \
+            c.s[i] = b.s[bIndex]; \
+        } \
+        else if (0 <= aIndex && aIndex < n) \
+        { \
+            c.s[i] = a.s[aIndex]; \
+        } \
+        else \
+        { \
+            c.s[i] = 0; \
+        } \
+    } \
+    return c; \
+}
+
+/** @brief Element shift left among vector
+
+For all type */
+OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left,  -, +)
+
+/** @brief Element shift right among vector
+
+For all type */
+OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -)
 
 /** @brief Sum packed values
 
@@ -849,7 +1219,7 @@ OPENCV_HAL_IMPL_SHIFT_OP(>>)
 @code
 {A1 A2 A3 ...} => sum{A1,A2,A3,...}
 @endcode
-For 32-bit integer and 32-bit floating point types.*/
+*/
 template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a)
 {
     typename V_TypeTraits<_Tp>::sum_type c = a.s[0];
@@ -858,7 +1228,44 @@ template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_redu
     return c;
 }
 
+/** @brief Sums all elements of each input vector, returns the vector of sums
+
+ Scheme:
+ @code
+ result[0] = a[0] + a[1] + a[2] + a[3]
+ result[1] = b[0] + b[1] + b[2] + b[3]
+ result[2] = c[0] + c[1] + c[2] + c[3]
+ result[3] = d[0] + d[1] + d[2] + d[3]
+ @endcode
+*/
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+    v_float32x4 r;
+    r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3];
+    r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3];
+    r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3];
+    r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3];
+    return r;
+}
+
+/** @brief Sum absolute differences of values
+
+Scheme:
+@code
+{A1 A2 A3 ...} {B1 B2 B3 ...} => sum{ABS(A1-B1),abs(A2-B2),abs(A3-B3),...}
+@endcode
+For all types except 64-bit types.*/
+template<typename _Tp, int n> inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]);
+    for (int i = 1; i < n; i++)
+        c += _absdiff(a.s[i], b.s[i]);
+    return c;
+}
+
 /** @brief Get negative values mask
+@deprecated v_signmask depends on a lane count heavily and therefore isn't universal enough
 
 Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes.
 Example:
@@ -866,7 +1273,7 @@ Returned value is a bit mask with bits set to 1 on places corresponding to negat
 v_int32x4 r; // set to {-1, -1, 1, 1}
 int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011
 @endcode
-For all types except 64-bit. */
+*/
 template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
 {
     int mask = 0;
@@ -875,10 +1282,27 @@ template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
     return mask;
 }
 
+/** @brief Get first negative lane index
+
+Returned value is an index of first negative lane (undefined for input of all positive values)
+Example:
+@code{.cpp}
+v_int32x4 r; // set to {0, 0, -1, -1}
+int idx = v_heading_zeros(r); // idx = 2
+@endcode
+*/
+template <typename _Tp, int n> inline int v_scan_forward(const v_reg<_Tp, n>& a)
+{
+    for (int i = 0; i < n; i++)
+        if(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0)
+            return i;
+    return 0;
+}
+
 /** @brief Check if all packed values are less than zero
 
 Unsigned values will be casted to signed: `uchar 254 => char -2`.
-For all types except 64-bit. */
+*/
 template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
 {
     for( int i = 0; i < n; i++ )
@@ -890,7 +1314,7 @@ template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
 /** @brief Check if any of packed values is less than zero
 
 Unsigned values will be casted to signed: `uchar 254 => char -2`.
-For all types except 64-bit. */
+*/
 template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
 {
     for( int i = 0; i < n; i++ )
@@ -899,13 +1323,16 @@ template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
     return false;
 }
 
-/** @brief Bitwise select
+/** @brief Per-element select (blend operation)
+
+Return value will be built by combining values _a_ and _b_ using the following scheme:
+    result[i] = mask[i] ? a[i] : b[i];
 
-Return value will be built by combining values a and b using the following scheme:
-If the i-th bit in _mask_ is 1
-    select i-th bit from _a_
-else
-    select i-th bit from _b_ */
+@note: _mask_ element values are restricted to these values:
+- 0: select element from _b_
+- 0xff/0xffff/etc: select element from _a_
+(fully compatible with bitwise-based operator)
+*/
 template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask,
                                                            const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
 {
@@ -915,8 +1342,8 @@ template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>&
     for( int i = 0; i < n; i++ )
     {
         int_type m = Traits::reinterpret_int(mask.s[i]);
-        c.s[i] =  Traits::reinterpret_from_int((Traits::reinterpret_int(a.s[i]) & m)
-                                             | (Traits::reinterpret_int(b.s[i]) & ~m));
+        CV_DbgAssert(m == 0 || m == (~(int_type)0));  // restrict mask values: 0 or 0xff/0xffff/etc
+        c.s[i] = m ? a.s[i] : b.s[i];
     }
     return c;
 }
@@ -940,13 +1367,51 @@ template<typename _Tp, int n> inline void v_expand(const v_reg<_Tp, n>& a,
     }
 }
 
-//! @cond IGNORED
-template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
-    v_reinterpret_as_int(const v_reg<_Tp, n>& a)
-{
-    v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
-    for( int i = 0; i < n; i++ )
-        c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
+/** @brief Expand lower values to the wider pack type
+
+Same as cv::v_expand, but return lower half of the vector.
+
+Scheme:
+@code
+ int32x4     int64x2
+{A B C D} ==> {A B}
+@endcode */
+template<typename _Tp, int n>
+inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+v_expand_low(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> b;
+    for( int i = 0; i < (n/2); i++ )
+        b.s[i] = a.s[i];
+    return b;
+}
+
+/** @brief Expand higher values to the wider pack type
+
+Same as cv::v_expand_low, but expand higher half of the vector instead.
+
+Scheme:
+@code
+ int32x4     int64x2
+{A B C D} ==> {C D}
+@endcode */
+template<typename _Tp, int n>
+inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+v_expand_high(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> b;
+    for( int i = 0; i < (n/2); i++ )
+        b.s[i] = a.s[i+(n/2)];
+    return b;
+}
+
+//! @cond IGNORED
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
+    v_reinterpret_as_int(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
     return c;
 }
 
@@ -993,21 +1458,52 @@ template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const
 @return register object
 
 @note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.
+
+@note Alignment requirement:
+if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough).
+Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`).
  */
 template<typename _Tp>
-inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load(const _Tp* ptr)
+inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr)
 {
-    return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr);
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
+    return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr);
 }
 
 /** @brief Load register contents from memory (aligned)
 
-similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)
+similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary in case of SIMD128, 32-byte - SIMD256, etc)
+ */
+template<typename _Tp>
+inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr)
+{
+    CV_Assert(isAligned<sizeof(v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>)>(ptr));
+    return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr);
+}
+
+/** @brief Load 64-bits of data to lower part (high part is undefined).
+
+@param ptr memory block containing data for first half (0..n/2)
+
+@code{.cpp}
+int lo[2] = { 1, 2 };
+v_int32x4 r = v_load_low(lo);
+@endcode
  */
 template<typename _Tp>
-inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_aligned(const _Tp* ptr)
+inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr)
 {
-    return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr);
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
+    v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
+    for( int i = 0; i < c.nlanes/2; i++ )
+    {
+        c.s[i] = ptr[i];
+    }
+    return c;
 }
 
 /** @brief Load register contents from two memory blocks
@@ -1021,9 +1517,13 @@ v_int32x4 r = v_load_halves(lo, hi);
 @endcode
  */
 template<typename _Tp>
-inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
+inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
 {
-    v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> c;
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(loptr));
+    CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
+#endif
+    v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
     for( int i = 0; i < c.nlanes/2; i++ )
     {
         c.s[i] = loptr[i];
@@ -1042,11 +1542,14 @@ v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32
 @endcode
 For 8-, 16-, 32-bit integer source types. */
 template<typename _Tp>
-inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_SIMD128Traits<_Tp>::nlanes / 2>
+inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_TypeTraits<_Tp>::nlanes128 / 2>
 v_load_expand(const _Tp* ptr)
 {
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
     typedef typename V_TypeTraits<_Tp>::w_type w_type;
-    v_reg<w_type, V_SIMD128Traits<w_type>::nlanes> c;
+    v_reg<w_type, V_TypeTraits<w_type>::nlanes128> c;
     for( int i = 0; i < c.nlanes; i++ )
     {
         c.s[i] = ptr[i];
@@ -1063,11 +1566,14 @@ v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32
 @endcode
 For 8-bit integer source types. */
 template<typename _Tp>
-inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_SIMD128Traits<_Tp>::nlanes / 4>
+inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_TypeTraits<_Tp>::nlanes128 / 4>
 v_load_expand_q(const _Tp* ptr)
 {
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
     typedef typename V_TypeTraits<_Tp>::q_type q_type;
-    v_reg<q_type, V_SIMD128Traits<q_type>::nlanes> c;
+    v_reg<q_type, V_TypeTraits<q_type>::nlanes128> c;
     for( int i = 0; i < c.nlanes; i++ )
     {
         c.s[i] = ptr[i];
@@ -1075,17 +1581,42 @@ v_load_expand_q(const _Tp* ptr)
     return c;
 }
 
-/** @brief Load and deinterleave (4 channels)
+/** @brief Load and deinterleave (2 channels)
 
-Load data from memory deinterleave and store to 4 registers.
+Load data from memory deinterleave and store to 2 registers.
 Scheme:
 @code
-{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}
+{A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
+                                                            v_reg<_Tp, n>& b)
+{
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
+    int i, i2;
+    for( i = i2 = 0; i < n; i++, i2 += 2 )
+    {
+        a.s[i] = ptr[i2];
+        b.s[i] = ptr[i2+1];
+    }
+}
+
+/** @brief Load and deinterleave (3 channels)
+
+Load data from memory deinterleave and store to 3 registers.
+Scheme:
+@code
+{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}
 @endcode
 For all types except 64-bit. */
 template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
                                                             v_reg<_Tp, n>& b, v_reg<_Tp, n>& c)
 {
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
     int i, i3;
     for( i = i3 = 0; i < n; i++, i3 += 3 )
     {
@@ -1095,12 +1626,12 @@ template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_
     }
 }
 
-/** @brief Load and deinterleave (3 channels)
+/** @brief Load and deinterleave (4 channels)
 
-Load data from memory deinterleave and store to 3 registers.
+Load data from memory deinterleave and store to 4 registers.
 Scheme:
 @code
-{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}
+{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}
 @endcode
 For all types except 64-bit. */
 template<typename _Tp, int n>
@@ -1108,6 +1639,9 @@ inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
                                 v_reg<_Tp, n>& b, v_reg<_Tp, n>& c,
                                 v_reg<_Tp, n>& d)
 {
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
     int i, i4;
     for( i = i4 = 0; i < n; i++, i4 += 4 )
     {
@@ -1118,18 +1652,46 @@ inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
     }
 }
 
+/** @brief Interleave and store (2 channels)
+
+Interleave and store data from 2 registers to memory.
+Scheme:
+@code
+{A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
+                               const v_reg<_Tp, n>& b,
+                               hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
+{
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
+    int i, i2;
+    for( i = i2 = 0; i < n; i++, i2 += 2 )
+    {
+        ptr[i2] = a.s[i];
+        ptr[i2+1] = b.s[i];
+    }
+}
+
 /** @brief Interleave and store (3 channels)
 
 Interleave and store data from 3 registers to memory.
 Scheme:
 @code
-{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}
+{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...}
 @endcode
 For all types except 64-bit. */
 template<typename _Tp, int n>
 inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
-                                const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c)
+                                const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
+                                hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
 {
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
     int i, i3;
     for( i = i3 = 0; i < n; i++, i3 += 3 )
     {
@@ -1149,8 +1711,12 @@ Interleave and store data from 4 registers to memory.
 For all types except 64-bit. */
 template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
                                                             const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
-                                                            const v_reg<_Tp, n>& d)
+                                                            const v_reg<_Tp, n>& d,
+                                                            hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
 {
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
     int i, i4;
     for( i = i4 = 0; i < n; i++, i4 += 4 )
     {
@@ -1172,10 +1738,22 @@ Pointer can be unaligned. */
 template<typename _Tp, int n>
 inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a)
 {
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
     for( int i = 0; i < n; i++ )
         ptr[i] = a.s[i];
 }
 
+template<typename _Tp, int n>
+inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
+{
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
+    v_store(ptr, a);
+}
+
 /** @brief Store data to memory (lower half)
 
 Store lower half of register contents to memory.
@@ -1186,6 +1764,9 @@ Store lower half of register contents to memory.
 template<typename _Tp, int n>
 inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a)
 {
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
     for( int i = 0; i < (n/2); i++ )
         ptr[i] = a.s[i];
 }
@@ -1200,6 +1781,9 @@ Store higher half of register contents to memory.
 template<typename _Tp, int n>
 inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a)
 {
+#if CV_STRONG_ALIGNMENT
+    CV_Assert(isAligned<sizeof(_Tp)>(ptr));
+#endif
     for( int i = 0; i < (n/2); i++ )
         ptr[i] = a.s[i+(n/2)];
 }
@@ -1215,8 +1799,22 @@ Pointer __should__ be aligned by 16-byte boundary. */
 template<typename _Tp, int n>
 inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a)
 {
-    for( int i = 0; i < n; i++ )
-        ptr[i] = a.s[i];
+    CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
+    v_store(ptr, a);
+}
+
+template<typename _Tp, int n>
+inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
+    v_store(ptr, a);
+}
+
+template<typename _Tp, int n>
+inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
+{
+    CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
+    v_store(ptr, a);
 }
 
 /** @brief Combine vector from first elements of two vectors
@@ -1282,6 +1880,23 @@ inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
     }
 }
 
+/** @brief Vector reverse order
+
+Reverse the order of the vector
+Scheme:
+@code
+  REG {A1 ... An} ==> REG {An ... A1}
+@endcode
+For all types. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_reverse(const v_reg<_Tp, n>& a)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = a.s[n-i-1];
+    return c;
+}
+
 /** @brief Vector extract
 
 Scheme:
@@ -1300,7 +1915,7 @@ Restriction: 0 <= shift < nlanes
 v_int32x4 a, b, c;
 c = v_extract<2>(a, b);
 @endcode
-For integer types only. */
+For all types. */
 template<int s, typename _Tp, int n>
 inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
 {
@@ -1314,6 +1929,42 @@ inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
     return r;
 }
 
+/** @brief Vector extract
+
+Scheme:
+Return the s-th element of v.
+Restriction: 0 <= s < nlanes
+
+Usage:
+@code
+v_int32x4 a;
+int r;
+r = v_extract_n<2>(a);
+@endcode
+For all types. */
+template<int s, typename _Tp, int n>
+inline _Tp v_extract_n(const v_reg<_Tp, n>& v)
+{
+    CV_DbgAssert(s >= 0 && s < n);
+    return v.s[s];
+}
+
+/** @brief Broadcast i-th element of vector
+
+Scheme:
+@code
+{ v[0] v[1] v[2] ... v[SZ] } => { v[i], v[i], v[i] ... v[i] }
+@endcode
+Restriction: 0 <= i < nlanes
+Supported types: 32-bit integers and floats (s32/u32/f32)
+ */
+template<int i, typename _Tp, int n>
+inline v_reg<_Tp, n> v_broadcast_element(const v_reg<_Tp, n>& a)
+{
+    CV_DbgAssert(i >= 0 && i < n);
+    return v_reg<_Tp, n>::all(a.s[i]);
+}
+
 /** @brief Round
 
 Rounds each value. Input type is float vector ==> output type is int vector.*/
@@ -1325,6 +1976,18 @@ template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
     return c;
 }
 
+/** @overload */
+template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a, const v_reg<double, n>& b)
+{
+    v_reg<int, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvRound(a.s[i]);
+        c.s[i+n] = cvRound(b.s[i]);
+    }
+    return c;
+}
+
 /** @brief Floor
 
 Floor each value. Input type is float vector ==> output type is int vector.*/
@@ -1417,28 +2080,241 @@ template<int n> inline v_reg<float, n> v_cvt_f32(const v_reg<int, n>& a)
     return c;
 }
 
+template<int n> inline v_reg<float, n*2> v_cvt_f32(const v_reg<double, n>& a)
+{
+    v_reg<float, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = (float)a.s[i];
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+template<int n> inline v_reg<float, n*2> v_cvt_f32(const v_reg<double, n>& a, const v_reg<double, n>& b)
+{
+    v_reg<float, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = (float)a.s[i];
+        c.s[i+n] = (float)b.s[i];
+    }
+    return c;
+}
+
 /** @brief Convert to double
 
 Supported input type is cv::v_int32x4. */
-template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<int, n*2>& a)
+CV_INLINE v_reg<double, 2> v_cvt_f64(const v_reg<int, 4>& a)
 {
+    enum { n = 2 };
     v_reg<double, n> c;
     for( int i = 0; i < n; i++ )
         c.s[i] = (double)a.s[i];
     return c;
 }
 
+/** @brief Convert to double high part of vector
+
+Supported input type is cv::v_int32x4. */
+CV_INLINE v_reg<double, 2> v_cvt_f64_high(const v_reg<int, 4>& a)
+{
+    enum { n = 2 };
+    v_reg<double, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (double)a.s[i + 2];
+    return c;
+}
+
 /** @brief Convert to double
 
 Supported input type is cv::v_float32x4. */
-template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<float, n*2>& a)
+CV_INLINE v_reg<double, 2> v_cvt_f64(const v_reg<float, 4>& a)
+{
+    enum { n = 2 };
+    v_reg<double, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (double)a.s[i];
+    return c;
+}
+
+/** @brief Convert to double high part of vector
+
+Supported input type is cv::v_float32x4. */
+CV_INLINE v_reg<double, 2> v_cvt_f64_high(const v_reg<float, 4>& a)
+{
+    enum { n = 2 };
+    v_reg<double, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (double)a.s[i + 2];
+    return c;
+}
+
+/** @brief Convert to double
+
+Supported input type is cv::v_int64x2. */
+CV_INLINE v_reg<double, 2> v_cvt_f64(const v_reg<int64, 2>& a)
 {
+    enum { n = 2 };
     v_reg<double, n> c;
     for( int i = 0; i < n; i++ )
         c.s[i] = (double)a.s[i];
     return c;
 }
 
+/** @brief Convert to double high part of vector
+
+Supported input type is cv::v_int64x2. */
+CV_INLINE v_reg<double, 2> v_cvt_f64_high(const v_reg<int64, 2>& a)
+{
+    enum { n = 2 };
+    v_reg<double, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (double)a.s[i];
+    return c;
+}
+
+
+template<typename _Tp> inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx)
+{
+    v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
+    for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++)
+        c.s[i] = tab[idx[i]];
+    return c;
+}
+template<typename _Tp> inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx)
+{
+    v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
+    for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++)
+        c.s[i] = tab[idx[i / 2] + i % 2];
+    return c;
+}
+template<typename _Tp> inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx)
+{
+    v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
+    for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++)
+        c.s[i] = tab[idx[i / 4] + i % 4];
+    return c;
+}
+
+template<int n> inline v_reg<int, n> v_lut(const int* tab, const v_reg<int, n>& idx)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = tab[idx.s[i]];
+    return c;
+}
+
+template<int n> inline v_reg<unsigned, n> v_lut(const unsigned* tab, const v_reg<int, n>& idx)
+{
+    v_reg<int, n> c;
+    for (int i = 0; i < n; i++)
+        c.s[i] = tab[idx.s[i]];
+    return c;
+}
+
+template<int n> inline v_reg<float, n> v_lut(const float* tab, const v_reg<int, n>& idx)
+{
+    v_reg<float, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = tab[idx.s[i]];
+    return c;
+}
+
+template<int n> inline v_reg<double, n> v_lut(const double* tab, const v_reg<int, n*2>& idx)
+{
+    v_reg<double, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = tab[idx.s[i]];
+    return c;
+}
+
+
+inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec)
+{
+    return v_lut(tab, idxvec.s);
+}
+
+inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec)
+{
+    return v_lut(tab, idxvec.s);
+}
+
+inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec)
+{
+    return v_lut(tab, idxvec.s);
+}
+
+inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec)
+{
+    return v_lut(tab, idxvec.s);
+}
+
+
+template<int n> inline void v_lut_deinterleave(const float* tab, const v_reg<int, n>& idx,
+                                               v_reg<float, n>& x, v_reg<float, n>& y)
+{
+    for( int i = 0; i < n; i++ )
+    {
+        int j = idx.s[i];
+        x.s[i] = tab[j];
+        y.s[i] = tab[j+1];
+    }
+}
+
+template<int n> inline void v_lut_deinterleave(const double* tab, const v_reg<int, n*2>& idx,
+                                               v_reg<double, n>& x, v_reg<double, n>& y)
+{
+    for( int i = 0; i < n; i++ )
+    {
+        int j = idx.s[i];
+        x.s[i] = tab[j];
+        y.s[i] = tab[j+1];
+    }
+}
+
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec)
+{
+    v_reg<_Tp, n> c;
+    for (int i = 0; i < n/4; i++)
+    {
+        c.s[4*i  ] = vec.s[4*i  ];
+        c.s[4*i+1] = vec.s[4*i+2];
+        c.s[4*i+2] = vec.s[4*i+1];
+        c.s[4*i+3] = vec.s[4*i+3];
+    }
+    return c;
+}
+
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec)
+{
+    v_reg<_Tp, n> c;
+    for (int i = 0; i < n/8; i++)
+    {
+        c.s[8*i  ] = vec.s[8*i  ];
+        c.s[8*i+1] = vec.s[8*i+4];
+        c.s[8*i+2] = vec.s[8*i+1];
+        c.s[8*i+3] = vec.s[8*i+5];
+        c.s[8*i+4] = vec.s[8*i+2];
+        c.s[8*i+5] = vec.s[8*i+6];
+        c.s[8*i+6] = vec.s[8*i+3];
+        c.s[8*i+7] = vec.s[8*i+7];
+    }
+    return c;
+}
+
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec)
+{
+    v_reg<_Tp, n> c;
+    for (int i = 0; i < n/4; i++)
+    {
+        c.s[3*i  ] = vec.s[4*i  ];
+        c.s[3*i+1] = vec.s[4*i+1];
+        c.s[3*i+2] = vec.s[4*i+2];
+    }
+    return c;
+}
+
 /** @brief Transpose 4x4 matrix
 
 Scheme:
@@ -1586,14 +2462,14 @@ OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64)
 
 //! @brief Helper macro
 //! @ingroup core_hal_intrin_impl
-#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix) \
+#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \
 inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
 { \
     _Tpnvec c; \
     for( int i = 0; i < _Tpvec::nlanes; i++ ) \
     { \
-        c.s[i] = saturate_cast<_Tpn>(a.s[i]); \
-        c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>(b.s[i]); \
+        c.s[i] = cast<_Tpn>(a.s[i]); \
+        c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \
     } \
     return c; \
 }
@@ -1607,26 +2483,28 @@ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
 //!
 //! - pack: for 16-, 32- and 64-bit integer input types
 //! - pack_u: for 16- and 32-bit signed integer input types
-OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack)
-OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack)
-OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack)
-OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack)
-OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack)
-OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack)
-OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u)
-OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u)
+//!
+//! @note All variants except 64-bit use saturation.
+OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast)
 //! @}
 
 //! @brief Helper macro
 //! @ingroup core_hal_intrin_impl
-#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
+#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
 template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
 { \
     _Tpnvec c; \
     for( int i = 0; i < _Tpvec::nlanes; i++ ) \
     { \
-        c.s[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
-        c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+        c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+        c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \
     } \
     return c; \
 }
@@ -1640,51 +2518,55 @@ template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpve
 //!
 //! - pack: for 16-, 32- and 64-bit integer input types
 //! - pack_u: for 16- and 32-bit signed integer input types
-OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u)
-OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u)
+//!
+//! @note All variants except 64-bit use saturation.
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
 //! @}
 
 //! @brief Helper macro
 //! @ingroup core_hal_intrin_impl
-#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
+#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
 inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
 { \
     for( int i = 0; i < _Tpvec::nlanes; i++ ) \
-        ptr[i] = saturate_cast<_Tpn>(a.s[i]); \
+        ptr[i] = cast<_Tpn>(a.s[i]); \
 }
 
 //! @name Pack and store
 //! @{
 //! @brief Store values from the input vector into memory with pack
 //!
-//! Values will be stored into memory with saturating conversion to narrower type.
+//! Values will be stored into memory with conversion to narrower type.
 //! Variant with _u_ suffix converts to corresponding unsigned type.
 //!
 //! - pack: for 16-, 32- and 64-bit integer input types
 //! - pack_u: for 16- and 32-bit signed integer input types
-OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack)
-OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack)
-OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack)
-OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack)
-OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack)
-OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack)
-OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u)
-OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u)
+//!
+//! @note All variants except 64-bit use saturation.
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
 //! @}
 
 //! @brief Helper macro
 //! @ingroup core_hal_intrin_impl
-#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
+#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
 template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
 { \
     for( int i = 0; i < _Tpvec::nlanes; i++ ) \
-        ptr[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+        ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
 }
 
 //! @name Pack and store with rounding shift
@@ -1696,14 +2578,113 @@ template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec
 //!
 //! - pack: for 16-, 32- and 64-bit integer input types
 //! - pack_u: for 16- and 32-bit signed integer input types
-OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack)
-OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u)
-OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u)
+//!
+//! @note All variants except 64-bit use saturation.
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
+//! @}
+
+//! @cond IGNORED
+template<typename _Tpm, typename _Tp, int n>
+inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    for (int i = 0; i < n; ++i)
+    {
+        mptr[i] = (_Tpm)a.s[i];
+        mptr[i + n] = (_Tpm)b.s[i];
+    }
+}
+//! @endcond
+
+//! @name Pack boolean values
+//! @{
+//! @brief Pack boolean values from multiple vectors to one unsigned 8-bit integer vector
+//!
+//! @note Must provide valid boolean values to guarantee same result for all architectures.
+
+/** @brief
+//! For 16-bit boolean values
+
+Scheme:
+@code
+a  {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0}
+b  {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF}
+===============
+{
+   0xFF 0 0 0xFF 0 0xFF 0xFF 0
+   0xFF 0 0xFF 0 0 0xFF 0 0xFF
+}
+@endcode */
+
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v_uint8x16 mask;
+    _pack_b(mask.s, a, b);
+    return mask;
+}
+
+/** @overload
+For 32-bit boolean values
+
+Scheme:
+@code
+a  {0xFFFF.. 0 0 0xFFFF..}
+b  {0 0xFFFF.. 0xFFFF.. 0}
+c  {0xFFFF.. 0 0xFFFF.. 0}
+d  {0 0xFFFF.. 0 0xFFFF..}
+===============
+{
+   0xFF 0 0 0xFF 0 0xFF 0xFF 0
+   0xFF 0 0xFF 0 0 0xFF 0 0xFF
+}
+@endcode */
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    v_uint8x16 mask;
+    _pack_b(mask.s, a, b);
+    _pack_b(mask.s + 8, c, d);
+    return mask;
+}
+
+/** @overload
+For 64-bit boolean values
+
+Scheme:
+@code
+a  {0xFFFF.. 0}
+b  {0 0xFFFF..}
+c  {0xFFFF.. 0}
+d  {0 0xFFFF..}
+
+e  {0xFFFF.. 0}
+f  {0xFFFF.. 0}
+g  {0 0xFFFF..}
+h  {0 0xFFFF..}
+===============
+{
+   0xFF 0 0 0xFF 0xFF 0 0 0xFF
+   0xFF 0 0xFF 0 0 0xFF 0 0xFF
+}
+@endcode */
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+    v_uint8x16 mask;
+    _pack_b(mask.s, a, b);
+    _pack_b(mask.s + 4, c, d);
+    _pack_b(mask.s + 8, e, f);
+    _pack_b(mask.s + 12, g, h);
+    return mask;
+}
 //! @}
 
 /** @brief Matrix multiplication
@@ -1731,8 +2712,70 @@ inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
                        v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]);
 }
 
+/** @brief Matrix multiplication and add
+
+Scheme:
+@code
+{A0 A1 A2   }   |V0|   |D0|
+{B0 B1 B2   }   |V1|   |D1|
+{C0 C1 C2   } x |V2| + |D2|
+====================
+{R0 R1 R2 R3}, where:
+R0 = A0V0 + A1V1 + A2V2 + D0,
+R1 = B0V0 + B1V1 + B2V2 + D1
+...
+@endcode
+*/
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& m3)
+{
+    return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0],
+                       v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1],
+                       v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2],
+                       v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]);
+}
+
+
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
+{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); }
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); }
+
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b)
+{ return v_dotprod_expand(a, b); }
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ return v_dotprod_expand(a, b, c); }
+
+////// FP16 support ///////
+
+inline v_reg<float, V_TypeTraits<float>::nlanes128>
+v_load_expand(const float16_t* ptr)
+{
+    v_reg<float, V_TypeTraits<float>::nlanes128> v;
+    for( int i = 0; i < v.nlanes; i++ )
+    {
+        v.s[i] = ptr[i];
+    }
+    return v;
+}
+
+inline void
+v_pack_store(float16_t* ptr, const v_reg<float, V_TypeTraits<float>::nlanes128>& v)
+{
+    for( int i = 0; i < v.nlanes; i++ )
+    {
+        ptr[i] = float16_t(v.s[i]);
+    }
+}
+
+inline void v_cleanup() {}
+
 //! @}
 
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+#endif
 }
 
 #endif
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_forward.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_forward.hpp
new file mode 100644
index 0000000..979f15a
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/hal/intrin_forward.hpp
@@ -0,0 +1,191 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef CV__SIMD_FORWARD
+#error "Need to pre-define forward width"
+#endif
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+/** Types **/
+#if CV__SIMD_FORWARD == 1024
+// [todo] 1024
+#error "1024-long ops not implemented yet"
+#elif CV__SIMD_FORWARD == 512
+// 512
+#define __CV_VX(fun)   v512_##fun
+#define __CV_V_UINT8   v_uint8x64
+#define __CV_V_INT8    v_int8x64
+#define __CV_V_UINT16  v_uint16x32
+#define __CV_V_INT16   v_int16x32
+#define __CV_V_UINT32  v_uint32x16
+#define __CV_V_INT32   v_int32x16
+#define __CV_V_UINT64  v_uint64x8
+#define __CV_V_INT64   v_int64x8
+#define __CV_V_FLOAT32 v_float32x16
+#define __CV_V_FLOAT64 v_float64x8
+struct v_uint8x64;
+struct v_int8x64;
+struct v_uint16x32;
+struct v_int16x32;
+struct v_uint32x16;
+struct v_int32x16;
+struct v_uint64x8;
+struct v_int64x8;
+struct v_float32x16;
+struct v_float64x8;
+#elif CV__SIMD_FORWARD == 256
+// 256
+#define __CV_VX(fun)   v256_##fun
+#define __CV_V_UINT8   v_uint8x32
+#define __CV_V_INT8    v_int8x32
+#define __CV_V_UINT16  v_uint16x16
+#define __CV_V_INT16   v_int16x16
+#define __CV_V_UINT32  v_uint32x8
+#define __CV_V_INT32   v_int32x8
+#define __CV_V_UINT64  v_uint64x4
+#define __CV_V_INT64   v_int64x4
+#define __CV_V_FLOAT32 v_float32x8
+#define __CV_V_FLOAT64 v_float64x4
+struct v_uint8x32;
+struct v_int8x32;
+struct v_uint16x16;
+struct v_int16x16;
+struct v_uint32x8;
+struct v_int32x8;
+struct v_uint64x4;
+struct v_int64x4;
+struct v_float32x8;
+struct v_float64x4;
+#else
+// 128
+#define __CV_VX(fun)   v_##fun
+#define __CV_V_UINT8   v_uint8x16
+#define __CV_V_INT8    v_int8x16
+#define __CV_V_UINT16  v_uint16x8
+#define __CV_V_INT16   v_int16x8
+#define __CV_V_UINT32  v_uint32x4
+#define __CV_V_INT32   v_int32x4
+#define __CV_V_UINT64  v_uint64x2
+#define __CV_V_INT64   v_int64x2
+#define __CV_V_FLOAT32 v_float32x4
+#define __CV_V_FLOAT64 v_float64x2
+struct v_uint8x16;
+struct v_int8x16;
+struct v_uint16x8;
+struct v_int16x8;
+struct v_uint32x4;
+struct v_int32x4;
+struct v_uint64x2;
+struct v_int64x2;
+struct v_float32x4;
+struct v_float64x2;
+#endif
+
+/** Value reordering **/
+
+// Expansion
+void v_expand(const __CV_V_UINT8&,  __CV_V_UINT16&, __CV_V_UINT16&);
+void v_expand(const __CV_V_INT8&,   __CV_V_INT16&,  __CV_V_INT16&);
+void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
+void v_expand(const __CV_V_INT16&,  __CV_V_INT32&,  __CV_V_INT32&);
+void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
+void v_expand(const __CV_V_INT32&,  __CV_V_INT64&,  __CV_V_INT64&);
+// Low Expansion
+__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&);
+__CV_V_INT16  v_expand_low(const __CV_V_INT8&);
+__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&);
+__CV_V_INT32  v_expand_low(const __CV_V_INT16&);
+__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&);
+__CV_V_INT64  v_expand_low(const __CV_V_INT32&);
+// High Expansion
+__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&);
+__CV_V_INT16  v_expand_high(const __CV_V_INT8&);
+__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&);
+__CV_V_INT32  v_expand_high(const __CV_V_INT16&);
+__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&);
+__CV_V_INT64  v_expand_high(const __CV_V_INT32&);
+// Load & Low Expansion
+__CV_V_UINT16 __CV_VX(load_expand)(const uchar*);
+__CV_V_INT16  __CV_VX(load_expand)(const schar*);
+__CV_V_UINT32 __CV_VX(load_expand)(const ushort*);
+__CV_V_INT32  __CV_VX(load_expand)(const short*);
+__CV_V_UINT64 __CV_VX(load_expand)(const uint*);
+__CV_V_INT64  __CV_VX(load_expand)(const int*);
+// Load lower 8-bit and expand into 32-bit
+__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*);
+__CV_V_INT32  __CV_VX(load_expand_q)(const schar*);
+
+// Saturating Pack
+__CV_V_UINT8  v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&);
+__CV_V_INT8   v_pack(const __CV_V_INT16&,  const __CV_V_INT16&);
+__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&);
+__CV_V_INT16  v_pack(const __CV_V_INT32&,  const __CV_V_INT32&);
+// Non-saturating Pack
+__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&);
+__CV_V_INT32  v_pack(const __CV_V_INT64&,  const __CV_V_INT64&);
+// Pack signed integers with unsigned saturation
+__CV_V_UINT8  v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&);
+__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&);
+
+/** Arithmetic, bitwise and comparison operations **/
+
+// Non-saturating multiply
+#if CV_VSX
+template<typename Tvec>
+Tvec v_mul_wrap(const Tvec& a, const Tvec& b);
+#else
+__CV_V_UINT8  v_mul_wrap(const __CV_V_UINT8&,  const __CV_V_UINT8&);
+__CV_V_INT8   v_mul_wrap(const __CV_V_INT8&,   const __CV_V_INT8&);
+__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&);
+__CV_V_INT16  v_mul_wrap(const __CV_V_INT16&,  const __CV_V_INT16&);
+#endif
+
+//  Multiply and expand
+#if CV_VSX
+template<typename Tvec, typename Twvec>
+void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d);
+#else
+void v_mul_expand(const __CV_V_UINT8&,  const __CV_V_UINT8&,  __CV_V_UINT16&, __CV_V_UINT16&);
+void v_mul_expand(const __CV_V_INT8&,   const __CV_V_INT8&,   __CV_V_INT16&,  __CV_V_INT16&);
+void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
+void v_mul_expand(const __CV_V_INT16&,  const __CV_V_INT16&,  __CV_V_INT32&,  __CV_V_INT32&);
+void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
+void v_mul_expand(const __CV_V_INT32&,  const __CV_V_INT32&,  __CV_V_INT64&,  __CV_V_INT64&);
+#endif
+
+// Conversions
+__CV_V_FLOAT32 v_cvt_f32(const __CV_V_INT32& a);
+__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a);
+__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a, const __CV_V_FLOAT64& b);
+__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT32& a);
+__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_INT32& a);
+__CV_V_FLOAT64 v_cvt_f64(const __CV_V_FLOAT32& a);
+__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_FLOAT32& a);
+__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT64& a);
+
+/** Cleanup **/
+#undef CV__SIMD_FORWARD
+#undef __CV_VX
+#undef __CV_V_UINT8
+#undef __CV_V_INT8
+#undef __CV_V_UINT16
+#undef __CV_V_INT16
+#undef __CV_V_UINT32
+#undef __CV_V_INT32
+#undef __CV_V_UINT64
+#undef __CV_V_INT64
+#undef __CV_V_FLOAT32
+#undef __CV_V_FLOAT64
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+} // cv::
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_msa.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_msa.hpp
new file mode 100644
index 0000000..260350c
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/hal/intrin_msa.hpp
@@ -0,0 +1,1872 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_HAL_INTRIN_MSA_HPP
+#define OPENCV_HAL_INTRIN_MSA_HPP
+
+#include <algorithm>
+#include "opencv2/core/utility.hpp"
+
+namespace cv
+{
+
+//! @cond IGNORED
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+#define CV_SIMD128 1
+
+//MSA implements 128-bit wide vector registers shared with the 64-bit wide floating-point unit registers.
+//MSA and FPU can not be both present, unless the FPU has 64-bit floating-point registers.
+#define CV_SIMD128_64F 1
+
+struct v_uint8x16
+{
+    typedef uchar lane_type;
+    enum { nlanes = 16 };
+
+    v_uint8x16() : val(msa_dupq_n_u8(0)) {}
+    explicit v_uint8x16(v16u8 v) : val(v) {}
+    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+    {
+        uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+        val = msa_ld1q_u8(v);
+    }
+    uchar get0() const
+    {
+        return msa_getq_lane_u8(val, 0);
+    }
+
+    v16u8 val;
+};
+
+struct v_int8x16
+{
+    typedef schar lane_type;
+    enum { nlanes = 16 };
+
+    v_int8x16() : val(msa_dupq_n_s8(0)) {}
+    explicit v_int8x16(v16i8 v) : val(v) {}
+    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+               schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+    {
+        schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+        val = msa_ld1q_s8(v);
+    }
+    schar get0() const
+    {
+        return msa_getq_lane_s8(val, 0);
+    }
+
+    v16i8 val;
+};
+
+struct v_uint16x8
+{
+    typedef ushort lane_type;
+    enum { nlanes = 8 };
+
+    v_uint16x8() : val(msa_dupq_n_u16(0)) {}
+    explicit v_uint16x8(v8u16 v) : val(v) {}
+    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+    {
+        ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+        val = msa_ld1q_u16(v);
+    }
+    ushort get0() const
+    {
+        return msa_getq_lane_u16(val, 0);
+    }
+
+    v8u16 val;
+};
+
+struct v_int16x8
+{
+    typedef short lane_type;
+    enum { nlanes = 8 };
+
+    v_int16x8() : val(msa_dupq_n_s16(0)) {}
+    explicit v_int16x8(v8i16 v) : val(v) {}
+    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+    {
+        short v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+        val = msa_ld1q_s16(v);
+    }
+    short get0() const
+    {
+        return msa_getq_lane_s16(val, 0);
+    }
+
+    v8i16 val;
+};
+
+struct v_uint32x4
+{
+    typedef unsigned int lane_type;
+    enum { nlanes = 4 };
+
+    v_uint32x4() : val(msa_dupq_n_u32(0)) {}
+    explicit v_uint32x4(v4u32 v) : val(v) {}
+    v_uint32x4(unsigned int v0, unsigned int v1, unsigned int v2, unsigned int v3)
+    {
+        unsigned int v[] = {v0, v1, v2, v3};
+        val = msa_ld1q_u32(v);
+    }
+    unsigned int get0() const
+    {
+        return msa_getq_lane_u32(val, 0);
+    }
+
+    v4u32 val;
+};
+
+struct v_int32x4
+{
+    typedef int lane_type;
+    enum { nlanes = 4 };
+
+    v_int32x4() : val(msa_dupq_n_s32(0)) {}
+    explicit v_int32x4(v4i32 v) : val(v) {}
+    v_int32x4(int v0, int v1, int v2, int v3)
+    {
+        int v[] = {v0, v1, v2, v3};
+        val = msa_ld1q_s32(v);
+    }
+    int get0() const
+    {
+        return msa_getq_lane_s32(val, 0);
+    }
+    v4i32 val;
+};
+
+struct v_float32x4
+{
+    typedef float lane_type;
+    enum { nlanes = 4 };
+
+    v_float32x4() : val(msa_dupq_n_f32(0.0f)) {}
+    explicit v_float32x4(v4f32 v) : val(v) {}
+    v_float32x4(float v0, float v1, float v2, float v3)
+    {
+        float v[] = {v0, v1, v2, v3};
+        val = msa_ld1q_f32(v);
+    }
+    float get0() const
+    {
+        return msa_getq_lane_f32(val, 0);
+    }
+    v4f32 val;
+};
+
+struct v_uint64x2
+{
+    typedef uint64 lane_type;
+    enum { nlanes = 2 };
+
+    v_uint64x2() : val(msa_dupq_n_u64(0)) {}
+    explicit v_uint64x2(v2u64 v) : val(v) {}
+    v_uint64x2(uint64 v0, uint64 v1)
+    {
+        uint64 v[] = {v0, v1};
+        val = msa_ld1q_u64(v);
+    }
+    uint64 get0() const
+    {
+        return msa_getq_lane_u64(val, 0);
+    }
+    v2u64 val;
+};
+
+struct v_int64x2
+{
+    typedef int64 lane_type;
+    enum { nlanes = 2 };
+
+    v_int64x2() : val(msa_dupq_n_s64(0)) {}
+    explicit v_int64x2(v2i64 v) : val(v) {}
+    v_int64x2(int64 v0, int64 v1)
+    {
+        int64 v[] = {v0, v1};
+        val = msa_ld1q_s64(v);
+    }
+    int64 get0() const
+    {
+        return msa_getq_lane_s64(val, 0);
+    }
+    v2i64 val;
+};
+
+struct v_float64x2
+{
+    typedef double lane_type;
+    enum { nlanes = 2 };
+
+    v_float64x2() : val(msa_dupq_n_f64(0.0f)) {}
+    explicit v_float64x2(v2f64 v) : val(v) {}
+    v_float64x2(double v0, double v1)
+    {
+        double v[] = {v0, v1};
+        val = msa_ld1q_f64(v);
+    }
+    double get0() const
+    {
+        return msa_getq_lane_f64(val, 0);
+    }
+    v2f64 val;
+};
+
+#define OPENCV_HAL_IMPL_MSA_INIT(_Tpv, _Tp, suffix) \
+inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(msa_dupq_n_##suffix((_Tp)0)); } \
+inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(msa_dupq_n_##suffix(v)); } \
+inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(MSA_TPV_REINTERPRET(v16u8, v.val)); } \
+inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(MSA_TPV_REINTERPRET(v16i8, v.val)); } \
+inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(MSA_TPV_REINTERPRET(v8u16, v.val)); } \
+inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(MSA_TPV_REINTERPRET(v8i16, v.val)); } \
+inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(MSA_TPV_REINTERPRET(v4u32, v.val)); } \
+inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(MSA_TPV_REINTERPRET(v4i32, v.val)); } \
+inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(MSA_TPV_REINTERPRET(v2u64, v.val)); } \
+inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(MSA_TPV_REINTERPRET(v2i64, v.val)); } \
+inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(MSA_TPV_REINTERPRET(v4f32, v.val)); } \
+inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(MSA_TPV_REINTERPRET(v2f64, v.val)); }
+
+OPENCV_HAL_IMPL_MSA_INIT(uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_MSA_INIT(int8x16, schar, s8)
+OPENCV_HAL_IMPL_MSA_INIT(uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_MSA_INIT(int16x8, short, s16)
+OPENCV_HAL_IMPL_MSA_INIT(uint32x4, unsigned int, u32)
+OPENCV_HAL_IMPL_MSA_INIT(int32x4, int, s32)
+OPENCV_HAL_IMPL_MSA_INIT(uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_MSA_INIT(int64x2, int64, s64)
+OPENCV_HAL_IMPL_MSA_INIT(float32x4, float, f32)
+OPENCV_HAL_IMPL_MSA_INIT(float64x2, double, f64)
+
+#define OPENCV_HAL_IMPL_MSA_PACK(_Tpvec, _Tpwvec, pack, mov, rshr) \
+inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
+{ \
+    return _Tpvec(mov(a.val, b.val)); \
+} \
+template<int n> inline \
+_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \
+{ \
+    return _Tpvec(rshr(a.val, b.val, n)); \
+}
+
+OPENCV_HAL_IMPL_MSA_PACK(v_uint8x16, v_uint16x8, pack, msa_qpack_u16, msa_qrpackr_u16)
+OPENCV_HAL_IMPL_MSA_PACK(v_int8x16, v_int16x8, pack, msa_qpack_s16, msa_qrpackr_s16)
+OPENCV_HAL_IMPL_MSA_PACK(v_uint16x8, v_uint32x4, pack, msa_qpack_u32, msa_qrpackr_u32)
+OPENCV_HAL_IMPL_MSA_PACK(v_int16x8, v_int32x4, pack, msa_qpack_s32, msa_qrpackr_s32)
+OPENCV_HAL_IMPL_MSA_PACK(v_uint32x4, v_uint64x2, pack, msa_pack_u64, msa_rpackr_u64)
+OPENCV_HAL_IMPL_MSA_PACK(v_int32x4, v_int64x2, pack, msa_pack_s64, msa_rpackr_s64)
+OPENCV_HAL_IMPL_MSA_PACK(v_uint8x16, v_int16x8, pack_u, msa_qpacku_s16, msa_qrpackru_s16)
+OPENCV_HAL_IMPL_MSA_PACK(v_uint16x8, v_int32x4, pack_u, msa_qpacku_s32, msa_qrpackru_s32)
+
+#define OPENCV_HAL_IMPL_MSA_PACK_STORE(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr) \
+inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
+{ \
+    hreg a1 = mov(a.val); \
+    msa_st1_##suffix(ptr, a1); \
+} \
+template<int n> inline \
+void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
+{ \
+    hreg a1 = rshr(a.val, n); \
+    msa_st1_##suffix(ptr, a1); \
+}
+
+OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint8x16, uchar, v8u8, u8, v_uint16x8, pack, msa_qmovn_u16, msa_qrshrn_n_u16)
+OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int8x16, schar, v8i8, s8, v_int16x8, pack, msa_qmovn_s16, msa_qrshrn_n_s16)
+OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint16x8, ushort, v4u16, u16, v_uint32x4, pack, msa_qmovn_u32, msa_qrshrn_n_u32)
+OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int16x8, short, v4i16, s16, v_int32x4, pack, msa_qmovn_s32, msa_qrshrn_n_s32)
+OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint32x4, unsigned, v2u32, u32, v_uint64x2, pack, msa_movn_u64, msa_rshrn_n_u64)
+OPENCV_HAL_IMPL_MSA_PACK_STORE(v_int32x4, int, v2i32, s32, v_int64x2, pack, msa_movn_s64, msa_rshrn_n_s64)
+OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint8x16, uchar, v8u8, u8, v_int16x8, pack_u, msa_qmovun_s16, msa_qrshrun_n_s16)
+OPENCV_HAL_IMPL_MSA_PACK_STORE(v_uint16x8, ushort, v4u16, u16, v_int32x4, pack_u, msa_qmovun_s32, msa_qrshrun_n_s32)
+
+// pack boolean
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    return v_uint8x16(msa_pack_u16(a.val, b.val));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    return v_uint8x16(msa_pack_u16(msa_pack_u32(a.val, b.val), msa_pack_u32(c.val, d.val)));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+    v8u16 abcd = msa_pack_u32(msa_pack_u64(a.val, b.val), msa_pack_u64(c.val, d.val));
+    v8u16 efgh = msa_pack_u32(msa_pack_u64(e.val, f.val), msa_pack_u64(g.val, h.val));
+    return v_uint8x16(msa_pack_u16(abcd, efgh));
+}
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    v4f32 v0 = v.val;
+    v4f32 res = msa_mulq_lane_f32(m0.val, v0, 0);
+    res = msa_mlaq_lane_f32(res, m1.val, v0, 1);
+    res = msa_mlaq_lane_f32(res, m2.val, v0, 2);
+    res = msa_mlaq_lane_f32(res, m3.val, v0, 3);
+    return v_float32x4(res);
+}
+
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& a)
+{
+    v4f32 v0 = v.val;
+    v4f32 res = msa_mulq_lane_f32(m0.val, v0, 0);
+    res = msa_mlaq_lane_f32(res, m1.val, v0, 1);
+    res = msa_mlaq_lane_f32(res, m2.val, v0, 2);
+    res = msa_addq_f32(res, a.val);
+    return v_float32x4(res);
+}
+
+#define OPENCV_HAL_IMPL_MSA_BIN_OP(bin_op, _Tpvec, intrin) \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(intrin(a.val, b.val)); \
+} \
+inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
+{ \
+    a.val = intrin(a.val, b.val); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint8x16, msa_qaddq_u8)
+OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint8x16, msa_qsubq_u8)
+OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int8x16, msa_qaddq_s8)
+OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int8x16, msa_qsubq_s8)
+OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint16x8, msa_qaddq_u16)
+OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint16x8, msa_qsubq_u16)
+OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int16x8, msa_qaddq_s16)
+OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int16x8, msa_qsubq_s16)
+OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int32x4, msa_addq_s32)
+OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int32x4, msa_subq_s32)
+OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_int32x4, msa_mulq_s32)
+OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint32x4, msa_addq_u32)
+OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint32x4, msa_subq_u32)
+OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_uint32x4, msa_mulq_u32)
+OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_float32x4, msa_addq_f32)
+OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_float32x4, msa_subq_f32)
+OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_float32x4, msa_mulq_f32)
+OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_int64x2, msa_addq_s64)
+OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_int64x2, msa_subq_s64)
+OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_uint64x2, msa_addq_u64)
+OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_uint64x2, msa_subq_u64)
+OPENCV_HAL_IMPL_MSA_BIN_OP(/, v_float32x4, msa_divq_f32)
+OPENCV_HAL_IMPL_MSA_BIN_OP(+, v_float64x2, msa_addq_f64)
+OPENCV_HAL_IMPL_MSA_BIN_OP(-, v_float64x2, msa_subq_f64)
+OPENCV_HAL_IMPL_MSA_BIN_OP(*, v_float64x2, msa_mulq_f64)
+OPENCV_HAL_IMPL_MSA_BIN_OP(/, v_float64x2, msa_divq_f64)
+
+// saturating multiply 8-bit, 16-bit
+#define OPENCV_HAL_IMPL_MSA_MUL_SAT(_Tpvec, _Tpwvec)         \
+inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b)  \
+{                                                            \
+    _Tpwvec c, d;                                            \
+    v_mul_expand(a, b, c, d);                                \
+    return v_pack(c, d);                                     \
+}                                                            \
+inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b)      \
+{a = a * b; return a; }
+
+OPENCV_HAL_IMPL_MSA_MUL_SAT(v_int8x16,  v_int16x8)
+OPENCV_HAL_IMPL_MSA_MUL_SAT(v_uint8x16, v_uint16x8)
+OPENCV_HAL_IMPL_MSA_MUL_SAT(v_int16x8,  v_int32x4)
+OPENCV_HAL_IMPL_MSA_MUL_SAT(v_uint16x8, v_uint32x4)
+
+//  Multiply and expand
+inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b,
+                         v_int16x8& c, v_int16x8& d)
+{
+    v16i8 a_lo, a_hi, b_lo, b_hi;
+
+    ILVRL_B2_SB(a.val, msa_dupq_n_s8(0), a_lo, a_hi);
+    ILVRL_B2_SB(b.val, msa_dupq_n_s8(0), b_lo, b_hi);
+    c.val = msa_mulq_s16(msa_paddlq_s8(a_lo), msa_paddlq_s8(b_lo));
+    d.val = msa_mulq_s16(msa_paddlq_s8(a_hi), msa_paddlq_s8(b_hi));
+}
+
+inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
+                         v_uint16x8& c, v_uint16x8& d)
+{
+    v16u8 a_lo, a_hi, b_lo, b_hi;
+
+    ILVRL_B2_UB(a.val, msa_dupq_n_u8(0), a_lo, a_hi);
+    ILVRL_B2_UB(b.val, msa_dupq_n_u8(0), b_lo, b_hi);
+    c.val = msa_mulq_u16(msa_paddlq_u8(a_lo), msa_paddlq_u8(b_lo));
+    d.val = msa_mulq_u16(msa_paddlq_u8(a_hi), msa_paddlq_u8(b_hi));
+}
+
+inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
+                         v_int32x4& c, v_int32x4& d)
+{
+    v8i16 a_lo, a_hi, b_lo, b_hi;
+
+    ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi);
+    ILVRL_H2_SH(b.val, msa_dupq_n_s16(0), b_lo, b_hi);
+    c.val = msa_mulq_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(b_lo));
+    d.val = msa_mulq_s32(msa_paddlq_s16(a_hi), msa_paddlq_s16(b_hi));
+}
+
+inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
+                         v_uint32x4& c, v_uint32x4& d)
+{
+    v8u16 a_lo, a_hi, b_lo, b_hi;
+
+    ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi);
+    ILVRL_H2_UH(b.val, msa_dupq_n_u16(0), b_lo, b_hi);
+    c.val = msa_mulq_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(b_lo));
+    d.val = msa_mulq_u32(msa_paddlq_u16(a_hi), msa_paddlq_u16(b_hi));
+}
+
+inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
+                         v_uint64x2& c, v_uint64x2& d)
+{
+    v4u32 a_lo, a_hi, b_lo, b_hi;
+
+    ILVRL_W2_UW(a.val, msa_dupq_n_u32(0), a_lo, a_hi);
+    ILVRL_W2_UW(b.val, msa_dupq_n_u32(0), b_lo, b_hi);
+    c.val = msa_mulq_u64(msa_paddlq_u32(a_lo), msa_paddlq_u32(b_lo));
+    d.val = msa_mulq_u64(msa_paddlq_u32(a_hi), msa_paddlq_u32(b_hi));
+}
+
+inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
+{
+    v8i16 a_lo, a_hi, b_lo, b_hi;
+
+    ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi);
+    ILVRL_H2_SH(b.val, msa_dupq_n_s16(0), b_lo, b_hi);
+
+    return v_int16x8(msa_packr_s32(msa_mulq_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(b_lo)),
+                                   msa_mulq_s32(msa_paddlq_s16(a_hi), msa_paddlq_s16(b_hi)), 16));
+}
+
+inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v8u16 a_lo, a_hi, b_lo, b_hi;
+
+    ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi);
+    ILVRL_H2_UH(b.val, msa_dupq_n_u16(0), b_lo, b_hi);
+
+    return v_uint16x8(msa_packr_u32(msa_mulq_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(b_lo)),
+                                    msa_mulq_u32(msa_paddlq_u16(a_hi), msa_paddlq_u16(b_hi)), 16));
+}
+
+//////// Dot Product ////////
+
+// 16 >> 32
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{ return v_int32x4(msa_dotp_s_w(a.val, b.val)); }
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{ return v_int32x4(msa_dpadd_s_w(c.val , a.val, b.val)); }
+
+// 32 >> 64
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
+{ return v_int64x2(msa_dotp_s_d(a.val, b.val)); }
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{ return v_int64x2(msa_dpadd_s_d(c.val , a.val, b.val)); }
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
+{
+    v8u16 even_a = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8), 8);
+    v8u16 odd_a  = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8);
+    v8u16 even_b = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8), 8);
+    v8u16 odd_b  = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8);
+    v4u32 prod   = msa_dotp_u_w(even_a, even_b);
+    return v_uint32x4(msa_dpadd_u_w(prod, odd_a, odd_b));
+}
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
+{
+    v8u16 even_a = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8), 8);
+    v8u16 odd_a  = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, a.val), 8);
+    v8u16 even_b = msa_shrq_n_u16(msa_shlq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8), 8);
+    v8u16 odd_b  = msa_shrq_n_u16(MSA_TPV_REINTERPRET(v8u16, b.val), 8);
+    v4u32 prod   = msa_dpadd_u_w(c.val, even_a, even_b);
+    return v_uint32x4(msa_dpadd_u_w(prod, odd_a, odd_b));
+}
+
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
+{
+    v8i16 prod = msa_dotp_s_h(a.val, b.val);
+    return v_int32x4(msa_hadd_s32(prod, prod));
+}
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b,
+                                  const v_int32x4& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v4u32 even_a = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16), 16);
+    v4u32 odd_a  = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16);
+    v4u32 even_b = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16), 16);
+    v4u32 odd_b  = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16);
+    v2u64 prod   = msa_dotp_u_d(even_a, even_b);
+    return v_uint64x2(msa_dpadd_u_d(prod, odd_a, odd_b));
+}
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b,
+                                   const v_uint64x2& c)
+{
+    v4u32 even_a = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16), 16);
+    v4u32 odd_a  = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, a.val), 16);
+    v4u32 even_b = msa_shrq_n_u32(msa_shlq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16), 16);
+    v4u32 odd_b  = msa_shrq_n_u32(MSA_TPV_REINTERPRET(v4u32, b.val), 16);
+    v2u64 prod   = msa_dpadd_u_d(c.val, even_a, even_b);
+    return v_uint64x2(msa_dpadd_u_d(prod, odd_a, odd_b));
+}
+
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b)
+{
+    v4i32 prod = msa_dotp_s_w(a.val, b.val);
+    return v_int64x2(msa_hadd_s64(prod, prod));
+}
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 32 >> 64f
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
+{ return v_cvt_f64(v_dotprod(a, b)); }
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+
+//////// Fast Dot Product ////////
+
+// 16 >> 32
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
+{ return v_dotprod(a, b); }
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{ return v_dotprod(a, b, c); }
+
+// 32 >> 64
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
+{ return v_dotprod(a, b); }
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{ return v_dotprod(a, b, c); }
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
+{ return v_dotprod_expand(a, b); }
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
+{ return v_dotprod_expand(a, b, c); }
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
+{ return v_dotprod_expand(a, b); }
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
+{ return v_dotprod_expand(a, b, c); }
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b)
+{ return v_dotprod_expand(a, b); }
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
+{ return v_dotprod_expand(a, b, c); }
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b)
+{ return v_dotprod_expand(a, b); }
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
+{ return v_dotprod_expand(a, b, c); }
+
+// 32 >> 64f
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b)
+{ return v_dotprod_expand(a, b); }
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ return v_dotprod_expand(a, b, c); }
+
+#define OPENCV_HAL_IMPL_MSA_LOGIC_OP(_Tpvec, _Tpv, suffix) \
+OPENCV_HAL_IMPL_MSA_BIN_OP(&, _Tpvec, msa_andq_##suffix)   \
+OPENCV_HAL_IMPL_MSA_BIN_OP(|, _Tpvec, msa_orrq_##suffix)   \
+OPENCV_HAL_IMPL_MSA_BIN_OP(^, _Tpvec, msa_eorq_##suffix)   \
+inline _Tpvec operator ~ (const _Tpvec& a) \
+{ \
+    return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_mvnq_u8(MSA_TPV_REINTERPRET(v16u8, a.val)))); \
+}
+
+OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint8x16, v16u8, u8)
+OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int8x16, v16i8, s8)
+OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint16x8, v8u16, u16)
+OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int16x8, v8i16, s16)
+OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint32x4, v4u32, u32)
+OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int32x4, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_uint64x2, v2u64, u64)
+OPENCV_HAL_IMPL_MSA_LOGIC_OP(v_int64x2, v2i64, s64)
+
+#define OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(bin_op, intrin) \
+inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \
+{ \
+    return v_float32x4(MSA_TPV_REINTERPRET(v4f32, intrin(MSA_TPV_REINTERPRET(v4i32, a.val), MSA_TPV_REINTERPRET(v4i32, b.val)))); \
+} \
+inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \
+{ \
+    a.val = MSA_TPV_REINTERPRET(v4f32, intrin(MSA_TPV_REINTERPRET(v4i32, a.val), MSA_TPV_REINTERPRET(v4i32, b.val))); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(&, msa_andq_s32)
+OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(|, msa_orrq_s32)
+OPENCV_HAL_IMPL_MSA_FLT_BIT_OP(^, msa_eorq_s32)
+
+inline v_float32x4 operator ~ (const v_float32x4& a)
+{
+    return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_mvnq_s32(MSA_TPV_REINTERPRET(v4i32, a.val))));
+}
+
+/* v_abs */
+#define OPENCV_HAL_IMPL_MSA_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \
+inline _Tpuvec v_abs(const _Tpsvec& a) \
+{ \
+    return v_reinterpret_as_##usuffix(_Tpsvec(msa_absq_##ssuffix(a.val))); \
+}
+
+OPENCV_HAL_IMPL_MSA_ABS(v_uint8x16, v_int8x16, u8, s8)
+OPENCV_HAL_IMPL_MSA_ABS(v_uint16x8, v_int16x8, u16, s16)
+OPENCV_HAL_IMPL_MSA_ABS(v_uint32x4, v_int32x4, u32, s32)
+
+/* v_abs(float), v_sqrt, v_invsqrt */
+#define OPENCV_HAL_IMPL_MSA_BASIC_FUNC(_Tpvec, func, intrin) \
+inline _Tpvec func(const _Tpvec& a) \
+{ \
+    return _Tpvec(intrin(a.val)); \
+}
+
+OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_abs, msa_absq_f32)
+OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_abs, msa_absq_f64)
+OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_sqrt, msa_sqrtq_f32)
+OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float32x4, v_invsqrt, msa_rsqrtq_f32)
+OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_sqrt, msa_sqrtq_f64)
+OPENCV_HAL_IMPL_MSA_BASIC_FUNC(v_float64x2, v_invsqrt, msa_rsqrtq_f64)
+
+#define OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(bin_op, intrin) \
+inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \
+{ \
+    return v_float64x2(MSA_TPV_REINTERPRET(v2f64, intrin(MSA_TPV_REINTERPRET(v2i64, a.val), MSA_TPV_REINTERPRET(v2i64, b.val)))); \
+} \
+inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \
+{ \
+    a.val = MSA_TPV_REINTERPRET(v2f64, intrin(MSA_TPV_REINTERPRET(v2i64, a.val), MSA_TPV_REINTERPRET(v2i64, b.val))); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(&, msa_andq_s64)
+OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(|, msa_orrq_s64)
+OPENCV_HAL_IMPL_MSA_DBL_BIT_OP(^, msa_eorq_s64)
+
+inline v_float64x2 operator ~ (const v_float64x2& a)
+{
+    return v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_mvnq_s32(MSA_TPV_REINTERPRET(v4i32, a.val))));
+}
+
+// TODO: exp, log, sin, cos
+
+#define OPENCV_HAL_IMPL_MSA_BIN_FUNC(_Tpvec, func, intrin) \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(intrin(a.val, b.val)); \
+}
+
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_min, msa_minq_u8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_max, msa_maxq_u8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_min, msa_minq_s8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_max, msa_maxq_s8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_min, msa_minq_u16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_max, msa_maxq_u16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_min, msa_minq_s16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_max, msa_maxq_s16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_min, msa_minq_u32)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_max, msa_maxq_u32)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int32x4, v_min, msa_minq_s32)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int32x4, v_max, msa_maxq_s32)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_min, msa_minq_f32)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_max, msa_maxq_f32)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_min, msa_minq_f64)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_max, msa_maxq_f64)
+
+#define OPENCV_HAL_IMPL_MSA_INT_CMP_OP(_Tpvec, _Tpv, suffix, not_suffix) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ceqq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_mvnq_##not_suffix(msa_ceqq_##suffix(a.val, b.val)))); } \
+inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cltq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cgtq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cleq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_cgeq_##suffix(a.val, b.val))); }
+
+OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint8x16, v16u8, u8, u8)
+OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int8x16, v16i8, s8, u8)
+OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint16x8, v8u16, u16, u16)
+OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int16x8, v8i16, s16, u16)
+OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint32x4, v4u32, u32, u32)
+OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int32x4, v4i32, s32, u32)
+OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_float32x4, v4f32, f32, u32)
+OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_uint64x2, v2u64, u64, u64)
+OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_int64x2, v2i64, s64, u64)
+OPENCV_HAL_IMPL_MSA_INT_CMP_OP(v_float64x2, v2f64, f64, u64)
+
+inline v_float32x4 v_not_nan(const v_float32x4& a)
+{ return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ceqq_f32(a.val, a.val))); }
+inline v_float64x2 v_not_nan(const v_float64x2& a)
+{ return v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ceqq_f64(a.val, a.val))); }
+
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_add_wrap, msa_addq_u8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_add_wrap, msa_addq_s8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_add_wrap, msa_addq_u16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_add_wrap, msa_addq_s16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_sub_wrap, msa_subq_u8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_sub_wrap, msa_subq_s8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_sub_wrap, msa_subq_u16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_sub_wrap, msa_subq_s16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_mul_wrap, msa_mulq_u8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_mul_wrap, msa_mulq_s8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_mul_wrap, msa_mulq_u16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_mul_wrap, msa_mulq_s16)
+
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint8x16, v_absdiff, msa_abdq_u8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint16x8, v_absdiff, msa_abdq_u16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_uint32x4, v_absdiff, msa_abdq_u32)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float32x4, v_absdiff, msa_abdq_f32)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_float64x2, v_absdiff, msa_abdq_f64)
+
+/** Saturating absolute difference **/
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int8x16, v_absdiffs, msa_qabdq_s8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC(v_int16x8, v_absdiffs, msa_qabdq_s16)
+
+#define OPENCV_HAL_IMPL_MSA_BIN_FUNC2(_Tpvec, _Tpvec2, _Tpv, func, intrin) \
+inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec2(MSA_TPV_REINTERPRET(_Tpv, intrin(a.val, b.val))); \
+}
+
+OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int8x16, v_uint8x16, v16u8, v_absdiff, msa_abdq_s8)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int16x8, v_uint16x8, v8u16, v_absdiff, msa_abdq_s16)
+OPENCV_HAL_IMPL_MSA_BIN_FUNC2(v_int32x4, v_uint32x4, v4u32, v_absdiff, msa_abdq_s32)
+
+/* v_magnitude, v_sqr_magnitude, v_fma, v_muladd */
+inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b)
+{
+    v_float32x4 x(msa_mlaq_f32(msa_mulq_f32(a.val, a.val), b.val, b.val));
+    return v_sqrt(x);
+}
+
+inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b)
+{
+    return v_float32x4(msa_mlaq_f32(msa_mulq_f32(a.val, a.val), b.val, b.val));
+}
+
+inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+{
+    return v_float32x4(msa_mlaq_f32(c.val, a.val, b.val));
+}
+
+inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+    return v_int32x4(msa_mlaq_s32(c.val, a.val, b.val));
+}
+
+inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+{
+    return v_fma(a, b, c);
+}
+
+inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+    return v_fma(a, b, c);
+}
+
+inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b)
+{
+    v_float64x2 x(msa_mlaq_f64(msa_mulq_f64(a.val, a.val), b.val, b.val));
+    return v_sqrt(x);
+}
+
+inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b)
+{
+    return v_float64x2(msa_mlaq_f64(msa_mulq_f64(a.val, a.val), b.val, b.val));
+}
+
+inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+    return v_float64x2(msa_mlaq_f64(c.val, a.val, b.val));
+}
+
+inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+    return v_fma(a, b, c);
+}
+
+// trade efficiency for convenience
+#define OPENCV_HAL_IMPL_MSA_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \
+inline _Tpvec operator << (const _Tpvec& a, int n) \
+{ return _Tpvec(msa_shlq_##suffix(a.val, msa_dupq_n_##ssuffix((_Tps)n))); } \
+inline _Tpvec operator >> (const _Tpvec& a, int n) \
+{ return _Tpvec(msa_shrq_##suffix(a.val, msa_dupq_n_##ssuffix((_Tps)n))); } \
+template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
+{ return _Tpvec(msa_shlq_n_##suffix(a.val, n)); } \
+template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
+{ return _Tpvec(msa_shrq_n_##suffix(a.val, n)); } \
+template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
+{ return _Tpvec(msa_rshrq_n_##suffix(a.val, n)); }
+
+OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint8x16, u8, schar, s8)
+OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int8x16, s8, schar, s8)
+OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint16x8, u16, short, s16)
+OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int16x8, s16, short, s16)
+OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint32x4, u32, int, s32)
+OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int32x4, s32, int, s32)
+OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_uint64x2, u64, int64, s64)
+OPENCV_HAL_IMPL_MSA_SHIFT_OP(v_int64x2, s64, int64, s64)
+
+/* v_rotate_right, v_rotate_left */
+#define OPENCV_HAL_IMPL_MSA_ROTATE_OP(_Tpvec, _Tpv, _Tpvs, suffix) \
+template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
+{ \
+    return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##suffix(0), n))); \
+} \
+template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
+{ \
+    return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(msa_dupq_n_##suffix(0), MSA_TPV_REINTERPRET(_Tpvs, a.val), _Tpvec::nlanes - n))); \
+} \
+template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
+{ \
+    return a; \
+} \
+template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), MSA_TPV_REINTERPRET(_Tpvs, b.val), n))); \
+} \
+template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, b.val), MSA_TPV_REINTERPRET(_Tpvs, a.val), _Tpvec::nlanes - n))); \
+} \
+template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    CV_UNUSED(b); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint8x16, v16u8, v16i8, s8)
+OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int8x16, v16i8, v16i8, s8)
+OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint16x8, v8u16, v8i16, s16)
+OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int16x8, v8i16, v8i16, s16)
+OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint32x4, v4u32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int32x4, v4i32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_float32x4, v4f32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_uint64x2, v2u64, v2i64, s64)
+OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_int64x2, v2i64, v2i64, s64)
+OPENCV_HAL_IMPL_MSA_ROTATE_OP(v_float64x2, v2f64, v2i64, s64)
+
+#define OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_load(const _Tp* ptr) \
+{ return _Tpvec(msa_ld1q_##suffix(ptr)); } \
+inline _Tpvec v_load_aligned(const _Tp* ptr) \
+{ return _Tpvec(msa_ld1q_##suffix(ptr)); } \
+inline _Tpvec v_load_low(const _Tp* ptr) \
+{ return _Tpvec(msa_combine_##suffix(msa_ld1_##suffix(ptr), msa_dup_n_##suffix((_Tp)0))); } \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
+{ return _Tpvec(msa_combine_##suffix(msa_ld1_##suffix(ptr0), msa_ld1_##suffix(ptr1))); } \
+inline void v_store(_Tp* ptr, const _Tpvec& a) \
+{ msa_st1q_##suffix(ptr, a.val); } \
+inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
+{ msa_st1q_##suffix(ptr, a.val); } \
+inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
+{ msa_st1q_##suffix(ptr, a.val); } \
+inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \
+{ msa_st1q_##suffix(ptr, a.val); } \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
+{ \
+    int n  = _Tpvec::nlanes; \
+    for( int i = 0; i < (n/2); i++ ) \
+        ptr[i] = a.val[i]; \
+} \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
+{ \
+    int n  = _Tpvec::nlanes; \
+    for( int i = 0; i < (n/2); i++ ) \
+        ptr[i] = a.val[i+(n/2)]; \
+}
+
+OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int64x2, int64, s64)
+OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float64x2, double, f64)
+
+
+/** Reverse **/
+inline v_uint8x16 v_reverse(const v_uint8x16 &a)
+{
+    v_uint8x16 c = v_uint8x16((v16u8)__builtin_msa_vshf_b((v16i8)((v2i64){0x08090A0B0C0D0E0F, 0x0001020304050607}), msa_dupq_n_s8(0), (v16i8)a.val));
+    return c;
+}
+
+inline v_int8x16 v_reverse(const v_int8x16 &a)
+{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
+
+inline v_uint16x8 v_reverse(const v_uint16x8 &a)
+{
+    v_uint16x8 c = v_uint16x8((v8u16)__builtin_msa_vshf_h((v8i16)((v2i64){0x0004000500060007, 0x0000000100020003}), msa_dupq_n_s16(0), (v8i16)a.val));
+    return c;
+}
+
+inline v_int16x8 v_reverse(const v_int16x8 &a)
+{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
+
+inline v_uint32x4 v_reverse(const v_uint32x4 &a)
+{
+    v_uint32x4 c;
+    c.val[0] = a.val[3];
+    c.val[1] = a.val[2];
+    c.val[2] = a.val[1];
+    c.val[3] = a.val[0];
+    return c;
+}
+
+inline v_int32x4 v_reverse(const v_int32x4 &a)
+{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_float32x4 v_reverse(const v_float32x4 &a)
+{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_uint64x2 v_reverse(const v_uint64x2 &a)
+{
+    v_uint64x2 c;
+    c.val[0] = a.val[1];
+    c.val[1] = a.val[0];
+    return c;
+}
+
+inline v_int64x2 v_reverse(const v_int64x2 &a)
+{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
+
+inline v_float64x2 v_reverse(const v_float64x2 &a)
+{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
+
+
+#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(func, cfunc) \
+inline unsigned short v_reduce_##func(const v_uint16x8& a) \
+{ \
+    v8u16 a_lo, a_hi; \
+    ILVRL_H2_UH(a.val, msa_dupq_n_u16(0), a_lo, a_hi); \
+    v4u32 b = msa_##func##q_u32(msa_paddlq_u16(a_lo), msa_paddlq_u16(a_hi)); \
+    v4u32 b_lo, b_hi; \
+    ILVRL_W2_UW(b, msa_dupq_n_u32(0), b_lo, b_hi); \
+    v2u64 c = msa_##func##q_u64(msa_paddlq_u32(b_lo), msa_paddlq_u32(b_hi)); \
+    return (unsigned short)cfunc(c[0], c[1]); \
+}
+
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(max, std::max)
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(min, std::min)
+
+#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(func, cfunc) \
+inline short v_reduce_##func(const v_int16x8& a) \
+{ \
+    v8i16 a_lo, a_hi; \
+    ILVRL_H2_SH(a.val, msa_dupq_n_s16(0), a_lo, a_hi); \
+    v4i32 b = msa_##func##q_s32(msa_paddlq_s16(a_lo), msa_paddlq_s16(a_hi)); \
+    v4i32 b_lo, b_hi; \
+    ILVRL_W2_SW(b, msa_dupq_n_s32(0), b_lo, b_hi); \
+    v2i64 c = msa_##func##q_s64(msa_paddlq_s32(b_lo), msa_paddlq_s32(b_hi)); \
+    return (short)cfunc(c[0], c[1]); \
+}
+
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(max, std::max)
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_8S(min, std::min)
+
+#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(_Tpvec, scalartype, func, cfunc) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    return (scalartype)cfunc(cfunc(a.val[0], a.val[1]), cfunc(a.val[2], a.val[3])); \
+}
+
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min)
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_int32x4, int, max, std::max)
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_int32x4, int, min, std::min)
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_float32x4, float, max, std::max)
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_4(v_float32x4, float, min, std::min)
+
+
+#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(_Tpvec, scalartype, _Tpvec2, func) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    _Tpvec2 a1, a2; \
+    v_expand(a, a1, a2); \
+    return (scalartype)v_reduce_##func(v_##func(a1, a2)); \
+}
+
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_uint8x16, uchar, v_uint16x8, min)
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_uint8x16, uchar, v_uint16x8, max)
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_int8x16, char, v_int16x8, min)
+OPENCV_HAL_IMPL_MSA_REDUCE_OP_16(v_int8x16, char, v_int16x8, max)
+
+
+
+#define OPENCV_HAL_IMPL_MSA_REDUCE_SUM(_Tpvec, scalartype, suffix) \
+inline scalartype v_reduce_sum(const _Tpvec& a) \
+{ \
+    return (scalartype)msa_sum_##suffix(a.val); \
+}
+
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned char, u8)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, char, s8)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned short, u16)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_float32x4, float, f32)
+
+inline uint64 v_reduce_sum(const v_uint64x2& a)
+{ return (uint64)(msa_getq_lane_u64(a.val, 0) + msa_getq_lane_u64(a.val, 1)); }
+inline int64 v_reduce_sum(const v_int64x2& a)
+{ return (int64)(msa_getq_lane_s64(a.val, 0) + msa_getq_lane_s64(a.val, 1)); }
+inline double v_reduce_sum(const v_float64x2& a)
+{
+    return msa_getq_lane_f64(a.val, 0) + msa_getq_lane_f64(a.val, 1);
+}
+
+/* v_reduce_sum4, v_reduce_sad */
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+    v4f32 u0 = msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, b.val), MSA_TPV_REINTERPRET(v4i32, a.val))),
+                            MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, b.val), MSA_TPV_REINTERPRET(v4i32, a.val)))); // a0+a1 b0+b1 a2+a3 b2+b3
+    v4f32 u1 = msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, d.val), MSA_TPV_REINTERPRET(v4i32, c.val))),
+                            MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, d.val), MSA_TPV_REINTERPRET(v4i32, c.val)))); // c0+c1 d0+d1 c2+c3 d2+d3
+
+    return v_float32x4(msa_addq_f32(MSA_TPV_REINTERPRET(v4f32, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, u1), MSA_TPV_REINTERPRET(v2i64, u0))),
+                                    MSA_TPV_REINTERPRET(v4f32, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, u1), MSA_TPV_REINTERPRET(v2i64, u0)))));
+}
+
+inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
+{
+    v16u8 t0 = msa_abdq_u8(a.val, b.val);
+    v8u16 t1 = msa_paddlq_u8(t0);
+    v4u32 t2 = msa_paddlq_u16(t1);
+    return msa_sum_u32(t2);
+}
+inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
+{
+    v16u8 t0 = MSA_TPV_REINTERPRET(v16u8, msa_abdq_s8(a.val, b.val));
+    v8u16 t1 = msa_paddlq_u8(t0);
+    v4u32 t2 = msa_paddlq_u16(t1);
+    return msa_sum_u32(t2);
+}
+inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v8u16 t0 = msa_abdq_u16(a.val, b.val);
+    v4u32 t1 = msa_paddlq_u16(t0);
+    return msa_sum_u32(t1);
+}
+inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
+{
+    v8u16 t0 = MSA_TPV_REINTERPRET(v8u16, msa_abdq_s16(a.val, b.val));
+    v4u32 t1 = msa_paddlq_u16(t0);
+    return msa_sum_u32(t1);
+}
+inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b)
+{
+    v4u32 t0 = msa_abdq_u32(a.val, b.val);
+    return msa_sum_u32(t0);
+}
+inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b)
+{
+    v4u32 t0 = MSA_TPV_REINTERPRET(v4u32, msa_abdq_s32(a.val, b.val));
+    return msa_sum_u32(t0);
+}
+inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
+{
+    v4f32 t0 = msa_abdq_f32(a.val, b.val);
+    return msa_sum_f32(t0);
+}
+
+/* v_popcount */
+#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(_Tpvec) \
+inline v_uint8x16 v_popcount(const _Tpvec& a) \
+{ \
+    v16u8 t = MSA_TPV_REINTERPRET(v16u8, msa_cntq_s8(MSA_TPV_REINTERPRET(v16i8, a.val))); \
+    return v_uint8x16(t); \
+}
+OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(v_uint8x16)
+OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE8(v_int8x16)
+
+#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(_Tpvec) \
+inline v_uint16x8 v_popcount(const _Tpvec& a) \
+{ \
+    v8u16 t = MSA_TPV_REINTERPRET(v8u16, msa_cntq_s16(MSA_TPV_REINTERPRET(v8i16, a.val))); \
+    return v_uint16x8(t); \
+}
+OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(v_uint16x8)
+OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE16(v_int16x8)
+
+#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(_Tpvec) \
+inline v_uint32x4 v_popcount(const _Tpvec& a) \
+{ \
+    v4u32 t = MSA_TPV_REINTERPRET(v4u32, msa_cntq_s32(MSA_TPV_REINTERPRET(v4i32, a.val))); \
+    return v_uint32x4(t); \
+}
+OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(v_uint32x4)
+OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE32(v_int32x4)
+
+#define OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(_Tpvec) \
+inline v_uint64x2 v_popcount(const _Tpvec& a) \
+{ \
+    v2u64 t = MSA_TPV_REINTERPRET(v2u64, msa_cntq_s64(MSA_TPV_REINTERPRET(v2i64, a.val))); \
+    return v_uint64x2(t); \
+}
+OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(v_uint64x2)
+OPENCV_HAL_IMPL_MSA_POPCOUNT_SIZE64(v_int64x2)
+
+inline int v_signmask(const v_uint8x16& a)
+{
+    v8i8 m0 = msa_create_s8(CV_BIG_UINT(0x0706050403020100));
+    v16u8 v0 = msa_shlq_u8(msa_shrq_n_u8(a.val, 7), msa_combine_s8(m0, m0));
+    v8u16 v1 = msa_paddlq_u8(v0);
+    v4u32 v2 = msa_paddlq_u16(v1);
+    v2u64 v3 = msa_paddlq_u32(v2);
+    return (int)msa_getq_lane_u64(v3, 0) + ((int)msa_getq_lane_u64(v3, 1) << 8);
+}
+inline int v_signmask(const v_int8x16& a)
+{ return v_signmask(v_reinterpret_as_u8(a)); }
+
+inline int v_signmask(const v_uint16x8& a)
+{
+    v4i16 m0 = msa_create_s16(CV_BIG_UINT(0x0003000200010000));
+    v8u16 v0 = msa_shlq_u16(msa_shrq_n_u16(a.val, 15), msa_combine_s16(m0, m0));
+    v4u32 v1 = msa_paddlq_u16(v0);
+    v2u64 v2 = msa_paddlq_u32(v1);
+    return (int)msa_getq_lane_u64(v2, 0) + ((int)msa_getq_lane_u64(v2, 1) << 4);
+}
+inline int v_signmask(const v_int16x8& a)
+{ return v_signmask(v_reinterpret_as_u16(a)); }
+
+inline int v_signmask(const v_uint32x4& a)
+{
+    v2i32 m0 = msa_create_s32(CV_BIG_UINT(0x0000000100000000));
+    v4u32 v0 = msa_shlq_u32(msa_shrq_n_u32(a.val, 31), msa_combine_s32(m0, m0));
+    v2u64 v1 = msa_paddlq_u32(v0);
+    return (int)msa_getq_lane_u64(v1, 0) + ((int)msa_getq_lane_u64(v1, 1) << 2);
+}
+inline int v_signmask(const v_int32x4& a)
+{ return v_signmask(v_reinterpret_as_u32(a)); }
+inline int v_signmask(const v_float32x4& a)
+{ return v_signmask(v_reinterpret_as_u32(a)); }
+
+inline int v_signmask(const v_uint64x2& a)
+{
+    v2u64 v0 = msa_shrq_n_u64(a.val, 63);
+    return (int)msa_getq_lane_u64(v0, 0) + ((int)msa_getq_lane_u64(v0, 1) << 1);
+}
+inline int v_signmask(const v_int64x2& a)
+{ return v_signmask(v_reinterpret_as_u64(a)); }
+inline int v_signmask(const v_float64x2& a)
+{ return v_signmask(v_reinterpret_as_u64(a)); }
+
+inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); }
+
+#define OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(_Tpvec, _Tpvec2, suffix, shift) \
+inline bool v_check_all(const v_##_Tpvec& a) \
+{ \
+    _Tpvec2 v0 = msa_shrq_n_##suffix(msa_mvnq_##suffix(a.val), shift); \
+    v2u64 v1 = MSA_TPV_REINTERPRET(v2u64, v0); \
+    return (msa_getq_lane_u64(v1, 0) | msa_getq_lane_u64(v1, 1)) == 0; \
+} \
+inline bool v_check_any(const v_##_Tpvec& a) \
+{ \
+    _Tpvec2 v0 = msa_shrq_n_##suffix(a.val, shift); \
+    v2u64 v1 = MSA_TPV_REINTERPRET(v2u64, v0); \
+    return (msa_getq_lane_u64(v1, 0) | msa_getq_lane_u64(v1, 1)) != 0; \
+}
+
+OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint8x16, v16u8, u8, 7)
+OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint16x8, v8u16, u16, 15)
+OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint32x4, v4u32, u32, 31)
+OPENCV_HAL_IMPL_MSA_CHECK_ALLANY(uint64x2, v2u64, u64, 63)
+
+inline bool v_check_all(const v_int8x16& a)
+{ return v_check_all(v_reinterpret_as_u8(a)); }
+inline bool v_check_all(const v_int16x8& a)
+{ return v_check_all(v_reinterpret_as_u16(a)); }
+inline bool v_check_all(const v_int32x4& a)
+{ return v_check_all(v_reinterpret_as_u32(a)); }
+inline bool v_check_all(const v_float32x4& a)
+{ return v_check_all(v_reinterpret_as_u32(a)); }
+
+inline bool v_check_any(const v_int8x16& a)
+{ return v_check_any(v_reinterpret_as_u8(a)); }
+inline bool v_check_any(const v_int16x8& a)
+{ return v_check_any(v_reinterpret_as_u16(a)); }
+inline bool v_check_any(const v_int32x4& a)
+{ return v_check_any(v_reinterpret_as_u32(a)); }
+inline bool v_check_any(const v_float32x4& a)
+{ return v_check_any(v_reinterpret_as_u32(a)); }
+
+inline bool v_check_all(const v_int64x2& a)
+{ return v_check_all(v_reinterpret_as_u64(a)); }
+inline bool v_check_all(const v_float64x2& a)
+{ return v_check_all(v_reinterpret_as_u64(a)); }
+inline bool v_check_any(const v_int64x2& a)
+{ return v_check_any(v_reinterpret_as_u64(a)); }
+inline bool v_check_any(const v_float64x2& a)
+{ return v_check_any(v_reinterpret_as_u64(a)); }
+
+/* v_select */
+#define OPENCV_HAL_IMPL_MSA_SELECT(_Tpvec, _Tpv, _Tpvu) \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_bslq_u8(MSA_TPV_REINTERPRET(_Tpvu, mask.val), \
+                  MSA_TPV_REINTERPRET(_Tpvu, b.val), MSA_TPV_REINTERPRET(_Tpvu, a.val)))); \
+}
+
+OPENCV_HAL_IMPL_MSA_SELECT(v_uint8x16, v16u8, v16u8)
+OPENCV_HAL_IMPL_MSA_SELECT(v_int8x16, v16i8, v16u8)
+OPENCV_HAL_IMPL_MSA_SELECT(v_uint16x8, v8u16, v16u8)
+OPENCV_HAL_IMPL_MSA_SELECT(v_int16x8, v8i16, v16u8)
+OPENCV_HAL_IMPL_MSA_SELECT(v_uint32x4, v4u32, v16u8)
+OPENCV_HAL_IMPL_MSA_SELECT(v_int32x4, v4i32, v16u8)
+OPENCV_HAL_IMPL_MSA_SELECT(v_float32x4, v4f32, v16u8)
+OPENCV_HAL_IMPL_MSA_SELECT(v_float64x2, v2f64, v16u8)
+
+#define OPENCV_HAL_IMPL_MSA_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix, ssuffix, _Tpv, _Tpvs) \
+inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
+{ \
+    _Tpv a_lo = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \
+    _Tpv a_hi = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \
+    b0.val = msa_paddlq_##suffix(a_lo); \
+    b1.val = msa_paddlq_##suffix(a_hi); \
+} \
+inline _Tpwvec v_expand_low(const _Tpvec& a) \
+{ \
+    _Tpv a_lo = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \
+    return _Tpwvec(msa_paddlq_##suffix(a_lo)); \
+} \
+inline _Tpwvec v_expand_high(const _Tpvec& a) \
+{ \
+    _Tpv a_hi = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), msa_dupq_n_##ssuffix(0))); \
+    return _Tpwvec(msa_paddlq_##suffix(a_hi)); \
+} \
+inline _Tpwvec v_load_expand(const _Tp* ptr) \
+{ \
+    return _Tpwvec(msa_movl_##suffix(msa_ld1_##suffix(ptr))); \
+}
+
+OPENCV_HAL_IMPL_MSA_EXPAND(v_uint8x16, v_uint16x8, uchar, u8, s8, v16u8, v16i8)
+OPENCV_HAL_IMPL_MSA_EXPAND(v_int8x16, v_int16x8, schar, s8, s8, v16i8, v16i8)
+OPENCV_HAL_IMPL_MSA_EXPAND(v_uint16x8, v_uint32x4, ushort, u16, s16, v8u16, v8i16)
+OPENCV_HAL_IMPL_MSA_EXPAND(v_int16x8, v_int32x4, short, s16, s16, v8i16, v8i16)
+OPENCV_HAL_IMPL_MSA_EXPAND(v_uint32x4, v_uint64x2, uint, u32, s32, v4u32, v4i32)
+OPENCV_HAL_IMPL_MSA_EXPAND(v_int32x4, v_int64x2, int, s32, s32, v4i32, v4i32)
+
+inline v_uint32x4 v_load_expand_q(const uchar* ptr)
+{
+    return v_uint32x4((v4u32){ptr[0], ptr[1], ptr[2], ptr[3]});
+}
+
+inline v_int32x4 v_load_expand_q(const schar* ptr)
+{
+    return v_int32x4((v4i32){ptr[0], ptr[1], ptr[2], ptr[3]});
+}
+
+/* v_zip, v_combine_low, v_combine_high, v_recombine */
+#define OPENCV_HAL_IMPL_MSA_UNPACKS(_Tpvec, _Tpv, _Tpvs, ssuffix) \
+inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \
+{ \
+    b0.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \
+    b1.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \
+} \
+inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val)))); \
+} \
+inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val)))); \
+} \
+inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \
+{ \
+    c.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val))); \
+    d.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, b.val), MSA_TPV_REINTERPRET(v2i64, a.val))); \
+}
+
+OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint8x16, v16u8, v16i8, s8)
+OPENCV_HAL_IMPL_MSA_UNPACKS(v_int8x16, v16i8, v16i8, s8)
+OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint16x8, v8u16, v8i16, s16)
+OPENCV_HAL_IMPL_MSA_UNPACKS(v_int16x8, v8i16, v8i16, s16)
+OPENCV_HAL_IMPL_MSA_UNPACKS(v_uint32x4, v4u32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_UNPACKS(v_int32x4, v4i32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_UNPACKS(v_float32x4, v4f32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_UNPACKS(v_float64x2, v2f64, v2i64, s64)
+
+/* v_extract */
+#define OPENCV_HAL_IMPL_MSA_EXTRACT(_Tpvec, _Tpv, _Tpvs, suffix) \
+template <int s> \
+inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(MSA_TPV_REINTERPRET(_Tpv, msa_extq_##suffix(MSA_TPV_REINTERPRET(_Tpvs, a.val), MSA_TPV_REINTERPRET(_Tpvs, b.val), s))); \
+}
+
+OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint8x16, v16u8, v16i8, s8)
+OPENCV_HAL_IMPL_MSA_EXTRACT(v_int8x16, v16i8, v16i8, s8)
+OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint16x8, v8u16, v8i16, s16)
+OPENCV_HAL_IMPL_MSA_EXTRACT(v_int16x8, v8i16, v8i16, s16)
+OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint32x4, v4u32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_EXTRACT(v_int32x4, v4i32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_EXTRACT(v_uint64x2, v2u64, v2i64, s64)
+OPENCV_HAL_IMPL_MSA_EXTRACT(v_int64x2, v2i64, v2i64, s64)
+OPENCV_HAL_IMPL_MSA_EXTRACT(v_float32x4, v4f32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_EXTRACT(v_float64x2, v2f64, v2i64, s64)
+
+/* v_round, v_floor, v_ceil, v_trunc */
+inline v_int32x4 v_round(const v_float32x4& a)
+{
+    return v_int32x4(msa_cvttintq_s32_f32(a.val));
+}
+
+inline v_int32x4 v_floor(const v_float32x4& a)
+{
+    v4i32 a1 = msa_cvttintq_s32_f32(a.val);
+    return v_int32x4(msa_addq_s32(a1, MSA_TPV_REINTERPRET(v4i32, msa_cgtq_f32(msa_cvtfintq_f32_s32(a1), a.val))));
+}
+
+inline v_int32x4 v_ceil(const v_float32x4& a)
+{
+    v4i32 a1 = msa_cvttintq_s32_f32(a.val);
+    return v_int32x4(msa_subq_s32(a1, MSA_TPV_REINTERPRET(v4i32, msa_cgtq_f32(a.val, msa_cvtfintq_f32_s32(a1)))));
+}
+
+inline v_int32x4 v_trunc(const v_float32x4& a)
+{
+    return v_int32x4(msa_cvttruncq_s32_f32(a.val));
+}
+
+inline v_int32x4 v_round(const v_float64x2& a)
+{
+    return v_int32x4(msa_pack_s64(msa_cvttintq_s64_f64(a.val), msa_dupq_n_s64(0)));
+}
+
+inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
+{
+    return v_int32x4(msa_pack_s64(msa_cvttintq_s64_f64(a.val), msa_cvttintq_s64_f64(b.val)));
+}
+
+inline v_int32x4 v_floor(const v_float64x2& a)
+{
+    v2f64 a1 = msa_cvtrintq_f64(a.val);
+    return v_int32x4(msa_pack_s64(msa_addq_s64(msa_cvttruncq_s64_f64(a1), MSA_TPV_REINTERPRET(v2i64, msa_cgtq_f64(a1, a.val))), msa_dupq_n_s64(0)));
+}
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{
+    v2f64 a1 = msa_cvtrintq_f64(a.val);
+    return v_int32x4(msa_pack_s64(msa_subq_s64(msa_cvttruncq_s64_f64(a1), MSA_TPV_REINTERPRET(v2i64, msa_cgtq_f64(a.val, a1))), msa_dupq_n_s64(0)));
+}
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{
+    return v_int32x4(msa_pack_s64(msa_cvttruncq_s64_f64(a.val), msa_dupq_n_s64(0)));
+}
+
+#define OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(_Tpvec, _Tpv, _Tpvs, ssuffix) \
+inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \
+                           const _Tpvec& a2, const _Tpvec& a3, \
+                           _Tpvec& b0, _Tpvec& b1, \
+                           _Tpvec& b2, _Tpvec& b3) \
+{ \
+    _Tpv t00 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \
+    _Tpv t01 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a1.val), MSA_TPV_REINTERPRET(_Tpvs, a0.val))); \
+    _Tpv t10 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a3.val), MSA_TPV_REINTERPRET(_Tpvs, a2.val))); \
+    _Tpv t11 = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_##ssuffix(MSA_TPV_REINTERPRET(_Tpvs, a3.val), MSA_TPV_REINTERPRET(_Tpvs, a2.val))); \
+    b0.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, t10), MSA_TPV_REINTERPRET(v2i64, t00))); \
+    b1.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, t10), MSA_TPV_REINTERPRET(v2i64, t00))); \
+    b2.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvrq_s64(MSA_TPV_REINTERPRET(v2i64, t11), MSA_TPV_REINTERPRET(v2i64, t01))); \
+    b3.val = MSA_TPV_REINTERPRET(_Tpv, msa_ilvlq_s64(MSA_TPV_REINTERPRET(v2i64, t11), MSA_TPV_REINTERPRET(v2i64, t01))); \
+}
+
+OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_uint32x4, v4u32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_int32x4, v4i32, v4i32, s32)
+OPENCV_HAL_IMPL_MSA_TRANSPOSE4x4(v_float32x4, v4f32, v4i32, s32)
+
+#define OPENCV_HAL_IMPL_MSA_INTERLEAVED(_Tpvec, _Tp, suffix) \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \
+{ \
+    msa_ld2q_##suffix(ptr, &a.val, &b.val); \
+} \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \
+{ \
+    msa_ld3q_##suffix(ptr, &a.val, &b.val, &c.val); \
+} \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
+                                v_##_Tpvec& c, v_##_Tpvec& d) \
+{ \
+    msa_ld4q_##suffix(ptr, &a.val, &b.val, &c.val, &d.val); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
+                                hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ \
+    msa_st2q_##suffix(ptr, a.val, b.val); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
+                                const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ \
+    msa_st3q_##suffix(ptr, a.val, b.val, c.val); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
+                                const v_##_Tpvec& c, const v_##_Tpvec& d, \
+                                hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \
+{ \
+    msa_st4q_##suffix(ptr, a.val, b.val, c.val, d.val); \
+}
+
+OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_MSA_INTERLEAVED(int8x16, schar, s8)
+OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_MSA_INTERLEAVED(int16x8, short, s16)
+OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_MSA_INTERLEAVED(int32x4, int, s32)
+OPENCV_HAL_IMPL_MSA_INTERLEAVED(float32x4, float, f32)
+OPENCV_HAL_IMPL_MSA_INTERLEAVED(uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_MSA_INTERLEAVED(int64x2, int64, s64)
+OPENCV_HAL_IMPL_MSA_INTERLEAVED(float64x2, double, f64)
+
+/* v_cvt_f32, v_cvt_f64, v_cvt_f64_high */
+inline v_float32x4 v_cvt_f32(const v_int32x4& a)
+{
+    return v_float32x4(msa_cvtfintq_f32_s32(a.val));
+}
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{
+    return v_float32x4(msa_cvtfq_f32_f64(a.val, msa_dupq_n_f64(0.0f)));
+}
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b)
+{
+    return v_float32x4(msa_cvtfq_f32_f64(a.val, b.val));
+}
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{
+    return v_float64x2(msa_cvtflq_f64_f32(msa_cvtfintq_f32_s32(a.val)));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+    return v_float64x2(msa_cvtfhq_f64_f32(msa_cvtfintq_f32_s32(a.val)));
+}
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{
+    return v_float64x2(msa_cvtflq_f64_f32(a.val));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    return v_float64x2(msa_cvtfhq_f64_f32(a.val));
+}
+
+inline v_float64x2 v_cvt_f64(const v_int64x2& a)
+{
+    return v_float64x2(msa_cvtfintq_f64_s64(a.val));
+}
+
+////////////// Lookup table access ////////////////////
+inline v_int8x16 v_lut(const schar* tab, const int* idx)
+{
+    schar CV_DECL_ALIGNED(32) elems[16] =
+    {
+        tab[idx[ 0]],
+        tab[idx[ 1]],
+        tab[idx[ 2]],
+        tab[idx[ 3]],
+        tab[idx[ 4]],
+        tab[idx[ 5]],
+        tab[idx[ 6]],
+        tab[idx[ 7]],
+        tab[idx[ 8]],
+        tab[idx[ 9]],
+        tab[idx[10]],
+        tab[idx[11]],
+        tab[idx[12]],
+        tab[idx[13]],
+        tab[idx[14]],
+        tab[idx[15]]
+    };
+    return v_int8x16(msa_ld1q_s8(elems));
+}
+inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx)
+{
+    schar CV_DECL_ALIGNED(32) elems[16] =
+    {
+        tab[idx[0]],
+        tab[idx[0] + 1],
+        tab[idx[1]],
+        tab[idx[1] + 1],
+        tab[idx[2]],
+        tab[idx[2] + 1],
+        tab[idx[3]],
+        tab[idx[3] + 1],
+        tab[idx[4]],
+        tab[idx[4] + 1],
+        tab[idx[5]],
+        tab[idx[5] + 1],
+        tab[idx[6]],
+        tab[idx[6] + 1],
+        tab[idx[7]],
+        tab[idx[7] + 1]
+    };
+    return v_int8x16(msa_ld1q_s8(elems));
+}
+inline v_int8x16 v_lut_quads(const schar* tab, const int* idx)
+{
+    schar CV_DECL_ALIGNED(32) elems[16] =
+    {
+        tab[idx[0]],
+        tab[idx[0] + 1],
+        tab[idx[0] + 2],
+        tab[idx[0] + 3],
+        tab[idx[1]],
+        tab[idx[1] + 1],
+        tab[idx[1] + 2],
+        tab[idx[1] + 3],
+        tab[idx[2]],
+        tab[idx[2] + 1],
+        tab[idx[2] + 2],
+        tab[idx[2] + 3],
+        tab[idx[3]],
+        tab[idx[3] + 1],
+        tab[idx[3] + 2],
+        tab[idx[3] + 3]
+    };
+    return v_int8x16(msa_ld1q_s8(elems));
+}
+inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); }
+inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); }
+inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); }
+
+
+inline v_int16x8 v_lut(const short* tab, const int* idx)
+{
+    short CV_DECL_ALIGNED(32) elems[8] =
+    {
+        tab[idx[0]],
+        tab[idx[1]],
+        tab[idx[2]],
+        tab[idx[3]],
+        tab[idx[4]],
+        tab[idx[5]],
+        tab[idx[6]],
+        tab[idx[7]]
+    };
+    return v_int16x8(msa_ld1q_s16(elems));
+}
+inline v_int16x8 v_lut_pairs(const short* tab, const int* idx)
+{
+    short CV_DECL_ALIGNED(32) elems[8] =
+    {
+        tab[idx[0]],
+        tab[idx[0] + 1],
+        tab[idx[1]],
+        tab[idx[1] + 1],
+        tab[idx[2]],
+        tab[idx[2] + 1],
+        tab[idx[3]],
+        tab[idx[3] + 1]
+    };
+    return v_int16x8(msa_ld1q_s16(elems));
+}
+inline v_int16x8 v_lut_quads(const short* tab, const int* idx)
+{
+    return v_int16x8(msa_combine_s16(msa_ld1_s16(tab + idx[0]), msa_ld1_s16(tab + idx[1])));
+}
+inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); }
+inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); }
+inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); }
+
+inline v_int32x4 v_lut(const int* tab, const int* idx)
+{
+    int CV_DECL_ALIGNED(32) elems[4] =
+    {
+        tab[idx[0]],
+        tab[idx[1]],
+        tab[idx[2]],
+        tab[idx[3]]
+    };
+    return v_int32x4(msa_ld1q_s32(elems));
+}
+inline v_int32x4 v_lut_pairs(const int* tab, const int* idx)
+{
+    return v_int32x4(msa_combine_s32(msa_ld1_s32(tab + idx[0]), msa_ld1_s32(tab + idx[1])));
+}
+inline v_int32x4 v_lut_quads(const int* tab, const int* idx)
+{
+    return v_int32x4(msa_ld1q_s32(tab + idx[0]));
+}
+inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); }
+inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); }
+inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); }
+
+inline v_int64x2 v_lut(const int64_t* tab, const int* idx)
+{
+    return v_int64x2(msa_combine_s64(msa_create_s64(tab[idx[0]]), msa_create_s64(tab[idx[1]])));
+}
+inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx)
+{
+    return v_int64x2(msa_ld1q_s64(tab + idx[0]));
+}
+inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); }
+inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); }
+
+inline v_float32x4 v_lut(const float* tab, const int* idx)
+{
+    float CV_DECL_ALIGNED(32) elems[4] =
+    {
+        tab[idx[0]],
+        tab[idx[1]],
+        tab[idx[2]],
+        tab[idx[3]]
+    };
+    return v_float32x4(msa_ld1q_f32(elems));
+}
+inline v_float32x4 v_lut_pairs(const float* tab, const int* idx)
+{
+    uint64 CV_DECL_ALIGNED(32) elems[2] =
+    {
+        *(uint64*)(tab + idx[0]),
+        *(uint64*)(tab + idx[1])
+    };
+    return v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ld1q_u64(elems)));
+}
+inline v_float32x4 v_lut_quads(const float* tab, const int* idx)
+{
+    return v_float32x4(msa_ld1q_f32(tab + idx[0]));
+}
+
+inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec)
+{
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_aligned(idx, idxvec);
+
+    return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
+}
+
+inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec)
+{
+    unsigned CV_DECL_ALIGNED(32) elems[4] =
+    {
+        tab[msa_getq_lane_s32(idxvec.val, 0)],
+        tab[msa_getq_lane_s32(idxvec.val, 1)],
+        tab[msa_getq_lane_s32(idxvec.val, 2)],
+        tab[msa_getq_lane_s32(idxvec.val, 3)]
+    };
+    return v_uint32x4(msa_ld1q_u32(elems));
+}
+
+inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec)
+{
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_aligned(idx, idxvec);
+
+    return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
+}
+
+inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y)
+{
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_aligned(idx, idxvec);
+
+    v4f32 xy02 = msa_combine_f32(msa_ld1_f32(tab + idx[0]), msa_ld1_f32(tab + idx[2]));
+    v4f32 xy13 = msa_combine_f32(msa_ld1_f32(tab + idx[1]), msa_ld1_f32(tab + idx[3]));
+    x = v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ilvevq_s32(MSA_TPV_REINTERPRET(v4i32, xy13), MSA_TPV_REINTERPRET(v4i32, xy02))));
+    y = v_float32x4(MSA_TPV_REINTERPRET(v4f32, msa_ilvodq_s32(MSA_TPV_REINTERPRET(v4i32, xy13), MSA_TPV_REINTERPRET(v4i32, xy02))));
+}
+
+inline v_int8x16 v_interleave_pairs(const v_int8x16& vec)
+{
+    v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0705060403010200, 0x0F0D0E0C0B090A08}), msa_dupq_n_s8(0), vec.val));
+    return c;
+}
+inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec)
+{ return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
+inline v_int8x16 v_interleave_quads(const v_int8x16& vec)
+{
+    v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0703060205010400, 0x0F0B0E0A0D090C08}), msa_dupq_n_s8(0), vec.val));
+    return c;
+}
+inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
+{
+    v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0003000100020000, 0x0007000500060004}), msa_dupq_n_s16(0), vec.val));
+    return c;
+}
+
+inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
+
+inline v_int16x8 v_interleave_quads(const v_int16x8& vec)
+{
+    v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0005000100040000, 0x0007000300060002}), msa_dupq_n_s16(0), vec.val));
+    return c;
+}
+
+inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_interleave_pairs(const v_int32x4& vec)
+{
+    v_int32x4 c;
+    c.val[0] = vec.val[0];
+    c.val[1] = vec.val[2];
+    c.val[2] = vec.val[1];
+    c.val[3] = vec.val[3];
+    return c;
+}
+
+inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+
+inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
+{
+    v_int8x16 c = v_int8x16(__builtin_msa_vshf_b((v16i8)((v2i64){0x0908060504020100, 0x131211100E0D0C0A}), msa_dupq_n_s8(0), vec.val));
+    return c;
+}
+
+inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
+{
+    v_int16x8 c = v_int16x8(__builtin_msa_vshf_h((v8i16)((v2i64){0x0004000200010000, 0x0009000800060005}), msa_dupq_n_s16(0), vec.val));
+    return c;
+}
+
+inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
+inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
+inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
+inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
+
+inline v_float64x2 v_lut(const double* tab, const int* idx)
+{
+    double CV_DECL_ALIGNED(32) elems[2] =
+    {
+        tab[idx[0]],
+        tab[idx[1]]
+    };
+    return v_float64x2(msa_ld1q_f64(elems));
+}
+
+inline v_float64x2 v_lut_pairs(const double* tab, const int* idx)
+{
+    return v_float64x2(msa_ld1q_f64(tab + idx[0]));
+}
+
+inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec)
+{
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_aligned(idx, idxvec);
+
+    return v_float64x2(tab[idx[0]], tab[idx[1]]);
+}
+
+inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y)
+{
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_aligned(idx, idxvec);
+
+    v2f64 xy0 = msa_ld1q_f64(tab + idx[0]);
+    v2f64 xy1 = msa_ld1q_f64(tab + idx[1]);
+    x = v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ilvevq_s64(MSA_TPV_REINTERPRET(v2i64, xy1), MSA_TPV_REINTERPRET(v2i64, xy0))));
+    y = v_float64x2(MSA_TPV_REINTERPRET(v2f64, msa_ilvodq_s64(MSA_TPV_REINTERPRET(v2i64, xy1), MSA_TPV_REINTERPRET(v2i64, xy0))));
+}
+
+template<int i, typename _Tp>
+inline typename _Tp::lane_type v_extract_n(const _Tp& a)
+{
+    return v_rotate_right<i>(a).get0();
+}
+
+template<int i>
+inline v_uint32x4 v_broadcast_element(const v_uint32x4& a)
+{
+    return v_setall_u32(v_extract_n<i>(a));
+}
+template<int i>
+inline v_int32x4 v_broadcast_element(const v_int32x4& a)
+{
+    return v_setall_s32(v_extract_n<i>(a));
+}
+template<int i>
+inline v_float32x4 v_broadcast_element(const v_float32x4& a)
+{
+    return v_setall_f32(v_extract_n<i>(a));
+}
+
+////// FP16 support ///////
+#if CV_FP16
+inline v_float32x4 v_load_expand(const float16_t* ptr)
+{
+#ifndef msa_ld1_f16
+    v4f16 v = (v4f16)msa_ld1_s16((const short*)ptr);
+#else
+    v4f16 v = msa_ld1_f16((const __fp16*)ptr);
+#endif
+    return v_float32x4(msa_cvt_f32_f16(v));
+}
+
+inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+{
+    v4f16 hv = msa_cvt_f16_f32(v.val);
+
+#ifndef msa_st1_f16
+    msa_st1_s16((short*)ptr, (int16x4_t)hv);
+#else
+    msa_st1_f16((__fp16*)ptr, hv);
+#endif
+}
+#else
+inline v_float32x4 v_load_expand(const float16_t* ptr)
+{
+    float buf[4];
+    for( int i = 0; i < 4; i++ )
+        buf[i] = (float)ptr[i];
+    return v_load(buf);
+}
+
+inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+{
+    float buf[4];
+    v_store(buf, v);
+    for( int i = 0; i < 4; i++ )
+        ptr[i] = (float16_t)buf[i];
+}
+#endif
+
+inline void v_cleanup() {}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+}
+
+#endif
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_neon.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_neon.hpp
index f3e47ca..280691b 100644
--- a/IPL/include/opencv/opencv2/core/hal/intrin_neon.hpp
+++ b/IPL/include/opencv/opencv2/core/hal/intrin_neon.hpp
@@ -42,17 +42,82 @@
 //
 //M*/
 
-#ifndef __OPENCV_HAL_INTRIN_NEON_HPP__
-#define __OPENCV_HAL_INTRIN_NEON_HPP__
+#ifndef OPENCV_HAL_INTRIN_NEON_HPP
+#define OPENCV_HAL_INTRIN_NEON_HPP
 
 #include <algorithm>
+#include "opencv2/core/utility.hpp"
 
 namespace cv
 {
 
 //! @cond IGNORED
 
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
 #define CV_SIMD128 1
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define CV_SIMD128_64F 1
+#else
+#define CV_SIMD128_64F 0
+#endif
+
+// TODO
+#define CV_NEON_DOT 0
+
+//////////// Utils ////////////
+
+#if CV_SIMD128_64F
+#define OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv, _Tpvx2, suffix) \
+    inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \
+    { c = vuzp1q_##suffix(a, b); d = vuzp2q_##suffix(a, b); }
+#define OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpv, _Tpvx2, suffix) \
+    inline void _v128_unzip(const _Tpv&a, const _Tpv&b, _Tpv& c, _Tpv& d) \
+    { c = vuzp1_##suffix(a, b); d = vuzp2_##suffix(a, b); }
+#else
+#define OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv, _Tpvx2, suffix) \
+    inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \
+    { _Tpvx2 ab = vuzpq_##suffix(a, b); c = ab.val[0]; d = ab.val[1]; }
+#define OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpv, _Tpvx2, suffix) \
+    inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \
+    { _Tpvx2 ab = vuzp_##suffix(a, b); c = ab.val[0]; d = ab.val[1]; }
+#endif
+
+#if CV_SIMD128_64F
+#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) \
+    template <typename T> static inline \
+    _Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \
+    template <typename T> static inline \
+    float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; }
+#else
+#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(_Tpv, _Tpvl, suffix) \
+    OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv##_t, _Tpv##x2_t, suffix) \
+    OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpvl##_t, _Tpvl##x2_t, suffix) \
+    OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv##_t, suffix)
+
+#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(_Tpv, _Tpvl, suffix) \
+    OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv##_t, suffix)
+
+#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_F64(_Tpv, _Tpvl, suffix) \
+    OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv##_t, _Tpv##x2_t, suffix)
+
+OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint8x16, uint8x8,  u8)
+OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int8x16,  int8x8,   s8)
+OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint16x8, uint16x4, u16)
+OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int16x8,  int16x4,  s16)
+OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint32x4, uint32x2, u32)
+OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int32x4,  int32x2,  s32)
+OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(float32x4, float32x2, f32)
+OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(uint64x2, uint64x1, u64)
+OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(int64x2,  int64x1,  s64)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_F64(float64x2, float64x1,f64)
+#endif
+
+//////////// Types ////////////
 
 struct v_uint8x16
 {
@@ -201,7 +266,7 @@ struct v_uint64x2
 
     v_uint64x2() {}
     explicit v_uint64x2(uint64x2_t v) : val(v) {}
-    v_uint64x2(unsigned v0, unsigned v1)
+    v_uint64x2(uint64 v0, uint64 v1)
     {
         uint64 v[] = {v0, v1};
         val = vld1q_u64(v);
@@ -220,7 +285,7 @@ struct v_int64x2
 
     v_int64x2() {}
     explicit v_int64x2(int64x2_t v) : val(v) {}
-    v_int64x2(int v0, int v1)
+    v_int64x2(int64 v0, int64 v1)
     {
         int64 v[] = {v0, v1};
         val = vld1q_s64(v);
@@ -232,6 +297,27 @@ struct v_int64x2
     int64x2_t val;
 };
 
+#if CV_SIMD128_64F
+struct v_float64x2
+{
+    typedef double lane_type;
+    enum { nlanes = 2 };
+
+    v_float64x2() {}
+    explicit v_float64x2(float64x2_t v) : val(v) {}
+    v_float64x2(double v0, double v1)
+    {
+        double v[] = {v0, v1};
+        val = vld1q_f64(v);
+    }
+    double get0() const
+    {
+        return vgetq_lane_f64(val, 0);
+    }
+    float64x2_t val;
+};
+#endif
+
 #define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
 inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
 inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
@@ -255,41 +341,85 @@ OPENCV_HAL_IMPL_NEON_INIT(int32x4, int, s32)
 OPENCV_HAL_IMPL_NEON_INIT(uint64x2, uint64, u64)
 OPENCV_HAL_IMPL_NEON_INIT(int64x2, int64, s64)
 OPENCV_HAL_IMPL_NEON_INIT(float32x4, float, f32)
+#if CV_SIMD128_64F
+#define OPENCV_HAL_IMPL_NEON_INIT_64(_Tpv, suffix) \
+inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(vreinterpretq_f64_##suffix(v.val)); }
+OPENCV_HAL_IMPL_NEON_INIT(float64x2, double, f64)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_INIT_64(int8x16, s8)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_INIT_64(int16x8, s16)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_INIT_64(int32x4, s32)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint64x2, u64)
+OPENCV_HAL_IMPL_NEON_INIT_64(int64x2, s64)
+OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32)
+OPENCV_HAL_IMPL_NEON_INIT_64(float64x2, f64)
+#endif
 
-#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, wsuffix, pack, op) \
+#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr) \
 inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
 { \
-    hreg a1 = vqmov##op##_##wsuffix(a.val), b1 = vqmov##op##_##wsuffix(b.val); \
+    hreg a1 = mov(a.val), b1 = mov(b.val); \
     return _Tpvec(vcombine_##suffix(a1, b1)); \
 } \
 inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
 { \
-    hreg a1 = vqmov##op##_##wsuffix(a.val); \
+    hreg a1 = mov(a.val); \
     vst1_##suffix(ptr, a1); \
 } \
 template<int n> inline \
 _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \
 { \
-    hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \
-    hreg b1 = vqrshr##op##_n_##wsuffix(b.val, n); \
+    hreg a1 = rshr(a.val, n); \
+    hreg b1 = rshr(b.val, n); \
     return _Tpvec(vcombine_##suffix(a1, b1)); \
 } \
 template<int n> inline \
 void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
 { \
-    hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \
+    hreg a1 = rshr(a.val, n); \
     vst1_##suffix(ptr, a1); \
 }
 
-OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, u16, pack, n)
-OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, s16, pack, n)
-OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, u32, pack, n)
-OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, s32, pack, n)
-OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, u64, pack, n)
-OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, s64, pack, n)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, pack, vqmovn_u16, vqrshrn_n_u16)
+OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, pack, vqmovn_s16, vqrshrn_n_s16)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, pack, vqmovn_u32, vqrshrn_n_u32)
+OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, pack, vqmovn_s32, vqrshrn_n_s32)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, pack, vmovn_u64, vrshrn_n_u64)
+OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, pack, vmovn_s64, vrshrn_n_s64)
+
+OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, pack_u, vqmovun_s16, vqrshrun_n_s16)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, pack_u, vqmovun_s32, vqrshrun_n_s32)
 
-OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, s16, pack_u, un)
-OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, s32, pack_u, un)
+// pack boolean
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    uint8x16_t ab = vcombine_u8(vmovn_u16(a.val), vmovn_u16(b.val));
+    return v_uint8x16(ab);
+}
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    uint16x8_t nab = vcombine_u16(vmovn_u32(a.val), vmovn_u32(b.val));
+    uint16x8_t ncd = vcombine_u16(vmovn_u32(c.val), vmovn_u32(d.val));
+    return v_uint8x16(vcombine_u8(vmovn_u16(nab), vmovn_u16(ncd)));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+    uint32x4_t ab = vcombine_u32(vmovn_u64(a.val), vmovn_u64(b.val));
+    uint32x4_t cd = vcombine_u32(vmovn_u64(c.val), vmovn_u64(d.val));
+    uint32x4_t ef = vcombine_u32(vmovn_u64(e.val), vmovn_u64(f.val));
+    uint32x4_t gh = vcombine_u32(vmovn_u64(g.val), vmovn_u64(h.val));
+
+    uint16x8_t abcd = vcombine_u16(vmovn_u32(ab), vmovn_u32(cd));
+    uint16x8_t efgh = vcombine_u16(vmovn_u32(ef), vmovn_u32(gh));
+    return v_uint8x16(vcombine_u8(vmovn_u16(abcd), vmovn_u16(efgh)));
+}
 
 inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
                             const v_float32x4& m1, const v_float32x4& m2,
@@ -303,6 +433,18 @@ inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
     return v_float32x4(res);
 }
 
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& a)
+{
+    float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val);
+    float32x4_t res = vmulq_lane_f32(m0.val, vl, 0);
+    res = vmlaq_lane_f32(res, m1.val, vl, 1);
+    res = vmlaq_lane_f32(res, m2.val, vh, 0);
+    res = vaddq_f32(res, a.val);
+    return v_float32x4(res);
+}
+
 #define OPENCV_HAL_IMPL_NEON_BIN_OP(bin_op, _Tpvec, intrin) \
 inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
 { \
@@ -320,10 +462,8 @@ OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8)
 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8)
 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16)
 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16)
-OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint16x8, vmulq_u16)
 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int16x8, vqaddq_s16)
 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int16x8, vqsubq_s16)
-OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int16x8, vmulq_s16)
 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32)
 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32)
 OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32)
@@ -337,7 +477,13 @@ OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int64x2, vaddq_s64)
 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int64x2, vsubq_s64)
 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint64x2, vaddq_u64)
 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint64x2, vsubq_u64)
-
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float32x4, vdivq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float64x2, vaddq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float64x2, vsubq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float64x2, vmulq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float64x2, vdivq_f64)
+#else
 inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b)
 {
     float32x4_t reciprocal = vrecpeq_f32(b.val);
@@ -353,6 +499,38 @@ inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b)
     a.val = vmulq_f32(a.val, reciprocal);
     return a;
 }
+#endif
+
+// saturating multiply 8-bit, 16-bit
+#define OPENCV_HAL_IMPL_NEON_MUL_SAT(_Tpvec, _Tpwvec)            \
+    inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b)  \
+    {                                                            \
+        _Tpwvec c, d;                                            \
+        v_mul_expand(a, b, c, d);                                \
+        return v_pack(c, d);                                     \
+    }                                                            \
+    inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b)      \
+    { a = a * b; return a; }
+
+OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int8x16,  v_int16x8)
+OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint8x16, v_uint16x8)
+OPENCV_HAL_IMPL_NEON_MUL_SAT(v_int16x8,  v_int32x4)
+OPENCV_HAL_IMPL_NEON_MUL_SAT(v_uint16x8, v_uint32x4)
+
+//  Multiply and expand
+inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b,
+                         v_int16x8& c, v_int16x8& d)
+{
+    c.val = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
+    d.val = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val));
+}
+
+inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
+                         v_uint16x8& c, v_uint16x8& d)
+{
+    c.val = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val));
+    d.val = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val));
+}
 
 inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
                          v_int32x4& c, v_int32x4& d)
@@ -375,13 +553,286 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
     d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val));
 }
 
+inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
+{
+    return v_int16x8(vcombine_s16(
+                                  vshrn_n_s32(vmull_s16( vget_low_s16(a.val),  vget_low_s16(b.val)), 16),
+                                  vshrn_n_s32(vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)), 16)
+                                 ));
+}
+inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
+{
+    return v_uint16x8(vcombine_u16(
+                                   vshrn_n_u32(vmull_u16( vget_low_u16(a.val),  vget_low_u16(b.val)), 16),
+                                   vshrn_n_u32(vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)), 16)
+                                  ));
+}
+
+//////// Dot Product ////////
+
+// 16 >> 32
 inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
 {
-    int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
-    int32x4_t d = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
-    int32x4x2_t cd = vuzpq_s32(c, d);
-    return v_int32x4(vaddq_s32(cd.val[0], cd.val[1]));
+    int16x8_t uzp1, uzp2;
+    _v128_unzip(a.val, b.val, uzp1, uzp2);
+    int16x4_t a0 = vget_low_s16(uzp1);
+    int16x4_t b0 = vget_high_s16(uzp1);
+    int16x4_t a1 = vget_low_s16(uzp2);
+    int16x4_t b1 = vget_high_s16(uzp2);
+    int32x4_t p = vmull_s16(a0, b0);
+    return v_int32x4(vmlal_s16(p, a1, b1));
+}
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{
+    int16x8_t uzp1, uzp2;
+    _v128_unzip(a.val, b.val, uzp1, uzp2);
+    int16x4_t a0 = vget_low_s16(uzp1);
+    int16x4_t b0 = vget_high_s16(uzp1);
+    int16x4_t a1 = vget_low_s16(uzp2);
+    int16x4_t b1 = vget_high_s16(uzp2);
+    int32x4_t p = vmlal_s16(c.val, a0, b0);
+    return v_int32x4(vmlal_s16(p, a1, b1));
+}
+
+// 32 >> 64
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
+{
+    int32x4_t uzp1, uzp2;
+    _v128_unzip(a.val, b.val, uzp1, uzp2);
+    int32x2_t a0 = vget_low_s32(uzp1);
+    int32x2_t b0 = vget_high_s32(uzp1);
+    int32x2_t a1 = vget_low_s32(uzp2);
+    int32x2_t b1 = vget_high_s32(uzp2);
+    int64x2_t p = vmull_s32(a0, b0);
+    return v_int64x2(vmlal_s32(p, a1, b1));
+}
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{
+    int32x4_t uzp1, uzp2;
+    _v128_unzip(a.val, b.val, uzp1, uzp2);
+    int32x2_t a0 = vget_low_s32(uzp1);
+    int32x2_t b0 = vget_high_s32(uzp1);
+    int32x2_t a1 = vget_low_s32(uzp2);
+    int32x2_t b1 = vget_high_s32(uzp2);
+    int64x2_t p = vmlal_s32(c.val, a0, b0);
+    return v_int64x2(vmlal_s32(p, a1, b1));
+}
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
+{
+#if CV_NEON_DOT
+    return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val));
+#else
+    const uint8x16_t zero   = vreinterpretq_u8_u32(vdupq_n_u32(0));
+    const uint8x16_t mask   = vreinterpretq_u8_u32(vdupq_n_u32(0x00FF00FF));
+    const uint16x8_t zero32 = vreinterpretq_u16_u32(vdupq_n_u32(0));
+    const uint16x8_t mask32 = vreinterpretq_u16_u32(vdupq_n_u32(0x0000FFFF));
+
+    uint16x8_t even = vmulq_u16(vreinterpretq_u16_u8(vbslq_u8(mask, a.val, zero)),
+                                vreinterpretq_u16_u8(vbslq_u8(mask, b.val, zero)));
+    uint16x8_t odd  = vmulq_u16(vshrq_n_u16(vreinterpretq_u16_u8(a.val), 8),
+                                vshrq_n_u16(vreinterpretq_u16_u8(b.val), 8));
+
+    uint32x4_t s0 = vaddq_u32(vreinterpretq_u32_u16(vbslq_u16(mask32, even, zero32)),
+                              vreinterpretq_u32_u16(vbslq_u16(mask32, odd,  zero32)));
+    uint32x4_t s1 = vaddq_u32(vshrq_n_u32(vreinterpretq_u32_u16(even), 16),
+                              vshrq_n_u32(vreinterpretq_u32_u16(odd),  16));
+    return v_uint32x4(vaddq_u32(s0, s1));
+#endif
+}
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b,
+                                   const v_uint32x4& c)
+{
+#if CV_NEON_DOT
+    return v_uint32x4(vdotq_u32(c.val, a.val, b.val));
+#else
+    return v_dotprod_expand(a, b) + c;
+#endif
+}
+
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
+{
+#if CV_NEON_DOT
+    return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val));
+#else
+    int16x8_t p0  = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
+    int16x8_t p1  = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val));
+    int16x8_t uzp1, uzp2;
+    _v128_unzip(p0, p1, uzp1, uzp2);
+    int16x8_t sum = vaddq_s16(uzp1, uzp2);
+    int16x4_t uzpl1, uzpl2;
+    _v128_unzip(vget_low_s16(sum), vget_high_s16(sum), uzpl1, uzpl2);
+    return v_int32x4(vaddl_s16(uzpl1, uzpl2));
+#endif
+}
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b,
+                                  const v_int32x4& c)
+{
+#if CV_NEON_DOT
+    return v_int32x4(vdotq_s32(c.val, a.val, b.val));
+#else
+    return v_dotprod_expand(a, b) + c;
+#endif
+}
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
+{
+    const uint16x8_t zero = vreinterpretq_u16_u32(vdupq_n_u32(0));
+    const uint16x8_t mask = vreinterpretq_u16_u32(vdupq_n_u32(0x0000FFFF));
+
+    uint32x4_t even = vmulq_u32(vreinterpretq_u32_u16(vbslq_u16(mask, a.val, zero)),
+                                vreinterpretq_u32_u16(vbslq_u16(mask, b.val, zero)));
+    uint32x4_t odd  = vmulq_u32(vshrq_n_u32(vreinterpretq_u32_u16(a.val), 16),
+                                vshrq_n_u32(vreinterpretq_u32_u16(b.val), 16));
+    uint32x4_t uzp1, uzp2;
+    _v128_unzip(even, odd, uzp1, uzp2);
+    uint64x2_t s0  = vaddl_u32(vget_low_u32(uzp1), vget_high_u32(uzp1));
+    uint64x2_t s1  = vaddl_u32(vget_low_u32(uzp2), vget_high_u32(uzp2));
+    return v_uint64x2(vaddq_u64(s0, s1));
+}
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b)
+{
+    int32x4_t p0  = vmull_s16(vget_low_s16(a.val),  vget_low_s16(b.val));
+    int32x4_t p1  = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
+
+    int32x4_t uzp1, uzp2;
+    _v128_unzip(p0, p1, uzp1, uzp2);
+    int32x4_t sum = vaddq_s32(uzp1, uzp2);
+
+    int32x2_t uzpl1, uzpl2;
+    _v128_unzip(vget_low_s32(sum), vget_high_s32(sum), uzpl1, uzpl2);
+    return v_int64x2(vaddl_s32(uzpl1, uzpl2));
 }
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b,
+                                  const v_int64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 32 >> 64f
+#if CV_SIMD128_64F
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
+{ return v_cvt_f64(v_dotprod(a, b)); }
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a,   const v_int32x4& b,
+                                    const v_float64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+#endif
+
+//////// Fast Dot Product ////////
+
+// 16 >> 32
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
+{
+    int16x4_t a0 = vget_low_s16(a.val);
+    int16x4_t a1 = vget_high_s16(a.val);
+    int16x4_t b0 = vget_low_s16(b.val);
+    int16x4_t b1 = vget_high_s16(b.val);
+    int32x4_t p = vmull_s16(a0, b0);
+    return v_int32x4(vmlal_s16(p, a1, b1));
+}
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{
+    int16x4_t a0 = vget_low_s16(a.val);
+    int16x4_t a1 = vget_high_s16(a.val);
+    int16x4_t b0 = vget_low_s16(b.val);
+    int16x4_t b1 = vget_high_s16(b.val);
+    int32x4_t p = vmlal_s16(c.val, a0, b0);
+    return v_int32x4(vmlal_s16(p, a1, b1));
+}
+
+// 32 >> 64
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
+{
+    int32x2_t a0 = vget_low_s32(a.val);
+    int32x2_t a1 = vget_high_s32(a.val);
+    int32x2_t b0 = vget_low_s32(b.val);
+    int32x2_t b1 = vget_high_s32(b.val);
+    int64x2_t p = vmull_s32(a0, b0);
+    return v_int64x2(vmlal_s32(p, a1, b1));
+}
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{
+    int32x2_t a0 = vget_low_s32(a.val);
+    int32x2_t a1 = vget_high_s32(a.val);
+    int32x2_t b0 = vget_low_s32(b.val);
+    int32x2_t b1 = vget_high_s32(b.val);
+    int64x2_t p = vmlal_s32(c.val, a0, b0);
+    return v_int64x2(vmlal_s32(p, a1, b1));
+}
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
+{
+#if CV_NEON_DOT
+    return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val));
+#else
+    uint16x8_t p0 = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val));
+    uint16x8_t p1 = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val));
+    uint32x4_t s0 = vaddl_u16(vget_low_u16(p0), vget_low_u16(p1));
+    uint32x4_t s1 = vaddl_u16(vget_high_u16(p0), vget_high_u16(p1));
+    return v_uint32x4(vaddq_u32(s0, s1));
+#endif
+}
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
+{
+#if CV_NEON_DOT
+    return v_uint32x4(vdotq_u32(c.val, a.val, b.val));
+#else
+    return v_dotprod_expand_fast(a, b) + c;
+#endif
+}
+
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
+{
+#if CV_NEON_DOT
+    return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val));
+#else
+    int16x8_t prod = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
+    prod = vmlal_s8(prod, vget_high_s8(a.val), vget_high_s8(b.val));
+    return v_int32x4(vaddl_s16(vget_low_s16(prod), vget_high_s16(prod)));
+#endif
+}
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
+{
+#if CV_NEON_DOT
+    return v_int32x4(vdotq_s32(c.val, a.val, b.val));
+#else
+    return v_dotprod_expand_fast(a, b) + c;
+#endif
+}
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b)
+{
+    uint32x4_t p0  = vmull_u16(vget_low_u16(a.val),  vget_low_u16(b.val));
+    uint32x4_t p1  = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
+    uint64x2_t s0  = vaddl_u32(vget_low_u32(p0), vget_high_u32(p0));
+    uint64x2_t s1  = vaddl_u32(vget_low_u32(p1), vget_high_u32(p1));
+    return v_uint64x2(vaddq_u64(s0, s1));
+}
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b)
+{
+    int32x4_t prod = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
+    prod = vmlal_s16(prod, vget_high_s16(a.val), vget_high_s16(b.val));
+    return v_int64x2(vaddl_s32(vget_low_s32(prod), vget_high_s32(prod)));
+}
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+// 32 >> 64f
+#if CV_SIMD128_64F
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b)
+{ return v_cvt_f64(v_dotprod_fast(a, b)); }
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+#endif
+
 
 #define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \
     OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
@@ -421,6 +872,18 @@ inline v_float32x4 operator ~ (const v_float32x4& a)
     return v_float32x4(vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(a.val))));
 }
 
+#if CV_SIMD128_64F
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{
+    return v_float32x4(vsqrtq_f32(x.val));
+}
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{
+    v_float32x4 one = v_setall_f32(1.0f);
+    return one / v_sqrt(x);
+}
+#else
 inline v_float32x4 v_sqrt(const v_float32x4& x)
 {
     float32x4_t x1 = vmaxq_f32(x.val, vdupq_n_f32(FLT_MIN));
@@ -437,10 +900,54 @@ inline v_float32x4 v_invsqrt(const v_float32x4& x)
     e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e);
     return v_float32x4(e);
 }
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \
+inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); }
+
+OPENCV_HAL_IMPL_NEON_ABS(v_uint8x16, v_int8x16, u8, s8)
+OPENCV_HAL_IMPL_NEON_ABS(v_uint16x8, v_int16x8, u16, s16)
+OPENCV_HAL_IMPL_NEON_ABS(v_uint32x4, v_int32x4, u32, s32)
 
 inline v_float32x4 v_abs(v_float32x4 x)
 { return v_float32x4(vabsq_f32(x.val)); }
 
+#if CV_SIMD128_64F
+#define OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(bin_op, intrin) \
+inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \
+{ \
+    return v_float64x2(vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val)))); \
+} \
+inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \
+{ \
+    a.val = vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val))); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(&, vandq_s64)
+OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(|, vorrq_s64)
+OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(^, veorq_s64)
+
+inline v_float64x2 operator ~ (const v_float64x2& a)
+{
+    return v_float64x2(vreinterpretq_f64_s32(vmvnq_s32(vreinterpretq_s32_f64(a.val))));
+}
+
+inline v_float64x2 v_sqrt(const v_float64x2& x)
+{
+    return v_float64x2(vsqrtq_f64(x.val));
+}
+
+inline v_float64x2 v_invsqrt(const v_float64x2& x)
+{
+    v_float64x2 one = v_setall_f64(1.0f);
+    return one / v_sqrt(x);
+}
+
+inline v_float64x2 v_abs(v_float64x2 x)
+{ return v_float64x2(vabsq_f64(x.val)); }
+#endif
+
 // TODO: exp, log, sin, cos
 
 #define OPENCV_HAL_IMPL_NEON_BIN_FUNC(_Tpvec, func, intrin) \
@@ -463,8 +970,23 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_min, vminq_s32)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_max, vmaxq_s32)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_min, vminq_f32)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_max, vmaxq_f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_min, vminq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_max, vmaxq_f64)
+#endif
 
-
+#if CV_SIMD128_64F
+inline int64x2_t vmvnq_s64(int64x2_t a)
+{
+    int64x2_t vx = vreinterpretq_s64_u32(vdupq_n_u32(0xFFFFFFFF));
+    return veorq_s64(a, vx);
+}
+inline uint64x2_t vmvnq_u64(uint64x2_t a)
+{
+    uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF));
+    return veorq_u64(a, vx);
+}
+#endif
 #define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \
 inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
 { return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \
@@ -486,6 +1008,18 @@ OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16)
 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32)
 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32)
 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64)
+#endif
+
+inline v_float32x4 v_not_nan(const v_float32x4& a)
+{ return v_float32x4(vreinterpretq_f32_u32(vceqq_f32(a.val, a.val))); }
+#if CV_SIMD128_64F
+inline v_float64x2 v_not_nan(const v_float64x2& a)
+{ return v_float64x2(vreinterpretq_f64_u64(vceqq_f64(a.val, a.val))); }
+#endif
 
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8)
@@ -495,12 +1029,24 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_sub_wrap, vsubq_u8)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_sub_wrap, vsubq_s8)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_sub_wrap, vsubq_u16)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_sub_wrap, vsubq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_mul_wrap, vmulq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_mul_wrap, vmulq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_mul_wrap, vmulq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_mul_wrap, vmulq_s16)
 
-// TODO: absdiff for signed integers
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32)
 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64)
+#endif
+
+/** Saturating absolute difference **/
+inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
+{ return v_int8x16(vqabsq_s8(vqsubq_s8(a.val, b.val))); }
+inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
+{ return v_int16x8(vqabsq_s16(vqsubq_s16(a.val, b.val))); }
 
 #define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
 inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
@@ -523,10 +1069,54 @@ inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b)
     return v_float32x4(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val));
 }
 
-inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
 {
+#if CV_SIMD128_64F
+    // ARMv8, which adds support for 64-bit floating-point (so CV_SIMD128_64F is defined),
+    // also adds FMA support both for single- and double-precision floating-point vectors
+    return v_float32x4(vfmaq_f32(c.val, a.val, b.val));
+#else
     return v_float32x4(vmlaq_f32(c.val, a.val, b.val));
+#endif
+}
+
+inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+    return v_int32x4(vmlaq_s32(c.val, a.val, b.val));
+}
+
+inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+{
+    return v_fma(a, b, c);
+}
+
+inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+    return v_fma(a, b, c);
+}
+
+#if CV_SIMD128_64F
+inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b)
+{
+    v_float64x2 x(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val)));
+    return v_sqrt(x);
+}
+
+inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b)
+{
+    return v_float64x2(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val)));
+}
+
+inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+    return v_float64x2(vfmaq_f64(c.val, a.val, b.val));
+}
+
+inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+    return v_fma(a, b, c);
 }
+#endif
 
 // trade efficiency for convenience
 #define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \
@@ -550,17 +1140,64 @@ OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32)
 OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint64x2, u64, int64, s64)
 OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int64x2, s64, int64, s64)
 
+#define OPENCV_HAL_IMPL_NEON_ROTATE_OP(_Tpvec, suffix) \
+template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
+{ return _Tpvec(vextq_##suffix(a.val, vdupq_n_##suffix(0), n)); } \
+template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
+{ return _Tpvec(vextq_##suffix(vdupq_n_##suffix(0), a.val, _Tpvec::nlanes - n)); } \
+template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
+{ return a; } \
+template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(vextq_##suffix(a.val, b.val, n)); } \
+template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); } \
+template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
+{ CV_UNUSED(b); return a; }
+
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int8x16, s8)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int16x8, s16)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int32x4, s32)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float32x4, f32)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint64x2, u64)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int64x2, s64)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float64x2, f64)
+#endif
+
+#if defined(__clang__) && defined(__aarch64__)
+// avoid LD2 instruction. details: https://github.com/opencv/opencv/issues/14863
+#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_load_low(const _Tp* ptr) \
+{ \
+typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; \
+uint64 v = *(unaligned_uint64*)ptr; \
+return _Tpvec(v_reinterpret_as_##suffix(v_uint64x2(v, (uint64)123456))); \
+}
+#else
+#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_load_low(const _Tp* ptr) \
+{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); }
+#endif
+
 #define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \
 inline _Tpvec v_load(const _Tp* ptr) \
 { return _Tpvec(vld1q_##suffix(ptr)); } \
 inline _Tpvec v_load_aligned(const _Tp* ptr) \
 { return _Tpvec(vld1q_##suffix(ptr)); } \
+OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
 inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
 { return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \
 inline void v_store(_Tp* ptr, const _Tpvec& a) \
 { vst1q_##suffix(ptr, a.val); } \
 inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
 { vst1q_##suffix(ptr, a.val); } \
+inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
+{ vst1q_##suffix(ptr, a.val); } \
+inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \
+{ vst1q_##suffix(ptr, a.val); } \
 inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
 { vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \
 inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
@@ -575,26 +1212,164 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32)
 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint64x2, uint64, u64)
 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int64x2, int64, s64)
 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64)
+#endif
+
+inline unsigned v_reduce_sum(const v_uint8x16& a)
+{
+    uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(a.val));
+    uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
+    return vget_lane_u32(vpadd_u32(t1, t1), 0);
+}
+inline int v_reduce_sum(const v_int8x16& a)
+{
+    int32x4_t t0 = vpaddlq_s16(vpaddlq_s8(a.val));
+    int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0));
+    return vget_lane_s32(vpadd_s32(t1, t1), 0);
+}
+inline unsigned v_reduce_sum(const v_uint16x8& a)
+{
+    uint32x4_t t0 = vpaddlq_u16(a.val);
+    uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
+    return vget_lane_u32(vpadd_u32(t1, t1), 0);
+}
+inline int v_reduce_sum(const v_int16x8& a)
+{
+    int32x4_t t0 = vpaddlq_s16(a.val);
+    int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0));
+    return vget_lane_s32(vpadd_s32(t1, t1), 0);
+}
 
-#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \
+#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
 inline scalartype v_reduce_##func(const _Tpvec& a) \
 { \
-    scalartype CV_DECL_ALIGNED(16) buf[4]; \
-    v_store_aligned(buf, a); \
-    scalartype s0 = scalar_func(buf[0], buf[1]); \
-    scalartype s1 = scalar_func(buf[2], buf[3]); \
-    return scalar_func(s0, s1); \
+    _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
+    a0 = vp##vectorfunc##_##suffix(a0, a0); \
+    a0 = vp##vectorfunc##_##suffix(a0, a0); \
+    return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
 }
 
-OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, unsigned, sum, OPENCV_HAL_ADD)
-OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)
-OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min)
-OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int, sum, OPENCV_HAL_ADD)
-OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int, max, std::max)
-OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int, min, std::min)
-OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float, sum, OPENCV_HAL_ADD)
-OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float, max, std::max)
-OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float, min, std::min)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, max, max, u8)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, min, min, u8)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, max, max, s8)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, min, min, s8)
+
+#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
+    a0 = vp##vectorfunc##_##suffix(a0, a0); \
+    return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
+}
+
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, max, max, u16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, min, min, u16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16)
+
+#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
+    return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \
+}
+
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, sum, add, u32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, max, max, u32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, min, min, u32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, sum, add, s32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, max, max, s32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, min, min, s32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32)
+
+inline uint64 v_reduce_sum(const v_uint64x2& a)
+{ return vget_lane_u64(vadd_u64(vget_low_u64(a.val), vget_high_u64(a.val)),0); }
+inline int64 v_reduce_sum(const v_int64x2& a)
+{ return vget_lane_s64(vadd_s64(vget_low_s64(a.val), vget_high_s64(a.val)),0); }
+#if CV_SIMD128_64F
+inline double v_reduce_sum(const v_float64x2& a)
+{
+    return vgetq_lane_f64(a.val, 0) + vgetq_lane_f64(a.val, 1);
+}
+#endif
+
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+    float32x4x2_t ab = vtrnq_f32(a.val, b.val);
+    float32x4x2_t cd = vtrnq_f32(c.val, d.val);
+
+    float32x4_t u0 = vaddq_f32(ab.val[0], ab.val[1]); // a0+a1 b0+b1 a2+a3 b2+b3
+    float32x4_t u1 = vaddq_f32(cd.val[0], cd.val[1]); // c0+c1 d0+d1 c2+c3 d2+d3
+
+    float32x4_t v0 = vcombine_f32(vget_low_f32(u0), vget_low_f32(u1));
+    float32x4_t v1 = vcombine_f32(vget_high_f32(u0), vget_high_f32(u1));
+
+    return v_float32x4(vaddq_f32(v0, v1));
+}
+
+inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
+{
+    uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vabdq_u8(a.val, b.val)));
+    uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
+    return vget_lane_u32(vpadd_u32(t1, t1), 0);
+}
+inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
+{
+    uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_s8(vabdq_s8(a.val, b.val))));
+    uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
+    return vget_lane_u32(vpadd_u32(t1, t1), 0);
+}
+inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
+{
+    uint32x4_t t0 = vpaddlq_u16(vabdq_u16(a.val, b.val));
+    uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
+    return vget_lane_u32(vpadd_u32(t1, t1), 0);
+}
+inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
+{
+    uint32x4_t t0 = vpaddlq_u16(vreinterpretq_u16_s16(vabdq_s16(a.val, b.val)));
+    uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
+    return vget_lane_u32(vpadd_u32(t1, t1), 0);
+}
+inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b)
+{
+    uint32x4_t t0 = vabdq_u32(a.val, b.val);
+    uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
+    return vget_lane_u32(vpadd_u32(t1, t1), 0);
+}
+inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b)
+{
+    uint32x4_t t0 = vreinterpretq_u32_s32(vabdq_s32(a.val, b.val));
+    uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
+    return vget_lane_u32(vpadd_u32(t1, t1), 0);
+}
+inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
+{
+    float32x4_t t0 = vabdq_f32(a.val, b.val);
+    float32x2_t t1 = vpadd_f32(vget_low_f32(t0), vget_high_f32(t0));
+    return vget_lane_f32(vpadd_f32(t1, t1), 0);
+}
+
+inline v_uint8x16 v_popcount(const v_uint8x16& a)
+{ return v_uint8x16(vcntq_u8(a.val)); }
+inline v_uint8x16 v_popcount(const v_int8x16& a)
+{ return v_uint8x16(vcntq_u8(vreinterpretq_u8_s8(a.val))); }
+inline v_uint16x8 v_popcount(const v_uint16x8& a)
+{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u16(a.val)))); }
+inline v_uint16x8 v_popcount(const v_int16x8& a)
+{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s16(a.val)))); }
+inline v_uint32x4 v_popcount(const v_uint32x4& a)
+{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u32(a.val))))); }
+inline v_uint32x4 v_popcount(const v_int32x4& a)
+{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s32(a.val))))); }
+inline v_uint64x2 v_popcount(const v_uint64x2& a)
+{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(a.val)))))); }
+inline v_uint64x2 v_popcount(const v_int64x2& a)
+{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s64(a.val)))))); }
 
 inline int v_signmask(const v_uint8x16& a)
 {
@@ -627,6 +1402,31 @@ inline int v_signmask(const v_int32x4& a)
 { return v_signmask(v_reinterpret_as_u32(a)); }
 inline int v_signmask(const v_float32x4& a)
 { return v_signmask(v_reinterpret_as_u32(a)); }
+inline int v_signmask(const v_uint64x2& a)
+{
+    int64x1_t m0 = vdup_n_s64(0);
+    uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0));
+    return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1);
+}
+inline int v_signmask(const v_int64x2& a)
+{ return v_signmask(v_reinterpret_as_u64(a)); }
+#if CV_SIMD128_64F
+inline int v_signmask(const v_float64x2& a)
+{ return v_signmask(v_reinterpret_as_u64(a)); }
+#endif
+
+inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); }
+#if CV_SIMD128_64F
+inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); }
+#endif
 
 #define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
 inline bool v_check_all(const v_##_Tpvec& a) \
@@ -646,6 +1446,17 @@ OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7)
 OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15)
 OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31)
 
+inline bool v_check_all(const v_uint64x2& a)
+{
+    uint64x2_t v0 = vshrq_n_u64(a.val, 63);
+    return (vgetq_lane_u64(v0, 0) & vgetq_lane_u64(v0, 1)) == 1;
+}
+inline bool v_check_any(const v_uint64x2& a)
+{
+    uint64x2_t v0 = vshrq_n_u64(a.val, 63);
+    return (vgetq_lane_u64(v0, 0) | vgetq_lane_u64(v0, 1)) != 0;
+}
+
 inline bool v_check_all(const v_int8x16& a)
 { return v_check_all(v_reinterpret_as_u8(a)); }
 inline bool v_check_all(const v_int16x8& a)
@@ -664,6 +1475,17 @@ inline bool v_check_any(const v_int32x4& a)
 inline bool v_check_any(const v_float32x4& a)
 { return v_check_any(v_reinterpret_as_u32(a)); }
 
+inline bool v_check_all(const v_int64x2& a)
+{ return v_check_all(v_reinterpret_as_u64(a)); }
+inline bool v_check_any(const v_int64x2& a)
+{ return v_check_any(v_reinterpret_as_u64(a)); }
+#if CV_SIMD128_64F
+inline bool v_check_all(const v_float64x2& a)
+{ return v_check_all(v_reinterpret_as_u64(a)); }
+inline bool v_check_any(const v_float64x2& a)
+{ return v_check_any(v_reinterpret_as_u64(a)); }
+#endif
+
 #define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \
 inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
 { \
@@ -677,6 +1499,9 @@ OPENCV_HAL_IMPL_NEON_SELECT(v_int16x8, s16, u16)
 OPENCV_HAL_IMPL_NEON_SELECT(v_uint32x4, u32, u32)
 OPENCV_HAL_IMPL_NEON_SELECT(v_int32x4, s32, u32)
 OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64)
+#endif
 
 #define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \
 inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
@@ -684,6 +1509,14 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
     b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \
     b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \
 } \
+inline _Tpwvec v_expand_low(const _Tpvec& a) \
+{ \
+    return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \
+} \
+inline _Tpwvec v_expand_high(const _Tpvec& a) \
+{ \
+    return _Tpwvec(vmovl_##suffix(vget_high_##suffix(a.val))); \
+} \
 inline _Tpwvec v_load_expand(const _Tp* ptr) \
 { \
     return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
@@ -698,18 +1531,41 @@ OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32)
 
 inline v_uint32x4 v_load_expand_q(const uchar* ptr)
 {
-    uint8x8_t v0 = vcreate_u8(*(unsigned*)ptr);
+    typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint;
+    uint8x8_t v0 = vcreate_u8(*(unaligned_uint*)ptr);
     uint16x4_t v1 = vget_low_u16(vmovl_u8(v0));
     return v_uint32x4(vmovl_u16(v1));
 }
 
 inline v_int32x4 v_load_expand_q(const schar* ptr)
 {
-    int8x8_t v0 = vcreate_s8(*(unsigned*)ptr);
+    typedef unsigned int CV_DECL_ALIGNED(1) unaligned_uint;
+    int8x8_t v0 = vcreate_s8(*(unaligned_uint*)ptr);
     int16x4_t v1 = vget_low_s16(vmovl_s8(v0));
     return v_int32x4(vmovl_s16(v1));
 }
 
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
+inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
+{ \
+    b0.val = vzip1q_##suffix(a0.val, a1.val); \
+    b1.val = vzip2q_##suffix(a0.val, a1.val); \
+} \
+inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \
+} \
+inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \
+} \
+inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
+{ \
+    c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
+    d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
+}
+#else
 #define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
 inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
 { \
@@ -730,6 +1586,7 @@ inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c,
     c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
     d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
 }
+#endif
 
 OPENCV_HAL_IMPL_NEON_UNPACKS(uint8x16, u8)
 OPENCV_HAL_IMPL_NEON_UNPACKS(int8x16, s8)
@@ -738,6 +1595,55 @@ OPENCV_HAL_IMPL_NEON_UNPACKS(int16x8, s16)
 OPENCV_HAL_IMPL_NEON_UNPACKS(uint32x4, u32)
 OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32)
 OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64)
+#endif
+
+inline v_uint8x16 v_reverse(const v_uint8x16 &a)
+{
+    uint8x16_t vec = vrev64q_u8(a.val);
+    return v_uint8x16(vextq_u8(vec, vec, 8));
+}
+
+inline v_int8x16 v_reverse(const v_int8x16 &a)
+{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
+
+inline v_uint16x8 v_reverse(const v_uint16x8 &a)
+{
+    uint16x8_t vec = vrev64q_u16(a.val);
+    return v_uint16x8(vextq_u16(vec, vec, 4));
+}
+
+inline v_int16x8 v_reverse(const v_int16x8 &a)
+{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
+
+inline v_uint32x4 v_reverse(const v_uint32x4 &a)
+{
+    uint32x4_t vec = vrev64q_u32(a.val);
+    return v_uint32x4(vextq_u32(vec, vec, 2));
+}
+
+inline v_int32x4 v_reverse(const v_int32x4 &a)
+{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_float32x4 v_reverse(const v_float32x4 &a)
+{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_uint64x2 v_reverse(const v_uint64x2 &a)
+{
+    uint64x2_t vec = a.val;
+    uint64x1_t vec_lo = vget_low_u64(vec);
+    uint64x1_t vec_hi = vget_high_u64(vec);
+    return v_uint64x2(vcombine_u64(vec_hi, vec_lo));
+}
+
+inline v_int64x2 v_reverse(const v_int64x2 &a)
+{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
+
+#if CV_SIMD128_64F
+inline v_float64x2 v_reverse(const v_float64x2 &a)
+{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
+#endif
 
 #define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \
 template <int s> \
@@ -755,7 +1661,54 @@ OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32)
 OPENCV_HAL_IMPL_NEON_EXTRACT(uint64x2, u64)
 OPENCV_HAL_IMPL_NEON_EXTRACT(int64x2, s64)
 OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_EXTRACT_N(_Tpvec, _Tp, suffix) \
+template<int i> inline _Tp v_extract_n(_Tpvec v) { return vgetq_lane_##suffix(v.val, i); }
+
+OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint32x4, uint, u32)
+OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int64x2, int64, s64)
+OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_float32x4, float, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_float64x2, double, f64)
+#endif
 
+#define OPENCV_HAL_IMPL_NEON_BROADCAST(_Tpvec, _Tp, suffix) \
+template<int i> inline _Tpvec v_broadcast_element(_Tpvec v) { _Tp t = v_extract_n<i>(v); return v_setall_##suffix(t); }
+
+OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_NEON_BROADCAST(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_NEON_BROADCAST(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint32x4, uint, u32)
+OPENCV_HAL_IMPL_NEON_BROADCAST(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_NEON_BROADCAST(v_int64x2, int64, s64)
+OPENCV_HAL_IMPL_NEON_BROADCAST(v_float32x4, float, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_BROADCAST(v_float64x2, double, f64)
+#endif
+
+#if CV_SIMD128_64F
+inline v_int32x4 v_round(const v_float32x4& a)
+{
+    float32x4_t a_ = a.val;
+    int32x4_t result;
+    __asm__ ("fcvtns %0.4s, %1.4s"
+             : "=w"(result)
+             : "w"(a_)
+             : /* No clobbers */);
+    return v_int32x4(result);
+}
+#else
 inline v_int32x4 v_round(const v_float32x4& a)
 {
     static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
@@ -764,7 +1717,7 @@ inline v_int32x4 v_round(const v_float32x4& a)
     int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val)));
     return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition))));
 }
-
+#endif
 inline v_int32x4 v_floor(const v_float32x4& a)
 {
     int32x4_t a1 = vcvtq_s32_f32(a.val);
@@ -782,6 +1735,43 @@ inline v_int32x4 v_ceil(const v_float32x4& a)
 inline v_int32x4 v_trunc(const v_float32x4& a)
 { return v_int32x4(vcvtq_s32_f32(a.val)); }
 
+#if CV_SIMD128_64F
+inline v_int32x4 v_round(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
+}
+
+inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
+{
+    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), vmovn_s64(vcvtaq_s64_f64(b.val))));
+}
+
+inline v_int32x4 v_floor(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    int64x2_t a1 = vcvtq_s64_f64(a.val);
+    uint64x2_t mask = vcgtq_f64(vcvtq_f64_s64(a1), a.val);
+    a1 = vaddq_s64(a1, vreinterpretq_s64_u64(mask));
+    return v_int32x4(vcombine_s32(vmovn_s64(a1), zero));
+}
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    int64x2_t a1 = vcvtq_s64_f64(a.val);
+    uint64x2_t mask = vcgtq_f64(a.val, vcvtq_f64_s64(a1));
+    a1 = vsubq_s64(a1, vreinterpretq_s64_u64(mask));
+    return v_int32x4(vcombine_s32(vmovn_s64(a1), zero));
+}
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
+}
+#endif
+
 #define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \
 inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
                          const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
@@ -809,6 +1799,12 @@ OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32)
 OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f32)
 
 #define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \
+{ \
+    _Tpvec##x2_t v = vld2q_##suffix(ptr); \
+    a.val = v.val[0]; \
+    b.val = v.val[1]; \
+} \
 inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \
 { \
     _Tpvec##x3_t v = vld3q_##suffix(ptr); \
@@ -825,7 +1821,16 @@ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
     c.val = v.val[2]; \
     d.val = v.val[3]; \
 } \
-inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
+                                hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ \
+    _Tpvec##x2_t v; \
+    v.val[0] = a.val; \
+    v.val[1] = b.val; \
+    vst2q_##suffix(ptr, v); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
+                                const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
 { \
     _Tpvec##x3_t v; \
     v.val[0] = a.val; \
@@ -834,7 +1839,8 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec&
     vst3q_##suffix(ptr, v); \
 } \
 inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
-                               const v_##_Tpvec& c, const v_##_Tpvec& d) \
+                                const v_##_Tpvec& c, const v_##_Tpvec& d, \
+                                hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \
 { \
     _Tpvec##x4_t v; \
     v.val[0] = a.val; \
@@ -844,6 +1850,83 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec&
     vst4q_##suffix(ptr, v); \
 }
 
+#define OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(tp, suffix) \
+inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b ) \
+{ \
+    tp##x1_t a0 = vld1_##suffix(ptr); \
+    tp##x1_t b0 = vld1_##suffix(ptr + 1); \
+    tp##x1_t a1 = vld1_##suffix(ptr + 2); \
+    tp##x1_t b1 = vld1_##suffix(ptr + 3); \
+    a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
+    b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
+} \
+ \
+inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, \
+                                 v_##tp##x2& b, v_##tp##x2& c ) \
+{ \
+    tp##x1_t a0 = vld1_##suffix(ptr); \
+    tp##x1_t b0 = vld1_##suffix(ptr + 1); \
+    tp##x1_t c0 = vld1_##suffix(ptr + 2); \
+    tp##x1_t a1 = vld1_##suffix(ptr + 3); \
+    tp##x1_t b1 = vld1_##suffix(ptr + 4); \
+    tp##x1_t c1 = vld1_##suffix(ptr + 5); \
+    a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
+    b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
+    c = v_##tp##x2(vcombine_##suffix(c0, c1)); \
+} \
+ \
+inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b, \
+                                 v_##tp##x2& c, v_##tp##x2& d ) \
+{ \
+    tp##x1_t a0 = vld1_##suffix(ptr); \
+    tp##x1_t b0 = vld1_##suffix(ptr + 1); \
+    tp##x1_t c0 = vld1_##suffix(ptr + 2); \
+    tp##x1_t d0 = vld1_##suffix(ptr + 3); \
+    tp##x1_t a1 = vld1_##suffix(ptr + 4); \
+    tp##x1_t b1 = vld1_##suffix(ptr + 5); \
+    tp##x1_t c1 = vld1_##suffix(ptr + 6); \
+    tp##x1_t d1 = vld1_##suffix(ptr + 7); \
+    a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
+    b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
+    c = v_##tp##x2(vcombine_##suffix(c0, c1)); \
+    d = v_##tp##x2(vcombine_##suffix(d0, d1)); \
+} \
+ \
+inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
+                                hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ \
+    vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
+    vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
+    vst1_##suffix(ptr + 2, vget_high_##suffix(a.val)); \
+    vst1_##suffix(ptr + 3, vget_high_##suffix(b.val)); \
+} \
+ \
+inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \
+                                const v_##tp##x2& b, const v_##tp##x2& c, \
+                                hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ \
+    vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
+    vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
+    vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \
+    vst1_##suffix(ptr + 3, vget_high_##suffix(a.val)); \
+    vst1_##suffix(ptr + 4, vget_high_##suffix(b.val)); \
+    vst1_##suffix(ptr + 5, vget_high_##suffix(c.val)); \
+} \
+ \
+inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
+                                const v_##tp##x2& c, const v_##tp##x2& d, \
+                                hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ \
+    vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
+    vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
+    vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \
+    vst1_##suffix(ptr + 3, vget_low_##suffix(d.val)); \
+    vst1_##suffix(ptr + 4, vget_high_##suffix(a.val)); \
+    vst1_##suffix(ptr + 5, vget_high_##suffix(b.val)); \
+    vst1_##suffix(ptr + 6, vget_high_##suffix(c.val)); \
+    vst1_##suffix(ptr + 7, vget_high_##suffix(d.val)); \
+}
+
 OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8)
 OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8)
 OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16)
@@ -851,12 +1934,411 @@ OPENCV_HAL_IMPL_NEON_INTERLEAVED(int16x8, short, s16)
 OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint32x4, unsigned, u32)
 OPENCV_HAL_IMPL_NEON_INTERLEAVED(int32x4, int, s32)
 OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64)
+#endif
+
+OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(int64, s64)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(uint64, u64)
 
 inline v_float32x4 v_cvt_f32(const v_int32x4& a)
 {
     return v_float32x4(vcvtq_f32_s32(a.val));
 }
 
+#if CV_SIMD128_64F
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{
+    float32x2_t zero = vdup_n_f32(0.0f);
+    return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), zero));
+}
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b)
+{
+    return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), vcvt_f32_f64(b.val)));
+}
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_low_s32(a.val))));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_high_s32(a.val))));
+}
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vget_low_f32(a.val)));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vget_high_f32(a.val)));
+}
+
+inline v_float64x2 v_cvt_f64(const v_int64x2& a)
+{  return v_float64x2(vcvtq_f64_s64(a.val)); }
+
+#endif
+
+////////////// Lookup table access ////////////////////
+
+inline v_int8x16 v_lut(const schar* tab, const int* idx)
+{
+    schar CV_DECL_ALIGNED(32) elems[16] =
+    {
+        tab[idx[ 0]],
+        tab[idx[ 1]],
+        tab[idx[ 2]],
+        tab[idx[ 3]],
+        tab[idx[ 4]],
+        tab[idx[ 5]],
+        tab[idx[ 6]],
+        tab[idx[ 7]],
+        tab[idx[ 8]],
+        tab[idx[ 9]],
+        tab[idx[10]],
+        tab[idx[11]],
+        tab[idx[12]],
+        tab[idx[13]],
+        tab[idx[14]],
+        tab[idx[15]]
+    };
+    return v_int8x16(vld1q_s8(elems));
+}
+inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx)
+{
+    schar CV_DECL_ALIGNED(32) elems[16] =
+    {
+        tab[idx[0]],
+        tab[idx[0] + 1],
+        tab[idx[1]],
+        tab[idx[1] + 1],
+        tab[idx[2]],
+        tab[idx[2] + 1],
+        tab[idx[3]],
+        tab[idx[3] + 1],
+        tab[idx[4]],
+        tab[idx[4] + 1],
+        tab[idx[5]],
+        tab[idx[5] + 1],
+        tab[idx[6]],
+        tab[idx[6] + 1],
+        tab[idx[7]],
+        tab[idx[7] + 1]
+    };
+    return v_int8x16(vld1q_s8(elems));
+}
+inline v_int8x16 v_lut_quads(const schar* tab, const int* idx)
+{
+    schar CV_DECL_ALIGNED(32) elems[16] =
+    {
+        tab[idx[0]],
+        tab[idx[0] + 1],
+        tab[idx[0] + 2],
+        tab[idx[0] + 3],
+        tab[idx[1]],
+        tab[idx[1] + 1],
+        tab[idx[1] + 2],
+        tab[idx[1] + 3],
+        tab[idx[2]],
+        tab[idx[2] + 1],
+        tab[idx[2] + 2],
+        tab[idx[2] + 3],
+        tab[idx[3]],
+        tab[idx[3] + 1],
+        tab[idx[3] + 2],
+        tab[idx[3] + 3]
+    };
+    return v_int8x16(vld1q_s8(elems));
+}
+inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); }
+inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); }
+inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); }
+
+inline v_int16x8 v_lut(const short* tab, const int* idx)
+{
+    short CV_DECL_ALIGNED(32) elems[8] =
+    {
+        tab[idx[0]],
+        tab[idx[1]],
+        tab[idx[2]],
+        tab[idx[3]],
+        tab[idx[4]],
+        tab[idx[5]],
+        tab[idx[6]],
+        tab[idx[7]]
+    };
+    return v_int16x8(vld1q_s16(elems));
+}
+inline v_int16x8 v_lut_pairs(const short* tab, const int* idx)
+{
+    short CV_DECL_ALIGNED(32) elems[8] =
+    {
+        tab[idx[0]],
+        tab[idx[0] + 1],
+        tab[idx[1]],
+        tab[idx[1] + 1],
+        tab[idx[2]],
+        tab[idx[2] + 1],
+        tab[idx[3]],
+        tab[idx[3] + 1]
+    };
+    return v_int16x8(vld1q_s16(elems));
+}
+inline v_int16x8 v_lut_quads(const short* tab, const int* idx)
+{
+    return v_int16x8(vcombine_s16(vld1_s16(tab + idx[0]), vld1_s16(tab + idx[1])));
+}
+inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); }
+inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); }
+inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); }
+
+inline v_int32x4 v_lut(const int* tab, const int* idx)
+{
+    int CV_DECL_ALIGNED(32) elems[4] =
+    {
+        tab[idx[0]],
+        tab[idx[1]],
+        tab[idx[2]],
+        tab[idx[3]]
+    };
+    return v_int32x4(vld1q_s32(elems));
+}
+inline v_int32x4 v_lut_pairs(const int* tab, const int* idx)
+{
+    return v_int32x4(vcombine_s32(vld1_s32(tab + idx[0]), vld1_s32(tab + idx[1])));
+}
+inline v_int32x4 v_lut_quads(const int* tab, const int* idx)
+{
+    return v_int32x4(vld1q_s32(tab + idx[0]));
+}
+inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); }
+inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); }
+inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); }
+
+inline v_int64x2 v_lut(const int64_t* tab, const int* idx)
+{
+    return v_int64x2(vcombine_s64(vcreate_s64(tab[idx[0]]), vcreate_s64(tab[idx[1]])));
+}
+inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx)
+{
+    return v_int64x2(vld1q_s64(tab + idx[0]));
+}
+inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); }
+inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); }
+
+inline v_float32x4 v_lut(const float* tab, const int* idx)
+{
+    float CV_DECL_ALIGNED(32) elems[4] =
+    {
+        tab[idx[0]],
+        tab[idx[1]],
+        tab[idx[2]],
+        tab[idx[3]]
+    };
+    return v_float32x4(vld1q_f32(elems));
+}
+inline v_float32x4 v_lut_pairs(const float* tab, const int* idx)
+{
+    typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64;
+
+    uint64 CV_DECL_ALIGNED(32) elems[2] =
+    {
+        *(unaligned_uint64*)(tab + idx[0]),
+        *(unaligned_uint64*)(tab + idx[1])
+    };
+    return v_float32x4(vreinterpretq_f32_u64(vld1q_u64(elems)));
+}
+inline v_float32x4 v_lut_quads(const float* tab, const int* idx)
+{
+    return v_float32x4(vld1q_f32(tab + idx[0]));
+}
+
+inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec)
+{
+    int CV_DECL_ALIGNED(32) elems[4] =
+    {
+        tab[vgetq_lane_s32(idxvec.val, 0)],
+        tab[vgetq_lane_s32(idxvec.val, 1)],
+        tab[vgetq_lane_s32(idxvec.val, 2)],
+        tab[vgetq_lane_s32(idxvec.val, 3)]
+    };
+    return v_int32x4(vld1q_s32(elems));
+}
+
+inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec)
+{
+    unsigned CV_DECL_ALIGNED(32) elems[4] =
+    {
+        tab[vgetq_lane_s32(idxvec.val, 0)],
+        tab[vgetq_lane_s32(idxvec.val, 1)],
+        tab[vgetq_lane_s32(idxvec.val, 2)],
+        tab[vgetq_lane_s32(idxvec.val, 3)]
+    };
+    return v_uint32x4(vld1q_u32(elems));
+}
+
+inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec)
+{
+    float CV_DECL_ALIGNED(32) elems[4] =
+    {
+        tab[vgetq_lane_s32(idxvec.val, 0)],
+        tab[vgetq_lane_s32(idxvec.val, 1)],
+        tab[vgetq_lane_s32(idxvec.val, 2)],
+        tab[vgetq_lane_s32(idxvec.val, 3)]
+    };
+    return v_float32x4(vld1q_f32(elems));
+}
+
+inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y)
+{
+    /*int CV_DECL_ALIGNED(32) idx[4];
+    v_store(idx, idxvec);
+
+    float32x4_t xy02 = vcombine_f32(vld1_f32(tab + idx[0]), vld1_f32(tab + idx[2]));
+    float32x4_t xy13 = vcombine_f32(vld1_f32(tab + idx[1]), vld1_f32(tab + idx[3]));
+
+    float32x4x2_t xxyy = vuzpq_f32(xy02, xy13);
+    x = v_float32x4(xxyy.val[0]);
+    y = v_float32x4(xxyy.val[1]);*/
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_aligned(idx, idxvec);
+
+    x = v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
+    y = v_float32x4(tab[idx[0]+1], tab[idx[1]+1], tab[idx[2]+1], tab[idx[3]+1]);
+}
+
+inline v_int8x16 v_interleave_pairs(const v_int8x16& vec)
+{
+    return v_int8x16(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0705060403010200)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0705060403010200))));
+}
+inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
+inline v_int8x16 v_interleave_quads(const v_int8x16& vec)
+{
+    return v_int8x16(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0703060205010400)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0703060205010400))));
+}
+inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
+{
+    return v_int16x8(vreinterpretq_s16_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0706030205040100)), vtbl1_s8(vget_high_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0706030205040100)))));
+}
+inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
+inline v_int16x8 v_interleave_quads(const v_int16x8& vec)
+{
+    int16x4x2_t res = vzip_s16(vget_low_s16(vec.val), vget_high_s16(vec.val));
+    return v_int16x8(vcombine_s16(res.val[0], res.val[1]));
+}
+inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_interleave_pairs(const v_int32x4& vec)
+{
+    int32x2x2_t res = vzip_s32(vget_low_s32(vec.val), vget_high_s32(vec.val));
+    return v_int32x4(vcombine_s32(res.val[0], res.val[1]));
+}
+inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+
+inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
+{
+    return v_int8x16(vextq_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vec.val), vcreate_s8(0x0605040201000000)), vtbl1_s8(vget_high_s8(vec.val), vcreate_s8(0x0807060504020100))), vdupq_n_s8(0), 2));
+}
+inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
+{
+    return v_int16x8(vreinterpretq_s16_s8(vextq_s8(vcombine_s8(vtbl1_s8(vget_low_s8(vreinterpretq_s8_s16(vec.val)), vcreate_s8(0x0504030201000000)), vget_high_s8(vreinterpretq_s8_s16(vec.val))), vdupq_n_s8(0), 2)));
+}
+inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
+inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
+inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
+
+#if CV_SIMD128_64F
+inline v_float64x2 v_lut(const double* tab, const int* idx)
+{
+    double CV_DECL_ALIGNED(32) elems[2] =
+    {
+        tab[idx[0]],
+        tab[idx[1]]
+    };
+    return v_float64x2(vld1q_f64(elems));
+}
+
+inline v_float64x2 v_lut_pairs(const double* tab, const int* idx)
+{
+    return v_float64x2(vld1q_f64(tab + idx[0]));
+}
+
+inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec)
+{
+    double CV_DECL_ALIGNED(32) elems[2] =
+    {
+        tab[vgetq_lane_s32(idxvec.val, 0)],
+        tab[vgetq_lane_s32(idxvec.val, 1)],
+    };
+    return v_float64x2(vld1q_f64(elems));
+}
+
+inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y)
+{
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_aligned(idx, idxvec);
+
+    x = v_float64x2(tab[idx[0]], tab[idx[1]]);
+    y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]);
+}
+#endif
+
+////// FP16 support ///////
+#if CV_FP16
+inline v_float32x4 v_load_expand(const float16_t* ptr)
+{
+    float16x4_t v =
+    #ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
+        (float16x4_t)vld1_s16((const short*)ptr);
+    #else
+        vld1_f16((const __fp16*)ptr);
+    #endif
+    return v_float32x4(vcvt_f32_f16(v));
+}
+
+inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+{
+    float16x4_t hv = vcvt_f16_f32(v.val);
+
+    #ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro
+        vst1_s16((short*)ptr, (int16x4_t)hv);
+    #else
+        vst1_f16((__fp16*)ptr, hv);
+    #endif
+}
+#else
+inline v_float32x4 v_load_expand(const float16_t* ptr)
+{
+    const int N = 4;
+    float buf[N];
+    for( int i = 0; i < N; i++ ) buf[i] = (float)ptr[i];
+    return v_load(buf);
+}
+
+inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+{
+    const int N = 4;
+    float buf[N];
+    v_store(buf, v);
+    for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]);
+}
+#endif
+
+inline void v_cleanup() {}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
 //! @endcond
 
 }
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_sse.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_sse.hpp
index 1840e03..867ff55 100644
--- a/IPL/include/opencv/opencv2/core/hal/intrin_sse.hpp
+++ b/IPL/include/opencv/opencv2/core/hal/intrin_sse.hpp
@@ -42,25 +42,40 @@
 //
 //M*/
 
-#ifndef __OPENCV_HAL_SSE_HPP__
-#define __OPENCV_HAL_SSE_HPP__
+#ifndef OPENCV_HAL_SSE_HPP
+#define OPENCV_HAL_SSE_HPP
 
 #include <algorithm>
+#include "opencv2/core/utility.hpp"
 
 #define CV_SIMD128 1
 #define CV_SIMD128_64F 1
+#define CV_SIMD128_FP16 0  // no native operations with FP16 type.
 
 namespace cv
 {
 
 //! @cond IGNORED
 
+//
+// Compilation troubleshooting:
+// - MSVC: error C2719: 'a': formal parameter with requested alignment of 16 won't be aligned
+//   Replace parameter declaration to const reference:
+//   -v_int32x4 a
+//   +const v_int32x4& a
+//
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+///////// Types ////////////
+
 struct v_uint8x16
 {
     typedef uchar lane_type;
+    typedef __m128i vector_type;
     enum { nlanes = 16 };
 
-    v_uint8x16() {}
+    v_uint8x16() : val(_mm_setzero_si128()) {}
     explicit v_uint8x16(__m128i v) : val(v) {}
     v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
                uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
@@ -81,9 +96,10 @@ struct v_uint8x16
 struct v_int8x16
 {
     typedef schar lane_type;
+    typedef __m128i vector_type;
     enum { nlanes = 16 };
 
-    v_int8x16() {}
+    v_int8x16() : val(_mm_setzero_si128()) {}
     explicit v_int8x16(__m128i v) : val(v) {}
     v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
               schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
@@ -104,9 +120,10 @@ struct v_int8x16
 struct v_uint16x8
 {
     typedef ushort lane_type;
+    typedef __m128i vector_type;
     enum { nlanes = 8 };
 
-    v_uint16x8() {}
+    v_uint16x8() : val(_mm_setzero_si128()) {}
     explicit v_uint16x8(__m128i v) : val(v) {}
     v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
     {
@@ -124,9 +141,10 @@ struct v_uint16x8
 struct v_int16x8
 {
     typedef short lane_type;
+    typedef __m128i vector_type;
     enum { nlanes = 8 };
 
-    v_int16x8() {}
+    v_int16x8() : val(_mm_setzero_si128()) {}
     explicit v_int16x8(__m128i v) : val(v) {}
     v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
     {
@@ -137,15 +155,17 @@ struct v_int16x8
     {
         return (short)_mm_cvtsi128_si32(val);
     }
+
     __m128i val;
 };
 
 struct v_uint32x4
 {
     typedef unsigned lane_type;
+    typedef __m128i vector_type;
     enum { nlanes = 4 };
 
-    v_uint32x4() {}
+    v_uint32x4() : val(_mm_setzero_si128()) {}
     explicit v_uint32x4(__m128i v) : val(v) {}
     v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
     {
@@ -155,15 +175,17 @@ struct v_uint32x4
     {
         return (unsigned)_mm_cvtsi128_si32(val);
     }
+
     __m128i val;
 };
 
 struct v_int32x4
 {
     typedef int lane_type;
+    typedef __m128i vector_type;
     enum { nlanes = 4 };
 
-    v_int32x4() {}
+    v_int32x4() : val(_mm_setzero_si128()) {}
     explicit v_int32x4(__m128i v) : val(v) {}
     v_int32x4(int v0, int v1, int v2, int v3)
     {
@@ -173,15 +195,17 @@ struct v_int32x4
     {
         return _mm_cvtsi128_si32(val);
     }
+
     __m128i val;
 };
 
 struct v_float32x4
 {
     typedef float lane_type;
+    typedef __m128 vector_type;
     enum { nlanes = 4 };
 
-    v_float32x4() {}
+    v_float32x4() : val(_mm_setzero_ps()) {}
     explicit v_float32x4(__m128 v) : val(v) {}
     v_float32x4(float v0, float v1, float v2, float v3)
     {
@@ -191,15 +215,17 @@ struct v_float32x4
     {
         return _mm_cvtss_f32(val);
     }
+
     __m128 val;
 };
 
 struct v_uint64x2
 {
     typedef uint64 lane_type;
+    typedef __m128i vector_type;
     enum { nlanes = 2 };
 
-    v_uint64x2() {}
+    v_uint64x2() : val(_mm_setzero_si128()) {}
     explicit v_uint64x2(__m128i v) : val(v) {}
     v_uint64x2(uint64 v0, uint64 v1)
     {
@@ -207,19 +233,25 @@ struct v_uint64x2
     }
     uint64 get0() const
     {
+    #if !defined(__x86_64__) && !defined(_M_X64)
         int a = _mm_cvtsi128_si32(val);
         int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32));
         return (unsigned)a | ((uint64)(unsigned)b << 32);
+    #else
+        return (uint64)_mm_cvtsi128_si64(val);
+    #endif
     }
+
     __m128i val;
 };
 
 struct v_int64x2
 {
     typedef int64 lane_type;
+    typedef __m128i vector_type;
     enum { nlanes = 2 };
 
-    v_int64x2() {}
+    v_int64x2() : val(_mm_setzero_si128()) {}
     explicit v_int64x2(__m128i v) : val(v) {}
     v_int64x2(int64 v0, int64 v1)
     {
@@ -227,19 +259,25 @@ struct v_int64x2
     }
     int64 get0() const
     {
+    #if !defined(__x86_64__) && !defined(_M_X64)
         int a = _mm_cvtsi128_si32(val);
         int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32));
         return (int64)((unsigned)a | ((uint64)(unsigned)b << 32));
+    #else
+        return _mm_cvtsi128_si64(val);
+    #endif
     }
+
     __m128i val;
 };
 
 struct v_float64x2
 {
     typedef double lane_type;
+    typedef __m128d vector_type;
     enum { nlanes = 2 };
 
-    v_float64x2() {}
+    v_float64x2() : val(_mm_setzero_pd()) {}
     explicit v_float64x2(__m128d v) : val(v) {}
     v_float64x2(double v0, double v1)
     {
@@ -249,17 +287,39 @@ struct v_float64x2
     {
         return _mm_cvtsd_f64(val);
     }
+
     __m128d val;
 };
 
+namespace hal_sse_internal
+{
+    template <typename to_sse_type, typename from_sse_type>
+    to_sse_type v_sse_reinterpret_as(const from_sse_type& val);
+
+#define OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(to_sse_type, from_sse_type, sse_cast_intrin) \
+    template<> inline \
+    to_sse_type v_sse_reinterpret_as(const from_sse_type& a) \
+    { return sse_cast_intrin(a); }
+
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128i, OPENCV_HAL_NOP)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128, _mm_castps_si128)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128d, _mm_castpd_si128)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128i, _mm_castsi128_ps)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128, OPENCV_HAL_NOP)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128d, _mm_castpd_ps)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128i, _mm_castsi128_pd)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128, _mm_castps_pd)
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128d, OPENCV_HAL_NOP)
+}
+
 #define OPENCV_HAL_IMPL_SSE_INITVEC(_Tpvec, _Tp, suffix, zsuffix, ssuffix, _Tps, cast) \
 inline _Tpvec v_setzero_##suffix() { return _Tpvec(_mm_setzero_##zsuffix()); } \
 inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(_mm_set1_##ssuffix((_Tps)v)); } \
 template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \
 { return _Tpvec(cast(a.val)); }
 
-OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, char, OPENCV_HAL_NOP)
-OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, char, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, schar, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, schar, OPENCV_HAL_NOP)
 OPENCV_HAL_IMPL_SSE_INITVEC(v_uint16x8, ushort, u16, si128, epi16, short, OPENCV_HAL_NOP)
 OPENCV_HAL_IMPL_SSE_INITVEC(v_int16x8, short, s16, si128, epi16, short, OPENCV_HAL_NOP)
 OPENCV_HAL_IMPL_SSE_INITVEC(v_uint32x4, unsigned, u32, si128, epi32, int, OPENCV_HAL_NOP)
@@ -362,7 +422,7 @@ void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a)
 inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b)
 { return v_int8x16(_mm_packs_epi16(a.val, b.val)); }
 
-inline void v_pack_store(schar* ptr, v_int16x8& a)
+inline void v_pack_store(schar* ptr, const v_int16x8& a)
 { _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a.val, a.val)); }
 
 template<int n> inline
@@ -383,20 +443,18 @@ void v_rshr_pack_store(schar* ptr, const v_int16x8& a)
 }
 
 
-// bit-wise "mask ? a : b"
+// byte-wise "mask ? a : b"
 inline __m128i v_select_si128(__m128i mask, __m128i a, __m128i b)
 {
+#if CV_SSE4_1
+    return _mm_blendv_epi8(b, a, mask);
+#else
     return _mm_xor_si128(b, _mm_and_si128(_mm_xor_si128(a, b), mask));
+#endif
 }
 
 inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b)
-{
-    __m128i z = _mm_setzero_si128(), maxval32 = _mm_set1_epi32(65535), delta32 = _mm_set1_epi32(32768);
-    __m128i a1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, a.val), maxval32, a.val), delta32);
-    __m128i b1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, b.val), maxval32, b.val), delta32);
-    __m128i r = _mm_packs_epi32(a1, b1);
-    return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768)));
-}
+{ return v_uint16x8(_v128_packs_epu32(a.val, b.val)); }
 
 inline void v_pack_store(ushort* ptr, const v_uint32x4& a)
 {
@@ -426,37 +484,62 @@ void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a)
 
 inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b)
 {
+#if CV_SSE4_1
+    return v_uint16x8(_mm_packus_epi32(a.val, b.val));
+#else
     __m128i delta32 = _mm_set1_epi32(32768);
-    __m128i r = _mm_packs_epi32(_mm_sub_epi32(a.val, delta32), _mm_sub_epi32(b.val, delta32));
+
+    // preliminary saturate negative values to zero
+    __m128i a1 = _mm_and_si128(a.val, _mm_cmpgt_epi32(a.val, _mm_set1_epi32(0)));
+    __m128i b1 = _mm_and_si128(b.val, _mm_cmpgt_epi32(b.val, _mm_set1_epi32(0)));
+
+    __m128i r = _mm_packs_epi32(_mm_sub_epi32(a1, delta32), _mm_sub_epi32(b1, delta32));
     return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768)));
+#endif
 }
 
 inline void v_pack_u_store(ushort* ptr, const v_int32x4& a)
 {
+#if CV_SSE4_1
+    _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi32(a.val, a.val));
+#else
     __m128i delta32 = _mm_set1_epi32(32768);
     __m128i a1 = _mm_sub_epi32(a.val, delta32);
     __m128i r = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
     _mm_storel_epi64((__m128i*)ptr, r);
+#endif
 }
 
 template<int n> inline
 v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b)
 {
+#if CV_SSE4_1
+    __m128i delta = _mm_set1_epi32(1 << (n - 1));
+    return v_uint16x8(_mm_packus_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n),
+                                       _mm_srai_epi32(_mm_add_epi32(b.val, delta), n)));
+#else
     __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
     __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32);
     __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
     __m128i b1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(b.val, delta), n), delta32);
     __m128i b2 = _mm_sub_epi16(_mm_packs_epi32(b1, b1), _mm_set1_epi16(-32768));
     return v_uint16x8(_mm_unpacklo_epi64(a2, b2));
+#endif
 }
 
 template<int n> inline
 void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a)
 {
+#if CV_SSE4_1
+    __m128i delta = _mm_set1_epi32(1 << (n - 1));
+    __m128i a1 = _mm_srai_epi32(_mm_add_epi32(a.val, delta), n);
+    _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi32(a1, a1));
+#else
     __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
     __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32);
     __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
     _mm_storel_epi64((__m128i*)ptr, a2);
+#endif
 }
 
 inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b)
@@ -567,6 +650,35 @@ void v_rshr_pack_store(int* ptr, const v_int64x2& a)
     _mm_storel_epi64((__m128i*)ptr, a2);
 }
 
+// pack boolean
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    __m128i ab = _mm_packs_epi16(a.val, b.val);
+    return v_uint8x16(ab);
+}
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    __m128i ab = _mm_packs_epi32(a.val, b.val);
+    __m128i cd = _mm_packs_epi32(c.val, d.val);
+    return v_uint8x16(_mm_packs_epi16(ab, cd));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+    __m128i ab = _mm_packs_epi32(a.val, b.val);
+    __m128i cd = _mm_packs_epi32(c.val, d.val);
+    __m128i ef = _mm_packs_epi32(e.val, f.val);
+    __m128i gh = _mm_packs_epi32(g.val, h.val);
+
+    __m128i abcd = _mm_packs_epi32(ab, cd);
+    __m128i efgh = _mm_packs_epi32(ef, gh);
+    return v_uint8x16(_mm_packs_epi16(abcd, efgh));
+}
+
 inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
                             const v_float32x4& m1, const v_float32x4& m2,
                             const v_float32x4& m3)
@@ -579,6 +691,16 @@ inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
     return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, v3)));
 }
 
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& a)
+{
+    __m128 v0 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(0, 0, 0, 0)), m0.val);
+    __m128 v1 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(1, 1, 1, 1)), m1.val);
+    __m128 v2 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(2, 2, 2, 2)), m2.val);
+
+    return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, a.val)));
+}
 
 #define OPENCV_HAL_IMPL_SSE_BIN_OP(bin_op, _Tpvec, intrin) \
     inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
@@ -597,14 +719,14 @@ OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int8x16, _mm_adds_epi8)
 OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int8x16, _mm_subs_epi8)
 OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint16x8, _mm_adds_epu16)
 OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint16x8, _mm_subs_epu16)
-OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_uint16x8, _mm_mullo_epi16)
 OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int16x8, _mm_adds_epi16)
 OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int16x8, _mm_subs_epi16)
-OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_int16x8, _mm_mullo_epi16)
 OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint32x4, _mm_add_epi32)
 OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint32x4, _mm_sub_epi32)
+OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_uint32x4, _v128_mullo_epi32)
 OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int32x4, _mm_add_epi32)
 OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int32x4, _mm_sub_epi32)
+OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_int32x4, _v128_mullo_epi32)
 OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float32x4, _mm_add_ps)
 OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float32x4, _mm_sub_ps)
 OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float32x4, _mm_mul_ps)
@@ -618,31 +740,41 @@ OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint64x2, _mm_sub_epi64)
 OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int64x2, _mm_add_epi64)
 OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int64x2, _mm_sub_epi64)
 
-inline v_uint32x4 operator * (const v_uint32x4& a, const v_uint32x4& b)
-{
-    __m128i c0 = _mm_mul_epu32(a.val, b.val);
-    __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
-    __m128i d0 = _mm_unpacklo_epi32(c0, c1);
-    __m128i d1 = _mm_unpackhi_epi32(c0, c1);
-    return v_uint32x4(_mm_unpacklo_epi64(d0, d1));
-}
-inline v_int32x4 operator * (const v_int32x4& a, const v_int32x4& b)
-{
-    __m128i c0 = _mm_mul_epu32(a.val, b.val);
-    __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
-    __m128i d0 = _mm_unpacklo_epi32(c0, c1);
-    __m128i d1 = _mm_unpackhi_epi32(c0, c1);
-    return v_int32x4(_mm_unpacklo_epi64(d0, d1));
-}
-inline v_uint32x4& operator *= (v_uint32x4& a, const v_uint32x4& b)
+// saturating multiply 8-bit, 16-bit
+#define OPENCV_HAL_IMPL_SSE_MUL_SAT(_Tpvec, _Tpwvec)             \
+    inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b)  \
+    {                                                            \
+        _Tpwvec c, d;                                            \
+        v_mul_expand(a, b, c, d);                                \
+        return v_pack(c, d);                                     \
+    }                                                            \
+    inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b)      \
+    { a = a * b; return a; }
+
+OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint8x16, v_uint16x8)
+OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int8x16,  v_int16x8)
+OPENCV_HAL_IMPL_SSE_MUL_SAT(v_uint16x8, v_uint32x4)
+OPENCV_HAL_IMPL_SSE_MUL_SAT(v_int16x8,  v_int32x4)
+
+//  Multiply and expand
+inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
+                         v_uint16x8& c, v_uint16x8& d)
 {
-    a = a * b;
-    return a;
+    v_uint16x8 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c = v_mul_wrap(a0, b0);
+    d = v_mul_wrap(a1, b1);
 }
-inline v_int32x4& operator *= (v_int32x4& a, const v_int32x4& b)
+
+inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b,
+                         v_int16x8& c, v_int16x8& d)
 {
-    a = a * b;
-    return a;
+    v_int16x8 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c = v_mul_wrap(a0, b0);
+    d = v_mul_wrap(a1, b1);
 }
 
 inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
@@ -672,10 +804,198 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
     d.val = _mm_unpackhi_epi64(c0, c1);
 }
 
+inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { return v_int16x8(_mm_mulhi_epi16(a.val, b.val)); }
+inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { return v_uint16x8(_mm_mulhi_epu16(a.val, b.val)); }
+
+//////// Dot Product ////////
+
+// 16 >> 32
 inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{ return v_int32x4(_mm_madd_epi16(a.val, b.val)); }
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{ return v_dotprod(a, b) + c; }
+
+// 32 >> 64
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
+{
+#if CV_SSE4_1
+    __m128i even = _mm_mul_epi32(a.val, b.val);
+    __m128i odd = _mm_mul_epi32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
+    return v_int64x2(_mm_add_epi64(even, odd));
+#else
+    __m128i even_u = _mm_mul_epu32(a.val, b.val);
+    __m128i odd_u = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
+    // convert unsigned to signed high multiplication (from: Agner Fog(veclib) and H S Warren: Hacker's delight, 2003, p. 132)
+    __m128i a_sign = _mm_srai_epi32(a.val, 31);
+    __m128i b_sign = _mm_srai_epi32(b.val, 31);
+    // |x * sign of x
+    __m128i axb  = _mm_and_si128(a.val, b_sign);
+    __m128i bxa  = _mm_and_si128(b.val, a_sign);
+    // sum of sign corrections
+    __m128i ssum = _mm_add_epi32(bxa, axb);
+    __m128i even_ssum = _mm_slli_epi64(ssum, 32);
+    __m128i odd_ssum = _mm_and_si128(ssum, _mm_set_epi32(-1, 0, -1, 0));
+    // convert to signed and prod
+    return v_int64x2(_mm_add_epi64(_mm_sub_epi64(even_u, even_ssum), _mm_sub_epi64(odd_u, odd_ssum)));
+#endif
+}
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{ return v_dotprod(a, b) + c; }
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
+{
+    __m128i a0 = _mm_srli_epi16(_mm_slli_si128(a.val, 1), 8); // even
+    __m128i a1 = _mm_srli_epi16(a.val, 8); // odd
+    __m128i b0 = _mm_srli_epi16(_mm_slli_si128(b.val, 1), 8);
+    __m128i b1 = _mm_srli_epi16(b.val, 8);
+    __m128i p0 = _mm_madd_epi16(a0, b0);
+    __m128i p1 = _mm_madd_epi16(a1, b1);
+    return v_uint32x4(_mm_add_epi32(p0, p1));
+}
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
+{
+    __m128i a0 = _mm_srai_epi16(_mm_slli_si128(a.val, 1), 8); // even
+    __m128i a1 = _mm_srai_epi16(a.val, 8); // odd
+    __m128i b0 = _mm_srai_epi16(_mm_slli_si128(b.val, 1), 8);
+    __m128i b1 = _mm_srai_epi16(b.val, 8);
+    __m128i p0 = _mm_madd_epi16(a0, b0);
+    __m128i p1 = _mm_madd_epi16(a1, b1);
+    return v_int32x4(_mm_add_epi32(p0, p1));
+}
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v_uint32x4 c, d;
+    v_mul_expand(a, b, c, d);
+
+    v_uint64x2 c0, c1, d0, d1;
+    v_expand(c, c0, c1);
+    v_expand(d, d0, d1);
+
+    c0 += c1; d0 += d1;
+    return v_uint64x2(_mm_add_epi64(
+        _mm_unpacklo_epi64(c0.val, d0.val),
+        _mm_unpackhi_epi64(c0.val, d0.val)
+    ));
+}
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b)
+{
+    v_int32x4 prod = v_dotprod(a, b);
+    v_int64x2 c, d;
+    v_expand(prod, c, d);
+    return v_int64x2(_mm_add_epi64(
+        _mm_unpacklo_epi64(c.val, d.val),
+        _mm_unpackhi_epi64(c.val, d.val)
+    ));
+}
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 32 >> 64f
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
+{
+#if CV_SSE4_1
+    return v_cvt_f64(v_dotprod(a, b));
+#else
+    v_float64x2 c = v_cvt_f64(a) * v_cvt_f64(b);
+    v_float64x2 d = v_cvt_f64_high(a) * v_cvt_f64_high(b);
+
+    return v_float64x2(_mm_add_pd(
+        _mm_unpacklo_pd(c.val, d.val),
+        _mm_unpackhi_pd(c.val, d.val)
+    ));
+#endif
+}
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+//////// Fast Dot Product ////////
+
+// 16 >> 32
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
+{ return v_dotprod(a, b); }
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{ return v_dotprod(a, b) + c; }
+
+// 32 >> 64
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
+{ return v_dotprod(a, b); }
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{ return v_dotprod_fast(a, b) + c; }
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
+{
+    __m128i a0 = v_expand_low(a).val;
+    __m128i a1 = v_expand_high(a).val;
+    __m128i b0 = v_expand_low(b).val;
+    __m128i b1 = v_expand_high(b).val;
+    __m128i p0 = _mm_madd_epi16(a0, b0);
+    __m128i p1 = _mm_madd_epi16(a1, b1);
+    return v_uint32x4(_mm_add_epi32(p0, p1));
+}
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
+{
+#if CV_SSE4_1
+    __m128i a0 = _mm_cvtepi8_epi16(a.val);
+    __m128i a1 = v_expand_high(a).val;
+    __m128i b0 = _mm_cvtepi8_epi16(b.val);
+    __m128i b1 = v_expand_high(b).val;
+    __m128i p0 = _mm_madd_epi16(a0, b0);
+    __m128i p1 = _mm_madd_epi16(a1, b1);
+    return v_int32x4(_mm_add_epi32(p0, p1));
+#else
+    return v_dotprod_expand(a, b);
+#endif
+}
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v_uint32x4 c, d;
+    v_mul_expand(a, b, c, d);
+
+    v_uint64x2 c0, c1, d0, d1;
+    v_expand(c, c0, c1);
+    v_expand(d, d0, d1);
+
+    c0 += c1; d0 += d1;
+    return c0 + d0;
+}
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b)
 {
-    return v_int32x4(_mm_madd_epi16(a.val, b.val));
+    v_int32x4 prod = v_dotprod(a, b);
+    v_int64x2 c, d;
+    v_expand(prod, c, d);
+    return c + d;
 }
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+// 32 >> 64f
+v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c);
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b)
+{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); }
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a,   const v_int32x4& b, const v_float64x2& c)
+{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); }
 
 #define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \
     OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \
@@ -702,7 +1022,7 @@ inline v_float32x4 v_sqrt(const v_float32x4& x)
 
 inline v_float32x4 v_invsqrt(const v_float32x4& x)
 {
-    static const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f);
+    const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f);
     __m128 t = x.val;
     __m128 h = _mm_mul_ps(t, _0_5);
     t = _mm_rsqrt_ps(t);
@@ -715,10 +1035,22 @@ inline v_float64x2 v_sqrt(const v_float64x2& x)
 
 inline v_float64x2 v_invsqrt(const v_float64x2& x)
 {
-    static const __m128d v_1 = _mm_set1_pd(1.);
+    const __m128d v_1 = _mm_set1_pd(1.);
     return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val)));
 }
 
+#define OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(_Tpuvec, _Tpsvec, func, suffix, subWidth) \
+inline _Tpuvec v_abs(const _Tpsvec& x) \
+{ return _Tpuvec(_mm_##func##_ep##suffix(x.val, _mm_sub_ep##subWidth(_mm_setzero_si128(), x.val))); }
+
+OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint8x16, v_int8x16, min, u8, i8)
+OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint16x8, v_int16x8, max, i16, i16)
+inline v_uint32x4 v_abs(const v_int32x4& x)
+{
+    __m128i s = _mm_srli_epi32(x.val, 31);
+    __m128i f = _mm_srai_epi32(x.val, 31);
+    return v_uint32x4(_mm_add_epi32(_mm_xor_si128(x.val, f), s));
+}
 inline v_float32x4 v_abs(const v_float32x4& x)
 { return v_float32x4(_mm_and_ps(x.val, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); }
 inline v_float64x2 v_abs(const v_float64x2& x)
@@ -746,43 +1078,75 @@ OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float64x2, v_max, _mm_max_pd)
 
 inline v_int8x16 v_min(const v_int8x16& a, const v_int8x16& b)
 {
+#if CV_SSE4_1
+    return v_int8x16(_mm_min_epi8(a.val, b.val));
+#else
     __m128i delta = _mm_set1_epi8((char)-128);
     return v_int8x16(_mm_xor_si128(delta, _mm_min_epu8(_mm_xor_si128(a.val, delta),
                                                        _mm_xor_si128(b.val, delta))));
+#endif
 }
 inline v_int8x16 v_max(const v_int8x16& a, const v_int8x16& b)
 {
+#if CV_SSE4_1
+    return v_int8x16(_mm_max_epi8(a.val, b.val));
+#else
     __m128i delta = _mm_set1_epi8((char)-128);
     return v_int8x16(_mm_xor_si128(delta, _mm_max_epu8(_mm_xor_si128(a.val, delta),
                                                        _mm_xor_si128(b.val, delta))));
+#endif
 }
 inline v_uint16x8 v_min(const v_uint16x8& a, const v_uint16x8& b)
 {
+#if CV_SSE4_1
+    return v_uint16x8(_mm_min_epu16(a.val, b.val));
+#else
     return v_uint16x8(_mm_subs_epu16(a.val, _mm_subs_epu16(a.val, b.val)));
+#endif
 }
 inline v_uint16x8 v_max(const v_uint16x8& a, const v_uint16x8& b)
 {
+#if CV_SSE4_1
+    return v_uint16x8(_mm_max_epu16(a.val, b.val));
+#else
     return v_uint16x8(_mm_adds_epu16(_mm_subs_epu16(a.val, b.val), b.val));
+#endif
 }
 inline v_uint32x4 v_min(const v_uint32x4& a, const v_uint32x4& b)
 {
+#if CV_SSE4_1
+    return v_uint32x4(_mm_min_epu32(a.val, b.val));
+#else
     __m128i delta = _mm_set1_epi32((int)0x80000000);
     __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta));
     return v_uint32x4(v_select_si128(mask, b.val, a.val));
+#endif
 }
 inline v_uint32x4 v_max(const v_uint32x4& a, const v_uint32x4& b)
 {
+#if CV_SSE4_1
+    return v_uint32x4(_mm_max_epu32(a.val, b.val));
+#else
     __m128i delta = _mm_set1_epi32((int)0x80000000);
     __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta));
     return v_uint32x4(v_select_si128(mask, a.val, b.val));
+#endif
 }
 inline v_int32x4 v_min(const v_int32x4& a, const v_int32x4& b)
 {
+#if CV_SSE4_1
+    return v_int32x4(_mm_min_epi32(a.val, b.val));
+#else
     return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), b.val, a.val));
+#endif
 }
 inline v_int32x4 v_max(const v_int32x4& a, const v_int32x4& b)
 {
+#if CV_SSE4_1
+    return v_int32x4(_mm_max_epi32(a.val, b.val));
+#else
     return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), a.val, b.val));
+#endif
 }
 
 #define OPENCV_HAL_IMPL_SSE_INT_CMP_OP(_Tpuvec, _Tpsvec, suffix, sbit) \
@@ -864,6 +1228,29 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
 OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
 OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
 
+#if CV_SSE4_1
+#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmpeq_epi64(a.val, b.val)); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return ~(a == b); }
+#else
+#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ __m128i cmp = _mm_cmpeq_epi32(a.val, b.val); \
+  return _Tpvec(_mm_and_si128(cmp, _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 3, 0, 1)))); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return ~(a == b); }
+#endif
+
+OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2)
+OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2)
+
+inline v_float32x4 v_not_nan(const v_float32x4& a)
+{ return v_float32x4(_mm_cmpord_ps(a.val, a.val)); }
+inline v_float64x2 v_not_nan(const v_float64x2& a)
+{ return v_float64x2(_mm_cmpord_pd(a.val, a.val)); }
+
 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8)
 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8)
 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16)
@@ -872,33 +1259,86 @@ OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_sub_wrap, _mm_sub_epi8)
 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_sub_wrap, _mm_sub_epi8)
 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_sub_wrap, _mm_sub_epi16)
 OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_sub_wrap, _mm_sub_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_mul_wrap, _mm_mullo_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_mul_wrap, _mm_mullo_epi16)
 
-#define OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(_Tpuvec, _Tpsvec, bits, smask32) \
-inline _Tpuvec v_absdiff(const _Tpuvec& a, const _Tpuvec& b) \
-{ \
-    return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a.val, b.val), _mm_subs_epu##bits(b.val, a.val))); \
-} \
-inline _Tpuvec v_absdiff(const _Tpsvec& a, const _Tpsvec& b) \
-{ \
-    __m128i smask = _mm_set1_epi32(smask32); \
-    __m128i a1 = _mm_xor_si128(a.val, smask); \
-    __m128i b1 = _mm_xor_si128(b.val, smask); \
-    return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a1, b1), _mm_subs_epu##bits(b1, a1))); \
+inline v_uint8x16 v_mul_wrap(const v_uint8x16& a, const v_uint8x16& b)
+{
+    __m128i ad = _mm_srai_epi16(a.val, 8);
+    __m128i bd = _mm_srai_epi16(b.val, 8);
+    __m128i p0 = _mm_mullo_epi16(a.val, b.val); // even
+    __m128i p1 = _mm_slli_epi16(_mm_mullo_epi16(ad, bd), 8); // odd
+    const __m128i b01 = _mm_set1_epi32(0xFF00FF00);
+    return v_uint8x16(_v128_blendv_epi8(p0, p1, b01));
+}
+inline v_int8x16 v_mul_wrap(const v_int8x16& a, const v_int8x16& b)
+{
+    return v_reinterpret_as_s8(v_mul_wrap(v_reinterpret_as_u8(a), v_reinterpret_as_u8(b)));
 }
 
-OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint8x16, v_int8x16, 8, (int)0x80808080)
-OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint16x8, v_int16x8, 16, (int)0x80008000)
+/** Absolute difference **/
 
+inline v_uint8x16 v_absdiff(const v_uint8x16& a, const v_uint8x16& b)
+{ return v_add_wrap(a - b,  b - a); }
+inline v_uint16x8 v_absdiff(const v_uint16x8& a, const v_uint16x8& b)
+{ return v_add_wrap(a - b,  b - a); }
 inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b)
+{ return v_max(a, b) - v_min(a, b); }
+
+inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b)
 {
-    return v_max(a, b) - v_min(a, b);
+    v_int8x16 d = v_sub_wrap(a, b);
+    v_int8x16 m = a < b;
+    return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m));
+}
+inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b)
+{
+    return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b)));
 }
-
 inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
 {
-    __m128i d = _mm_sub_epi32(a.val, b.val);
-    __m128i m = _mm_cmpgt_epi32(b.val, a.val);
-    return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m));
+    v_int32x4 d = a - b;
+    v_int32x4 m = a < b;
+    return v_reinterpret_as_u32((d ^ m) - m);
+}
+
+/** Saturating absolute difference **/
+inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
+{
+    v_int8x16 d = a - b;
+    v_int8x16 m = a < b;
+    return (d ^ m) - m;
+ }
+inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
+{ return v_max(a, b) - v_min(a, b); }
+
+
+inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+    return a * b + c;
+}
+
+inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+    return v_fma(a, b, c);
+}
+
+inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+{
+#if CV_FMA3
+    return v_float32x4(_mm_fmadd_ps(a.val, b.val, c.val));
+#else
+    return v_float32x4(_mm_add_ps(_mm_mul_ps(a.val, b.val), c.val));
+#endif
+}
+
+inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+#if CV_FMA3
+    return v_float64x2(_mm_fmadd_pd(a.val, b.val, c.val));
+#else
+    return v_float64x2(_mm_add_pd(_mm_mul_pd(a.val, b.val), c.val));
+#endif
 }
 
 #define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \
@@ -909,17 +1349,16 @@ inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
 } \
 inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
 { \
-    _Tpreg res = _mm_add_##suffix(_mm_mul_##suffix(a.val, a.val), _mm_mul_##suffix(b.val, b.val)); \
-    return _Tpvec(_mm_sqrt_##suffix(res)); \
+    _Tpvec res = v_fma(a, a, b*b); \
+    return _Tpvec(_mm_sqrt_##suffix(res.val)); \
 } \
 inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
 { \
-    _Tpreg res = _mm_add_##suffix(_mm_mul_##suffix(a.val, a.val), _mm_mul_##suffix(b.val, b.val)); \
-    return _Tpvec(res); \
+    return v_fma(a, a, b*b); \
 } \
 inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
 { \
-    return _Tpvec(_mm_add_##suffix(_mm_mul_##suffix(a.val, b.val), c.val)); \
+    return v_fma(a, b, c); \
 }
 
 OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(v_float32x4, float, __m128, ps, _mm_set1_epi32((int)0x7fffffff))
@@ -967,11 +1406,125 @@ OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint16x8, v_int16x8, epi16, _mm_srai_epi16)
 OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint32x4, v_int32x4, epi32, _mm_srai_epi32)
 OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint64x2, v_int64x2, epi64, v_srai_epi64)
 
+namespace hal_sse_internal
+{
+    template <int imm,
+        bool is_invalid = ((imm < 0) || (imm > 16)),
+        bool is_first = (imm == 0),
+        bool is_half = (imm == 8),
+        bool is_second = (imm == 16),
+        bool is_other = (((imm > 0) && (imm < 8)) || ((imm > 8) && (imm < 16)))>
+    class v_sse_palignr_u8_class;
+
+    template <int imm>
+    class v_sse_palignr_u8_class<imm, true, false, false, false, false>;
+
+    template <int imm>
+    class v_sse_palignr_u8_class<imm, false, true, false, false, false>
+    {
+    public:
+        inline __m128i operator()(const __m128i& a, const __m128i&) const
+        {
+            return a;
+        }
+    };
+
+    template <int imm>
+    class v_sse_palignr_u8_class<imm, false, false, true, false, false>
+    {
+    public:
+        inline __m128i operator()(const __m128i& a, const __m128i& b) const
+        {
+            return _mm_unpacklo_epi64(_mm_unpackhi_epi64(a, a), b);
+        }
+    };
+
+    template <int imm>
+    class v_sse_palignr_u8_class<imm, false, false, false, true, false>
+    {
+    public:
+        inline __m128i operator()(const __m128i&, const __m128i& b) const
+        {
+            return b;
+        }
+    };
+
+    template <int imm>
+    class v_sse_palignr_u8_class<imm, false, false, false, false, true>
+    {
+#if CV_SSSE3
+    public:
+        inline __m128i operator()(const __m128i& a, const __m128i& b) const
+        {
+            return _mm_alignr_epi8(b, a, imm);
+        }
+#else
+    public:
+        inline __m128i operator()(const __m128i& a, const __m128i& b) const
+        {
+            enum { imm2 = (sizeof(__m128i) - imm) };
+            return _mm_or_si128(_mm_srli_si128(a, imm), _mm_slli_si128(b, imm2));
+        }
+#endif
+    };
+
+    template <int imm>
+    inline __m128i v_sse_palignr_u8(const __m128i& a, const __m128i& b)
+    {
+        CV_StaticAssert((imm >= 0) && (imm <= 16), "Invalid imm for v_sse_palignr_u8.");
+        return v_sse_palignr_u8_class<imm>()(a, b);
+    }
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_right(const _Tpvec &a)
+{
+    using namespace hal_sse_internal;
+    enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) };
+    return _Tpvec(v_sse_reinterpret_as<typename _Tpvec::vector_type>(
+        _mm_srli_si128(
+            v_sse_reinterpret_as<__m128i>(a.val), imm2)));
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_left(const _Tpvec &a)
+{
+    using namespace hal_sse_internal;
+    enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) };
+    return _Tpvec(v_sse_reinterpret_as<typename _Tpvec::vector_type>(
+        _mm_slli_si128(
+            v_sse_reinterpret_as<__m128i>(a.val), imm2)));
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b)
+{
+    using namespace hal_sse_internal;
+    enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) };
+    return _Tpvec(v_sse_reinterpret_as<typename _Tpvec::vector_type>(
+        v_sse_palignr_u8<imm2>(
+            v_sse_reinterpret_as<__m128i>(a.val),
+            v_sse_reinterpret_as<__m128i>(b.val))));
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b)
+{
+    using namespace hal_sse_internal;
+    enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) };
+    return _Tpvec(v_sse_reinterpret_as<typename _Tpvec::vector_type>(
+        v_sse_palignr_u8<imm2>(
+            v_sse_reinterpret_as<__m128i>(b.val),
+            v_sse_reinterpret_as<__m128i>(a.val))));
+}
+
 #define OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(_Tpvec, _Tp) \
 inline _Tpvec v_load(const _Tp* ptr) \
 { return _Tpvec(_mm_loadu_si128((const __m128i*)ptr)); } \
 inline _Tpvec v_load_aligned(const _Tp* ptr) \
 { return _Tpvec(_mm_load_si128((const __m128i*)ptr)); } \
+inline _Tpvec v_load_low(const _Tp* ptr) \
+{ return _Tpvec(_mm_loadl_epi64((const __m128i*)ptr)); } \
 inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
 { \
     return _Tpvec(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \
@@ -981,6 +1534,17 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \
 { _mm_storeu_si128((__m128i*)ptr, a.val); } \
 inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
 { _mm_store_si128((__m128i*)ptr, a.val); } \
+inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
+{ _mm_stream_si128((__m128i*)ptr, a.val); } \
+inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
+{ \
+    if( mode == hal::STORE_UNALIGNED ) \
+        _mm_storeu_si128((__m128i*)ptr, a.val); \
+    else if( mode == hal::STORE_ALIGNED_NOCACHE )  \
+        _mm_stream_si128((__m128i*)ptr, a.val); \
+    else \
+        _mm_store_si128((__m128i*)ptr, a.val); \
+} \
 inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
 { _mm_storel_epi64((__m128i*)ptr, a.val); } \
 inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
@@ -1000,6 +1564,8 @@ inline _Tpvec v_load(const _Tp* ptr) \
 { return _Tpvec(_mm_loadu_##suffix(ptr)); } \
 inline _Tpvec v_load_aligned(const _Tp* ptr) \
 { return _Tpvec(_mm_load_##suffix(ptr)); } \
+inline _Tpvec v_load_low(const _Tp* ptr) \
+{ return _Tpvec(_mm_castsi128_##suffix(_mm_loadl_epi64((const __m128i*)ptr))); } \
 inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
 { \
     return _Tpvec(_mm_castsi128_##suffix( \
@@ -1010,6 +1576,17 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \
 { _mm_storeu_##suffix(ptr, a.val); } \
 inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
 { _mm_store_##suffix(ptr, a.val); } \
+inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
+{ _mm_stream_##suffix(ptr, a.val); } \
+inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
+{ \
+    if( mode == hal::STORE_UNALIGNED ) \
+        _mm_storeu_##suffix(ptr, a.val); \
+    else if( mode == hal::STORE_ALIGNED_NOCACHE )  \
+        _mm_stream_##suffix(ptr, a.val); \
+    else \
+        _mm_store_##suffix(ptr, a.val); \
+} \
 inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
 { _mm_storel_epi64((__m128i*)ptr, _mm_cast##suffix##_si128(a.val)); } \
 inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
@@ -1021,6 +1598,72 @@ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
 OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps)
 OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd)
 
+inline unsigned v_reduce_sum(const v_uint8x16& a)
+{
+    __m128i half = _mm_sad_epu8(a.val, _mm_setzero_si128());
+    return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
+}
+inline int v_reduce_sum(const v_int8x16& a)
+{
+    __m128i half = _mm_set1_epi8((schar)-128);
+    half = _mm_sad_epu8(_mm_xor_si128(a.val, half), _mm_setzero_si128());
+    return _mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))) - 2048;
+}
+#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(func) \
+inline schar v_reduce_##func(const v_int8x16& a) \
+{ \
+    __m128i val = a.val; \
+    __m128i smask = _mm_set1_epi8((schar)-128); \
+    val = _mm_xor_si128(val, smask); \
+    val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \
+    val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \
+    val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \
+    val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \
+    return (schar)_mm_cvtsi128_si32(val) ^ (schar)-128; \
+} \
+inline uchar v_reduce_##func(const v_uint8x16& a) \
+{ \
+    __m128i val = a.val; \
+    val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \
+    val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \
+    val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \
+    val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \
+    return (uchar)_mm_cvtsi128_si32(val); \
+}
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(max)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(min)
+
+#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(_Tpvec, scalartype, func, suffix, sbit) \
+inline scalartype v_reduce_##func(const v_##_Tpvec& a) \
+{ \
+    __m128i val = a.val; \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \
+    return (scalartype)_mm_cvtsi128_si32(val); \
+} \
+inline unsigned scalartype v_reduce_##func(const v_u##_Tpvec& a) \
+{ \
+    __m128i val = a.val; \
+    __m128i smask = _mm_set1_epi16(sbit); \
+    val = _mm_xor_si128(val, smask); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \
+    return (unsigned scalartype)(_mm_cvtsi128_si32(val) ^  sbit); \
+}
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, max, epi16, (short)-32768)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, min, epi16, (short)-32768)
+
+#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, cast_from, cast_to, extract) \
+inline scalartype v_reduce_sum(const _Tpvec& a) \
+{ \
+    regtype val = a.val; \
+    val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 8))); \
+    val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 4))); \
+    return (scalartype)_mm_cvt##extract(val); \
+}
+
 #define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \
 inline scalartype v_reduce_##func(const _Tpvec& a) \
 { \
@@ -1031,41 +1674,178 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \
     return scalar_func(s0, s1); \
 }
 
-OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, sum, OPENCV_HAL_ADD)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_uint32x4, unsigned, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_int32x4, int, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_si128, _mm_castsi128_ps, ss_f32)
+
+inline int v_reduce_sum(const v_int16x8& a)
+{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); }
+inline unsigned v_reduce_sum(const v_uint16x8& a)
+{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); }
+
+inline uint64 v_reduce_sum(const v_uint64x2& a)
+{
+    uint64 CV_DECL_ALIGNED(32) idx[2];
+    v_store_aligned(idx, a);
+    return idx[0] + idx[1];
+}
+inline int64 v_reduce_sum(const v_int64x2& a)
+{
+    int64 CV_DECL_ALIGNED(32) idx[2];
+    v_store_aligned(idx, a);
+    return idx[0] + idx[1];
+}
+inline double v_reduce_sum(const v_float64x2& a)
+{
+    double CV_DECL_ALIGNED(32) idx[2];
+    v_store_aligned(idx, a);
+    return idx[0] + idx[1];
+}
+
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+#if CV_SSE3
+    __m128 ab = _mm_hadd_ps(a.val, b.val);
+    __m128 cd = _mm_hadd_ps(c.val, d.val);
+    return v_float32x4(_mm_hadd_ps(ab, cd));
+#else
+    __m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val));
+    __m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val));
+    return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd)));
+#endif
+}
+
 OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)
 OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min)
-OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, sum, OPENCV_HAL_ADD)
 OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, max, std::max)
 OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, min, std::min)
-OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, sum, OPENCV_HAL_ADD)
 OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, max, std::max)
 OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min)
 
-#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, pack_op, and_op, signmask, allmask) \
-inline int v_signmask(const _Tpvec& a) \
-{ \
-    return and_op(_mm_movemask_##suffix(pack_op(a.val)), signmask); \
-} \
-inline bool v_check_all(const _Tpvec& a) \
-{ return and_op(_mm_movemask_##suffix(a.val), allmask) == allmask; } \
-inline bool v_check_any(const _Tpvec& a) \
-{ return and_op(_mm_movemask_##suffix(a.val), allmask) != 0; }
+inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
+{
+    __m128i half = _mm_sad_epu8(a.val, b.val);
+    return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
+}
+inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
+{
+    __m128i half = _mm_set1_epi8(0x7f);
+    half = _mm_sad_epu8(_mm_add_epi8(a.val, half), _mm_add_epi8(b.val, half));
+    return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
+}
+inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v_uint32x4 l, h;
+    v_expand(v_absdiff(a, b), l, h);
+    return v_reduce_sum(l + h);
+}
+inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
+{
+    v_uint32x4 l, h;
+    v_expand(v_absdiff(a, b), l, h);
+    return v_reduce_sum(l + h);
+}
+inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b)
+{
+    return v_reduce_sum(v_absdiff(a, b));
+}
+inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b)
+{
+    return v_reduce_sum(v_absdiff(a, b));
+}
+inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
+{
+    return v_reduce_sum(v_absdiff(a, b));
+}
 
-#define OPENCV_HAL_PACKS(a) _mm_packs_epi16(a, a)
-inline __m128i v_packq_epi32(__m128i a)
+inline v_uint8x16 v_popcount(const v_uint8x16& a)
+{
+    __m128i m1 = _mm_set1_epi32(0x55555555);
+    __m128i m2 = _mm_set1_epi32(0x33333333);
+    __m128i m4 = _mm_set1_epi32(0x0f0f0f0f);
+    __m128i p = a.val;
+    p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 1), m1), _mm_and_si128(p, m1));
+    p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 2), m2), _mm_and_si128(p, m2));
+    p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 4), m4), _mm_and_si128(p, m4));
+    return v_uint8x16(p);
+}
+inline v_uint16x8 v_popcount(const v_uint16x8& a)
+{
+    v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a));
+    p += v_rotate_right<1>(p);
+    return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff);
+}
+inline v_uint32x4 v_popcount(const v_uint32x4& a)
+{
+    v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a));
+    p += v_rotate_right<1>(p);
+    p += v_rotate_right<2>(p);
+    return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff);
+}
+inline v_uint64x2 v_popcount(const v_uint64x2& a)
 {
-    __m128i b = _mm_packs_epi32(a, a);
-    return _mm_packs_epi16(b, b);
+    return v_uint64x2(_mm_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm_setzero_si128()));
+}
+inline v_uint8x16 v_popcount(const v_int8x16& a)
+{ return v_popcount(v_reinterpret_as_u8(a)); }
+inline v_uint16x8 v_popcount(const v_int16x8& a)
+{ return v_popcount(v_reinterpret_as_u16(a)); }
+inline v_uint32x4 v_popcount(const v_int32x4& a)
+{ return v_popcount(v_reinterpret_as_u32(a)); }
+inline v_uint64x2 v_popcount(const v_int64x2& a)
+{ return v_popcount(v_reinterpret_as_u64(a)); }
+
+#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, cast_op, allmask) \
+inline int v_signmask(const _Tpvec& a)   { return _mm_movemask_##suffix(cast_op(a.val)); } \
+inline bool v_check_all(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) == allmask; } \
+inline bool v_check_any(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) != 0; }
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, 65535)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, 65535)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, ps, _mm_castsi128_ps, 15)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, ps, _mm_castsi128_ps, 15)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint64x2, pd, _mm_castsi128_pd, 3)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int64x2, pd, _mm_castsi128_pd, 3)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, 15)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, 3)
+
+#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(_Tpvec) \
+inline int v_signmask(const _Tpvec& a) { return _mm_movemask_epi8(_mm_packs_epi16(a.val, a.val)) & 255; } \
+inline bool v_check_all(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) == 0xaaaa; } \
+inline bool v_check_any(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) != 0; }
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_uint16x8)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_int16x8)
+
+inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
+inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
+inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
+inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
+inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
+inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
+inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
+inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
+inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
+inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
+
+#if CV_SSE4_1
+#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, cast_ret, cast, suffix) \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(cast_ret(_mm_blendv_##suffix(cast(b.val), cast(a.val), cast(mask.val)))); \
 }
 
-OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
-OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
-OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
-OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
-OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
-OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
-OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15)
-OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3)
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, _mm_castps_si128, _mm_castsi128_ps, ps)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, _mm_castps_si128, _mm_castsi128_ps, ps)
+// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, TBD, TBD, pd)
+// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, TBD, TBD, ps)
+OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, OPENCV_HAL_NOP, OPENCV_HAL_NOP, ps)
+OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, OPENCV_HAL_NOP, OPENCV_HAL_NOP, pd)
+
+#else // CV_SSE4_1
 
 #define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix) \
 inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
@@ -1083,71 +1863,41 @@ OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, si128)
 // OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128)
 OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps)
 OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd)
+#endif
 
-#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpuvec, _Tpwuvec, _Tpu, _Tpsvec, _Tpwsvec, _Tps, suffix, wsuffix, shift) \
-inline void v_expand(const _Tpuvec& a, _Tpwuvec& b0, _Tpwuvec& b1) \
-{ \
-    __m128i z = _mm_setzero_si128(); \
-    b0.val = _mm_unpacklo_##suffix(a.val, z); \
-    b1.val = _mm_unpackhi_##suffix(a.val, z); \
-} \
-inline _Tpwuvec v_load_expand(const _Tpu* ptr) \
-{ \
-    __m128i z = _mm_setzero_si128(); \
-    return _Tpwuvec(_mm_unpacklo_##suffix(_mm_loadl_epi64((const __m128i*)ptr), z)); \
-} \
-inline void v_expand(const _Tpsvec& a, _Tpwsvec& b0, _Tpwsvec& b1) \
-{ \
-    b0.val = _mm_srai_##wsuffix(_mm_unpacklo_##suffix(a.val, a.val), shift); \
-    b1.val = _mm_srai_##wsuffix(_mm_unpackhi_##suffix(a.val, a.val), shift); \
-} \
-inline _Tpwsvec v_load_expand(const _Tps* ptr) \
-{ \
-    __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \
-    return _Tpwsvec(_mm_srai_##wsuffix(_mm_unpacklo_##suffix(a, a), shift)); \
-}
-
-OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, v_int8x16, v_int16x8, schar, epi8, epi16, 8)
-OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, v_int16x8, v_int32x4, short, epi16, epi32, 16)
-
-inline void v_expand(const v_uint32x4& a, v_uint64x2& b0, v_uint64x2& b1)
-{
-    __m128i z = _mm_setzero_si128();
-    b0.val = _mm_unpacklo_epi32(a.val, z);
-    b1.val = _mm_unpackhi_epi32(a.val, z);
-}
-inline v_uint64x2 v_load_expand(const unsigned* ptr)
-{
-    __m128i z = _mm_setzero_si128();
-    return v_uint64x2(_mm_unpacklo_epi32(_mm_loadl_epi64((const __m128i*)ptr), z));
-}
-inline void v_expand(const v_int32x4& a, v_int64x2& b0, v_int64x2& b1)
-{
-    __m128i s = _mm_srai_epi32(a.val, 31);
-    b0.val = _mm_unpacklo_epi32(a.val, s);
-    b1.val = _mm_unpackhi_epi32(a.val, s);
-}
-inline v_int64x2 v_load_expand(const int* ptr)
-{
-    __m128i a = _mm_loadl_epi64((const __m128i*)ptr);
-    __m128i s = _mm_srai_epi32(a, 31);
-    return v_int64x2(_mm_unpacklo_epi32(a, s));
-}
+/* Expand */
+#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin)    \
+    inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
+    {                                                               \
+        b0.val = intrin(a.val);                                     \
+        b1.val = __CV_CAT(intrin, _high)(a.val);                    \
+    }                                                               \
+    inline _Tpwvec v_expand_low(const _Tpvec& a)                    \
+    { return _Tpwvec(intrin(a.val)); }                              \
+    inline _Tpwvec v_expand_high(const _Tpvec& a)                   \
+    { return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); }             \
+    inline _Tpwvec v_load_expand(const _Tp* ptr)                    \
+    {                                                               \
+        __m128i a = _mm_loadl_epi64((const __m128i*)ptr);           \
+        return _Tpwvec(intrin(a));                                  \
+    }
 
-inline v_uint32x4 v_load_expand_q(const uchar* ptr)
-{
-    __m128i z = _mm_setzero_si128();
-    __m128i a = _mm_cvtsi32_si128(*(const int*)ptr);
-    return v_uint32x4(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z));
-}
+OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8,  uchar,    _v128_cvtepu8_epi16)
+OPENCV_HAL_IMPL_SSE_EXPAND(v_int8x16,  v_int16x8,   schar,    _v128_cvtepi8_epi16)
+OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4,  ushort,   _v128_cvtepu16_epi32)
+OPENCV_HAL_IMPL_SSE_EXPAND(v_int16x8,  v_int32x4,   short,    _v128_cvtepi16_epi32)
+OPENCV_HAL_IMPL_SSE_EXPAND(v_uint32x4, v_uint64x2,  unsigned, _v128_cvtepu32_epi64)
+OPENCV_HAL_IMPL_SSE_EXPAND(v_int32x4,  v_int64x2,   int,      _v128_cvtepi32_epi64)
+
+#define OPENCV_HAL_IMPL_SSE_EXPAND_Q(_Tpvec, _Tp, intrin)  \
+    inline _Tpvec v_load_expand_q(const _Tp* ptr)          \
+    {                                                      \
+        __m128i a = _mm_cvtsi32_si128(*(const int*)ptr);   \
+        return _Tpvec(intrin(a));                          \
+    }
 
-inline v_int32x4 v_load_expand_q(const schar* ptr)
-{
-    __m128i a = _mm_cvtsi32_si128(*(const int*)ptr);
-    a = _mm_unpacklo_epi8(a, a);
-    a = _mm_unpacklo_epi8(a, a);
-    return v_int32x4(_mm_srai_epi32(a, 24));
-}
+OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_uint32x4, uchar, _v128_cvtepu8_epi32)
+OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_int32x4,  schar, _v128_cvtepi8_epi32)
 
 #define OPENCV_HAL_IMPL_SSE_UNPACKS(_Tpvec, suffix, cast_from, cast_to) \
 inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \
@@ -1181,15 +1931,63 @@ OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
 OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
 OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd)
 
+inline v_uint8x16 v_reverse(const v_uint8x16 &a)
+{
+#if CV_SSSE3
+    static const __m128i perm = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+    return v_uint8x16(_mm_shuffle_epi8(a.val, perm));
+#else
+    uchar CV_DECL_ALIGNED(32) d[16];
+    v_store_aligned(d, a);
+    return v_uint8x16(d[15], d[14], d[13], d[12], d[11], d[10], d[9], d[8], d[7], d[6], d[5], d[4], d[3], d[2], d[1], d[0]);
+#endif
+}
+
+inline v_int8x16 v_reverse(const v_int8x16 &a)
+{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
+
+inline v_uint16x8 v_reverse(const v_uint16x8 &a)
+{
+#if CV_SSSE3
+    static const __m128i perm = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
+    return v_uint16x8(_mm_shuffle_epi8(a.val, perm));
+#else
+    __m128i r = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3));
+    r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(2, 3, 0, 1));
+    r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(2, 3, 0, 1));
+    return v_uint16x8(r);
+#endif
+}
+
+inline v_int16x8 v_reverse(const v_int16x8 &a)
+{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
+
+inline v_uint32x4 v_reverse(const v_uint32x4 &a)
+{
+    return v_uint32x4(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3)));
+}
+
+inline v_int32x4 v_reverse(const v_int32x4 &a)
+{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_float32x4 v_reverse(const v_float32x4 &a)
+{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_uint64x2 v_reverse(const v_uint64x2 &a)
+{
+    return v_uint64x2(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(1, 0, 3, 2)));
+}
+
+inline v_int64x2 v_reverse(const v_int64x2 &a)
+{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
+
+inline v_float64x2 v_reverse(const v_float64x2 &a)
+{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
+
 template<int s, typename _Tpvec>
 inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
 {
-    const int w = sizeof(typename _Tpvec::lane_type);
-    const int n = _Tpvec::nlanes;
-    __m128i ra, rb;
-    ra = _mm_srli_si128(a.val, s*w);
-    rb = _mm_slli_si128(b.val, (n-s)*w);
-    return _Tpvec(_mm_or_si128(ra, rb));
+    return v_rotate_right<s>(a, b);
 }
 
 inline v_int32x4 v_round(const v_float32x4& a)
@@ -1215,6 +2013,12 @@ inline v_int32x4 v_trunc(const v_float32x4& a)
 inline v_int32x4 v_round(const v_float64x2& a)
 { return v_int32x4(_mm_cvtpd_epi32(a.val)); }
 
+inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
+{
+    __m128i ai = _mm_cvtpd_epi32(a.val), bi = _mm_cvtpd_epi32(b.val);
+    return v_int32x4(_mm_unpacklo_epi64(ai, bi));
+}
+
 inline v_int32x4 v_floor(const v_float64x2& a)
 {
     __m128i a1 = _mm_cvtpd_epi32(a.val);
@@ -1255,12 +2059,70 @@ OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_N
 OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
 OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
 
-// adopted from sse_utils.hpp
-inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
+// load deinterleave
+inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
 {
     __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
     __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16));
-    __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 32));
+
+    __m128i t10 = _mm_unpacklo_epi8(t00, t01);
+    __m128i t11 = _mm_unpackhi_epi8(t00, t01);
+
+    __m128i t20 = _mm_unpacklo_epi8(t10, t11);
+    __m128i t21 = _mm_unpackhi_epi8(t10, t11);
+
+    __m128i t30 = _mm_unpacklo_epi8(t20, t21);
+    __m128i t31 = _mm_unpackhi_epi8(t20, t21);
+
+    a.val = _mm_unpacklo_epi8(t30, t31);
+    b.val = _mm_unpackhi_epi8(t30, t31);
+}
+
+inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
+{
+#if CV_SSE4_1
+    const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
+    const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
+    __m128i s0 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i s1 = _mm_loadu_si128((const __m128i*)(ptr + 16));
+    __m128i s2 = _mm_loadu_si128((const __m128i*)(ptr + 32));
+    __m128i a0 = _mm_blendv_epi8(_mm_blendv_epi8(s0, s1, m0), s2, m1);
+    __m128i b0 = _mm_blendv_epi8(_mm_blendv_epi8(s1, s2, m0), s0, m1);
+    __m128i c0 = _mm_blendv_epi8(_mm_blendv_epi8(s2, s0, m0), s1, m1);
+    const __m128i sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13);
+    const __m128i sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14);
+    const __m128i sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15);
+    a0 = _mm_shuffle_epi8(a0, sh_b);
+    b0 = _mm_shuffle_epi8(b0, sh_g);
+    c0 = _mm_shuffle_epi8(c0, sh_r);
+    a.val = a0;
+    b.val = b0;
+    c.val = c0;
+#elif CV_SSSE3
+    const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14);
+    const __m128i m1 = _mm_alignr_epi8(m0, m0, 11);
+    const __m128i m2 = _mm_alignr_epi8(m0, m0, 6);
+
+    __m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 16));
+    __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 32));
+
+    __m128i s0 = _mm_shuffle_epi8(t0, m0);
+    __m128i s1 = _mm_shuffle_epi8(t1, m1);
+    __m128i s2 = _mm_shuffle_epi8(t2, m2);
+
+    t0 = _mm_alignr_epi8(s1, _mm_slli_si128(s0, 10), 5);
+    a.val = _mm_alignr_epi8(s2, t0, 5);
+
+    t1 = _mm_alignr_epi8(_mm_srli_si128(s1, 5), _mm_slli_si128(s0, 5), 6);
+    b.val = _mm_alignr_epi8(_mm_srli_si128(s2, 5), t1, 5);
+
+    t2 = _mm_alignr_epi8(_mm_srli_si128(s2, 10), s1, 11);
+    c.val = _mm_alignr_epi8(t2, s0, 11);
+#else
+    __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16));
+    __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 32));
 
     __m128i t10 = _mm_unpacklo_epi8(t00, _mm_unpackhi_epi64(t01, t01));
     __m128i t11 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t00, t00), t02);
@@ -1277,6 +2139,7 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
     a.val = _mm_unpacklo_epi8(t30, _mm_unpackhi_epi64(t31, t31));
     b.val = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t30, t30), t32);
     c.val = _mm_unpacklo_epi8(t31, _mm_unpackhi_epi64(t32, t32));
+#endif
 }
 
 inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d)
@@ -1307,8 +2170,41 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
     d.val = _mm_unpackhi_epi8(v2, v3);
 }
 
+inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b)
+{
+    __m128i v0 = _mm_loadu_si128((__m128i*)(ptr));     // a0 b0 a1 b1 a2 b2 a3 b3
+    __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7
+
+    __m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5
+    __m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7
+    __m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6
+    __m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7
+
+    a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7
+    b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7
+}
+
 inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c)
 {
+#if CV_SSE4_1
+    __m128i v0 = _mm_loadu_si128((__m128i*)(ptr));
+    __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8));
+    __m128i v2 = _mm_loadu_si128((__m128i*)(ptr + 16));
+    __m128i a0 = _mm_blend_epi16(_mm_blend_epi16(v0, v1, 0x92), v2, 0x24);
+    __m128i b0 = _mm_blend_epi16(_mm_blend_epi16(v2, v0, 0x92), v1, 0x24);
+    __m128i c0 = _mm_blend_epi16(_mm_blend_epi16(v1, v2, 0x92), v0, 0x24);
+
+    const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
+    const __m128i sh_b = _mm_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13);
+    const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
+    a0 = _mm_shuffle_epi8(a0, sh_a);
+    b0 = _mm_shuffle_epi8(b0, sh_b);
+    c0 = _mm_shuffle_epi8(c0, sh_c);
+
+    a.val = a0;
+    b.val = b0;
+    c.val = c0;
+#else
     __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
     __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 8));
     __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 16));
@@ -1324,6 +2220,7 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
     a.val = _mm_unpacklo_epi16(t20, _mm_unpackhi_epi64(t21, t21));
     b.val = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t20, t20), t22);
     c.val = _mm_unpacklo_epi16(t21, _mm_unpackhi_epi64(t22, t22));
+#endif
 }
 
 inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d)
@@ -1349,6 +2246,18 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
     d.val = _mm_unpackhi_epi16(u2, u3);
 }
 
+inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b)
+{
+    __m128i v0 = _mm_loadu_si128((__m128i*)(ptr));     // a0 b0 a1 b1
+    __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 4)); // a2 b2 a3 b3
+
+    __m128i v2 = _mm_unpacklo_epi32(v0, v1); // a0 a2 b0 b2
+    __m128i v3 = _mm_unpackhi_epi32(v0, v1); // a1 a3 b1 b3
+
+    a.val = _mm_unpacklo_epi32(v2, v3); // a0 a1 a2 a3
+    b.val = _mm_unpackhi_epi32(v2, v3); // b0 b1 ab b3
+}
+
 inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c)
 {
     __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
@@ -1366,17 +2275,150 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
 
 inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d)
 {
-    v_uint32x4 u0(_mm_loadu_si128((const __m128i*)ptr));        // a0 b0 c0 d0
-    v_uint32x4 u1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1
-    v_uint32x4 u2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2
-    v_uint32x4 u3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3
+    v_uint32x4 s0(_mm_loadu_si128((const __m128i*)ptr));        // a0 b0 c0 d0
+    v_uint32x4 s1(_mm_loadu_si128((const __m128i*)(ptr + 4)));  // a1 b1 c1 d1
+    v_uint32x4 s2(_mm_loadu_si128((const __m128i*)(ptr + 8)));  // a2 b2 c2 d2
+    v_uint32x4 s3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3
+
+    v_transpose4x4(s0, s1, s2, s3, a, b, c, d);
+}
+
+inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
+{
+    __m128 u0 = _mm_loadu_ps(ptr);       // a0 b0 a1 b1
+    __m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3
 
-    v_transpose4x4(u0, u1, u2, u3, a, b, c, d);
+    a.val = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(2, 0, 2, 0)); // a0 a1 a2 a3
+    b.val = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(3, 1, 3, 1)); // b0 b1 ab b3
 }
 
+inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c)
+{
+    __m128 t0 = _mm_loadu_ps(ptr + 0);
+    __m128 t1 = _mm_loadu_ps(ptr + 4);
+    __m128 t2 = _mm_loadu_ps(ptr + 8);
+
+    __m128 at12 = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(0, 1, 0, 2));
+    a.val = _mm_shuffle_ps(t0, at12, _MM_SHUFFLE(2, 0, 3, 0));
+
+    __m128 bt01 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(0, 0, 0, 1));
+    __m128 bt12 = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(0, 2, 0, 3));
+    b.val = _mm_shuffle_ps(bt01, bt12, _MM_SHUFFLE(2, 0, 2, 0));
+
+    __m128 ct01 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(0, 1, 0, 2));
+    c.val = _mm_shuffle_ps(ct01, t2, _MM_SHUFFLE(3, 0, 2, 0));
+}
+
+inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d)
+{
+    __m128 t0 = _mm_loadu_ps(ptr +  0);
+    __m128 t1 = _mm_loadu_ps(ptr +  4);
+    __m128 t2 = _mm_loadu_ps(ptr +  8);
+    __m128 t3 = _mm_loadu_ps(ptr + 12);
+    __m128 t02lo = _mm_unpacklo_ps(t0, t2);
+    __m128 t13lo = _mm_unpacklo_ps(t1, t3);
+    __m128 t02hi = _mm_unpackhi_ps(t0, t2);
+    __m128 t13hi = _mm_unpackhi_ps(t1, t3);
+    a.val = _mm_unpacklo_ps(t02lo, t13lo);
+    b.val = _mm_unpackhi_ps(t02lo, t13lo);
+    c.val = _mm_unpacklo_ps(t02hi, t13hi);
+    d.val = _mm_unpackhi_ps(t02hi, t13hi);
+}
+
+inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b)
+{
+    __m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2));
+
+    a = v_uint64x2(_mm_unpacklo_epi64(t0, t1));
+    b = v_uint64x2(_mm_unpackhi_epi64(t0, t1));
+}
+
+inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
+{
+    __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0, b0
+    __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0, a1
+    __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // b1, c1
+
+    t1 = _mm_shuffle_epi32(t1, 0x4e); // a1, c0
+
+    a = v_uint64x2(_mm_unpacklo_epi64(t0, t1));
+    b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2));
+    c = v_uint64x2(_mm_unpackhi_epi64(t1, t2));
+}
+
+inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a,
+                                v_uint64x2& b, v_uint64x2& c, v_uint64x2& d)
+{
+    __m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0
+    __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0 d0
+    __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // a1 b1
+    __m128i t3 = _mm_loadu_si128((const __m128i*)(ptr + 6)); // c1 d1
+
+    a = v_uint64x2(_mm_unpacklo_epi64(t0, t2));
+    b = v_uint64x2(_mm_unpackhi_epi64(t0, t2));
+    c = v_uint64x2(_mm_unpacklo_epi64(t1, t3));
+    d = v_uint64x2(_mm_unpackhi_epi64(t1, t3));
+}
+
+// store interleave
+
 inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
-                                const v_uint8x16& c )
+                                hal::StoreMode mode = hal::STORE_UNALIGNED)
 {
+    __m128i v0 = _mm_unpacklo_epi8(a.val, b.val);
+    __m128i v1 = _mm_unpackhi_epi8(a.val, b.val);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 16), v1);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 16), v1);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 16), v1);
+    }
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
+                                const v_uint8x16& c, hal::StoreMode mode = hal::STORE_UNALIGNED)
+{
+#if CV_SSE4_1
+    const __m128i sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
+    const __m128i sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
+    const __m128i sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
+    __m128i a0 = _mm_shuffle_epi8(a.val, sh_a);
+    __m128i b0 = _mm_shuffle_epi8(b.val, sh_b);
+    __m128i c0 = _mm_shuffle_epi8(c.val, sh_c);
+
+    const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
+    const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
+    __m128i v0 = _mm_blendv_epi8(_mm_blendv_epi8(a0, b0, m1), c0, m0);
+    __m128i v1 = _mm_blendv_epi8(_mm_blendv_epi8(b0, c0, m1), a0, m0);
+    __m128i v2 = _mm_blendv_epi8(_mm_blendv_epi8(c0, a0, m1), b0, m0);
+#elif CV_SSSE3
+    const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5);
+    const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10);
+    const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15);
+
+    __m128i t0 = _mm_alignr_epi8(b.val, _mm_slli_si128(a.val, 10), 5);
+    t0 = _mm_alignr_epi8(c.val, t0, 5);
+    __m128i v0 = _mm_shuffle_epi8(t0, m0);
+
+    __m128i t1 = _mm_alignr_epi8(_mm_srli_si128(b.val, 5), _mm_slli_si128(a.val, 5), 6);
+    t1 = _mm_alignr_epi8(_mm_srli_si128(c.val, 5), t1, 5);
+    __m128i v1 = _mm_shuffle_epi8(t1, m1);
+
+    __m128i t2 = _mm_alignr_epi8(_mm_srli_si128(c.val, 10), b.val, 11);
+    t2 = _mm_alignr_epi8(t2, a.val, 11);
+    __m128i v2 = _mm_shuffle_epi8(t2, m2);
+#else
     __m128i z = _mm_setzero_si128();
     __m128i ab0 = _mm_unpacklo_epi8(a.val, b.val);
     __m128i ab1 = _mm_unpackhi_epi8(a.val, b.val);
@@ -1414,14 +2456,31 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
     __m128i v0 = _mm_or_si128(_mm_srli_si128(p40, 2), _mm_slli_si128(p41, 10));
     __m128i v1 = _mm_or_si128(_mm_srli_si128(p41, 6), _mm_slli_si128(p42, 6));
     __m128i v2 = _mm_or_si128(_mm_srli_si128(p42, 10), _mm_slli_si128(p43, 2));
+#endif
 
-    _mm_storeu_si128((__m128i*)(ptr), v0);
-    _mm_storeu_si128((__m128i*)(ptr + 16), v1);
-    _mm_storeu_si128((__m128i*)(ptr + 32), v2);
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 16), v1);
+        _mm_stream_si128((__m128i*)(ptr + 32), v2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 16), v1);
+        _mm_store_si128((__m128i*)(ptr + 32), v2);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 16), v1);
+        _mm_storeu_si128((__m128i*)(ptr + 32), v2);
+    }
 }
 
 inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
-                                const v_uint8x16& c, const v_uint8x16& d)
+                                const v_uint8x16& c, const v_uint8x16& d,
+                                hal::StoreMode mode = hal::STORE_UNALIGNED)
 {
     // a0 a1 a2 a3 ....
     // b0 b1 b2 b3 ....
@@ -1433,20 +2492,72 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
     __m128i u3 = _mm_unpackhi_epi8(b.val, d.val); // b8 d8 b9 d9 ...
 
     __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 b0 c0 d0 ...
-    __m128i v1 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ...
-    __m128i v2 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ...
+    __m128i v1 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ...
+    __m128i v2 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ...
     __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a12 b12 c12 d12 ...
 
-    _mm_storeu_si128((__m128i*)ptr, v0);
-    _mm_storeu_si128((__m128i*)(ptr + 16), v2);
-    _mm_storeu_si128((__m128i*)(ptr + 32), v1);
-    _mm_storeu_si128((__m128i*)(ptr + 48), v3);
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 16), v1);
+        _mm_stream_si128((__m128i*)(ptr + 32), v2);
+        _mm_stream_si128((__m128i*)(ptr + 48), v3);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 16), v1);
+        _mm_store_si128((__m128i*)(ptr + 32), v2);
+        _mm_store_si128((__m128i*)(ptr + 48), v3);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 16), v1);
+        _mm_storeu_si128((__m128i*)(ptr + 32), v2);
+        _mm_storeu_si128((__m128i*)(ptr + 48), v3);
+    }
 }
 
-inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
-                                const v_uint16x8& b,
-                                const v_uint16x8& c )
+inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
+                                hal::StoreMode mode = hal::STORE_UNALIGNED)
 {
+    __m128i v0 = _mm_unpacklo_epi16(a.val, b.val);
+    __m128i v1 = _mm_unpackhi_epi16(a.val, b.val);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 8), v1);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 8), v1);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 8), v1);
+    }
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
+                                const v_uint16x8& b, const v_uint16x8& c,
+                                hal::StoreMode mode = hal::STORE_UNALIGNED)
+{
+#if CV_SSE4_1
+    const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
+    const __m128i sh_b = _mm_setr_epi8(10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5);
+    const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
+    __m128i a0 = _mm_shuffle_epi8(a.val, sh_a);
+    __m128i b0 = _mm_shuffle_epi8(b.val, sh_b);
+    __m128i c0 = _mm_shuffle_epi8(c.val, sh_c);
+
+    __m128i v0 = _mm_blend_epi16(_mm_blend_epi16(a0, b0, 0x92), c0, 0x24);
+    __m128i v1 = _mm_blend_epi16(_mm_blend_epi16(c0, a0, 0x92), b0, 0x24);
+    __m128i v2 = _mm_blend_epi16(_mm_blend_epi16(b0, c0, 0x92), a0, 0x24);
+#else
     __m128i z = _mm_setzero_si128();
     __m128i ab0 = _mm_unpacklo_epi16(a.val, b.val);
     __m128i ab1 = _mm_unpackhi_epi16(a.val, b.val);
@@ -1474,14 +2585,30 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
     __m128i v0 = _mm_or_si128(_mm_srli_si128(p30, 2), _mm_slli_si128(p31, 10));
     __m128i v1 = _mm_or_si128(_mm_srli_si128(p31, 6), _mm_slli_si128(p32, 6));
     __m128i v2 = _mm_or_si128(_mm_srli_si128(p32, 10), _mm_slli_si128(p33, 2));
-
-    _mm_storeu_si128((__m128i*)(ptr), v0);
-    _mm_storeu_si128((__m128i*)(ptr + 8), v1);
-    _mm_storeu_si128((__m128i*)(ptr + 16), v2);
+#endif
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 8), v1);
+        _mm_stream_si128((__m128i*)(ptr + 16), v2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 8), v1);
+        _mm_store_si128((__m128i*)(ptr + 16), v2);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 8), v1);
+        _mm_storeu_si128((__m128i*)(ptr + 16), v2);
+    }
 }
 
 inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
-                                const v_uint16x8& c, const v_uint16x8& d)
+                                const v_uint16x8& c, const v_uint16x8& d,
+                                hal::StoreMode mode = hal::STORE_UNALIGNED)
 {
     // a0 a1 a2 a3 ....
     // b0 b1 b2 b3 ....
@@ -1493,18 +2620,58 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16
     __m128i u3 = _mm_unpackhi_epi16(b.val, d.val); // b4 d4 b5 d5 ...
 
     __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 b0 c0 d0 ...
-    __m128i v1 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ...
-    __m128i v2 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ...
+    __m128i v1 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ...
+    __m128i v2 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ...
     __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a6 b6 c6 d6 ...
 
-    _mm_storeu_si128((__m128i*)ptr, v0);
-    _mm_storeu_si128((__m128i*)(ptr + 8), v2);
-    _mm_storeu_si128((__m128i*)(ptr + 16), v1);
-    _mm_storeu_si128((__m128i*)(ptr + 24), v3);
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 8), v1);
+        _mm_stream_si128((__m128i*)(ptr + 16), v2);
+        _mm_stream_si128((__m128i*)(ptr + 24), v3);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 8), v1);
+        _mm_store_si128((__m128i*)(ptr + 16), v2);
+        _mm_store_si128((__m128i*)(ptr + 24), v3);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 8), v1);
+        _mm_storeu_si128((__m128i*)(ptr + 16), v2);
+        _mm_storeu_si128((__m128i*)(ptr + 24), v3);
+    }
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
+                                hal::StoreMode mode = hal::STORE_UNALIGNED)
+{
+    __m128i v0 = _mm_unpacklo_epi32(a.val, b.val);
+    __m128i v1 = _mm_unpackhi_epi32(a.val, b.val);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 4), v1);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 4), v1);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 4), v1);
+    }
 }
 
 inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
-                                const v_uint32x4& c )
+                                const v_uint32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED)
 {
     v_uint32x4 z = v_setzero_u32(), u0, u1, u2, u3;
     v_transpose4x4(a, b, c, z, u0, u1, u2, u3);
@@ -1513,64 +2680,287 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint
     __m128i v1 = _mm_or_si128(_mm_srli_si128(u1.val, 4), _mm_slli_si128(u2.val, 8));
     __m128i v2 = _mm_or_si128(_mm_srli_si128(u2.val, 8), _mm_slli_si128(u3.val, 4));
 
-    _mm_storeu_si128((__m128i*)ptr, v0);
-    _mm_storeu_si128((__m128i*)(ptr + 4), v1);
-    _mm_storeu_si128((__m128i*)(ptr + 8), v2);
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 4), v1);
+        _mm_stream_si128((__m128i*)(ptr + 8), v2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 4), v1);
+        _mm_store_si128((__m128i*)(ptr + 8), v2);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 4), v1);
+        _mm_storeu_si128((__m128i*)(ptr + 8), v2);
+    }
 }
 
 inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
-                               const v_uint32x4& c, const v_uint32x4& d)
+                               const v_uint32x4& c, const v_uint32x4& d,
+                               hal::StoreMode mode = hal::STORE_UNALIGNED)
+{
+    v_uint32x4 v0, v1, v2, v3;
+    v_transpose4x4(a, b, c, d, v0, v1, v2, v3);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0.val);
+        _mm_stream_si128((__m128i*)(ptr + 4), v1.val);
+        _mm_stream_si128((__m128i*)(ptr + 8), v2.val);
+        _mm_stream_si128((__m128i*)(ptr + 12), v3.val);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0.val);
+        _mm_store_si128((__m128i*)(ptr + 4), v1.val);
+        _mm_store_si128((__m128i*)(ptr + 8), v2.val);
+        _mm_store_si128((__m128i*)(ptr + 12), v3.val);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0.val);
+        _mm_storeu_si128((__m128i*)(ptr + 4), v1.val);
+        _mm_storeu_si128((__m128i*)(ptr + 8), v2.val);
+        _mm_storeu_si128((__m128i*)(ptr + 12), v3.val);
+    }
+}
+
+// 2-channel, float only
+inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
+                               hal::StoreMode mode = hal::STORE_UNALIGNED)
+{
+    __m128 v0 = _mm_unpacklo_ps(a.val, b.val); // a0 b0 a1 b1
+    __m128 v1 = _mm_unpackhi_ps(a.val, b.val); // a2 b2 a3 b3
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_ps(ptr, v0);
+        _mm_stream_ps(ptr + 4, v1);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_ps(ptr, v0);
+        _mm_store_ps(ptr + 4, v1);
+    }
+    else
+    {
+        _mm_storeu_ps(ptr, v0);
+        _mm_storeu_ps(ptr + 4, v1);
+    }
+}
+
+inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
+                               const v_float32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED)
+{
+    __m128 u0 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(0, 0, 0, 0));
+    __m128 u1 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(1, 1, 0, 0));
+    __m128 v0 = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(2, 0, 2, 0));
+    __m128 u2 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(1, 1, 1, 1));
+    __m128 u3 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(2, 2, 2, 2));
+    __m128 v1 = _mm_shuffle_ps(u2, u3, _MM_SHUFFLE(2, 0, 2, 0));
+    __m128 u4 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(3, 3, 2, 2));
+    __m128 u5 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(3, 3, 3, 3));
+    __m128 v2 = _mm_shuffle_ps(u4, u5, _MM_SHUFFLE(2, 0, 2, 0));
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_ps(ptr, v0);
+        _mm_stream_ps(ptr + 4, v1);
+        _mm_stream_ps(ptr + 8, v2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_ps(ptr, v0);
+        _mm_store_ps(ptr + 4, v1);
+        _mm_store_ps(ptr + 8, v2);
+    }
+    else
+    {
+        _mm_storeu_ps(ptr, v0);
+        _mm_storeu_ps(ptr + 4, v1);
+        _mm_storeu_ps(ptr + 8, v2);
+    }
+}
+
+inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
+                               const v_float32x4& c, const v_float32x4& d,
+                               hal::StoreMode mode = hal::STORE_UNALIGNED)
+{
+    __m128 u0 = _mm_unpacklo_ps(a.val, c.val);
+    __m128 u1 = _mm_unpacklo_ps(b.val, d.val);
+    __m128 u2 = _mm_unpackhi_ps(a.val, c.val);
+    __m128 u3 = _mm_unpackhi_ps(b.val, d.val);
+    __m128 v0 = _mm_unpacklo_ps(u0, u1);
+    __m128 v2 = _mm_unpacklo_ps(u2, u3);
+    __m128 v1 = _mm_unpackhi_ps(u0, u1);
+    __m128 v3 = _mm_unpackhi_ps(u2, u3);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_ps(ptr, v0);
+        _mm_stream_ps(ptr + 4, v1);
+        _mm_stream_ps(ptr + 8, v2);
+        _mm_stream_ps(ptr + 12, v3);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_ps(ptr, v0);
+        _mm_store_ps(ptr + 4, v1);
+        _mm_store_ps(ptr + 8, v2);
+        _mm_store_ps(ptr + 12, v3);
+    }
+    else
+    {
+        _mm_storeu_ps(ptr, v0);
+        _mm_storeu_ps(ptr + 4, v1);
+        _mm_storeu_ps(ptr + 8, v2);
+        _mm_storeu_ps(ptr + 12, v3);
+    }
+}
+
+inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
+                               hal::StoreMode mode = hal::STORE_UNALIGNED)
+{
+    __m128i v0 = _mm_unpacklo_epi64(a.val, b.val);
+    __m128i v1 = _mm_unpackhi_epi64(a.val, b.val);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 2), v1);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 2), v1);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 2), v1);
+    }
+}
+
+inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
+                               const v_uint64x2& c, hal::StoreMode mode = hal::STORE_UNALIGNED)
 {
-    v_uint32x4 t0, t1, t2, t3;
-    v_transpose4x4(a, b, c, d, t0, t1, t2, t3);
-    v_store(ptr, t0);
-    v_store(ptr + 4, t1);
-    v_store(ptr + 8, t2);
-    v_store(ptr + 12, t3);
+    __m128i v0 = _mm_unpacklo_epi64(a.val, b.val);
+    __m128i v1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val));
+    __m128i v2 = _mm_unpackhi_epi64(b.val, c.val);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 2), v1);
+        _mm_stream_si128((__m128i*)(ptr + 4), v2);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 2), v1);
+        _mm_store_si128((__m128i*)(ptr + 4), v2);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 2), v1);
+        _mm_storeu_si128((__m128i*)(ptr + 4), v2);
+    }
 }
 
-#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \
-inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
-                                 _Tpvec& b0, _Tpvec& c0 ) \
+inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
+                               const v_uint64x2& c, const v_uint64x2& d,
+                               hal::StoreMode mode = hal::STORE_UNALIGNED)
+{
+    __m128i v0 = _mm_unpacklo_epi64(a.val, b.val);
+    __m128i v1 = _mm_unpacklo_epi64(c.val, d.val);
+    __m128i v2 = _mm_unpackhi_epi64(a.val, b.val);
+    __m128i v3 = _mm_unpackhi_epi64(c.val, d.val);
+
+    if( mode == hal::STORE_ALIGNED_NOCACHE )
+    {
+        _mm_stream_si128((__m128i*)(ptr), v0);
+        _mm_stream_si128((__m128i*)(ptr + 2), v1);
+        _mm_stream_si128((__m128i*)(ptr + 4), v2);
+        _mm_stream_si128((__m128i*)(ptr + 6), v3);
+    }
+    else if( mode == hal::STORE_ALIGNED )
+    {
+        _mm_store_si128((__m128i*)(ptr), v0);
+        _mm_store_si128((__m128i*)(ptr + 2), v1);
+        _mm_store_si128((__m128i*)(ptr + 4), v2);
+        _mm_store_si128((__m128i*)(ptr + 6), v3);
+    }
+    else
+    {
+        _mm_storeu_si128((__m128i*)(ptr), v0);
+        _mm_storeu_si128((__m128i*)(ptr + 2), v1);
+        _mm_storeu_si128((__m128i*)(ptr + 4), v2);
+        _mm_storeu_si128((__m128i*)(ptr + 6), v3);
+    }
+}
+
+#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \
+{ \
+    _Tpvec1 a1, b1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+} \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \
+{ \
+    _Tpvec1 a1, b1, c1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+    c0 = v_reinterpret_as_##suffix0(c1); \
+} \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \
 { \
-    _Tpuvec a1, b1, c1; \
-    v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1); \
-    a0 = v_reinterpret_as_##suffix(a1); \
-    b0 = v_reinterpret_as_##suffix(b1); \
-    c0 = v_reinterpret_as_##suffix(c1); \
+    _Tpvec1 a1, b1, c1, d1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+    c0 = v_reinterpret_as_##suffix0(c1); \
+    d0 = v_reinterpret_as_##suffix0(d1); \
 } \
-inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
-                                 _Tpvec& b0, _Tpvec& c0, _Tpvec& d0 ) \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
+                                hal::StoreMode mode = hal::STORE_UNALIGNED ) \
 { \
-    _Tpuvec a1, b1, c1, d1; \
-    v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1, d1); \
-    a0 = v_reinterpret_as_##suffix(a1); \
-    b0 = v_reinterpret_as_##suffix(b1); \
-    c0 = v_reinterpret_as_##suffix(c1); \
-    d0 = v_reinterpret_as_##suffix(d1); \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, mode);      \
 } \
-inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, \
-                               const _Tpvec& b0, const _Tpvec& c0 ) \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
+                                const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \
 { \
-    _Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \
-    _Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \
-    _Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \
-    v_store_interleave((_Tpu*)ptr, a1, b1, c1); \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode);  \
 } \
-inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, const _Tpvec& b0, \
-                               const _Tpvec& c0, const _Tpvec& d0 ) \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
+                                const _Tpvec0& c0, const _Tpvec0& d0, \
+                                hal::StoreMode mode = hal::STORE_UNALIGNED ) \
 { \
-    _Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \
-    _Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \
-    _Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \
-    _Tpuvec d1 = v_reinterpret_as_##usuffix(d0); \
-    v_store_interleave((_Tpu*)ptr, a1, b1, c1, d1); \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
+    _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \
 }
 
 OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8)
 OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16)
 OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32)
-OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float32x4, float, f32, v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int64x2, int64, s64, v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, uint64, u64)
 
 inline v_float32x4 v_cvt_f32(const v_int32x4& a)
 {
@@ -1582,16 +2972,462 @@ inline v_float32x4 v_cvt_f32(const v_float64x2& a)
     return v_float32x4(_mm_cvtpd_ps(a.val));
 }
 
+inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b)
+{
+    return v_float32x4(_mm_movelh_ps(_mm_cvtpd_ps(a.val), _mm_cvtpd_ps(b.val)));
+}
+
 inline v_float64x2 v_cvt_f64(const v_int32x4& a)
 {
     return v_float64x2(_mm_cvtepi32_pd(a.val));
 }
 
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+    return v_float64x2(_mm_cvtepi32_pd(_mm_srli_si128(a.val,8)));
+}
+
 inline v_float64x2 v_cvt_f64(const v_float32x4& a)
 {
     return v_float64x2(_mm_cvtps_pd(a.val));
 }
 
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    return v_float64x2(_mm_cvtps_pd(_mm_movehl_ps(a.val, a.val)));
+}
+
+// from (Mysticial and wim) https://stackoverflow.com/q/41144668
+inline v_float64x2 v_cvt_f64(const v_int64x2& v)
+{
+    // constants encoded as floating-point
+    __m128i magic_i_hi32 = _mm_set1_epi64x(0x4530000080000000); // 2^84 + 2^63
+    __m128i magic_i_all  = _mm_set1_epi64x(0x4530000080100000); // 2^84 + 2^63 + 2^52
+    __m128d magic_d_all  = _mm_castsi128_pd(magic_i_all);
+    // Blend the 32 lowest significant bits of v with magic_int_lo
+#if CV_SSE4_1
+    __m128i magic_i_lo   = _mm_set1_epi64x(0x4330000000000000); // 2^52
+    __m128i v_lo         = _mm_blend_epi16(v.val, magic_i_lo, 0xcc);
+#else
+    __m128i magic_i_lo   = _mm_set1_epi32(0x43300000); // 2^52
+    __m128i v_lo         = _mm_unpacklo_epi32(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(0, 0, 2, 0)), magic_i_lo);
+#endif
+    // Extract the 32 most significant bits of v
+    __m128i v_hi         = _mm_srli_epi64(v.val, 32);
+    // Flip the msb of v_hi and blend with 0x45300000
+            v_hi         = _mm_xor_si128(v_hi, magic_i_hi32);
+    // Compute in double precision
+    __m128d v_hi_dbl     = _mm_sub_pd(_mm_castsi128_pd(v_hi), magic_d_all);
+    // (v_hi - magic_d_all) + v_lo  Do not assume associativity of floating point addition
+    __m128d result       = _mm_add_pd(v_hi_dbl, _mm_castsi128_pd(v_lo));
+    return v_float64x2(result);
+}
+
+////////////// Lookup table access ////////////////////
+
+inline v_int8x16 v_lut(const schar* tab, const int* idx)
+{
+#if defined(_MSC_VER)
+    return v_int8x16(_mm_setr_epi8(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]],
+                                   tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]));
+#else
+    return v_int8x16(_mm_setr_epi64(
+                        _mm_setr_pi8(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]]),
+                        _mm_setr_pi8(tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]])
+                    ));
+#endif
+}
+inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx)
+{
+#if defined(_MSC_VER)
+    return v_int8x16(_mm_setr_epi16(*(const short*)(tab + idx[0]), *(const short*)(tab + idx[1]), *(const short*)(tab + idx[2]), *(const short*)(tab + idx[3]),
+                                    *(const short*)(tab + idx[4]), *(const short*)(tab + idx[5]), *(const short*)(tab + idx[6]), *(const short*)(tab + idx[7])));
+#else
+    return v_int8x16(_mm_setr_epi64(
+                        _mm_setr_pi16(*(const short*)(tab + idx[0]), *(const short*)(tab + idx[1]), *(const short*)(tab + idx[2]), *(const short*)(tab + idx[3])),
+                        _mm_setr_pi16(*(const short*)(tab + idx[4]), *(const short*)(tab + idx[5]), *(const short*)(tab + idx[6]), *(const short*)(tab + idx[7]))
+                    ));
+#endif
+}
+inline v_int8x16 v_lut_quads(const schar* tab, const int* idx)
+{
+#if defined(_MSC_VER)
+    return v_int8x16(_mm_setr_epi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]),
+                                    *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3])));
+#else
+    return v_int8x16(_mm_setr_epi64(
+                        _mm_setr_pi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1])),
+                        _mm_setr_pi32(*(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))
+                    ));
+#endif
+}
+inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar *)tab, idx)); }
+inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar *)tab, idx)); }
+inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar *)tab, idx)); }
+
+inline v_int16x8 v_lut(const short* tab, const int* idx)
+{
+#if defined(_MSC_VER)
+    return v_int16x8(_mm_setr_epi16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]],
+                                    tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]));
+#else
+    return v_int16x8(_mm_setr_epi64(
+                        _mm_setr_pi16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]),
+                        _mm_setr_pi16(tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]])
+                    ));
+#endif
+}
+inline v_int16x8 v_lut_pairs(const short* tab, const int* idx)
+{
+#if defined(_MSC_VER)
+    return v_int16x8(_mm_setr_epi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]),
+                                    *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3])));
+#else
+    return v_int16x8(_mm_setr_epi64(
+                        _mm_setr_pi32(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1])),
+                        _mm_setr_pi32(*(const int*)(tab + idx[2]), *(const int*)(tab + idx[3]))
+                    ));
+#endif
+}
+inline v_int16x8 v_lut_quads(const short* tab, const int* idx)
+{
+    return v_int16x8(_mm_set_epi64x(*(const int64_t*)(tab + idx[1]), *(const int64_t*)(tab + idx[0])));
+}
+inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short *)tab, idx)); }
+inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short *)tab, idx)); }
+inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short *)tab, idx)); }
+
+inline v_int32x4 v_lut(const int* tab, const int* idx)
+{
+#if defined(_MSC_VER)
+    return v_int32x4(_mm_setr_epi32(tab[idx[0]], tab[idx[1]],
+                                    tab[idx[2]], tab[idx[3]]));
+#else
+    return v_int32x4(_mm_setr_epi64(
+                        _mm_setr_pi32(tab[idx[0]], tab[idx[1]]),
+                        _mm_setr_pi32(tab[idx[2]], tab[idx[3]])
+                    ));
+#endif
+}
+inline v_int32x4 v_lut_pairs(const int* tab, const int* idx)
+{
+    return v_int32x4(_mm_set_epi64x(*(const int64_t*)(tab + idx[1]), *(const int64_t*)(tab + idx[0])));
+}
+inline v_int32x4 v_lut_quads(const int* tab, const int* idx)
+{
+    return v_int32x4(_mm_loadu_si128((const __m128i*)(tab + idx[0])));
+}
+inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int *)tab, idx)); }
+inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int *)tab, idx)); }
+inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int *)tab, idx)); }
+
+inline v_int64x2 v_lut(const int64_t* tab, const int* idx)
+{
+    return v_int64x2(_mm_set_epi64x(tab[idx[1]], tab[idx[0]]));
+}
+inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx)
+{
+    return v_int64x2(_mm_loadu_si128((const __m128i*)(tab + idx[0])));
+}
+inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); }
+inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); }
+
+inline v_float32x4 v_lut(const float* tab, const int* idx)
+{
+    return v_float32x4(_mm_setr_ps(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]));
+}
+inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int *)tab, idx)); }
+inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_quads((const int *)tab, idx)); }
+
+inline v_float64x2 v_lut(const double* tab, const int* idx)
+{
+    return v_float64x2(_mm_setr_pd(tab[idx[0]], tab[idx[1]]));
+}
+inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { return v_float64x2(_mm_castsi128_pd(_mm_loadu_si128((const __m128i*)(tab + idx[0])))); }
+
+inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec)
+{
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_aligned(idx, idxvec);
+    return v_int32x4(_mm_setr_epi32(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]));
+}
+
+inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec)
+{
+    return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec));
+}
+
+inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec)
+{
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_aligned(idx, idxvec);
+    return v_float32x4(_mm_setr_ps(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]));
+}
+
+inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec)
+{
+    int idx[2];
+    v_store_low(idx, idxvec);
+    return v_float64x2(_mm_setr_pd(tab[idx[0]], tab[idx[1]]));
+}
+
+// loads pairs from the table and deinterleaves them, e.g. returns:
+//   x = (tab[idxvec[0], tab[idxvec[1]], tab[idxvec[2]], tab[idxvec[3]]),
+//   y = (tab[idxvec[0]+1], tab[idxvec[1]+1], tab[idxvec[2]+1], tab[idxvec[3]+1])
+// note that the indices are float's indices, not the float-pair indices.
+// in theory, this function can be used to implement bilinear interpolation,
+// when idxvec are the offsets within the image.
+inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y)
+{
+    int CV_DECL_ALIGNED(32) idx[4];
+    v_store_aligned(idx, idxvec);
+    __m128 z = _mm_setzero_ps();
+    __m128 xy01 = _mm_loadl_pi(z, (__m64*)(tab + idx[0]));
+    __m128 xy23 = _mm_loadl_pi(z, (__m64*)(tab + idx[2]));
+    xy01 = _mm_loadh_pi(xy01, (__m64*)(tab + idx[1]));
+    xy23 = _mm_loadh_pi(xy23, (__m64*)(tab + idx[3]));
+    __m128 xxyy02 = _mm_unpacklo_ps(xy01, xy23);
+    __m128 xxyy13 = _mm_unpackhi_ps(xy01, xy23);
+    x = v_float32x4(_mm_unpacklo_ps(xxyy02, xxyy13));
+    y = v_float32x4(_mm_unpackhi_ps(xxyy02, xxyy13));
+}
+
+inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y)
+{
+    int idx[2];
+    v_store_low(idx, idxvec);
+    __m128d xy0 = _mm_loadu_pd(tab + idx[0]);
+    __m128d xy1 = _mm_loadu_pd(tab + idx[1]);
+    x = v_float64x2(_mm_unpacklo_pd(xy0, xy1));
+    y = v_float64x2(_mm_unpackhi_pd(xy0, xy1));
+}
+
+inline v_int8x16 v_interleave_pairs(const v_int8x16& vec)
+{
+#if CV_SSSE3
+    return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0d0e0c0b090a08, 0x0705060403010200)));
+#else
+    __m128i a = _mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(3, 1, 2, 0));
+    a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(3, 1, 2, 0));
+    a = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0));
+    return v_int8x16(_mm_unpacklo_epi8(a, _mm_unpackhi_epi64(a, a)));
+#endif
+}
+inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
+inline v_int8x16 v_interleave_quads(const v_int8x16& vec)
+{
+#if CV_SSSE3
+    return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0b0e0a0d090c08, 0x0703060205010400)));
+#else
+    __m128i a = _mm_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0));
+    return v_int8x16(_mm_unpacklo_epi8(a, _mm_unpackhi_epi64(a, a)));
+#endif
+}
+inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
+{
+#if CV_SSSE3
+    return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0e0b0a0d0c0908, 0x0706030205040100)));
+#else
+    __m128i a = _mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(3, 1, 2, 0));
+    return v_int16x8(_mm_shufflehi_epi16(a, _MM_SHUFFLE(3, 1, 2, 0)));
+#endif
+}
+inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
+inline v_int16x8 v_interleave_quads(const v_int16x8& vec)
+{
+#if CV_SSSE3
+    return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0x0f0e07060d0c0504, 0x0b0a030209080100)));
+#else
+    return v_int16x8(_mm_unpacklo_epi16(vec.val, _mm_unpackhi_epi64(vec.val, vec.val)));
+#endif
+}
+inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_interleave_pairs(const v_int32x4& vec)
+{
+    return v_int32x4(_mm_shuffle_epi32(vec.val, _MM_SHUFFLE(3, 1, 2, 0)));
+}
+inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+
+inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
+{
+#if CV_SSSE3
+    return v_int8x16(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0xffffff0f0e0d0c0a, 0x0908060504020100)));
+#else
+    __m128i mask = _mm_set1_epi64x(0x00000000FFFFFFFF);
+    __m128i a = _mm_srli_si128(_mm_or_si128(_mm_andnot_si128(mask, vec.val), _mm_and_si128(mask, _mm_sll_epi32(vec.val, _mm_set_epi64x(0, 8)))), 1);
+    return v_int8x16(_mm_srli_si128(_mm_shufflelo_epi16(a, _MM_SHUFFLE(2, 1, 0, 3)), 2));
+#endif
+}
+inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
+{
+#if CV_SSSE3
+    return v_int16x8(_mm_shuffle_epi8(vec.val, _mm_set_epi64x(0xffff0f0e0d0c0b0a, 0x0908050403020100)));
+#else
+    return v_int16x8(_mm_srli_si128(_mm_shufflelo_epi16(vec.val, _MM_SHUFFLE(2, 1, 0, 3)), 2));
+#endif
+}
+inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
+inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
+inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
+
+template<int i>
+inline uchar v_extract_n(const v_uint8x16& v)
+{
+#if CV_SSE4_1
+    return (uchar)_mm_extract_epi8(v.val, i);
+#else
+    return v_rotate_right<i>(v).get0();
+#endif
+}
+
+template<int i>
+inline schar v_extract_n(const v_int8x16& v)
+{
+    return (schar)v_extract_n<i>(v_reinterpret_as_u8(v));
+}
+
+template<int i>
+inline ushort v_extract_n(const v_uint16x8& v)
+{
+    return (ushort)_mm_extract_epi16(v.val, i);
+}
+
+template<int i>
+inline short v_extract_n(const v_int16x8& v)
+{
+    return (short)v_extract_n<i>(v_reinterpret_as_u16(v));
+}
+
+template<int i>
+inline uint v_extract_n(const v_uint32x4& v)
+{
+#if CV_SSE4_1
+    return (uint)_mm_extract_epi32(v.val, i);
+#else
+    return v_rotate_right<i>(v).get0();
+#endif
+}
+
+template<int i>
+inline int v_extract_n(const v_int32x4& v)
+{
+    return (int)v_extract_n<i>(v_reinterpret_as_u32(v));
+}
+
+template<int i>
+inline uint64 v_extract_n(const v_uint64x2& v)
+{
+#ifdef CV__SIMD_NATIVE_mm_extract_epi64
+    return (uint64)_v128_extract_epi64<i>(v.val);
+#else
+    return v_rotate_right<i>(v).get0();
+#endif
+}
+
+template<int i>
+inline int64 v_extract_n(const v_int64x2& v)
+{
+    return (int64)v_extract_n<i>(v_reinterpret_as_u64(v));
+}
+
+template<int i>
+inline float v_extract_n(const v_float32x4& v)
+{
+    union { uint iv; float fv; } d;
+    d.iv = v_extract_n<i>(v_reinterpret_as_u32(v));
+    return d.fv;
+}
+
+template<int i>
+inline double v_extract_n(const v_float64x2& v)
+{
+    union { uint64 iv; double dv; } d;
+    d.iv = v_extract_n<i>(v_reinterpret_as_u64(v));
+    return d.dv;
+}
+
+template<int i>
+inline v_int32x4 v_broadcast_element(const v_int32x4& v)
+{
+    return v_int32x4(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(i,i,i,i)));
+}
+
+template<int i>
+inline v_uint32x4 v_broadcast_element(const v_uint32x4& v)
+{
+    return v_uint32x4(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(i,i,i,i)));
+}
+
+template<int i>
+inline v_float32x4 v_broadcast_element(const v_float32x4& v)
+{
+    return v_float32x4(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE((char)i,(char)i,(char)i,(char)i)));
+}
+
+////////////// FP16 support ///////////////////////////
+
+inline v_float32x4 v_load_expand(const float16_t* ptr)
+{
+#if CV_FP16
+    return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
+#else
+    const __m128i z = _mm_setzero_si128(), delta = _mm_set1_epi32(0x38000000);
+    const __m128i signmask = _mm_set1_epi32(0x80000000), maxexp = _mm_set1_epi32(0x7c000000);
+    const __m128 deltaf = _mm_castsi128_ps(_mm_set1_epi32(0x38800000));
+    __m128i bits = _mm_unpacklo_epi16(z, _mm_loadl_epi64((const __m128i*)ptr)); // h << 16
+    __m128i e = _mm_and_si128(bits, maxexp), sign = _mm_and_si128(bits, signmask);
+    __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_xor_si128(bits, sign), 3), delta); // ((h & 0x7fff) << 13) + delta
+    __m128i zt = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_add_epi32(t, _mm_set1_epi32(1 << 23))), deltaf));
+
+    t = _mm_add_epi32(t, _mm_and_si128(delta, _mm_cmpeq_epi32(maxexp, e)));
+    __m128i zmask = _mm_cmpeq_epi32(e, z);
+    __m128i ft = v_select_si128(zmask, zt, t);
+    return v_float32x4(_mm_castsi128_ps(_mm_or_si128(ft, sign)));
+#endif
+}
+
+inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+{
+#if CV_FP16
+    __m128i fp16_value = _mm_cvtps_ph(v.val, 0);
+    _mm_storel_epi64((__m128i*)ptr, fp16_value);
+#else
+    const __m128i signmask = _mm_set1_epi32(0x80000000);
+    const __m128i rval = _mm_set1_epi32(0x3f000000);
+
+    __m128i t = _mm_castps_si128(v.val);
+    __m128i sign = _mm_srai_epi32(_mm_and_si128(t, signmask), 16);
+    t = _mm_andnot_si128(signmask, t);
+
+    __m128i finitemask = _mm_cmpgt_epi32(_mm_set1_epi32(0x47800000), t);
+    __m128i isnan = _mm_cmpgt_epi32(t, _mm_set1_epi32(0x7f800000));
+    __m128i naninf = v_select_si128(isnan, _mm_set1_epi32(0x7e00), _mm_set1_epi32(0x7c00));
+    __m128i tinymask = _mm_cmpgt_epi32(_mm_set1_epi32(0x38800000), t);
+    __m128i tt = _mm_castps_si128(_mm_add_ps(_mm_castsi128_ps(t), _mm_castsi128_ps(rval)));
+    tt = _mm_sub_epi32(tt, rval);
+    __m128i odd = _mm_and_si128(_mm_srli_epi32(t, 13), _mm_set1_epi32(1));
+    __m128i nt = _mm_add_epi32(t, _mm_set1_epi32(0xc8000fff));
+    nt = _mm_srli_epi32(_mm_add_epi32(nt, odd), 13);
+    t = v_select_si128(tinymask, tt, nt);
+    t = v_select_si128(finitemask, t, naninf);
+    t = _mm_or_si128(t, sign);
+    t = _mm_packs_epi32(t, t);
+    _mm_storel_epi64((__m128i*)ptr, t);
+#endif
+}
+
+inline void v_cleanup() {}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
 //! @endcond
 
 }
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_sse_em.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_sse_em.hpp
new file mode 100644
index 0000000..6fb0881
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/hal/intrin_sse_em.hpp
@@ -0,0 +1,180 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP
+#define OPENCV_HAL_INTRIN_SSE_EM_HPP
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \
+    inline tp _v128_##fun(const tp& a) \
+    { return _mm_##fun(a); }
+
+#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \
+    inline tp _v128_##fun(const tp& a, const tp& b) \
+    { return _mm_##fun(a, b); }
+
+#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \
+    inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \
+    { return _mm_##fun(a, b, c); }
+
+///////////////////////////// XOP /////////////////////////////
+
+// [todo] define CV_XOP
+#if 1 // CV_XOP
+inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b)
+{
+    const __m128i delta = _mm_set1_epi32((int)0x80000000);
+    return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta));
+}
+// wrapping XOP
+#else
+OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i)
+#endif // !CV_XOP
+
+///////////////////////////// SSE4.1 /////////////////////////////
+
+#if !CV_SSE4_1
+
+/** Swizzle **/
+inline __m128i _v128_blendv_epi8(const __m128i& a, const __m128i& b, const __m128i& mask)
+{ return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); }
+
+/** Convert **/
+// 8 >> 16
+inline __m128i _v128_cvtepu8_epi16(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpacklo_epi8(a, z);
+}
+inline __m128i _v128_cvtepi8_epi16(const __m128i& a)
+{ return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); }
+// 8 >> 32
+inline __m128i _v128_cvtepu8_epi32(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z);
+}
+inline __m128i _v128_cvtepi8_epi32(const __m128i& a)
+{
+    __m128i r = _mm_unpacklo_epi8(a, a);
+    r = _mm_unpacklo_epi8(r, r);
+    return _mm_srai_epi32(r, 24);
+}
+// 16 >> 32
+inline __m128i _v128_cvtepu16_epi32(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpacklo_epi16(a, z);
+}
+inline __m128i _v128_cvtepi16_epi32(const __m128i& a)
+{ return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); }
+// 32 >> 64
+inline __m128i _v128_cvtepu32_epi64(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpacklo_epi32(a, z);
+}
+inline __m128i _v128_cvtepi32_epi64(const __m128i& a)
+{ return _mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); }
+
+/** Arithmetic **/
+inline __m128i _v128_mullo_epi32(const __m128i& a, const __m128i& b)
+{
+    __m128i c0 = _mm_mul_epu32(a, b);
+    __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32));
+    __m128i d0 = _mm_unpacklo_epi32(c0, c1);
+    __m128i d1 = _mm_unpackhi_epi32(c0, c1);
+    return _mm_unpacklo_epi64(d0, d1);
+}
+
+/** Math **/
+inline __m128i _v128_min_epu32(const __m128i& a, const __m128i& b)
+{ return _v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); }
+
+// wrapping SSE4.1
+#else
+OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i)
+OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i)
+OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i)
+OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i)
+#endif // !CV_SSE4_1
+
+///////////////////////////// Revolutionary /////////////////////////////
+
+/** Convert **/
+// 16 << 8
+inline __m128i _v128_cvtepu8_epi16_high(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpackhi_epi8(a, z);
+}
+inline __m128i _v128_cvtepi8_epi16_high(const __m128i& a)
+{ return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); }
+// 32 << 16
+inline __m128i _v128_cvtepu16_epi32_high(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpackhi_epi16(a, z);
+}
+inline __m128i _v128_cvtepi16_epi32_high(const __m128i& a)
+{ return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); }
+// 64 << 32
+inline __m128i _v128_cvtepu32_epi64_high(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpackhi_epi32(a, z);
+}
+inline __m128i _v128_cvtepi32_epi64_high(const __m128i& a)
+{ return _mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); }
+
+/** Miscellaneous **/
+inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b)
+{
+    const __m128i m = _mm_set1_epi32(65535);
+    __m128i am = _v128_min_epu32(a, m);
+    __m128i bm = _v128_min_epu32(b, m);
+#if CV_SSE4_1
+    return _mm_packus_epi32(am, bm);
+#else
+    const __m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768);
+    am = _mm_sub_epi32(am, d);
+    bm = _mm_sub_epi32(bm, d);
+    am = _mm_packs_epi32(am, bm);
+    return _mm_sub_epi16(am, nd);
+#endif
+}
+
+template<int i>
+inline int64 _v128_extract_epi64(const __m128i& a)
+{
+#if defined(CV__SIMD_HAVE_mm_extract_epi64) || (CV_SSE4_1 && (defined(__x86_64__)/*GCC*/ || defined(_M_X64)/*MSVC*/))
+#define CV__SIMD_NATIVE_mm_extract_epi64 1
+    return _mm_extract_epi64(a, i);
+#else
+    CV_DECL_ALIGNED(16) int64 tmp[2];
+    _mm_store_si128((__m128i*)tmp, a);
+    return tmp[i];
+#endif
+}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+} // cv::
+
+#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_vsx.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_vsx.hpp
new file mode 100644
index 0000000..e0f6cbf
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/hal/intrin_vsx.hpp
@@ -0,0 +1,1578 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef OPENCV_HAL_VSX_HPP
+#define OPENCV_HAL_VSX_HPP
+
+#include <algorithm>
+#include "opencv2/core/utility.hpp"
+
+#define CV_SIMD128 1
+#define CV_SIMD128_64F 1
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+///////// Types ////////////
+
+struct v_uint8x16
+{
+    typedef uchar lane_type;
+    enum { nlanes = 16 };
+    vec_uchar16 val;
+
+    explicit v_uint8x16(const vec_uchar16& v) : val(v)
+    {}
+    v_uint8x16() : val(vec_uchar16_z)
+    {}
+    v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v))
+    {}
+    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+        : val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
+    {}
+    uchar get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int8x16
+{
+    typedef schar lane_type;
+    enum { nlanes = 16 };
+    vec_char16 val;
+
+    explicit v_int8x16(const vec_char16& v) : val(v)
+    {}
+    v_int8x16() : val(vec_char16_z)
+    {}
+    v_int8x16(vec_bchar16 v) : val(vec_char16_c(v))
+    {}
+    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+              schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+        : val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
+    {}
+    schar get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_uint16x8
+{
+    typedef ushort lane_type;
+    enum { nlanes = 8 };
+    vec_ushort8 val;
+
+    explicit v_uint16x8(const vec_ushort8& v) : val(v)
+    {}
+    v_uint16x8() : val(vec_ushort8_z)
+    {}
+    v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v))
+    {}
+    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+        : val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7))
+    {}
+    ushort get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int16x8
+{
+    typedef short lane_type;
+    enum { nlanes = 8 };
+    vec_short8 val;
+
+    explicit v_int16x8(const vec_short8& v) : val(v)
+    {}
+    v_int16x8() : val(vec_short8_z)
+    {}
+    v_int16x8(vec_bshort8 v) : val(vec_short8_c(v))
+    {}
+    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+        : val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7))
+    {}
+    short get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_uint32x4
+{
+    typedef unsigned lane_type;
+    enum { nlanes = 4 };
+    vec_uint4 val;
+
+    explicit v_uint32x4(const vec_uint4& v) : val(v)
+    {}
+    v_uint32x4() : val(vec_uint4_z)
+    {}
+    v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v))
+    {}
+    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) : val(vec_uint4_set(v0, v1, v2, v3))
+    {}
+    uint get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int32x4
+{
+    typedef int lane_type;
+    enum { nlanes = 4 };
+    vec_int4 val;
+
+    explicit v_int32x4(const vec_int4& v) : val(v)
+    {}
+    v_int32x4() : val(vec_int4_z)
+    {}
+    v_int32x4(vec_bint4 v) : val(vec_int4_c(v))
+    {}
+    v_int32x4(int v0, int v1, int v2, int v3) : val(vec_int4_set(v0, v1, v2, v3))
+    {}
+    int get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_float32x4
+{
+    typedef float lane_type;
+    enum { nlanes = 4 };
+    vec_float4 val;
+
+    explicit v_float32x4(const vec_float4& v) : val(v)
+    {}
+    v_float32x4() : val(vec_float4_z)
+    {}
+    v_float32x4(vec_bint4 v) : val(vec_float4_c(v))
+    {}
+    v_float32x4(float v0, float v1, float v2, float v3) : val(vec_float4_set(v0, v1, v2, v3))
+    {}
+    float get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_uint64x2
+{
+    typedef uint64 lane_type;
+    enum { nlanes = 2 };
+    vec_udword2 val;
+
+    explicit v_uint64x2(const vec_udword2& v) : val(v)
+    {}
+    v_uint64x2() : val(vec_udword2_z)
+    {}
+    v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v))
+    {}
+    v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1))
+    {}
+    uint64 get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int64x2
+{
+    typedef int64 lane_type;
+    enum { nlanes = 2 };
+    vec_dword2 val;
+
+    explicit v_int64x2(const vec_dword2& v) : val(v)
+    {}
+    v_int64x2() : val(vec_dword2_z)
+    {}
+    v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v))
+    {}
+    v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1))
+    {}
+    int64 get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_float64x2
+{
+    typedef double lane_type;
+    enum { nlanes = 2 };
+    vec_double2 val;
+
+    explicit v_float64x2(const vec_double2& v) : val(v)
+    {}
+    v_float64x2() : val(vec_double2_z)
+    {}
+    v_float64x2(vec_bdword2 v) : val(vec_double2_c(v))
+    {}
+    v_float64x2(double v0, double v1) : val(vec_double2_set(v0, v1))
+    {}
+    double get0() const
+    { return vec_extract(val, 0); }
+};
+
+#define OPENCV_HAL_IMPL_VSX_EXTRACT_N(_Tpvec, _Tp) \
+template<int i> inline _Tp v_extract_n(VSX_UNUSED(_Tpvec v)) { return vec_extract(v.val, i); }
+
+OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint8x16, uchar)
+OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int8x16, schar)
+OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int16x8, short)
+OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint32x4, uint)
+OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int32x4, int)
+OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int64x2, int64)
+OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float32x4, float)
+OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float64x2, double)
+
+//////////////// Load and store operations ///////////////
+
+/*
+ * clang-5 aborted during parse "vec_xxx_c" only if it's
+ * inside a function template which is defined by preprocessor macro.
+ *
+ * if vec_xxx_c defined as C++ cast, clang-5 will pass it
+*/
+#define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast)                        \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec(); }                               \
+inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));}          \
+template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a)  \
+{ return _Tpvec((cast)a.val); }
+
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint8x16, uchar, u8, vec_uchar16)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int8x16, schar, s8, vec_char16)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint16x8, ushort, u16, vec_ushort8)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int16x8, short, s16, vec_short8)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint32x4, uint, u32, vec_uint4)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int32x4, int, s32, vec_int4)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint64x2, uint64, u64, vec_udword2)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)
+
+#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a)    \
+inline _Tpvec v_load(const _Tp* ptr)                                        \
+{ return _Tpvec(ld(0, ptr)); }                                              \
+inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr))                    \
+{ return _Tpvec(ld_a(0, ptr)); }                                            \
+inline _Tpvec v_load_low(const _Tp* ptr)                                    \
+{ return _Tpvec(vec_ld_l8(ptr)); }                                          \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1)               \
+{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); }          \
+inline void v_store(_Tp* ptr, const _Tpvec& a)                              \
+{ st(a.val, 0, ptr); }                                                      \
+inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a)          \
+{ st_a(a.val, 0, ptr); }                                                    \
+inline void v_store_aligned_nocache(VSX_UNUSED(_Tp* ptr), const _Tpvec& a)  \
+{ st_a(a.val, 0, ptr); }                                                    \
+inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode)         \
+{ if(mode == hal::STORE_UNALIGNED) st(a.val, 0, ptr); else st_a(a.val, 0, ptr); } \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a)                          \
+{ vec_st_l8(a.val, ptr); }                                                  \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a)                         \
+{ vec_st_h8(a.val, ptr); }
+
+// working around gcc bug for aligned ld/st
+// if runtime check for vec_ld/st fail we failback to unaligned ld/st
+// https://github.com/opencv/opencv/issues/13211
+#ifdef CV_COMPILER_VSX_BROKEN_ALIGNED
+    #define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
+    OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vsx_ld, vsx_st, vsx_st)
+#else
+    #define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
+    OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st)
+#endif
+
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16,  uchar)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16,   schar)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8,  ushort)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8,   short)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4,  uint)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4,   int)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float)
+
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld,  vsx_ld,  vsx_st,  vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2,  uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2,    int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
+
+//////////////// Value reordering ///////////////
+
+/* de&interleave */
+#define OPENCV_HAL_IMPL_VSX_INTERLEAVE(_Tp, _Tpvec)                          \
+inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b)        \
+{ vec_ld_deinterleave(ptr, a.val, b.val);}                                   \
+inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a,                   \
+                                _Tpvec& b, _Tpvec& c)                        \
+{ vec_ld_deinterleave(ptr, a.val, b.val, c.val); }                           \
+inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b,        \
+                                                _Tpvec& c, _Tpvec& d)        \
+{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); }                    \
+inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b,   \
+                               hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ vec_st_interleave(a.val, b.val, ptr); }                                    \
+inline void v_store_interleave(_Tp* ptr, const _Tpvec& a,                    \
+                               const _Tpvec& b, const _Tpvec& c,             \
+                               hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ vec_st_interleave(a.val, b.val, c.val, ptr); }                             \
+inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b,   \
+                                         const _Tpvec& c, const _Tpvec& d,   \
+                               hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
+{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); }
+
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(schar, v_int8x16)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(ushort, v_uint16x8)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(short, v_int16x8)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(int64, v_int64x2)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint64, v_uint64x2)
+
+/* Expand */
+#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh)  \
+inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1)   \
+{                                                                 \
+    b0.val = fh(a.val);                                           \
+    b1.val = fl(a.val);                                           \
+}                                                                 \
+inline _Tpwvec v_expand_low(const _Tpvec& a)                      \
+{ return _Tpwvec(fh(a.val)); }                                    \
+inline _Tpwvec v_expand_high(const _Tpvec& a)                     \
+{ return _Tpwvec(fl(a.val)); }                                    \
+inline _Tpwvec v_load_expand(const _Tp* ptr)                      \
+{ return _Tpwvec(fh(vec_ld_l8(ptr))); }
+
+OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_uint16x8, v_uint32x4, ushort, vec_unpacklu, vec_unpackhu)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_int16x8, v_int32x4, short, vec_unpackl, vec_unpackh)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpackhu)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)
+
+/* Load and zero expand a 4 byte value into the second dword, first is don't care. */
+#if !defined(CV_COMPILER_VSX_BROKEN_ASM)
+    #define _LXSIWZX(out, ptr, T) __asm__ ("lxsiwzx %x0, 0, %1\r\n" : "=wa"(out) : "r" (ptr) : "memory");
+#else
+    /* This is compiler-agnostic, but will introduce an unneeded splat on the critical path. */
+    #define _LXSIWZX(out, ptr, T) out = (T)vec_udword2_sp(*(uint32_t*)(ptr));
+#endif
+
+inline v_uint32x4 v_load_expand_q(const uchar* ptr)
+{
+    // Zero-extend the extra 24B instead of unpacking. Usually faster in small kernel
+    // Likewise note, value is zero extended and upper 4 bytes are zero'ed.
+    vec_uchar16 pmu = {8, 12, 12, 12, 9, 12, 12, 12, 10, 12, 12, 12, 11, 12, 12, 12};
+    vec_uchar16 out;
+
+    _LXSIWZX(out, ptr, vec_uchar16);
+    out = vec_perm(out, out, pmu);
+    return v_uint32x4((vec_uint4)out);
+}
+
+inline v_int32x4 v_load_expand_q(const schar* ptr)
+{
+    vec_char16 out;
+    vec_short8 outs;
+    vec_int4 outw;
+
+    _LXSIWZX(out, ptr, vec_char16);
+    outs = vec_unpackl(out);
+    outw = vec_unpackh(outs);
+    return v_int32x4(outw);
+}
+
+/* pack */
+#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack)    \
+inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b)                                          \
+{                                                                                                   \
+    return _Tpvec(pkfnc(a.val, b.val));                                                             \
+}                                                                                                   \
+inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a)                                            \
+{                                                                                                   \
+    vec_st_l8(pkfnc(a.val, a.val), ptr);                                                            \
+}                                                                                                   \
+template<int n>                                                                                     \
+inline _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b)                                     \
+{                                                                                                   \
+    const __vector _Tpvn vn = vec_splats((_Tpvn)n);                                                 \
+    const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1)));                         \
+    return _Tpvec(pkfnc(sfnc(addfnc(a.val, delta), vn), sfnc(addfnc(b.val, delta), vn)));           \
+}                                                                                                   \
+template<int n>                                                                                     \
+inline void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a)                                       \
+{                                                                                                   \
+    const __vector _Tpvn vn = vec_splats((_Tpvn)n);                                                 \
+    const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1)));                         \
+    vec_st_l8(pkfnc(sfnc(addfnc(a.val, delta), vn), delta), ptr);                                   \
+}
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_uint16x8, unsigned short, unsigned short,
+                         vec_sr, vec_packs, vec_adds, pack)
+OPENCV_HAL_IMPL_VSX_PACK(v_int8x16, schar, v_int16x8, unsigned short, short,
+                         vec_sra, vec_packs, vec_adds, pack)
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_uint32x4, unsigned int, unsigned int,
+                         vec_sr, vec_packs, vec_add, pack)
+OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int,
+                         vec_sra, vec_packs, vec_add, pack)
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_uint64x2, unsigned long long, unsigned long long,
+                         vec_sr, vec_pack, vec_add, pack)
+OPENCV_HAL_IMPL_VSX_PACK(v_int32x4, int, v_int64x2, unsigned long long, long long,
+                         vec_sra, vec_pack, vec_add, pack)
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_int16x8, unsigned short, short,
+                         vec_sra, vec_packsu, vec_adds, pack_u)
+OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int,
+                         vec_sra, vec_packsu, vec_add, pack_u)
+// Following variant is not implemented on other platforms:
+//OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long,
+//                         vec_sra, vec_packsu, vec_add, pack_u)
+
+// pack boolean
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    vec_uchar16 ab = vec_pack(a.val, b.val);
+    return v_uint8x16(ab);
+}
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    vec_ushort8 ab = vec_pack(a.val, b.val);
+    vec_ushort8 cd = vec_pack(c.val, d.val);
+    return v_uint8x16(vec_pack(ab, cd));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+    vec_uint4 ab = vec_pack(a.val, b.val);
+    vec_uint4 cd = vec_pack(c.val, d.val);
+    vec_uint4 ef = vec_pack(e.val, f.val);
+    vec_uint4 gh = vec_pack(g.val, h.val);
+
+    vec_ushort8 abcd = vec_pack(ab, cd);
+    vec_ushort8 efgh = vec_pack(ef, gh);
+    return v_uint8x16(vec_pack(abcd, efgh));
+}
+
+/* Recombine */
+template <typename _Tpvec>
+inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
+{
+    b0.val = vec_mergeh(a0.val, a1.val);
+    b1.val = vec_mergel(a0.val, a1.val);
+}
+
+template <typename _Tpvec>
+inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b)
+{ return _Tpvec(vec_mergesql(a.val, b.val)); }
+
+template <typename _Tpvec>
+inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b)
+{ return _Tpvec(vec_mergesqh(a.val, b.val)); }
+
+template <typename _Tpvec>
+inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
+{
+    c.val = vec_mergesqh(a.val, b.val);
+    d.val = vec_mergesql(a.val, b.val);
+}
+
+////////// Arithmetic, bitwise and comparison operations /////////
+
+/* Element-wise binary and unary operations */
+/** Arithmetics **/
+#define OPENCV_HAL_IMPL_VSX_BIN_OP(bin_op, _Tpvec, intrin)       \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(intrin(a.val, b.val)); }                         \
+inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b)   \
+{ a.val = intrin(a.val, b.val); return a; }
+
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint8x16, vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint8x16, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int8x16,  vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int8x16, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint16x8, vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint16x8, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int16x8, vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int16x8, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint32x4, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint32x4, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint32x4, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int32x4, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int32x4, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int32x4, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float32x4, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float32x4, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float32x4, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float32x4, vec_div)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float64x2, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float64x2, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float64x2, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float64x2, vec_div)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint64x2, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint64x2, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int64x2, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int64x2, vec_sub)
+
+// saturating multiply
+#define OPENCV_HAL_IMPL_VSX_MUL_SAT(_Tpvec, _Tpwvec)             \
+    inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b)  \
+    {                                                            \
+        _Tpwvec c, d;                                            \
+        v_mul_expand(a, b, c, d);                                \
+        return v_pack(c, d);                                     \
+    }                                                            \
+    inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b)      \
+    { a = a * b; return a; }
+
+OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int8x16,  v_int16x8)
+OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint8x16, v_uint16x8)
+OPENCV_HAL_IMPL_VSX_MUL_SAT(v_int16x8,  v_int32x4)
+OPENCV_HAL_IMPL_VSX_MUL_SAT(v_uint16x8, v_uint32x4)
+
+template<typename Tvec, typename Twvec>
+inline void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d)
+{
+    Twvec p0 = Twvec(vec_mule(a.val, b.val));
+    Twvec p1 = Twvec(vec_mulo(a.val, b.val));
+    v_zip(p0, p1, c, d);
+}
+
+inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
+{
+    vec_int4 p0 = vec_mule(a.val, b.val);
+    vec_int4 p1 = vec_mulo(a.val, b.val);
+    static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31};
+    return v_int16x8(vec_perm(vec_short8_c(p0), vec_short8_c(p1), perm));
+}
+inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
+{
+    vec_uint4 p0 = vec_mule(a.val, b.val);
+    vec_uint4 p1 = vec_mulo(a.val, b.val);
+    static const vec_uchar16 perm = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31};
+    return v_uint16x8(vec_perm(vec_ushort8_c(p0), vec_ushort8_c(p1), perm));
+}
+
+/** Non-saturating arithmetics **/
+#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin)    \
+template<typename _Tpvec>                             \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b)  \
+{ return _Tpvec(intrin(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_mul_wrap, vec_mul)
+
+/** Bitwise shifts **/
+#define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpvec, shr, splfunc)   \
+inline _Tpvec operator << (const _Tpvec& a, int imm)         \
+{ return _Tpvec(vec_sl(a.val, splfunc(imm))); }              \
+inline _Tpvec operator >> (const _Tpvec& a, int imm)         \
+{ return _Tpvec(shr(a.val, splfunc(imm))); }                 \
+template<int imm> inline _Tpvec v_shl(const _Tpvec& a)       \
+{ return _Tpvec(vec_sl(a.val, splfunc(imm))); }              \
+template<int imm> inline _Tpvec v_shr(const _Tpvec& a)       \
+{ return _Tpvec(shr(a.val, splfunc(imm))); }
+
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint8x16, vec_sr, vec_uchar16_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint16x8, vec_sr, vec_ushort8_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint32x4, vec_sr, vec_uint4_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint64x2, vec_sr, vec_udword2_sp)
+// algebraic right shift
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int8x16, vec_sra, vec_uchar16_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int16x8, vec_sra, vec_ushort8_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int32x4, vec_sra, vec_uint4_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int64x2, vec_sra, vec_udword2_sp)
+
+/** Bitwise logic **/
+#define OPENCV_HAL_IMPL_VSX_LOGIC_OP(_Tpvec)    \
+OPENCV_HAL_IMPL_VSX_BIN_OP(&, _Tpvec, vec_and)  \
+OPENCV_HAL_IMPL_VSX_BIN_OP(|, _Tpvec, vec_or)   \
+OPENCV_HAL_IMPL_VSX_BIN_OP(^, _Tpvec, vec_xor)  \
+inline _Tpvec operator ~ (const _Tpvec& a)      \
+{ return _Tpvec(vec_not(a.val)); }
+
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint8x16)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int8x16)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint16x8)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int16x8)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint32x4)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int32x4)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint64x2)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int64x2)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float32x4)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float64x2)
+
+/** Bitwise select **/
+#define OPENCV_HAL_IMPL_VSX_SELECT(_Tpvec, cast)                             \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(vec_sel(b.val, a.val, cast(mask.val))); }
+
+OPENCV_HAL_IMPL_VSX_SELECT(v_uint8x16, vec_bchar16_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_int8x16, vec_bchar16_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_uint16x8, vec_bshort8_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_int16x8, vec_bshort8_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_uint32x4, vec_bint4_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_int32x4, vec_bint4_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_float32x4, vec_bint4_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_float64x2, vec_bdword2_c)
+
+/** Comparison **/
+#define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec)                 \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmpeq(a.val, b.val)); }                    \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmpne(a.val, b.val)); }                    \
+inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b)    \
+{ return _Tpvec(vec_cmplt(a.val, b.val)); }                    \
+inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b)    \
+{ return _Tpvec(vec_cmpgt(a.val, b.val)); }                    \
+inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmple(a.val, b.val)); }                    \
+inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmpge(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint8x16)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int8x16)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint16x8)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int16x8)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint32x4)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int32x4)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float32x4)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2)
+
+inline v_float32x4 v_not_nan(const v_float32x4& a)
+{ return v_float32x4(vec_cmpeq(a.val, a.val)); }
+inline v_float64x2 v_not_nan(const v_float64x2& a)
+{ return v_float64x2(vec_cmpeq(a.val, a.val)); }
+
+/** min/max **/
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max)
+
+/** Rotate **/
+#define OPENCV_IMPL_VSX_ROTATE(_Tpvec, suffix, shf, cast)                       \
+template<int imm>                                                               \
+inline _Tpvec v_rotate_##suffix(const _Tpvec& a)                                \
+{                                                                               \
+    const int wd = imm * sizeof(typename _Tpvec::lane_type);                    \
+    if (wd > 15)                                                                \
+        return _Tpvec();                                                        \
+    return _Tpvec((cast)shf(vec_uchar16_c(a.val), vec_uchar16_sp(wd << 3)));    \
+}
+
+#define OPENCV_IMPL_VSX_ROTATE_LR(_Tpvec, cast)     \
+OPENCV_IMPL_VSX_ROTATE(_Tpvec, left, vec_slo, cast) \
+OPENCV_IMPL_VSX_ROTATE(_Tpvec, right, vec_sro, cast)
+
+OPENCV_IMPL_VSX_ROTATE_LR(v_uint8x16, vec_uchar16)
+OPENCV_IMPL_VSX_ROTATE_LR(v_int8x16,  vec_char16)
+OPENCV_IMPL_VSX_ROTATE_LR(v_uint16x8, vec_ushort8)
+OPENCV_IMPL_VSX_ROTATE_LR(v_int16x8,  vec_short8)
+OPENCV_IMPL_VSX_ROTATE_LR(v_uint32x4, vec_uint4)
+OPENCV_IMPL_VSX_ROTATE_LR(v_int32x4,  vec_int4)
+OPENCV_IMPL_VSX_ROTATE_LR(v_float32x4, vec_float4)
+OPENCV_IMPL_VSX_ROTATE_LR(v_uint64x2, vec_udword2)
+OPENCV_IMPL_VSX_ROTATE_LR(v_int64x2,  vec_dword2)
+OPENCV_IMPL_VSX_ROTATE_LR(v_float64x2, vec_double2)
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b)
+{
+    enum { CV_SHIFT = 16 - imm * (sizeof(typename _Tpvec::lane_type)) };
+    if (CV_SHIFT == 16)
+        return a;
+#ifdef __IBMCPP__
+    return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT & 15));
+#else
+    return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT));
+#endif
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b)
+{
+    enum { CV_SHIFT = imm * (sizeof(typename _Tpvec::lane_type)) };
+    if (CV_SHIFT == 16)
+        return b;
+    return _Tpvec(vec_sld(a.val, b.val, CV_SHIFT));
+}
+
+#define OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, suffix, rg1, rg2)   \
+template<int imm>                                                 \
+inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
+{                                                                 \
+    if (imm == 1)                                                 \
+        return _Tpvec(vec_permi(rg1.val, rg2.val, 2));            \
+    return imm ? b : a;                                           \
+}
+
+#define OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(_Tpvec)    \
+OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, left,  b, a)  \
+OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, right, a, b)
+
+OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2)
+OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2)
+OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2)
+
+/* Reverse */
+inline v_uint8x16 v_reverse(const v_uint8x16 &a)
+{
+    static const vec_uchar16 perm = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
+    vec_uchar16 vec = (vec_uchar16)a.val;
+    return v_uint8x16(vec_perm(vec, vec, perm));
+}
+
+inline v_int8x16 v_reverse(const v_int8x16 &a)
+{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
+
+inline v_uint16x8 v_reverse(const v_uint16x8 &a)
+{
+    static const vec_uchar16 perm = {14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1};
+    vec_uchar16 vec = (vec_uchar16)a.val;
+    return v_reinterpret_as_u16(v_uint8x16(vec_perm(vec, vec, perm)));
+}
+
+inline v_int16x8 v_reverse(const v_int16x8 &a)
+{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
+
+inline v_uint32x4 v_reverse(const v_uint32x4 &a)
+{
+    static const vec_uchar16 perm = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3};
+    vec_uchar16 vec = (vec_uchar16)a.val;
+    return v_reinterpret_as_u32(v_uint8x16(vec_perm(vec, vec, perm)));
+}
+
+inline v_int32x4 v_reverse(const v_int32x4 &a)
+{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_float32x4 v_reverse(const v_float32x4 &a)
+{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_uint64x2 v_reverse(const v_uint64x2 &a)
+{
+    static const vec_uchar16 perm = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
+    vec_uchar16 vec = (vec_uchar16)a.val;
+    return v_reinterpret_as_u64(v_uint8x16(vec_perm(vec, vec, perm)));
+}
+
+inline v_int64x2 v_reverse(const v_int64x2 &a)
+{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
+
+inline v_float64x2 v_reverse(const v_float64x2 &a)
+{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
+
+/* Extract */
+template<int s, typename _Tpvec>
+inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
+{ return v_rotate_right<s>(a, b); }
+
+////////// Reduce and mask /////////
+
+/** Reduce **/
+inline uint v_reduce_sum(const v_uint8x16& a)
+{
+    const vec_uint4 zero4 = vec_uint4_z;
+    vec_uint4 sum4 = vec_sum4s(a.val, zero4);
+    return (uint)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3);
+}
+inline int v_reduce_sum(const v_int8x16& a)
+{
+    const vec_int4 zero4 = vec_int4_z;
+    vec_int4 sum4 = vec_sum4s(a.val, zero4);
+    return (int)vec_extract(vec_sums(sum4, zero4), 3);
+}
+inline int v_reduce_sum(const v_int16x8& a)
+{
+    const vec_int4 zero = vec_int4_z;
+    return saturate_cast<int>(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3));
+}
+inline uint v_reduce_sum(const v_uint16x8& a)
+{
+    const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8))));
+    return saturate_cast<uint>(vec_extract(vec_sums(v4, vec_int4_z), 3));
+}
+
+#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \
+inline scalartype v_reduce_##suffix(const _Tpvec& a)                               \
+{                                                                                  \
+    const _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8));                      \
+    return vec_extract(func(rs, vec_sld(rs, rs, 4)), 0);                           \
+}
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, sum, vec_add)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, min, vec_min)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, sum, vec_add)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, min, vec_min)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min)
+
+inline uint64 v_reduce_sum(const v_uint64x2& a)
+{
+    return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0);
+}
+inline int64 v_reduce_sum(const v_int64x2& a)
+{
+    return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0);
+}
+inline double v_reduce_sum(const v_float64x2& a)
+{
+    return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0);
+}
+
+#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \
+inline scalartype v_reduce_##suffix(const _Tpvec& a)                               \
+{                                                                                  \
+    _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8));                            \
+    rs = func(rs, vec_sld(rs, rs, 4));                                             \
+    return vec_extract(func(rs, vec_sld(rs, rs, 2)), 0);                           \
+}
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, min, vec_min)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, min, vec_min)
+
+#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(_Tpvec, _Tpvec2, scalartype, suffix, func) \
+inline scalartype v_reduce_##suffix(const _Tpvec& a)                               \
+{                                                                                  \
+    _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8));                            \
+    rs = func(rs, vec_sld(rs, rs, 4));                                             \
+    rs = func(rs, vec_sld(rs, rs, 2));                                             \
+    return vec_extract(func(rs, vec_sld(rs, rs, 1)), 0);                           \
+}
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_uint8x16, vec_uchar16, uchar, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_uint8x16, vec_uchar16, uchar, min, vec_min)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_int8x16, vec_char16, schar, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_int8x16, vec_char16, schar, min, vec_min)
+
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+    vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val));
+    ac = vec_add(ac, vec_sld(ac, ac, 8));
+
+    vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val));
+    bd = vec_add(bd, vec_sld(bd, bd, 8));
+    return v_float32x4(vec_mergeh(ac, bd));
+}
+
+inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
+{
+    const vec_uint4 zero4 = vec_uint4_z;
+    vec_uint4 sum4 = vec_sum4s(vec_absd(a.val, b.val), zero4);
+    return (unsigned)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3);
+}
+inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
+{
+    const vec_int4 zero4 = vec_int4_z;
+    vec_char16 ad = vec_abss(vec_subs(a.val, b.val));
+    vec_int4 sum4 = vec_sum4s(ad, zero4);
+    return (unsigned)vec_extract(vec_sums(sum4, zero4), 3);
+}
+inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
+{
+    vec_ushort8 ad = vec_absd(a.val, b.val);
+    VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)) + vec_int4_c(vec_unpacklu(ad)), vec_int4_z);
+    return (unsigned)vec_extract(sum, 3);
+}
+inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
+{
+    const vec_int4 zero4 = vec_int4_z;
+    vec_short8 ad = vec_abss(vec_subs(a.val, b.val));
+    vec_int4 sum4 = vec_sum4s(ad, zero4);
+    return (unsigned)vec_extract(vec_sums(sum4, zero4), 3);
+}
+inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b)
+{
+    const vec_uint4 ad = vec_absd(a.val, b.val);
+    const vec_uint4 rd = vec_add(ad, vec_sld(ad, ad, 8));
+    return vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0);
+}
+inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b)
+{
+    vec_int4 ad = vec_abss(vec_sub(a.val, b.val));
+    return (unsigned)vec_extract(vec_sums(ad, vec_int4_z), 3);
+}
+inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
+{
+    const vec_float4 ad = vec_abs(vec_sub(a.val, b.val));
+    const vec_float4 rd = vec_add(ad, vec_sld(ad, ad, 8));
+    return vec_extract(vec_add(rd, vec_sld(rd, rd, 4)), 0);
+}
+
+/** Popcount **/
+inline v_uint8x16 v_popcount(const v_uint8x16& a)
+{ return v_uint8x16(vec_popcntu(a.val)); }
+inline v_uint8x16 v_popcount(const v_int8x16& a)
+{ return v_uint8x16(vec_popcntu(a.val)); }
+inline v_uint16x8 v_popcount(const v_uint16x8& a)
+{ return v_uint16x8(vec_popcntu(a.val)); }
+inline v_uint16x8 v_popcount(const v_int16x8& a)
+{ return v_uint16x8(vec_popcntu(a.val)); }
+inline v_uint32x4 v_popcount(const v_uint32x4& a)
+{ return v_uint32x4(vec_popcntu(a.val)); }
+inline v_uint32x4 v_popcount(const v_int32x4& a)
+{ return v_uint32x4(vec_popcntu(a.val)); }
+inline v_uint64x2 v_popcount(const v_uint64x2& a)
+{ return v_uint64x2(vec_popcntu(a.val)); }
+inline v_uint64x2 v_popcount(const v_int64x2& a)
+{ return v_uint64x2(vec_popcntu(a.val)); }
+
+/** Mask **/
+inline int v_signmask(const v_uint8x16& a)
+{
+    static const vec_uchar16 qperm = {120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0};
+    return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
+}
+inline int v_signmask(const v_int8x16& a)
+{ return v_signmask(v_reinterpret_as_u8(a)); }
+
+inline int v_signmask(const v_int16x8& a)
+{
+    static const vec_uchar16 qperm = {112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128};
+    return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
+}
+inline int v_signmask(const v_uint16x8& a)
+{ return v_signmask(v_reinterpret_as_s16(a)); }
+
+inline int v_signmask(const v_int32x4& a)
+{
+    static const vec_uchar16 qperm = {96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128};
+    return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
+}
+inline int v_signmask(const v_uint32x4& a)
+{ return v_signmask(v_reinterpret_as_s32(a)); }
+inline int v_signmask(const v_float32x4& a)
+{ return v_signmask(v_reinterpret_as_s32(a)); }
+
+inline int v_signmask(const v_int64x2& a)
+{
+    VSX_UNUSED(const vec_dword2) sv = vec_sr(a.val, vec_udword2_sp(63));
+    return (int)vec_extract(sv, 0) | (int)vec_extract(sv, 1) << 1;
+}
+inline int v_signmask(const v_uint64x2& a)
+{ return v_signmask(v_reinterpret_as_s64(a)); }
+inline int v_signmask(const v_float64x2& a)
+{ return v_signmask(v_reinterpret_as_s64(a)); }
+
+inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); }
+inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); }
+
+template<typename _Tpvec>
+inline bool v_check_all(const _Tpvec& a)
+{ return vec_all_lt(a.val, _Tpvec().val); }
+inline bool v_check_all(const v_uint8x16& a)
+{ return v_check_all(v_reinterpret_as_s8(a)); }
+inline bool v_check_all(const v_uint16x8& a)
+{ return v_check_all(v_reinterpret_as_s16(a)); }
+inline bool v_check_all(const v_uint32x4& a)
+{ return v_check_all(v_reinterpret_as_s32(a)); }
+inline bool v_check_all(const v_uint64x2& a)
+{ return v_check_all(v_reinterpret_as_s64(a)); }
+inline bool v_check_all(const v_float32x4& a)
+{ return v_check_all(v_reinterpret_as_s32(a)); }
+inline bool v_check_all(const v_float64x2& a)
+{ return v_check_all(v_reinterpret_as_s64(a)); }
+
+template<typename _Tpvec>
+inline bool v_check_any(const _Tpvec& a)
+{ return vec_any_lt(a.val, _Tpvec().val); }
+inline bool v_check_any(const v_uint8x16& a)
+{ return v_check_any(v_reinterpret_as_s8(a)); }
+inline bool v_check_any(const v_uint16x8& a)
+{ return v_check_any(v_reinterpret_as_s16(a)); }
+inline bool v_check_any(const v_uint32x4& a)
+{ return v_check_any(v_reinterpret_as_s32(a)); }
+inline bool v_check_any(const v_uint64x2& a)
+{ return v_check_any(v_reinterpret_as_s64(a)); }
+inline bool v_check_any(const v_float32x4& a)
+{ return v_check_any(v_reinterpret_as_s32(a)); }
+inline bool v_check_any(const v_float64x2& a)
+{ return v_check_any(v_reinterpret_as_s64(a)); }
+
+////////// Other math /////////
+
+/** Some frequent operations **/
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{ return v_float32x4(vec_sqrt(x.val)); }
+inline v_float64x2 v_sqrt(const v_float64x2& x)
+{ return v_float64x2(vec_sqrt(x.val)); }
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{ return v_float32x4(vec_rsqrt(x.val)); }
+inline v_float64x2 v_invsqrt(const v_float64x2& x)
+{ return v_float64x2(vec_rsqrt(x.val)); }
+
+#define OPENCV_HAL_IMPL_VSX_MULADD(_Tpvec)                                  \
+inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b)                 \
+{ return _Tpvec(vec_sqrt(vec_madd(a.val, a.val, vec_mul(b.val, b.val)))); } \
+inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b)             \
+{ return _Tpvec(vec_madd(a.val, a.val, vec_mul(b.val, b.val))); }           \
+inline _Tpvec v_fma(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)      \
+{ return _Tpvec(vec_madd(a.val, b.val, c.val)); }                           \
+inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)   \
+{ return _Tpvec(vec_madd(a.val, b.val, c.val)); }
+
+OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4)
+OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2)
+
+inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{ return a * b + c; }
+
+// TODO: exp, log, sin, cos
+
+/** Absolute values **/
+inline v_uint8x16 v_abs(const v_int8x16& x)
+{ return v_uint8x16(vec_uchar16_c(vec_abs(x.val))); }
+
+inline v_uint16x8 v_abs(const v_int16x8& x)
+{ return v_uint16x8(vec_ushort8_c(vec_abs(x.val))); }
+
+inline v_uint32x4 v_abs(const v_int32x4& x)
+{ return v_uint32x4(vec_uint4_c(vec_abs(x.val))); }
+
+inline v_float32x4 v_abs(const v_float32x4& x)
+{ return v_float32x4(vec_abs(x.val)); }
+
+inline v_float64x2 v_abs(const v_float64x2& x)
+{ return v_float64x2(vec_abs(x.val)); }
+
+/** Absolute difference **/
+// unsigned
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd)
+
+inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b)
+{ return v_reinterpret_as_u8(v_sub_wrap(v_max(a, b), v_min(a, b))); }
+inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b)
+{ return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b))); }
+inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
+{ return v_reinterpret_as_u32(v_max(a, b) - v_min(a, b)); }
+
+inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
+{ return v_abs(a - b); }
+inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
+{ return v_abs(a - b); }
+
+/** Absolute difference for signed integers **/
+inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
+{ return v_int8x16(vec_abss(vec_subs(a.val, b.val))); }
+inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
+{ return v_int16x8(vec_abss(vec_subs(a.val, b.val))); }
+
+////////// Conversions /////////
+
+/** Rounding **/
+inline v_int32x4 v_round(const v_float32x4& a)
+{ return v_int32x4(vec_cts(vec_rint(a.val))); }
+
+inline v_int32x4 v_round(const v_float64x2& a)
+{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_int4_z)); }
+
+inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
+{ return v_int32x4(vec_mergesqo(vec_ctso(vec_rint(a.val)), vec_ctso(vec_rint(b.val)))); }
+
+inline v_int32x4 v_floor(const v_float32x4& a)
+{ return v_int32x4(vec_cts(vec_floor(a.val))); }
+
+inline v_int32x4 v_floor(const v_float64x2& a)
+{ return v_int32x4(vec_mergesqo(vec_ctso(vec_floor(a.val)), vec_int4_z)); }
+
+inline v_int32x4 v_ceil(const v_float32x4& a)
+{ return v_int32x4(vec_cts(vec_ceil(a.val))); }
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{ return v_int32x4(vec_mergesqo(vec_ctso(vec_ceil(a.val)), vec_int4_z)); }
+
+inline v_int32x4 v_trunc(const v_float32x4& a)
+{ return v_int32x4(vec_cts(a.val)); }
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{ return v_int32x4(vec_mergesqo(vec_ctso(a.val), vec_int4_z)); }
+
+/** To float **/
+inline v_float32x4 v_cvt_f32(const v_int32x4& a)
+{ return v_float32x4(vec_ctf(a.val)); }
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_float4_z)); }
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b)
+{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_cvfo(b.val))); }
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{ return v_float64x2(vec_ctdo(vec_mergeh(a.val, a.val))); }
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{ return v_float64x2(vec_ctdo(vec_mergel(a.val, a.val))); }
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{ return v_float64x2(vec_cvfo(vec_mergeh(a.val, a.val))); }
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{ return v_float64x2(vec_cvfo(vec_mergel(a.val, a.val))); }
+
+inline v_float64x2 v_cvt_f64(const v_int64x2& a)
+{ return v_float64x2(vec_ctd(a.val)); }
+
+////////////// Lookup table access ////////////////////
+
+inline v_int8x16 v_lut(const schar* tab, const int* idx)
+{
+    return v_int8x16(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]],
+                     tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]);
+}
+inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx)
+{
+    return v_reinterpret_as_s8(v_int16x8(*(const short*)(tab+idx[0]), *(const short*)(tab+idx[1]), *(const short*)(tab+idx[2]), *(const short*)(tab+idx[3]),
+                                       *(const short*)(tab+idx[4]), *(const short*)(tab+idx[5]), *(const short*)(tab+idx[6]), *(const short*)(tab+idx[7])));
+}
+inline v_int8x16 v_lut_quads(const schar* tab, const int* idx)
+{
+    return v_reinterpret_as_s8(v_int32x4(*(const int*)(tab+idx[0]), *(const int*)(tab+idx[1]), *(const int*)(tab+idx[2]), *(const int*)(tab+idx[3])));
+}
+inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar*)tab, idx)); }
+inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar*)tab, idx)); }
+inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar*)tab, idx)); }
+
+inline v_int16x8 v_lut(const short* tab, const int* idx)
+{
+    return v_int16x8(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]], tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]);
+}
+inline v_int16x8 v_lut_pairs(const short* tab, const int* idx)
+{
+    return v_reinterpret_as_s16(v_int32x4(*(const int*)(tab + idx[0]), *(const int*)(tab + idx[1]), *(const int*)(tab + idx[2]), *(const int*)(tab + idx[3])));
+}
+inline v_int16x8 v_lut_quads(const short* tab, const int* idx)
+{
+    return v_reinterpret_as_s16(v_int64x2(*(const int64*)(tab + idx[0]), *(const int64*)(tab + idx[1])));
+}
+inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short*)tab, idx)); }
+inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short*)tab, idx)); }
+inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short*)tab, idx)); }
+
+inline v_int32x4 v_lut(const int* tab, const int* idx)
+{
+    return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
+}
+inline v_int32x4 v_lut_pairs(const int* tab, const int* idx)
+{
+    return v_reinterpret_as_s32(v_int64x2(*(const int64*)(tab + idx[0]), *(const int64*)(tab + idx[1])));
+}
+inline v_int32x4 v_lut_quads(const int* tab, const int* idx)
+{
+    return v_int32x4(vsx_ld(0, tab + idx[0]));
+}
+inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int*)tab, idx)); }
+inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int*)tab, idx)); }
+inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int*)tab, idx)); }
+
+inline v_int64x2 v_lut(const int64_t* tab, const int* idx)
+{
+    return v_int64x2(tab[idx[0]], tab[idx[1]]);
+}
+inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx)
+{
+    return v_int64x2(vsx_ld2(0, tab + idx[0]));
+}
+inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); }
+inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); }
+
+inline v_float32x4 v_lut(const float* tab, const int* idx)
+{
+    return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
+}
+inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int*)tab, idx)); }
+inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_load(tab + *idx); }
+
+inline v_float64x2 v_lut(const double* tab, const int* idx)
+{
+    return v_float64x2(tab[idx[0]], tab[idx[1]]);
+}
+inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { return v_load(tab + *idx); }
+
+inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec)
+{
+    const int idx[4] = {
+        vec_extract(idxvec.val, 0),
+        vec_extract(idxvec.val, 1),
+        vec_extract(idxvec.val, 2),
+        vec_extract(idxvec.val, 3)
+    };
+    return v_int32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
+}
+
+inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec)
+{
+    const int idx[4] = {
+        vec_extract(idxvec.val, 0),
+        vec_extract(idxvec.val, 1),
+        vec_extract(idxvec.val, 2),
+        vec_extract(idxvec.val, 3)
+    };
+    return v_uint32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
+}
+
+inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec)
+{
+    const int idx[4] = {
+        vec_extract(idxvec.val, 0),
+        vec_extract(idxvec.val, 1),
+        vec_extract(idxvec.val, 2),
+        vec_extract(idxvec.val, 3)
+    };
+    return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
+}
+
+inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec)
+{
+    const int idx[2] = {
+        vec_extract(idxvec.val, 0),
+        vec_extract(idxvec.val, 1)
+    };
+    return v_float64x2(tab[idx[0]], tab[idx[1]]);
+}
+
+inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y)
+{
+    vec_float4 xy0 = vec_ld_l8(tab + vec_extract(idxvec.val, 0));
+    vec_float4 xy1 = vec_ld_l8(tab + vec_extract(idxvec.val, 1));
+    vec_float4 xy2 = vec_ld_l8(tab + vec_extract(idxvec.val, 2));
+    vec_float4 xy3 = vec_ld_l8(tab + vec_extract(idxvec.val, 3));
+    vec_float4 xy02 = vec_mergeh(xy0, xy2); // x0, x2, y0, y2
+    vec_float4 xy13 = vec_mergeh(xy1, xy3); // x1, x3, y1, y3
+    x.val = vec_mergeh(xy02, xy13);
+    y.val = vec_mergel(xy02, xy13);
+}
+inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y)
+{
+    vec_double2 xy0 = vsx_ld(vec_extract(idxvec.val, 0), tab);
+    vec_double2 xy1 = vsx_ld(vec_extract(idxvec.val, 1), tab);
+    x.val = vec_mergeh(xy0, xy1);
+    y.val = vec_mergel(xy0, xy1);
+}
+
+inline v_int8x16 v_interleave_pairs(const v_int8x16& vec)
+{
+    static const vec_uchar16 perm = {0, 2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12, 14, 13, 15};
+    return v_int8x16(vec_perm(vec.val, vec.val, perm));
+}
+inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec)
+{ return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
+
+inline v_int8x16 v_interleave_quads(const v_int8x16& vec)
+{
+    static const vec_uchar16 perm = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15};
+    return v_int8x16(vec_perm(vec.val, vec.val, perm));
+}
+inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec)
+{ return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
+{
+    static const vec_uchar16 perm = {0,1, 4,5, 2,3, 6,7, 8,9, 12,13, 10,11, 14,15};
+    return v_int16x8(vec_perm(vec.val, vec.val, perm));
+}
+inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec)
+{ return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
+
+inline v_int16x8 v_interleave_quads(const v_int16x8& vec)
+{
+    static const vec_uchar16 perm = {0,1, 8,9, 2,3, 10,11, 4,5, 12,13, 6,7, 14,15};
+    return v_int16x8(vec_perm(vec.val, vec.val, perm));
+}
+inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec)
+{ return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_interleave_pairs(const v_int32x4& vec)
+{
+    static const vec_uchar16 perm = {0,1,2,3, 8,9,10,11, 4,5,6,7, 12,13,14,15};
+    return v_int32x4(vec_perm(vec.val, vec.val, perm));
+}
+inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec)
+{ return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+inline v_float32x4 v_interleave_pairs(const v_float32x4& vec)
+{ return v_reinterpret_as_f32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+
+inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
+{
+    static const vec_uchar16 perm = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 15, 15, 15};
+    return v_int8x16(vec_perm(vec.val, vec.val, perm));
+}
+inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
+{ return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
+{
+    static const vec_uchar16 perm = {0,1, 2,3, 4,5, 8,9, 10,11, 12,13, 14,15, 14,15};
+    return v_int16x8(vec_perm(vec.val, vec.val, perm));
+}
+inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec)
+{ return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_pack_triplets(const v_int32x4& vec)
+{ return vec; }
+inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec)
+{ return vec; }
+inline v_float32x4 v_pack_triplets(const v_float32x4& vec)
+{ return vec; }
+
+/////// FP16 support ////////
+
+inline v_float32x4 v_load_expand(const float16_t* ptr)
+{
+    vec_ushort8 vf16 = vec_ld_l8((const ushort*)ptr);
+#if CV_VSX3 && defined(vec_extract_fp_from_shorth)
+    return v_float32x4(vec_extract_fp_from_shorth(vf16));
+#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
+    vec_float4 vf32;
+    __asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wa" (vf32) : "wa" (vec_mergeh(vf16, vf16)));
+    return v_float32x4(vf32);
+#else
+    const vec_int4 z = vec_int4_z, delta = vec_int4_sp(0x38000000);
+    const vec_int4 signmask = vec_int4_sp(0x80000000);
+    const vec_int4 maxexp = vec_int4_sp(0x7c000000);
+    const vec_float4 deltaf = vec_float4_c(vec_int4_sp(0x38800000));
+
+    vec_int4 bits = vec_int4_c(vec_mergeh(vec_short8_c(z), vec_short8_c(vf16)));
+    vec_int4 e = vec_and(bits, maxexp), sign = vec_and(bits, signmask);
+    vec_int4 t = vec_add(vec_sr(vec_xor(bits, sign), vec_uint4_sp(3)), delta); // ((h & 0x7fff) << 13) + delta
+    vec_int4 zt = vec_int4_c(vec_sub(vec_float4_c(vec_add(t, vec_int4_sp(1 << 23))), deltaf));
+
+    t = vec_add(t, vec_and(delta, vec_cmpeq(maxexp, e)));
+    vec_bint4 zmask = vec_cmpeq(e, z);
+    vec_int4 ft = vec_sel(t, zt, zmask);
+    return v_float32x4(vec_float4_c(vec_or(ft, sign)));
+#endif
+}
+
+inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+{
+// fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"?
+#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
+    vec_ushort8 vf16;
+    __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (v.val));
+    vec_st_l8(vec_mergesqe(vf16, vf16), ptr);
+#else
+    const vec_int4 signmask = vec_int4_sp(0x80000000);
+    const vec_int4 rval = vec_int4_sp(0x3f000000);
+
+    vec_int4 t = vec_int4_c(v.val);
+    vec_int4 sign = vec_sra(vec_and(t, signmask), vec_uint4_sp(16));
+    t = vec_and(vec_nor(signmask, signmask), t);
+
+    vec_bint4 finitemask = vec_cmpgt(vec_int4_sp(0x47800000), t);
+    vec_bint4 isnan = vec_cmpgt(t, vec_int4_sp(0x7f800000));
+    vec_int4 naninf = vec_sel(vec_int4_sp(0x7c00), vec_int4_sp(0x7e00), isnan);
+    vec_bint4 tinymask = vec_cmpgt(vec_int4_sp(0x38800000), t);
+    vec_int4 tt = vec_int4_c(vec_add(vec_float4_c(t), vec_float4_c(rval)));
+    tt = vec_sub(tt, rval);
+    vec_int4 odd = vec_and(vec_sr(t, vec_uint4_sp(13)), vec_int4_sp(1));
+    vec_int4 nt = vec_add(t, vec_int4_sp(0xc8000fff));
+    nt = vec_sr(vec_add(nt, odd), vec_uint4_sp(13));
+    t = vec_sel(nt, tt, tinymask);
+    t = vec_sel(naninf, t, finitemask);
+    t = vec_or(t, sign);
+    vec_st_l8(vec_packs(t, t), ptr);
+#endif
+}
+
+inline void v_cleanup() {}
+
+
+/** Reinterpret **/
+/** its up there with load and store operations **/
+
+////////// Matrix operations /////////
+
+//////// Dot Product ////////
+// 16 >> 32
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{ return v_int32x4(vec_msum(a.val, b.val, c.val)); }
+
+// 32 >> 64
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
+{
+    vec_dword2 even = vec_mule(a.val, b.val);
+    vec_dword2 odd = vec_mulo(a.val, b.val);
+    return v_int64x2(vec_add(even, odd));
+}
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{ return v_dotprod(a, b) + c; }
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
+{ return v_uint32x4(vec_msum(a.val, b.val, c.val)); }
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
+{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)); }
+
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
+{
+    const vec_ushort8 eight = vec_ushort8_sp(8);
+    vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight); // even
+    vec_short8 a1 = vec_sra((vec_short8)a.val, eight); // odd
+    vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight);
+    vec_short8 b1 = vec_sra((vec_short8)b.val, eight);
+    return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z)));
+}
+
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
+{
+    const vec_ushort8 eight = vec_ushort8_sp(8);
+    vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight); // even
+    vec_short8 a1 = vec_sra((vec_short8)a.val, eight); // odd
+    vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight);
+    vec_short8 b1 = vec_sra((vec_short8)b.val, eight);
+    return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, c.val)));
+}
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
+{
+    const vec_uint4 zero = vec_uint4_z;
+    vec_uint4 even = vec_mule(a.val, b.val);
+    vec_uint4 odd  = vec_mulo(a.val, b.val);
+    vec_udword2 e0 = (vec_udword2)vec_mergee(even, zero);
+    vec_udword2 e1 = (vec_udword2)vec_mergeo(even, zero);
+    vec_udword2 o0 = (vec_udword2)vec_mergee(odd, zero);
+    vec_udword2 o1 = (vec_udword2)vec_mergeo(odd, zero);
+    vec_udword2 s0 = vec_add(e0, o0);
+    vec_udword2 s1 = vec_add(e1, o1);
+    return v_uint64x2(vec_add(s0, s1));
+}
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b)
+{
+    v_int32x4 prod = v_dotprod(a, b);
+    v_int64x2 c, d;
+    v_expand(prod, c, d);
+    return v_int64x2(vec_add(vec_mergeh(c.val, d.val), vec_mergel(c.val, d.val)));
+}
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 32 >> 64f
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
+{ return v_cvt_f64(v_dotprod(a, b)); }
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+//////// Fast Dot Product ////////
+
+// 16 >> 32
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
+{ return v_dotprod(a, b); }
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)) + c; }
+// 32 >> 64
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
+{ return v_dotprod(a, b); }
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{ return v_dotprod(a, b, c); }
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
+{ return v_dotprod_expand(a, b); }
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
+{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)) + c; }
+
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
+{
+    vec_short8 a0 = vec_unpackh(a.val);
+    vec_short8 a1 = vec_unpackl(a.val);
+    vec_short8 b0 = vec_unpackh(b.val);
+    vec_short8 b1 = vec_unpackl(b.val);
+    return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z)));
+}
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b)
+{ return v_dotprod_expand(a, b); }
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
+{ return v_dotprod_expand(a, b, c); }
+
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b)
+{
+    v_int32x4 prod = v_dotprod(a, b);
+    v_int64x2 c, d;
+    v_expand(prod, c, d);
+    return c + d;
+}
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
+{ return v_dotprod_expand_fast(a, b) + c; }
+
+// 32 >> 64f
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b)
+{ return v_dotprod_expand(a, b); }
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ return v_dotprod_expand(a, b, c); }
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    const vec_float4 v0 = vec_splat(v.val, 0);
+    const vec_float4 v1 = vec_splat(v.val, 1);
+    const vec_float4 v2 = vec_splat(v.val, 2);
+    VSX_UNUSED(const vec_float4) v3 = vec_splat(v.val, 3);
+    return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, vec_mul(v3, m3.val)))));
+}
+
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& a)
+{
+    const vec_float4 v0 = vec_splat(v.val, 0);
+    const vec_float4 v1 = vec_splat(v.val, 1);
+    const vec_float4 v2 = vec_splat(v.val, 2);
+    return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, a.val))));
+}
+
+#define OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(_Tpvec, _Tpvec2)                        \
+inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1,                   \
+                           const _Tpvec& a2, const _Tpvec& a3,                   \
+                           _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3)       \
+{                                                                                \
+    _Tpvec2 a02 = vec_mergeh(a0.val, a2.val);                                    \
+    _Tpvec2 a13 = vec_mergeh(a1.val, a3.val);                                    \
+    b0.val = vec_mergeh(a02, a13);                                               \
+    b1.val = vec_mergel(a02, a13);                                               \
+    a02 = vec_mergel(a0.val, a2.val);                                            \
+    a13 = vec_mergel(a1.val, a3.val);                                            \
+    b2.val  = vec_mergeh(a02, a13);                                              \
+    b3.val  = vec_mergel(a02, a13);                                              \
+}
+OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4)
+OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4)
+OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4)
+
+template<int i, typename Tvec>
+inline Tvec v_broadcast_element(const Tvec& v)
+{ return Tvec(vec_splat(v.val, i)); }
+
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+}
+
+#endif // OPENCV_HAL_VSX_HPP
diff --git a/IPL/include/opencv/opencv2/core/hal/intrin_wasm.hpp b/IPL/include/opencv/opencv2/core/hal/intrin_wasm.hpp
new file mode 100644
index 0000000..b8c250f
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/hal/intrin_wasm.hpp
@@ -0,0 +1,4260 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_HAL_INTRIN_WASM_HPP
+#define OPENCV_HAL_INTRIN_WASM_HPP
+
+#include <limits>
+#include <cstring>
+#include <algorithm>
+#include "opencv2/core/saturate.hpp"
+
+#define CV_SIMD128 1
+#define CV_SIMD128_64F 0 // Now all implementation of f64 use fallback, so disable it.
+#define CV_SIMD128_FP16 0
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+#if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) < (1038046)
+// handle renames: https://github.com/emscripten-core/emscripten/pull/9440 (https://github.com/emscripten-core/emscripten/commit/755d5b46cb84d0aa120c10981b11d05646c29673)
+#define wasm_i32x4_trunc_saturate_f32x4 wasm_trunc_saturate_i32x4_f32x4
+#define wasm_u32x4_trunc_saturate_f32x4 wasm_trunc_saturate_u32x4_f32x4
+#define wasm_i64x2_trunc_saturate_f64x2 wasm_trunc_saturate_i64x2_f64x2
+#define wasm_u64x2_trunc_saturate_f64x2 wasm_trunc_saturate_u64x2_f64x2
+#define wasm_f32x4_convert_i32x4 wasm_convert_f32x4_i32x4
+#define wasm_f32x4_convert_u32x4 wasm_convert_f32x4_u32x4
+#define wasm_f64x2_convert_i64x2 wasm_convert_f64x2_i64x2
+#define wasm_f64x2_convert_u64x2 wasm_convert_f64x2_u64x2
+#endif // COMPATIBILITY: <1.38.46
+
+///////// Types ///////////
+
+struct v_uint8x16
+{
+    typedef uchar lane_type;
+    typedef v128_t vector_type;
+    enum { nlanes = 16 };
+
+    v_uint8x16() : val(wasm_i8x16_splat(0)) {}
+    explicit v_uint8x16(v128_t v) : val(v) {}
+    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+            uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+    {
+        uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+        val = wasm_v128_load(v);
+    }
+    uchar get0() const
+    {
+        return (uchar)wasm_i8x16_extract_lane(val, 0);
+    }
+
+    v128_t val;
+};
+
+struct v_int8x16
+{
+    typedef schar lane_type;
+    typedef v128_t vector_type;
+    enum { nlanes = 16 };
+
+    v_int8x16() : val(wasm_i8x16_splat(0)) {}
+    explicit v_int8x16(v128_t v) : val(v) {}
+    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+            schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+    {
+        schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+        val = wasm_v128_load(v);
+    }
+    schar get0() const
+    {
+        return wasm_i8x16_extract_lane(val, 0);
+    }
+
+    v128_t val;
+};
+
+struct v_uint16x8
+{
+    typedef ushort lane_type;
+    typedef v128_t vector_type;
+    enum { nlanes = 8 };
+
+    v_uint16x8() : val(wasm_i16x8_splat(0)) {}
+    explicit v_uint16x8(v128_t v) : val(v) {}
+    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+    {
+        ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+        val = wasm_v128_load(v);
+    }
+    ushort get0() const
+    {
+        return (ushort)wasm_i16x8_extract_lane(val, 0);    // wasm_u16x8_extract_lane() unimplemented yet
+    }
+
+    v128_t val;
+};
+
+struct v_int16x8
+{
+    typedef short lane_type;
+    typedef v128_t vector_type;
+    enum { nlanes = 8 };
+
+    v_int16x8() : val(wasm_i16x8_splat(0)) {}
+    explicit v_int16x8(v128_t v) : val(v) {}
+    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+    {
+        short v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+        val = wasm_v128_load(v);
+    }
+    short get0() const
+    {
+        return wasm_i16x8_extract_lane(val, 0);
+    }
+
+    v128_t val;
+};
+
+struct v_uint32x4
+{
+    typedef unsigned lane_type;
+    typedef v128_t vector_type;
+    enum { nlanes = 4 };
+
+    v_uint32x4() : val(wasm_i32x4_splat(0)) {}
+    explicit v_uint32x4(v128_t v) : val(v) {}
+    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+    {
+        unsigned v[] = {v0, v1, v2, v3};
+        val = wasm_v128_load(v);
+    }
+    unsigned get0() const
+    {
+        return (unsigned)wasm_i32x4_extract_lane(val, 0);
+    }
+
+    v128_t val;
+};
+
+struct v_int32x4
+{
+    typedef int lane_type;
+    typedef v128_t vector_type;
+    enum { nlanes = 4 };
+
+    v_int32x4() : val(wasm_i32x4_splat(0)) {}
+    explicit v_int32x4(v128_t v) : val(v) {}
+    v_int32x4(int v0, int v1, int v2, int v3)
+    {
+        int v[] = {v0, v1, v2, v3};
+        val = wasm_v128_load(v);
+    }
+    int get0() const
+    {
+        return wasm_i32x4_extract_lane(val, 0);
+    }
+
+    v128_t val;
+};
+
+struct v_float32x4
+{
+    typedef float lane_type;
+    typedef v128_t vector_type;
+    enum { nlanes = 4 };
+
+    v_float32x4() : val(wasm_f32x4_splat(0)) {}
+    explicit v_float32x4(v128_t v) : val(v) {}
+    v_float32x4(float v0, float v1, float v2, float v3)
+    {
+        float v[] = {v0, v1, v2, v3};
+        val = wasm_v128_load(v);
+    }
+    float get0() const
+    {
+        return wasm_f32x4_extract_lane(val, 0);
+    }
+
+    v128_t val;
+};
+
+struct v_uint64x2
+{
+    typedef uint64 lane_type;
+    typedef v128_t vector_type;
+    enum { nlanes = 2 };
+
+#ifdef __wasm_unimplemented_simd128__
+    v_uint64x2() : val(wasm_i64x2_splat(0)) {}
+#else
+    v_uint64x2() : val(wasm_i32x4_splat(0)) {}
+#endif
+    explicit v_uint64x2(v128_t v) : val(v) {}
+    v_uint64x2(uint64 v0, uint64 v1)
+    {
+        uint64 v[] = {v0, v1};
+        val = wasm_v128_load(v);
+    }
+    uint64 get0() const
+    {
+#ifdef __wasm_unimplemented_simd128__
+        return (uint64)wasm_i64x2_extract_lane(val, 0);
+#else
+        uint64 des[2];
+        wasm_v128_store(des, val);
+        return des[0];
+#endif
+    }
+
+    v128_t val;
+};
+
+struct v_int64x2
+{
+    typedef int64 lane_type;
+    typedef v128_t vector_type;
+    enum { nlanes = 2 };
+
+#ifdef __wasm_unimplemented_simd128__
+    v_int64x2() : val(wasm_i64x2_splat(0)) {}
+#else
+    v_int64x2() : val(wasm_i32x4_splat(0)) {}
+#endif
+    explicit v_int64x2(v128_t v) : val(v) {}
+    v_int64x2(int64 v0, int64 v1)
+    {
+        int64 v[] = {v0, v1};
+        val = wasm_v128_load(v);
+    }
+    int64 get0() const
+    {
+#ifdef __wasm_unimplemented_simd128__
+        return wasm_i64x2_extract_lane(val, 0);
+#else
+        int64 des[2];
+        wasm_v128_store(des, val);
+        return des[0];
+#endif
+    }
+
+    v128_t val;
+};
+
+struct v_float64x2
+{
+    typedef double lane_type;
+    typedef v128_t vector_type;
+    enum { nlanes = 2 };
+
+#ifdef __wasm_unimplemented_simd128__
+    v_float64x2() : val(wasm_f64x2_splat(0)) {}
+#else
+    v_float64x2() : val(wasm_f32x4_splat(0)) {}
+#endif
+    explicit v_float64x2(v128_t v) : val(v) {}
+    v_float64x2(double v0, double v1)
+    {
+        double v[] = {v0, v1};
+        val = wasm_v128_load(v);
+    }
+    double get0() const
+    {
+#ifdef __wasm_unimplemented_simd128__
+        return wasm_f64x2_extract_lane(val, 0);
+#else
+        double des[2];
+        wasm_v128_store(des, val);
+        return des[0];
+#endif
+    }
+
+    v128_t val;
+};
+
+namespace fallback
+{
+
+template<typename _Tp, int n> struct v_reg
+{
+    typedef _Tp lane_type;
+    enum { nlanes = n };
+
+    explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; }
+
+    v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }
+
+    v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }
+
+    v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
+           _Tp s4, _Tp s5, _Tp s6, _Tp s7)
+    {
+        s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
+        s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
+    }
+
+    v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
+           _Tp s4, _Tp s5, _Tp s6, _Tp s7,
+           _Tp s8, _Tp s9, _Tp s10, _Tp s11,
+           _Tp s12, _Tp s13, _Tp s14, _Tp s15)
+    {
+        s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
+        s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
+        s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;
+        s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;
+    }
+
+    v_reg() {}
+
+    v_reg(const v_reg<_Tp, n> & r)
+    {
+        for( int i = 0; i < n; i++ )
+            s[i] = r.s[i];
+    }
+
+    _Tp get0() const { return s[0]; }
+
+    _Tp get(const int i) const { return s[i]; }
+    v_reg<_Tp, n> high() const
+    {
+        v_reg<_Tp, n> c;
+        int i;
+        for( i = 0; i < n/2; i++ )
+        {
+            c.s[i] = s[i+(n/2)];
+            c.s[i+(n/2)] = 0;
+        }
+        return c;
+    }
+
+    static v_reg<_Tp, n> zero()
+    {
+        v_reg<_Tp, n> c;
+        for( int i = 0; i < n; i++ )
+            c.s[i] = (_Tp)0;
+        return c;
+    }
+
+    static v_reg<_Tp, n> all(_Tp s)
+    {
+        v_reg<_Tp, n> c;
+        for( int i = 0; i < n; i++ )
+            c.s[i] = s;
+        return c;
+    }
+
+    template<typename _Tp2, int n2> v_reg<_Tp2, n2> reinterpret_as() const
+    {
+        size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n);
+        v_reg<_Tp2, n2> c;
+        std::memcpy(&c.s[0], &s[0], bytes);
+        return c;
+    }
+
+    v_reg(const cv::v_uint8x16& v) { wasm_v128_store(&s, v.val); }
+    v_reg(const cv::v_int8x16& v) { wasm_v128_store(&s, v.val); }
+    v_reg(const cv::v_uint16x8& v) { wasm_v128_store(&s, v.val); }
+    v_reg(const cv::v_int16x8& v) { wasm_v128_store(&s, v.val); }
+    v_reg(const cv::v_uint32x4& v) { wasm_v128_store(&s, v.val); }
+    v_reg(const cv::v_int32x4& v) { wasm_v128_store(&s, v.val); }
+    v_reg(const cv::v_float32x4& v) { wasm_v128_store(&s, v.val); }
+    v_reg(const cv::v_float64x2& v) { wasm_v128_store(&s, v.val); }
+    v_reg(const cv::v_uint64x2& v) { wasm_v128_store(&s, v.val); }
+    v_reg(const cv::v_int64x2& v) { wasm_v128_store(&s, v.val); }
+
+    operator cv::v_uint8x16() const { return cv::v_uint8x16(wasm_v128_load(&s)); }
+    operator cv::v_int8x16() const { return cv::v_int8x16(wasm_v128_load(&s)); }
+    operator cv::v_uint16x8() const { return cv::v_uint16x8(wasm_v128_load(&s)); }
+    operator cv::v_int16x8() const { return cv::v_int16x8(wasm_v128_load(&s)); }
+    operator cv::v_uint32x4() const { return cv::v_uint32x4(wasm_v128_load(&s)); }
+    operator cv::v_int32x4() const { return cv::v_int32x4(wasm_v128_load(&s)); }
+    operator cv::v_float32x4() const { return cv::v_float32x4(wasm_v128_load(&s)); }
+    operator cv::v_float64x2() const { return cv::v_float64x2(wasm_v128_load(&s)); }
+    operator cv::v_uint64x2() const { return cv::v_uint64x2(wasm_v128_load(&s)); }
+    operator cv::v_int64x2() const { return cv::v_int64x2(wasm_v128_load(&s)); }
+
+    _Tp s[n];
+};
+
+typedef v_reg<uchar, 16> v_uint8x16;
+typedef v_reg<schar, 16> v_int8x16;
+typedef v_reg<ushort, 8> v_uint16x8;
+typedef v_reg<short, 8> v_int16x8;
+typedef v_reg<unsigned, 4> v_uint32x4;
+typedef v_reg<int, 4> v_int32x4;
+typedef v_reg<float, 4> v_float32x4;
+typedef v_reg<double, 2> v_float64x2;
+typedef v_reg<uint64, 2> v_uint64x2;
+typedef v_reg<int64, 2> v_int64x2;
+
+#define OPENCV_HAL_IMPL_BIN_OP(bin_op) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> \
+    operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
+    return c; \
+} \
+template<typename _Tp, int n> inline v_reg<_Tp, n>& \
+    operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    for( int i = 0; i < n; i++ ) \
+        a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_BIN_OP(+)
+OPENCV_HAL_IMPL_BIN_OP(-)
+OPENCV_HAL_IMPL_BIN_OP(*)
+OPENCV_HAL_IMPL_BIN_OP(/)
+
+#define OPENCV_HAL_IMPL_BIT_OP(bit_op) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> operator bit_op \
+    (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    typedef typename V_TypeTraits<_Tp>::int_type itype; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
+                                                        V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
+    return c; \
+} \
+template<typename _Tp, int n> inline v_reg<_Tp, n>& operator \
+    bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    typedef typename V_TypeTraits<_Tp>::int_type itype; \
+    for( int i = 0; i < n; i++ ) \
+        a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
+                                                        V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_BIT_OP(&)
+OPENCV_HAL_IMPL_BIT_OP(|)
+OPENCV_HAL_IMPL_BIT_OP(^)
+
+template<typename _Tp, int n> inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i]));
+    }
+    return c;
+}
+
+#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
+template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
+{ \
+    v_reg<_Tp2, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = cfunc(a.s[i]); \
+    return c; \
+}
+
+OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs,
+                          typename V_TypeTraits<_Tp>::abs_type)
+OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int)
+OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int)
+OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int)
+OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int)
+
+#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = cfunc(a.s[i], b.s[i]); \
+    return c; \
+}
+
+#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
+template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
+{ \
+    _Tp c = a.s[0]; \
+    for( int i = 1; i < n; i++ ) \
+        c = cfunc(c, a.s[i]); \
+    return c; \
+}
+
+OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min)
+OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max)
+OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
+OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max)
+
+static const unsigned char popCountTable[] =
+{
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
+};
+
+template<typename _Tp, int n>
+inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_popcount(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::abs_type, n> b = v_reg<typename V_TypeTraits<_Tp>::abs_type, n>::zero();
+    for (int i = 0; i < (int)(n*sizeof(_Tp)); i++)
+        b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
+    return b;
+}
+
+template<typename _Tp, int n>
+inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                      v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )
+{
+    for( int i = 0; i < n; i++ )
+    {
+        minval.s[i] = std::min(a.s[i], b.s[i]);
+        maxval.s[i] = std::max(a.s[i], b.s[i]);
+    }
+}
+
+#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
+template<typename _Tp, int n> \
+inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    typedef typename V_TypeTraits<_Tp>::int_type itype; \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \
+    return c; \
+}
+
+OPENCV_HAL_IMPL_CMP_OP(<)
+OPENCV_HAL_IMPL_CMP_OP(>)
+OPENCV_HAL_IMPL_CMP_OP(<=)
+OPENCV_HAL_IMPL_CMP_OP(>=)
+OPENCV_HAL_IMPL_CMP_OP(==)
+OPENCV_HAL_IMPL_CMP_OP(!=)
+
+template<int n>
+inline v_reg<float, n> v_not_nan(const v_reg<float, n>& a)
+{
+    typedef typename V_TypeTraits<float>::int_type itype;
+    v_reg<float, n> c;
+    for (int i = 0; i < n; i++)
+        c.s[i] = V_TypeTraits<float>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
+    return c;
+}
+template<int n>
+inline v_reg<double, n> v_not_nan(const v_reg<double, n>& a)
+{
+    typedef typename V_TypeTraits<double>::int_type itype;
+    v_reg<double, n> c;
+    for (int i = 0; i < n; i++)
+        c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
+    return c;
+}
+
+#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
+template<typename _Tp, int n> \
+inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    typedef _Tp2 rtype; \
+    v_reg<rtype, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
+    return c; \
+}
+
+OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp)
+OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp)
+OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp)
+
+template<typename T> inline T _absdiff(T a, T b)
+{
+    return a > b ? a - b : b - a;
+}
+
+template<typename _Tp, int n>
+inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b)
+{
+    typedef typename V_TypeTraits<_Tp>::abs_type rtype;
+    v_reg<rtype, n> c;
+    const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0);
+    for( int i = 0; i < n; i++ )
+    {
+        rtype ua = a.s[i] ^ mask;
+        rtype ub = b.s[i] ^ mask;
+        c.s[i] = _absdiff(ua, ub);
+    }
+    return c;
+}
+
+inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
+{
+    v_float32x4 c;
+    for( int i = 0; i < c.nlanes; i++ )
+        c.s[i] = _absdiff(a.s[i], b.s[i]);
+    return c;
+}
+
+inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
+{
+    v_float64x2 c;
+    for( int i = 0; i < c.nlanes; i++ )
+        c.s[i] = _absdiff(a.s[i], b.s[i]);
+    return c;
+}
+
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++)
+        c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i]));
+    return c;
+}
+
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = 1.f/std::sqrt(a.s[i]);
+    return c;
+}
+
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);
+    return c;
+}
+
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];
+    return c;
+}
+
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                           const v_reg<_Tp, n>& c)
+{
+    v_reg<_Tp, n> d;
+    for( int i = 0; i < n; i++ )
+        d.s[i] = a.s[i]*b.s[i] + c.s[i];
+    return d;
+}
+
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                              const v_reg<_Tp, n>& c)
+{
+    return v_fma(a, b, c);
+}
+
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+    v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<w_type, n/2> c;
+    for( int i = 0; i < (n/2); i++ )
+        c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];
+    return c;
+}
+
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+    v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<w_type, n/2> s;
+    for( int i = 0; i < (n/2); i++ )
+        s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
+    return s;
+}
+
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
+    v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typedef typename V_TypeTraits<_Tp>::q_type q_type;
+    v_reg<q_type, n/4> s;
+    for( int i = 0; i < (n/4); i++ )
+        s.s[i] = (q_type)a.s[i*4    ]*b.s[i*4    ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
+                 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3];
+    return s;
+}
+
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
+    v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                     const v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4>& c)
+{
+    typedef typename V_TypeTraits<_Tp>::q_type q_type;
+    v_reg<q_type, n/4> s;
+    for( int i = 0; i < (n/4); i++ )
+        s.s[i] = (q_type)a.s[i*4    ]*b.s[i*4    ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
+                 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i];
+    return s;
+}
+
+template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                                                       v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c,
+                                                       v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& d)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = (w_type)a.s[i]*b.s[i];
+        d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];
+    }
+}
+
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<_Tp, n> c;
+    for (int i = 0; i < n; i++)
+        c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8);
+    return c;
+}
+
+template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a,
+                                                 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];
+    }
+}
+
+#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = (_Tp)(a.s[i] shift_op imm); \
+    return c; \
+}
+
+OPENCV_HAL_IMPL_SHIFT_OP(<< )
+OPENCV_HAL_IMPL_SHIFT_OP(>> )
+
+#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \
+template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \
+{ \
+    v_reg<_Tp, n> b; \
+    for (int i = 0; i < n; i++) \
+    { \
+        int sIndex = i opA imm; \
+        if (0 <= sIndex && sIndex < n) \
+        { \
+            b.s[i] = a.s[sIndex]; \
+        } \
+        else \
+        { \
+            b.s[i] = 0; \
+        } \
+    } \
+    return b; \
+} \
+template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    for (int i = 0; i < n; i++) \
+    { \
+        int aIndex = i opA imm; \
+        int bIndex = i opA imm opB n; \
+        if (0 <= bIndex && bIndex < n) \
+        { \
+            c.s[i] = b.s[bIndex]; \
+        } \
+        else if (0 <= aIndex && aIndex < n) \
+        { \
+            c.s[i] = a.s[aIndex]; \
+        } \
+        else \
+        { \
+            c.s[i] = 0; \
+        } \
+    } \
+    return c; \
+}
+
+OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left,  -, +)
+OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -)
+
+template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a)
+{
+    typename V_TypeTraits<_Tp>::sum_type c = a.s[0];
+    for( int i = 1; i < n; i++ )
+        c += a.s[i];
+    return c;
+}
+
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+    v_float32x4 r;
+    r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3];
+    r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3];
+    r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3];
+    r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3];
+    return r;
+}
+
+template<typename _Tp, int n> inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]);
+    for (int i = 1; i < n; i++)
+        c += _absdiff(a.s[i], b.s[i]);
+    return c;
+}
+
+template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
+{
+    int mask = 0;
+    for( int i = 0; i < n; i++ )
+        mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i;
+    return mask;
+}
+
+template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 )
+            return false;
+    return true;
+}
+
+template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 )
+            return true;
+    return false;
+}
+
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask,
+                                                           const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typedef V_TypeTraits<_Tp> Traits;
+    typedef typename Traits::int_type int_type;
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        int_type m = Traits::reinterpret_int(mask.s[i]);
+        CV_DbgAssert(m == 0 || m == (~(int_type)0));  // restrict mask values: 0 or 0xff/0xffff/etc
+        c.s[i] = m ? a.s[i] : b.s[i];
+    }
+    return c;
+}
+
+template<typename _Tp, int n> inline void v_expand(const v_reg<_Tp, n>& a,
+                            v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b0,
+                            v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b1)
+{
+    for( int i = 0; i < (n/2); i++ )
+    {
+        b0.s[i] = a.s[i];
+        b1.s[i] = a.s[i+(n/2)];
+    }
+}
+
+template<typename _Tp, int n>
+inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+v_expand_low(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> b;
+    for( int i = 0; i < (n/2); i++ )
+        b.s[i] = a.s[i];
+    return b;
+}
+
+template<typename _Tp, int n>
+inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+v_expand_high(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> b;
+    for( int i = 0; i < (n/2); i++ )
+        b.s[i] = a.s[i+(n/2)];
+    return b;
+}
+
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
+    v_reinterpret_as_int(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
+    return c;
+}
+
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
+    v_reinterpret_as_uint(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
+    return c;
+}
+
+template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1,
+                                               v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 )
+{
+    int i;
+    for( i = 0; i < n/2; i++ )
+    {
+        b0.s[i*2] = a0.s[i];
+        b0.s[i*2+1] = a1.s[i];
+    }
+    for( ; i < n; i++ )
+    {
+        b1.s[i*2-n] = a0.s[i];
+        b1.s[i*2-n+1] = a1.s[i];
+    }
+}
+
+template<typename _Tp>
+inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr)
+{
+    return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr);
+}
+
+template<typename _Tp>
+inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr)
+{
+    return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr);
+}
+
+template<typename _Tp>
+inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr)
+{
+    v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
+    for( int i = 0; i < c.nlanes/2; i++ )
+    {
+        c.s[i] = ptr[i];
+    }
+    return c;
+}
+
+template<typename _Tp>
+inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
+{
+    v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
+    for( int i = 0; i < c.nlanes/2; i++ )
+    {
+        c.s[i] = loptr[i];
+        c.s[i+c.nlanes/2] = hiptr[i];
+    }
+    return c;
+}
+
+template<typename _Tp>
+inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_TypeTraits<_Tp>::nlanes128 / 2>
+v_load_expand(const _Tp* ptr)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<w_type, V_TypeTraits<w_type>::nlanes128> c;
+    for( int i = 0; i < c.nlanes; i++ )
+    {
+        c.s[i] = ptr[i];
+    }
+    return c;
+}
+
+template<typename _Tp>
+inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_TypeTraits<_Tp>::nlanes128 / 4>
+v_load_expand_q(const _Tp* ptr)
+{
+    typedef typename V_TypeTraits<_Tp>::q_type q_type;
+    v_reg<q_type, V_TypeTraits<q_type>::nlanes128> c;
+    for( int i = 0; i < c.nlanes; i++ )
+    {
+        c.s[i] = ptr[i];
+    }
+    return c;
+}
+
+template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
+                                                            v_reg<_Tp, n>& b)
+{
+    int i, i2;
+    for( i = i2 = 0; i < n; i++, i2 += 2 )
+    {
+        a.s[i] = ptr[i2];
+        b.s[i] = ptr[i2+1];
+    }
+}
+
+template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
+                                                            v_reg<_Tp, n>& b, v_reg<_Tp, n>& c)
+{
+    int i, i3;
+    for( i = i3 = 0; i < n; i++, i3 += 3 )
+    {
+        a.s[i] = ptr[i3];
+        b.s[i] = ptr[i3+1];
+        c.s[i] = ptr[i3+2];
+    }
+}
+
+template<typename _Tp, int n>
+inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
+                                v_reg<_Tp, n>& b, v_reg<_Tp, n>& c,
+                                v_reg<_Tp, n>& d)
+{
+    int i, i4;
+    for( i = i4 = 0; i < n; i++, i4 += 4 )
+    {
+        a.s[i] = ptr[i4];
+        b.s[i] = ptr[i4+1];
+        c.s[i] = ptr[i4+2];
+        d.s[i] = ptr[i4+3];
+    }
+}
+
+template<typename _Tp, int n>
+inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
+                               const v_reg<_Tp, n>& b,
+                               hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
+{
+    int i, i2;
+    for( i = i2 = 0; i < n; i++, i2 += 2 )
+    {
+        ptr[i2] = a.s[i];
+        ptr[i2+1] = b.s[i];
+    }
+}
+
+template<typename _Tp, int n>
+inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
+                                const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
+                                hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
+{
+    int i, i3;
+    for( i = i3 = 0; i < n; i++, i3 += 3 )
+    {
+        ptr[i3] = a.s[i];
+        ptr[i3+1] = b.s[i];
+        ptr[i3+2] = c.s[i];
+    }
+}
+
+template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
+                                                            const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
+                                                            const v_reg<_Tp, n>& d,
+                                                            hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
+{
+    int i, i4;
+    for( i = i4 = 0; i < n; i++, i4 += 4 )
+    {
+        ptr[i4] = a.s[i];
+        ptr[i4+1] = b.s[i];
+        ptr[i4+2] = c.s[i];
+        ptr[i4+3] = d.s[i];
+    }
+}
+
+template<typename _Tp, int n>
+inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    for( int i = 0; i < n; i++ )
+        ptr[i] = a.s[i];
+}
+
+template<typename _Tp, int n>
+inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < (n/2); i++ )
+        ptr[i] = a.s[i];
+}
+
+template<typename _Tp, int n>
+inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < (n/2); i++ )
+        ptr[i] = a.s[i+(n/2)];
+}
+
+template<typename _Tp, int n>
+inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        ptr[i] = a.s[i];
+}
+
+template<typename _Tp, int n>
+inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        ptr[i] = a.s[i];
+}
+
+template<typename _Tp, int n>
+inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
+{
+    for( int i = 0; i < n; i++ )
+        ptr[i] = a.s[i];
+}
+
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = a.s[i];
+        c.s[i+(n/2)] = b.s[i];
+    }
+    return c;
+}
+
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = a.s[i+(n/2)];
+        c.s[i+(n/2)] = b.s[i+(n/2)];
+    }
+    return c;
+}
+
+template<typename _Tp, int n>
+inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                        v_reg<_Tp, n>& low, v_reg<_Tp, n>& high)
+{
+    for( int i = 0; i < (n/2); i++ )
+    {
+        low.s[i] = a.s[i];
+        low.s[i+(n/2)] = b.s[i];
+        high.s[i] = a.s[i+(n/2)];
+        high.s[i+(n/2)] = b.s[i+(n/2)];
+    }
+}
+
+template<int s, typename _Tp, int n>
+inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> r;
+    const int shift = n - s;
+    int i = 0;
+    for (; i < shift; ++i)
+        r.s[i] = a.s[i+s];
+    for (; i < n; ++i)
+        r.s[i] = b.s[i-shift];
+    return r;
+}
+
+template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = cvRound(a.s[i]);
+    return c;
+}
+
+template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a, const v_reg<double, n>& b)
+{
+    v_reg<int, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvRound(a.s[i]);
+        c.s[i+n] = cvRound(b.s[i]);
+    }
+    return c;
+}
+
+template<int n> inline v_reg<int, n> v_floor(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = cvFloor(a.s[i]);
+    return c;
+}
+
+template<int n> inline v_reg<int, n> v_ceil(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = cvCeil(a.s[i]);
+    return c;
+}
+
+template<int n> inline v_reg<int, n> v_trunc(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (int)(a.s[i]);
+    return c;
+}
+
+template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a)
+{
+    v_reg<int, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvRound(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+template<int n> inline v_reg<int, n*2> v_floor(const v_reg<double, n>& a)
+{
+    v_reg<int, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvFloor(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+template<int n> inline v_reg<int, n*2> v_ceil(const v_reg<double, n>& a)
+{
+    v_reg<int, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvCeil(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+template<int n> inline v_reg<int, n*2> v_trunc(const v_reg<double, n>& a)
+{
+    v_reg<int, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = (int)(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+template<int n> inline v_reg<float, n> v_cvt_f32(const v_reg<int, n>& a)
+{
+    v_reg<float, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (float)a.s[i];
+    return c;
+}
+
+template<int n> inline v_reg<float, n*2> v_cvt_f32(const v_reg<double, n>& a)
+{
+    v_reg<float, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = (float)a.s[i];
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+template<int n> inline v_reg<float, n*2> v_cvt_f32(const v_reg<double, n>& a, const v_reg<double, n>& b)
+{
+    v_reg<float, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = (float)a.s[i];
+        c.s[i+n] = (float)b.s[i];
+    }
+    return c;
+}
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{
+    v_float64x2 c;
+    for( int i = 0; i < 2; i++ )
+        c.s[i] = (double)a.s[i];
+    return c;
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+    v_float64x2 c;
+    for( int i = 0; i < 2; i++ )
+        c.s[i] = (double)a.s[i+2];
+    return c;
+}
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{
+    v_float64x2 c;
+    for( int i = 0; i < 2; i++ )
+        c.s[i] = (double)a.s[i];
+    return c;
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    v_float64x2 c;
+    for( int i = 0; i < 2; i++ )
+        c.s[i] = (double)a.s[i+2];
+    return c;
+}
+
+inline v_float64x2 v_cvt_f64(const v_int64x2& a)
+{
+    v_float64x2 c;
+    for( int i = 0; i < 2; i++ )
+        c.s[i] = (double)a.s[i];
+    return c;
+}
+
+template<typename _Tp> inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx)
+{
+    v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
+    for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++)
+        c.s[i] = tab[idx[i]];
+    return c;
+}
+template<typename _Tp> inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx)
+{
+    v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
+    for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++)
+        c.s[i] = tab[idx[i / 2] + i % 2];
+    return c;
+}
+template<typename _Tp> inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx)
+{
+    v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
+    for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++)
+        c.s[i] = tab[idx[i / 4] + i % 4];
+    return c;
+}
+
+template<int n> inline v_reg<int, n> v_lut(const int* tab, const v_reg<int, n>& idx)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = tab[idx.s[i]];
+    return c;
+}
+
+template<int n> inline v_reg<unsigned, n> v_lut(const unsigned* tab, const v_reg<int, n>& idx)
+{
+    v_reg<int, n> c;
+    for (int i = 0; i < n; i++)
+        c.s[i] = tab[idx.s[i]];
+    return c;
+}
+
+template<int n> inline v_reg<float, n> v_lut(const float* tab, const v_reg<int, n>& idx)
+{
+    v_reg<float, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = tab[idx.s[i]];
+    return c;
+}
+
+template<int n> inline v_reg<double, n> v_lut(const double* tab, const v_reg<int, n*2>& idx)
+{
+    v_reg<double, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = tab[idx.s[i]];
+    return c;
+}
+
+template<int n> inline void v_lut_deinterleave(const float* tab, const v_reg<int, n>& idx,
+                                               v_reg<float, n>& x, v_reg<float, n>& y)
+{
+    for( int i = 0; i < n; i++ )
+    {
+        int j = idx.s[i];
+        x.s[i] = tab[j];
+        y.s[i] = tab[j+1];
+    }
+}
+
+template<int n> inline void v_lut_deinterleave(const double* tab, const v_reg<int, n*2>& idx,
+                                               v_reg<double, n>& x, v_reg<double, n>& y)
+{
+    for( int i = 0; i < n; i++ )
+    {
+        int j = idx.s[i];
+        x.s[i] = tab[j];
+        y.s[i] = tab[j+1];
+    }
+}
+
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec)
+{
+    v_reg<_Tp, n> c;
+    for (int i = 0; i < n/4; i++)
+    {
+        c.s[4*i  ] = vec.s[4*i  ];
+        c.s[4*i+1] = vec.s[4*i+2];
+        c.s[4*i+2] = vec.s[4*i+1];
+        c.s[4*i+3] = vec.s[4*i+3];
+    }
+    return c;
+}
+
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec)
+{
+    v_reg<_Tp, n> c;
+    for (int i = 0; i < n/8; i++)
+    {
+        c.s[8*i  ] = vec.s[8*i  ];
+        c.s[8*i+1] = vec.s[8*i+4];
+        c.s[8*i+2] = vec.s[8*i+1];
+        c.s[8*i+3] = vec.s[8*i+5];
+        c.s[8*i+4] = vec.s[8*i+2];
+        c.s[8*i+5] = vec.s[8*i+6];
+        c.s[8*i+6] = vec.s[8*i+3];
+        c.s[8*i+7] = vec.s[8*i+7];
+    }
+    return c;
+}
+
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec)
+{
+    v_reg<_Tp, n> c;
+    for (int i = 0; i < n/4; i++)
+    {
+        c.s[3*i  ] = vec.s[4*i  ];
+        c.s[3*i+1] = vec.s[4*i+1];
+        c.s[3*i+2] = vec.s[4*i+2];
+    }
+    return c;
+}
+
+template<typename _Tp>
+inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1,
+                            const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3,
+                            v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1,
+                            v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 )
+{
+    b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]);
+    b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]);
+    b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]);
+    b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]);
+}
+
+#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); }
+
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64)
+
+#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
+
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64)
+
+#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \
+template<typename _Tp0, int n0> inline _Tpvec \
+    v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
+{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); }
+
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64)
+
+#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \
+template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
+{ return a << n; }
+
+OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short)
+OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int)
+OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64)
+
+#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \
+template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
+{ return a >> n; }
+
+OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short)
+OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int)
+OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64)
+
+#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \
+template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
+{ \
+    _Tpvec c; \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+        c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+    return c; \
+}
+
+OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64)
+
+#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \
+inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpnvec c; \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+    { \
+        c.s[i] = cast<_Tpn>(a.s[i]); \
+        c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \
+    } \
+    return c; \
+}
+
+OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast)
+
+#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
+template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpnvec c; \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+    { \
+        c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+        c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+    } \
+    return c; \
+}
+
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
+
+#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
+inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
+{ \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+        ptr[i] = cast<_Tpn>(a.s[i]); \
+}
+
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
+
+#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
+template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
+{ \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+        ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+}
+
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
+
+template<typename _Tpm, typename _Tp, int n>
+inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    for (int i = 0; i < n; ++i)
+    {
+        mptr[i] = (_Tpm)a.s[i];
+        mptr[i + n] = (_Tpm)b.s[i];
+    }
+}
+
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v_uint8x16 mask;
+    _pack_b(mask.s, a, b);
+    return mask;
+}
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    v_uint8x16 mask;
+    _pack_b(mask.s, a, b);
+    _pack_b(mask.s + 8, c, d);
+    return mask;
+}
+
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+    v_uint8x16 mask;
+    _pack_b(mask.s, a, b);
+    _pack_b(mask.s + 4, c, d);
+    _pack_b(mask.s + 8, e, f);
+    _pack_b(mask.s + 12, g, h);
+    return mask;
+}
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0],
+                       v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1],
+                       v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2],
+                       v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]);
+}
+
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& m3)
+{
+    return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0],
+                       v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1],
+                       v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2],
+                       v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]);
+}
+
+inline v_reg<float, V_TypeTraits<float>::nlanes128>
+v_load_expand(const float16_t* ptr)
+{
+    v_reg<float, V_TypeTraits<float>::nlanes128> v;
+    for( int i = 0; i < v.nlanes; i++ )
+    {
+        v.s[i] = ptr[i];
+    }
+    return v;
+}
+
+inline void
+v_pack_store(float16_t* ptr, const v_reg<float, V_TypeTraits<float>::nlanes128>& v)
+{
+    for( int i = 0; i < v.nlanes; i++ )
+    {
+        ptr[i] = float16_t(v.s[i]);
+    }
+}
+
+inline void v_cleanup() {}
+}  // namespace fallback
+
+static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) {
+    return wasm_v8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23);
+}
+
+static v128_t wasm_unpacklo_i16x8(v128_t a, v128_t b) {
+    return wasm_v8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23);
+}
+
+static v128_t wasm_unpacklo_i32x4(v128_t a, v128_t b) {
+    return wasm_v8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23);
+}
+
+static v128_t wasm_unpacklo_i64x2(v128_t a, v128_t b) {
+    return wasm_v8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
+}
+
+static v128_t wasm_unpackhi_i8x16(v128_t a, v128_t b) {
+    return wasm_v8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31);
+}
+
+static v128_t wasm_unpackhi_i16x8(v128_t a, v128_t b) {
+    return wasm_v8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31);
+}
+
+static v128_t wasm_unpackhi_i32x4(v128_t a, v128_t b) {
+    return wasm_v8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31);
+}
+
+static v128_t wasm_unpackhi_i64x2(v128_t a, v128_t b) {
+    return wasm_v8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
+}
+
+/** Convert **/
+// 8 >> 16
+inline v128_t v128_cvtu8x16_i16x8(const v128_t& a)
+{
+    const v128_t z = wasm_i8x16_splat(0);
+    return wasm_unpacklo_i8x16(a, z);
+}
+inline v128_t v128_cvti8x16_i16x8(const v128_t& a)
+{ return wasm_i16x8_shr(wasm_unpacklo_i8x16(a, a), 8); }
+// 8 >> 32
+inline v128_t v128_cvtu8x16_i32x4(const v128_t& a)
+{
+    const v128_t z = wasm_i8x16_splat(0);
+    return wasm_unpacklo_i16x8(wasm_unpacklo_i8x16(a, z), z);
+}
+inline v128_t v128_cvti8x16_i32x4(const v128_t& a)
+{
+    v128_t r = wasm_unpacklo_i8x16(a, a);
+    r = wasm_unpacklo_i8x16(r, r);
+    return wasm_i32x4_shr(r, 24);
+}
+// 16 >> 32
+inline v128_t v128_cvtu16x8_i32x4(const v128_t& a)
+{
+    const v128_t z = wasm_i8x16_splat(0);
+    return wasm_unpacklo_i16x8(a, z);
+}
+inline v128_t v128_cvti16x8_i32x4(const v128_t& a)
+{ return wasm_i32x4_shr(wasm_unpacklo_i16x8(a, a), 16); }
+// 32 >> 64
+inline v128_t v128_cvtu32x4_i64x2(const v128_t& a)
+{
+    const v128_t z = wasm_i8x16_splat(0);
+    return wasm_unpacklo_i32x4(a, z);
+}
+inline v128_t v128_cvti32x4_i64x2(const v128_t& a)
+{ return wasm_unpacklo_i32x4(a, wasm_i32x4_shr(a, 31)); }
+
+// 16 << 8
+inline v128_t v128_cvtu8x16_i16x8_high(const v128_t& a)
+{
+    const v128_t z = wasm_i8x16_splat(0);
+    return wasm_unpackhi_i8x16(a, z);
+}
+inline v128_t v128_cvti8x16_i16x8_high(const v128_t& a)
+{ return wasm_i16x8_shr(wasm_unpackhi_i8x16(a, a), 8); }
+// 32 << 16
+inline v128_t v128_cvtu16x8_i32x4_high(const v128_t& a)
+{
+    const v128_t z = wasm_i8x16_splat(0);
+    return wasm_unpackhi_i16x8(a, z);
+}
+inline v128_t v128_cvti16x8_i32x4_high(const v128_t& a)
+{ return wasm_i32x4_shr(wasm_unpackhi_i16x8(a, a), 16); }
+// 64 << 32
+inline v128_t v128_cvtu32x4_i64x2_high(const v128_t& a)
+{
+    const v128_t z = wasm_i8x16_splat(0);
+    return wasm_unpackhi_i32x4(a, z);
+}
+inline v128_t v128_cvti32x4_i64x2_high(const v128_t& a)
+{ return wasm_unpackhi_i32x4(a, wasm_i32x4_shr(a, 31)); }
+
+#define OPENCV_HAL_IMPL_WASM_INITVEC(_Tpvec, _Tp, suffix, zsuffix, _Tps) \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec(wasm_##zsuffix##_splat((_Tps)0)); } \
+inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(wasm_##zsuffix##_splat((_Tps)v)); } \
+template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \
+{ return _Tpvec(a.val); }
+
+OPENCV_HAL_IMPL_WASM_INITVEC(v_uint8x16, uchar, u8, i8x16, schar)
+OPENCV_HAL_IMPL_WASM_INITVEC(v_int8x16, schar, s8, i8x16, schar)
+OPENCV_HAL_IMPL_WASM_INITVEC(v_uint16x8, ushort, u16, i16x8, short)
+OPENCV_HAL_IMPL_WASM_INITVEC(v_int16x8, short, s16, i16x8, short)
+OPENCV_HAL_IMPL_WASM_INITVEC(v_uint32x4, unsigned, u32, i32x4, int)
+OPENCV_HAL_IMPL_WASM_INITVEC(v_int32x4, int, s32, i32x4, int)
+OPENCV_HAL_IMPL_WASM_INITVEC(v_float32x4, float, f32, f32x4, float)
+
+#ifdef __wasm_unimplemented_simd128__
+OPENCV_HAL_IMPL_WASM_INITVEC(v_uint64x2, uint64, u64, i64x2, int64)
+OPENCV_HAL_IMPL_WASM_INITVEC(v_int64x2, int64, s64, i64x2, int64)
+OPENCV_HAL_IMPL_WASM_INITVEC(v_float64x2, double, f64, f64x2, double)
+#else
+#define OPENCV_HAL_IMPL_FALLBACK_INITVEC(_Tpvec, _Tp, suffix, _Tps) \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec((_Tps)0, (_Tps)0); } \
+inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec((_Tps)v, (_Tps)v); } \
+template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \
+{ return _Tpvec(a.val); }
+
+OPENCV_HAL_IMPL_FALLBACK_INITVEC(v_uint64x2, uint64, u64, int64)
+OPENCV_HAL_IMPL_FALLBACK_INITVEC(v_int64x2, int64, s64, int64)
+OPENCV_HAL_IMPL_FALLBACK_INITVEC(v_float64x2, double, f64, double)
+#endif
+
+//////////////// PACK ///////////////
+inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v128_t maxval = wasm_i16x8_splat(255);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
+    v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval));
+    return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
+}
+inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b)
+{
+    v128_t maxval = wasm_i16x8_splat(127);
+    v128_t minval = wasm_i16x8_splat(-128);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
+    v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval));
+    v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
+    v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval));
+    return v_int8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
+}
+inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b)
+{
+    v128_t maxval = wasm_i32x4_splat(65535);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval));
+    v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval));
+    return v_uint16x8(wasm_v8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
+}
+inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b)
+{
+    v128_t maxval = wasm_i32x4_splat(32767);
+    v128_t minval = wasm_i32x4_splat(-32768);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
+    v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval));
+    v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
+    v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval));
+    return v_int16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
+}
+inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b)
+{
+    return v_uint32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
+}
+inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b)
+{
+    return v_int32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
+}
+inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b)
+{
+    v128_t maxval = wasm_i16x8_splat(255);
+    v128_t minval = wasm_i16x8_splat(0);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
+    v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval));
+    v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
+    v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval));
+    return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
+}
+inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b)
+{
+    v128_t maxval = wasm_i32x4_splat(65535);
+    v128_t minval = wasm_i32x4_splat(0);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
+    v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval));
+    v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
+    v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval));
+    return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
+}
+
+template<int n>
+inline v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v128_t delta = wasm_i16x8_splat(((short)1 << (n-1)));
+    v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n);
+    v128_t b1 = wasm_u16x8_shr(wasm_i16x8_add(b.val, delta), n);
+    v128_t maxval = wasm_i16x8_splat(255);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval));
+    v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u16x8_gt(b1, maxval));
+    return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
+}
+template<int n>
+inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b)
+{
+    v128_t delta = wasm_i16x8_splat(((short)1 << (n-1)));
+    v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n);
+    v128_t b1 = wasm_i16x8_shr(wasm_i16x8_add(b.val, delta), n);
+    v128_t maxval = wasm_i16x8_splat(127);
+    v128_t minval = wasm_i16x8_splat(-128);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
+    v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval));
+    v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
+    v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval));
+    return v_int8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
+}
+template<int n>
+inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b)
+{
+    v128_t delta = wasm_i32x4_splat(((int)1 << (n-1)));
+    v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n);
+    v128_t b1 = wasm_u32x4_shr(wasm_i32x4_add(b.val, delta), n);
+    v128_t maxval = wasm_i32x4_splat(65535);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval));
+    v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u32x4_gt(b1, maxval));
+    return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
+}
+template<int n>
+inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b)
+{
+    v128_t delta = wasm_i32x4_splat(((int)1 << (n-1)));
+    v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n);
+    v128_t b1 = wasm_i32x4_shr(wasm_i32x4_add(b.val, delta), n);
+    v128_t maxval = wasm_i32x4_splat(32767);
+    v128_t minval = wasm_i16x8_splat(-32768);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
+    v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval));
+    v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
+    v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval));
+    return v_int16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
+}
+template<int n>
+inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
+    v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n);
+    v128_t b1 = wasm_u64x2_shr(wasm_i64x2_add(b.val, delta), n);
+    return v_uint32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
+#else
+    fallback::v_uint64x2 a_(a), b_(b);
+    return fallback::v_rshr_pack<n>(a_, b_);
+#endif
+}
+template<int n>
+inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
+    v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n);
+    v128_t b1 = wasm_i64x2_shr(wasm_i64x2_add(b.val, delta), n);
+    return v_int32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
+#else
+    fallback::v_int64x2 a_(a), b_(b);
+    return fallback::v_rshr_pack<n>(a_, b_);
+#endif
+}
+template<int n>
+inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b)
+{
+    v128_t delta = wasm_i16x8_splat(((short)1 << (n-1)));
+    v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n);
+    v128_t b1 = wasm_i16x8_shr(wasm_i16x8_add(b.val, delta), n);
+    v128_t maxval = wasm_i16x8_splat(255);
+    v128_t minval = wasm_i16x8_splat(0);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
+    v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval));
+    v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
+    v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval));
+    return v_uint8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
+}
+template<int n>
+inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b)
+{
+    v128_t delta = wasm_i32x4_splat(((int)1 << (n-1)));
+    v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n);
+    v128_t b1 = wasm_i32x4_shr(wasm_i32x4_add(b.val, delta), n);
+    v128_t maxval = wasm_i32x4_splat(65535);
+    v128_t minval = wasm_i16x8_splat(0);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
+    v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval));
+    v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
+    v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval));
+    return v_uint16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
+}
+
+inline void v_pack_store(uchar* ptr, const v_uint16x8& a)
+{
+    v128_t maxval = wasm_i16x8_splat(255);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
+    v128_t r = wasm_v8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
+    uchar t_ptr[16];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<8; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+inline void v_pack_store(schar* ptr, const v_int16x8& a)
+{
+    v128_t maxval = wasm_i16x8_splat(127);
+    v128_t minval = wasm_i16x8_splat(-128);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
+    v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
+    v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
+    schar t_ptr[16];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<8; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+inline void v_pack_store(ushort* ptr, const v_uint32x4& a)
+{
+    v128_t maxval = wasm_i32x4_splat(65535);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval));
+    v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
+    ushort t_ptr[8];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<4; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+inline void v_pack_store(short* ptr, const v_int32x4& a)
+{
+    v128_t maxval = wasm_i32x4_splat(32767);
+    v128_t minval = wasm_i32x4_splat(-32768);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
+    v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
+    v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
+    short t_ptr[8];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<4; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+inline void v_pack_store(unsigned* ptr, const v_uint64x2& a)
+{
+    v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
+    unsigned t_ptr[4];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<2; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+inline void v_pack_store(int* ptr, const v_int64x2& a)
+{
+    v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
+    int t_ptr[4];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<2; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+inline void v_pack_u_store(uchar* ptr, const v_int16x8& a)
+{
+    v128_t maxval = wasm_i16x8_splat(255);
+    v128_t minval = wasm_i16x8_splat(0);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
+    v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
+    v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
+    uchar t_ptr[16];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<8; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+inline void v_pack_u_store(ushort* ptr, const v_int32x4& a)
+{
+    v128_t maxval = wasm_i32x4_splat(65535);
+    v128_t minval = wasm_i32x4_splat(0);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
+    v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
+    v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
+    ushort t_ptr[8];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<4; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+
+template<int n>
+inline void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a)
+{
+    v128_t delta = wasm_i16x8_splat((short)(1 << (n-1)));
+    v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n);
+    v128_t maxval = wasm_i16x8_splat(255);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval));
+    v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
+    uchar t_ptr[16];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<8; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+template<int n>
+inline void v_rshr_pack_store(schar* ptr, const v_int16x8& a)
+{
+    v128_t delta = wasm_i16x8_splat(((short)1 << (n-1)));
+    v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n);
+    v128_t maxval = wasm_i16x8_splat(127);
+    v128_t minval = wasm_i16x8_splat(-128);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
+    v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
+    v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
+    schar t_ptr[16];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<8; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+template<int n>
+inline void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a)
+{
+    v128_t delta = wasm_i32x4_splat(((int)1 << (n-1)));
+    v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n);
+    v128_t maxval = wasm_i32x4_splat(65535);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval));
+    v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
+    ushort t_ptr[8];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<4; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+template<int n>
+inline void v_rshr_pack_store(short* ptr, const v_int32x4& a)
+{
+    v128_t delta = wasm_i32x4_splat(((int)1 << (n-1)));
+    v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n);
+    v128_t maxval = wasm_i32x4_splat(32767);
+    v128_t minval = wasm_i32x4_splat(-32768);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
+    v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
+    v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
+    short t_ptr[8];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<4; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+template<int n>
+inline void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
+    v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n);
+    v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
+    unsigned t_ptr[4];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<2; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+#else
+    fallback::v_uint64x2 _a(a);
+    fallback::v_rshr_pack_store<n>(ptr, _a);
+#endif
+}
+template<int n>
+inline void v_rshr_pack_store(int* ptr, const v_int64x2& a)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
+    v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n);
+    v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
+    int t_ptr[4];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<2; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+#else
+    fallback::v_int64x2 _a(a);
+    fallback::v_rshr_pack_store<n>(ptr, _a);
+#endif
+}
+template<int n>
+inline void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a)
+{
+    v128_t delta = wasm_i16x8_splat(((short)1 << (n-1)));
+    v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val, delta), n);
+    v128_t maxval = wasm_i16x8_splat(255);
+    v128_t minval = wasm_i16x8_splat(0);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
+    v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
+    v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
+    uchar t_ptr[16];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<8; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+template<int n>
+inline void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a)
+{
+    v128_t delta = wasm_i32x4_splat(((int)1 << (n-1)));
+    v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val, delta), n);
+    v128_t maxval = wasm_i32x4_splat(65535);
+    v128_t minval = wasm_i32x4_splat(0);
+    v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
+    v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
+    v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
+    ushort t_ptr[8];
+    wasm_v128_store(t_ptr, r);
+    for (int i=0; i<4; ++i) {
+        ptr[i] = t_ptr[i];
+    }
+}
+
+inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v128_t maxval = wasm_i16x8_splat(255);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
+    v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval));
+    return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
+                           const v_uint32x4& c, const v_uint32x4& d)
+{
+    v128_t maxval = wasm_i32x4_splat(255);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval));
+    v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval));
+    v128_t c1 = wasm_v128_bitselect(maxval, c.val, wasm_u32x4_gt(c.val, maxval));
+    v128_t d1 = wasm_v128_bitselect(maxval, d.val, wasm_u32x4_gt(d.val, maxval));
+    v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28);
+    v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28);
+    return v_uint8x16(wasm_v8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
+}
+
+inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
+                           const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f,
+                           const v_uint64x2& g, const v_uint64x2& h)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v128_t maxval = wasm_i32x4_splat(255);
+    v128_t a1 = wasm_v128_bitselect(maxval, a.val, ((__u64x2)(a.val) > (__u64x2)maxval));
+    v128_t b1 = wasm_v128_bitselect(maxval, b.val, ((__u64x2)(b.val) > (__u64x2)maxval));
+    v128_t c1 = wasm_v128_bitselect(maxval, c.val, ((__u64x2)(c.val) > (__u64x2)maxval));
+    v128_t d1 = wasm_v128_bitselect(maxval, d.val, ((__u64x2)(d.val) > (__u64x2)maxval));
+    v128_t e1 = wasm_v128_bitselect(maxval, e.val, ((__u64x2)(e.val) > (__u64x2)maxval));
+    v128_t f1 = wasm_v128_bitselect(maxval, f.val, ((__u64x2)(f.val) > (__u64x2)maxval));
+    v128_t g1 = wasm_v128_bitselect(maxval, g.val, ((__u64x2)(g.val) > (__u64x2)maxval));
+    v128_t h1 = wasm_v128_bitselect(maxval, h.val, ((__u64x2)(h.val) > (__u64x2)maxval));
+    v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
+    v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
+    v128_t ef = wasm_v8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
+    v128_t gh = wasm_v8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
+    v128_t abcd = wasm_v8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
+    v128_t efgh = wasm_v8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
+    return v_uint8x16(wasm_v8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
+#else
+    fallback::v_uint64x2 a_(a), b_(b), c_(c), d_(d), e_(e), f_(f), g_(g), h_(h);
+    return fallback::v_pack_b(a_, b_, c_, d_, e_, f_, g_, h_);
+#endif
+}
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    v128_t v0 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 0));
+    v128_t v1 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 1));
+    v128_t v2 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 2));
+    v128_t v3 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 3));
+    v0 = wasm_f32x4_mul(v0, m0.val);
+    v1 = wasm_f32x4_mul(v1, m1.val);
+    v2 = wasm_f32x4_mul(v2, m2.val);
+    v3 = wasm_f32x4_mul(v3, m3.val);
+
+    return v_float32x4(wasm_f32x4_add(wasm_f32x4_add(v0, v1), wasm_f32x4_add(v2, v3)));
+}
+
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& a)
+{
+    v128_t v0 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 0));
+    v128_t v1 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 1));
+    v128_t v2 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 2));
+    v0 = wasm_f32x4_mul(v0, m0.val);
+    v1 = wasm_f32x4_mul(v1, m1.val);
+    v2 = wasm_f32x4_mul(v2, m2.val);
+
+    return v_float32x4(wasm_f32x4_add(wasm_f32x4_add(v0, v1), wasm_f32x4_add(v2, a.val)));
+}
+
+#define OPENCV_HAL_IMPL_WASM_BIN_OP(bin_op, _Tpvec, intrin) \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(intrin(a.val, b.val)); \
+} \
+inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
+{ \
+    a.val = intrin(a.val, b.val); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint8x16, wasm_u8x16_add_saturate)
+OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint8x16, wasm_u8x16_sub_saturate)
+OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int8x16, wasm_i8x16_add_saturate)
+OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int8x16, wasm_i8x16_sub_saturate)
+OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint16x8, wasm_u16x8_add_saturate)
+OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint16x8, wasm_u16x8_sub_saturate)
+OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int16x8, wasm_i16x8_add_saturate)
+OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int16x8, wasm_i16x8_sub_saturate)
+OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint32x4, wasm_i32x4_add)
+OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint32x4, wasm_i32x4_sub)
+OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_uint32x4, wasm_i32x4_mul)
+OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int32x4, wasm_i32x4_add)
+OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int32x4, wasm_i32x4_sub)
+OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_int32x4, wasm_i32x4_mul)
+OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_float32x4, wasm_f32x4_add)
+OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_float32x4, wasm_f32x4_sub)
+OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_float32x4, wasm_f32x4_mul)
+OPENCV_HAL_IMPL_WASM_BIN_OP(/, v_float32x4, wasm_f32x4_div)
+
+#ifdef __wasm_unimplemented_simd128__
+OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_uint64x2, wasm_i64x2_add)
+OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_uint64x2, wasm_i64x2_sub)
+OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_int64x2, wasm_i64x2_add)
+OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_int64x2, wasm_i64x2_sub)
+OPENCV_HAL_IMPL_WASM_BIN_OP(+, v_float64x2, wasm_f64x2_add)
+OPENCV_HAL_IMPL_WASM_BIN_OP(-, v_float64x2, wasm_f64x2_sub)
+OPENCV_HAL_IMPL_WASM_BIN_OP(*, v_float64x2, wasm_f64x2_mul)
+OPENCV_HAL_IMPL_WASM_BIN_OP(/, v_float64x2, wasm_f64x2_div)
+#else
+#define OPENCV_HAL_IMPL_FALLBACK_BIN_OP(bin_op, _Tpvec) \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ \
+    fallback::_Tpvec a_(a), b_(b); \
+    return _Tpvec((a_) bin_op (b_)); \
+} \
+inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
+{ \
+    fallback::_Tpvec a_(a), b_(b); \
+    a_ bin_op##= b_; \
+    a = _Tpvec(a_); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_FALLBACK_BIN_OP(+, v_uint64x2)
+OPENCV_HAL_IMPL_FALLBACK_BIN_OP(-, v_uint64x2)
+OPENCV_HAL_IMPL_FALLBACK_BIN_OP(+, v_int64x2)
+OPENCV_HAL_IMPL_FALLBACK_BIN_OP(-, v_int64x2)
+OPENCV_HAL_IMPL_FALLBACK_BIN_OP(+, v_float64x2)
+OPENCV_HAL_IMPL_FALLBACK_BIN_OP(-, v_float64x2)
+OPENCV_HAL_IMPL_FALLBACK_BIN_OP(*, v_float64x2)
+OPENCV_HAL_IMPL_FALLBACK_BIN_OP(/, v_float64x2)
+#endif
+
+// saturating multiply 8-bit, 16-bit
+#define OPENCV_HAL_IMPL_WASM_MUL_SAT(_Tpvec, _Tpwvec)        \
+inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b)  \
+{                                                            \
+    _Tpwvec c, d;                                            \
+    v_mul_expand(a, b, c, d);                                \
+    return v_pack(c, d);                                     \
+}                                                            \
+inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b)      \
+{ a = a * b; return a; }
+
+OPENCV_HAL_IMPL_WASM_MUL_SAT(v_uint8x16, v_uint16x8)
+OPENCV_HAL_IMPL_WASM_MUL_SAT(v_int8x16,  v_int16x8)
+OPENCV_HAL_IMPL_WASM_MUL_SAT(v_uint16x8, v_uint32x4)
+OPENCV_HAL_IMPL_WASM_MUL_SAT(v_int16x8,  v_int32x4)
+
+//  Multiply and expand
+inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
+                         v_uint16x8& c, v_uint16x8& d)
+{
+    v_uint16x8 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c = v_mul_wrap(a0, b0);
+    d = v_mul_wrap(a1, b1);
+}
+
+inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b,
+                         v_int16x8& c, v_int16x8& d)
+{
+    v_int16x8 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c = v_mul_wrap(a0, b0);
+    d = v_mul_wrap(a1, b1);
+}
+
+inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
+                         v_int32x4& c, v_int32x4& d)
+{
+    v_int32x4 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c.val = wasm_i32x4_mul(a0.val, b0.val);
+    d.val = wasm_i32x4_mul(a1.val, b1.val);
+}
+
+inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
+                         v_uint32x4& c, v_uint32x4& d)
+{
+    v_uint32x4 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c.val = wasm_i32x4_mul(a0.val, b0.val);
+    d.val = wasm_i32x4_mul(a1.val, b1.val);
+}
+
+inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
+                         v_uint64x2& c, v_uint64x2& d)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v_uint64x2 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    c.val = ((__u64x2)(a0.val) * (__u64x2)(b0.val));
+    d.val = ((__u64x2)(a1.val) * (__u64x2)(b1.val));
+#else
+    fallback::v_uint32x4 a_(a), b_(b);
+    fallback::v_uint64x2 c_, d_;
+    fallback::v_mul_expand(a_, b_, c_, d_);
+    c = v_uint64x2(c_);
+    d = v_uint64x2(d_);
+#endif
+}
+
+inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
+{
+    v_int32x4 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    v128_t c = wasm_i32x4_mul(a0.val, b0.val);
+    v128_t d = wasm_i32x4_mul(a1.val, b1.val);
+    return v_int16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31));
+}
+inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v_uint32x4 a0, a1, b0, b1;
+    v_expand(a, a0, a1);
+    v_expand(b, b0, b1);
+    v128_t c = wasm_i32x4_mul(a0.val, b0.val);
+    v128_t d = wasm_i32x4_mul(a1.val, b1.val);
+    return v_uint16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31));
+}
+
+//////// Dot Product ////////
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{
+    v128_t a0 = wasm_i32x4_shr(wasm_i32x4_shl(a.val, 16), 16);
+    v128_t a1 = wasm_i32x4_shr(a.val, 16);
+    v128_t b0 = wasm_i32x4_shr(wasm_i32x4_shl(b.val, 16), 16);
+    v128_t b1 = wasm_i32x4_shr(b.val, 16);
+    v128_t c = wasm_i32x4_mul(a0, b0);
+    v128_t d = wasm_i32x4_mul(a1, b1);
+    return v_int32x4(wasm_i32x4_add(c, d));
+}
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{ return v_dotprod(a, b) + c; }
+
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v128_t a0 = wasm_i64x2_shr(wasm_i64x2_shl(a.val, 32), 32);
+    v128_t a1 = wasm_i64x2_shr(a.val, 32);
+    v128_t b0 = wasm_i64x2_shr(wasm_i64x2_shl(b.val, 32), 32);
+    v128_t b1 = wasm_i64x2_shr(b.val, 32);
+    v128_t c = (v128_t)((__i64x2)a0 * (__i64x2)b0);
+    v128_t d = (v128_t)((__i64x2)a1 * (__i64x2)b1);
+    return v_int64x2(wasm_i64x2_add(c, d));
+#else
+    fallback::v_int32x4 a_(a);
+    fallback::v_int32x4 b_(b);
+    return fallback::v_dotprod(a_, b_);
+#endif
+}
+inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{
+#ifdef __wasm_unimplemented_simd128__
+    return v_dotprod(a, b) + c;
+#else
+    fallback::v_int32x4 a_(a);
+    fallback::v_int32x4 b_(b);
+    fallback::v_int64x2 c_(c);
+    return fallback::v_dotprod(a_, b_, c_);
+#endif
+}
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
+{
+    v128_t a0 = wasm_u16x8_shr(wasm_i16x8_shl(a.val, 8), 8);
+    v128_t a1 = wasm_u16x8_shr(a.val, 8);
+    v128_t b0 = wasm_u16x8_shr(wasm_i16x8_shl(b.val, 8), 8);
+    v128_t b1 = wasm_u16x8_shr(b.val, 8);
+    return v_uint32x4((
+        v_dotprod(v_int16x8(a0), v_int16x8(b0)) +
+        v_dotprod(v_int16x8(a1), v_int16x8(b1))).val
+    );
+}
+inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
+{
+    v128_t a0 = wasm_i16x8_shr(wasm_i16x8_shl(a.val, 8), 8);
+    v128_t a1 = wasm_i16x8_shr(a.val, 8);
+    v128_t b0 = wasm_i16x8_shr(wasm_i16x8_shl(b.val, 8), 8);
+    v128_t b1 = wasm_i16x8_shr(b.val, 8);
+    return v_int32x4(
+        v_dotprod(v_int16x8(a0), v_int16x8(b0)) +
+        v_dotprod(v_int16x8(a1), v_int16x8(b1))
+    );
+}
+inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
+{
+    fallback::v_uint16x8 a_(a);
+    fallback::v_uint16x8 b_(b);
+    return fallback::v_dotprod_expand(a_, b_);
+}
+inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
+{
+    fallback::v_uint16x8 a_(a);
+    fallback::v_uint16x8 b_(b);
+    fallback::v_uint64x2 c_(c);
+    return fallback::v_dotprod_expand(a_, b_, c_);
+}
+
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b)
+{
+    fallback::v_int16x8 a_(a);
+    fallback::v_int16x8 b_(b);
+    return fallback::v_dotprod_expand(a_, b_);
+}
+
+inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
+{
+    fallback::v_int16x8 a_(a);
+    fallback::v_int16x8 b_(b);
+    fallback::v_int64x2 c_(c);
+    return fallback::v_dotprod_expand(a_, b_, c_);
+}
+
+// 32 >> 64f
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
+{ return v_cvt_f64(v_dotprod(a, b)); }
+inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ return v_dotprod_expand(a, b) + c; }
+
+//////// Fast Dot Product ////////
+
+// 16 >> 32
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
+{ return v_dotprod(a, b); }
+inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{ return v_dotprod(a, b, c); }
+
+// 32 >> 64
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
+{ return v_dotprod(a, b); }
+inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
+{ return v_dotprod(a, b, c); }
+
+// 8 >> 32
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
+{ return v_dotprod_expand(a, b); }
+inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
+{ return v_dotprod_expand(a, b, c); }
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
+{ return v_dotprod_expand(a, b); }
+inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
+{ return v_dotprod_expand(a, b, c); }
+
+// 16 >> 64
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b)
+{ return v_dotprod_expand(a, b); }
+inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
+{ return v_dotprod_expand(a, b, c); }
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b)
+{ return v_dotprod_expand(a, b); }
+inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
+{ return v_dotprod_expand(a, b, c); }
+
+// 32 >> 64f
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b)
+{ return v_dotprod_expand(a, b); }
+inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
+{ return v_dotprod_expand(a, b, c); }
+
+#define OPENCV_HAL_IMPL_WASM_LOGIC_OP(_Tpvec) \
+OPENCV_HAL_IMPL_WASM_BIN_OP(&, _Tpvec, wasm_v128_and) \
+OPENCV_HAL_IMPL_WASM_BIN_OP(|, _Tpvec, wasm_v128_or) \
+OPENCV_HAL_IMPL_WASM_BIN_OP(^, _Tpvec, wasm_v128_xor) \
+inline _Tpvec operator ~ (const _Tpvec& a) \
+{ \
+    return _Tpvec(wasm_v128_not(a.val)); \
+}
+
+OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint8x16)
+OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int8x16)
+OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint16x8)
+OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int16x8)
+OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint32x4)
+OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int32x4)
+OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_uint64x2)
+OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_int64x2)
+OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_float32x4)
+OPENCV_HAL_IMPL_WASM_LOGIC_OP(v_float64x2)
+
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{
+#ifdef __wasm_unimplemented_simd128__
+    return v_float32x4(wasm_f32x4_sqrt(x.val));
+#else
+    fallback::v_float32x4 x_(x);
+    return fallback::v_sqrt(x_);
+#endif
+}
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{
+#ifdef __wasm_unimplemented_simd128__
+    const v128_t _1_0 = wasm_f32x4_splat(1.0);
+    return v_float32x4(wasm_f32x4_div(_1_0, wasm_f32x4_sqrt(x.val)));
+#else
+    fallback::v_float32x4 x_(x);
+    return fallback::v_invsqrt(x_);
+#endif
+}
+
+inline v_float64x2 v_sqrt(const v_float64x2& x)
+{
+#ifdef __wasm_unimplemented_simd128__
+    return v_float64x2(wasm_f64x2_sqrt(x.val));
+#else
+    fallback::v_float64x2 x_(x);
+    return fallback::v_sqrt(x_);
+#endif
+}
+
+inline v_float64x2 v_invsqrt(const v_float64x2& x)
+{
+#ifdef __wasm_unimplemented_simd128__
+    const v128_t _1_0 = wasm_f64x2_splat(1.0);
+    return v_float64x2(wasm_f64x2_div(_1_0, wasm_f64x2_sqrt(x.val)));
+#else
+    fallback::v_float64x2 x_(x);
+    return fallback::v_invsqrt(x_);
+#endif
+}
+
+#define OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(_Tpuvec, _Tpsvec, suffix, zsuffix, shiftWidth) \
+inline _Tpuvec v_abs(const _Tpsvec& x) \
+{ \
+    v128_t s = wasm_##suffix##_shr(x.val, shiftWidth); \
+    v128_t f = wasm_##zsuffix##_shr(x.val, shiftWidth); \
+    return _Tpuvec(wasm_##zsuffix##_add(wasm_v128_xor(x.val, f), s)); \
+}
+
+OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint8x16, v_int8x16, u8x16, i8x16, 7)
+OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint16x8, v_int16x8, u16x8, i16x8, 15)
+OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(v_uint32x4, v_int32x4, u32x4, i32x4, 31)
+
+inline v_float32x4 v_abs(const v_float32x4& x)
+{ return v_float32x4(wasm_f32x4_abs(x.val)); }
+inline v_float64x2 v_abs(const v_float64x2& x)
+{
+#ifdef __wasm_unimplemented_simd128__
+    return v_float64x2(wasm_f64x2_abs(x.val));
+#else
+    fallback::v_float64x2 x_(x);
+    return fallback::v_abs(x_);
+#endif
+}
+
+// TODO: exp, log, sin, cos
+
+#define OPENCV_HAL_IMPL_WASM_BIN_FUNC(_Tpvec, func, intrin) \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(intrin(a.val, b.val)); \
+}
+
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_min, wasm_f32x4_min)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_max, wasm_f32x4_max)
+
+#ifdef __wasm_unimplemented_simd128__
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_min, wasm_f64x2_min)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_max, wasm_f64x2_max)
+#else
+#define OPENCV_HAL_IMPL_WASM_MINMAX_64f_FUNC(func) \
+inline v_float64x2 func(const v_float64x2& a, const v_float64x2& b) \
+{ \
+    fallback::v_float64x2 a_(a), b_(b); \
+    return fallback::func(a_, b_); \
+}
+
+OPENCV_HAL_IMPL_WASM_MINMAX_64f_FUNC(v_min)
+OPENCV_HAL_IMPL_WASM_MINMAX_64f_FUNC(v_max)
+#endif
+
+#define OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(_Tpvec, suffix) \
+inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(wasm_v128_bitselect(b.val, a.val, wasm_##suffix##_gt(a.val, b.val))); \
+} \
+inline _Tpvec v_max(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(wasm_v128_bitselect(a.val, b.val, wasm_##suffix##_gt(a.val, b.val))); \
+}
+
+OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int8x16, i8x16)
+OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int16x8, i16x8)
+OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(v_int32x4, i32x4)
+
+#define OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(_Tpvec, suffix, deltaNum) \
+inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    v128_t delta = wasm_##suffix##_splat(deltaNum); \
+    v128_t mask = wasm_##suffix##_gt(wasm_v128_xor(a.val, delta), wasm_v128_xor(b.val, delta)); \
+    return _Tpvec(wasm_v128_bitselect(b.val, a.val, mask)); \
+} \
+inline _Tpvec v_max(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    v128_t delta = wasm_##suffix##_splat(deltaNum); \
+    v128_t mask = wasm_##suffix##_gt(wasm_v128_xor(a.val, delta), wasm_v128_xor(b.val, delta)); \
+    return _Tpvec(wasm_v128_bitselect(a.val, b.val, mask)); \
+}
+
+OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint8x16, i8x16, (schar)0x80)
+OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint16x8, i16x8, (short)0x8000)
+OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(v_uint32x4, i32x4, (int)0x80000000)
+
+#define OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(_Tpvec, suffix, esuffix) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(wasm_##esuffix##_eq(a.val, b.val)); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(wasm_##esuffix##_ne(a.val, b.val)); } \
+inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(wasm_##suffix##_lt(a.val, b.val)); } \
+inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(wasm_##suffix##_gt(a.val, b.val)); } \
+inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(wasm_##suffix##_le(a.val, b.val)); } \
+inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(wasm_##suffix##_ge(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint8x16, u8x16, i8x16)
+OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int8x16, i8x16, i8x16)
+OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint16x8, u16x8, i16x8)
+OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int16x8, i16x8, i16x8)
+OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_uint32x4, u32x4, i32x4)
+OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_int32x4, i32x4, i32x4)
+OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_float32x4, f32x4, f32x4)
+
+#ifdef __wasm_unimplemented_simd128__
+OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(v_float64x2, f64x2, f64x2)
+#else
+#define OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(_Tpvec, bin_op) \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ \
+    fallback::_Tpvec a_(a), b_(b); \
+    return _Tpvec((a_) bin_op (b_));\
+} \
+
+OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, ==)
+OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, !=)
+OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, <)
+OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, >)
+OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, <=)
+OPENCV_HAL_IMPL_INIT_FALLBACK_CMP_OP(v_float64x2, >=)
+#endif
+
+#define OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(_Tpvec, cast) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
+
+OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64)
+OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64)
+
+inline v_float32x4 v_not_nan(const v_float32x4& a)
+{
+    v128_t z = wasm_i32x4_splat(0x7fffffff);
+    v128_t t = wasm_i32x4_splat(0x7f800000);
+    return v_float32x4(wasm_u32x4_lt(wasm_v128_and(a.val, z), t));
+}
+inline v_float64x2 v_not_nan(const v_float64x2& a)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v128_t z = wasm_i64x2_splat(0x7fffffffffffffff);
+    v128_t t = wasm_i64x2_splat(0x7ff0000000000000);
+    return v_float64x2((__u64x2)(wasm_v128_and(a.val, z)) < (__u64x2)t);
+#else
+    fallback::v_float64x2 a_(a);
+    return fallback::v_not_nan(a_);
+#endif
+}
+
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_add_wrap, wasm_i8x16_add)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_add_wrap, wasm_i8x16_add)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_add_wrap, wasm_i16x8_add)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_add_wrap, wasm_i16x8_add)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_sub_wrap, wasm_i8x16_sub)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_sub_wrap, wasm_i8x16_sub)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_sub_wrap, wasm_i16x8_sub)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_sub_wrap, wasm_i16x8_sub)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_mul_wrap, wasm_i8x16_mul)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_mul_wrap, wasm_i8x16_mul)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_mul_wrap, wasm_i16x8_mul)
+OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_mul_wrap, wasm_i16x8_mul)
+
+
+/** Absolute difference **/
+
+inline v_uint8x16 v_absdiff(const v_uint8x16& a, const v_uint8x16& b)
+{ return v_add_wrap(a - b,  b - a); }
+inline v_uint16x8 v_absdiff(const v_uint16x8& a, const v_uint16x8& b)
+{ return v_add_wrap(a - b,  b - a); }
+inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b)
+{ return v_max(a, b) - v_min(a, b); }
+
+inline v_uint8x16 v_absdiff(const v_int8x16& a, const v_int8x16& b)
+{
+    v_int8x16 d = v_sub_wrap(a, b);
+    v_int8x16 m = a < b;
+    return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m));
+}
+inline v_uint16x8 v_absdiff(const v_int16x8& a, const v_int16x8& b)
+{
+    return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b)));
+}
+inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
+{
+    v_int32x4 d = a - b;
+    v_int32x4 m = a < b;
+    return v_reinterpret_as_u32((d ^ m) - m);
+}
+
+/** Saturating absolute difference **/
+inline v_int8x16 v_absdiffs(const v_int8x16& a, const v_int8x16& b)
+{
+    v_int8x16 d = a - b;
+    v_int8x16 m = a < b;
+    return (d ^ m) - m;
+ }
+inline v_int16x8 v_absdiffs(const v_int16x8& a, const v_int16x8& b)
+{ return v_max(a, b) - v_min(a, b); }
+
+
+inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+    return a * b + c;
+}
+
+inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+    return v_fma(a, b, c);
+}
+
+inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+{
+    return a * b + c;
+}
+
+inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+    return a * b + c;
+}
+
+inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
+{
+    v128_t absmask_vec = wasm_i32x4_splat(0x7fffffff);
+    return v_float32x4(wasm_v128_and(wasm_f32x4_sub(a.val, b.val), absmask_vec));
+}
+inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v128_t absmask_vec = wasm_u64x2_shr(wasm_i32x4_splat(-1), 1);
+    return v_float64x2(wasm_v128_and(wasm_f64x2_sub(a.val, b.val), absmask_vec));
+#else
+    fallback::v_float64x2 a_(a), b_(b);
+    return fallback::v_absdiff(a_, b_);
+#endif
+}
+
+#define OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(_Tpvec) \
+inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    fallback::_Tpvec a_(a), b_(b); \
+    return fallback::v_magnitude(a_, b_); \
+} \
+inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return v_fma(a, a, b*b); \
+} \
+inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
+{ \
+    return v_fma(a, b, c); \
+}
+
+OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float32x4)
+OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(v_float64x2)
+
+#define OPENCV_HAL_IMPL_WASM_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, ssuffix) \
+inline _Tpuvec operator << (const _Tpuvec& a, int imm) \
+{ \
+    return _Tpuvec(wasm_##suffix##_shl(a.val, imm)); \
+} \
+inline _Tpsvec operator << (const _Tpsvec& a, int imm) \
+{ \
+    return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \
+} \
+inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \
+{ \
+    return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \
+} \
+inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \
+{ \
+    return _Tpsvec(wasm_##suffix##_shr(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpuvec v_shl(const _Tpuvec& a) \
+{ \
+    return _Tpuvec(wasm_##suffix##_shl(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpsvec v_shl(const _Tpsvec& a) \
+{ \
+    return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpuvec v_shr(const _Tpuvec& a) \
+{ \
+    return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpsvec v_shr(const _Tpsvec& a) \
+{ \
+    return _Tpsvec(wasm_##suffix##_shr(a.val, imm)); \
+}
+
+OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint8x16, v_int8x16, i8x16, u8x16)
+OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint16x8, v_int16x8, i16x8, u16x8)
+OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint32x4, v_int32x4, i32x4, u32x4)
+
+#ifdef __wasm_unimplemented_simd128__
+OPENCV_HAL_IMPL_WASM_SHIFT_OP(v_uint64x2, v_int64x2, i64x2, u64x2)
+#else
+#define OPENCV_HAL_IMPL_FALLBACK_SHIFT_OP(_Tpvec) \
+inline _Tpvec operator << (const _Tpvec& a, int imm) \
+{ \
+    fallback::_Tpvec a_(a); \
+    return a_ << imm; \
+} \
+inline _Tpvec operator >> (const _Tpvec& a, int imm) \
+{ \
+    fallback::_Tpvec a_(a); \
+    return a_ >> imm; \
+} \
+template<int imm> \
+inline _Tpvec v_shl(const _Tpvec& a) \
+{ \
+    fallback::_Tpvec a_(a); \
+    return fallback::v_shl<imm>(a_); \
+} \
+template<int imm> \
+inline _Tpvec v_shr(const _Tpvec& a) \
+{ \
+    fallback::_Tpvec a_(a); \
+    return fallback::v_shr<imm>(a_); \
+} \
+
+OPENCV_HAL_IMPL_FALLBACK_SHIFT_OP(v_uint64x2)
+OPENCV_HAL_IMPL_FALLBACK_SHIFT_OP(v_int64x2)
+#endif
+
+namespace hal_wasm_internal
+{
+    template <int imm,
+        bool is_invalid = ((imm < 0) || (imm > 16)),
+        bool is_first = (imm == 0),
+        bool is_second = (imm == 16),
+        bool is_other = (((imm > 0) && (imm < 16)))>
+    class v_wasm_palignr_u8_class;
+
+    template <int imm>
+    class v_wasm_palignr_u8_class<imm, true, false, false, false>;
+
+    template <int imm>
+    class v_wasm_palignr_u8_class<imm, false, true, false, false>
+    {
+    public:
+        inline v128_t operator()(const v128_t& a, const v128_t&) const
+        {
+            return a;
+        }
+    };
+
+    template <int imm>
+    class v_wasm_palignr_u8_class<imm, false, false, true, false>
+    {
+    public:
+        inline v128_t operator()(const v128_t&, const v128_t& b) const
+        {
+            return b;
+        }
+    };
+
+    template <int imm>
+    class v_wasm_palignr_u8_class<imm, false, false, false, true>
+    {
+    public:
+        inline v128_t operator()(const v128_t& a, const v128_t& b) const
+        {
+            enum { imm2 = (sizeof(v128_t) - imm) };
+            return wasm_v8x16_shuffle(a, b,
+                                      imm, imm+1, imm+2, imm+3,
+                                      imm+4, imm+5, imm+6, imm+7,
+                                      imm+8, imm+9, imm+10, imm+11,
+                                      imm+12, imm+13, imm+14, imm+15);
+        }
+    };
+
+    template <int imm>
+    inline v128_t v_wasm_palignr_u8(const v128_t& a, const v128_t& b)
+    {
+        CV_StaticAssert((imm >= 0) && (imm <= 16), "Invalid imm for v_wasm_palignr_u8.");
+        return v_wasm_palignr_u8_class<imm>()(a, b);
+    }
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_right(const _Tpvec &a)
+{
+    using namespace hal_wasm_internal;
+    enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) };
+    v128_t z = wasm_i8x16_splat(0);
+    return _Tpvec(v_wasm_palignr_u8<imm2>(a.val, z));
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_left(const _Tpvec &a)
+{
+    using namespace hal_wasm_internal;
+    enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) };
+    v128_t z = wasm_i8x16_splat(0);
+    return _Tpvec(v_wasm_palignr_u8<imm2>(z, a.val));
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b)
+{
+    using namespace hal_wasm_internal;
+    enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) };
+    return _Tpvec(v_wasm_palignr_u8<imm2>(a.val, b.val));
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b)
+{
+    using namespace hal_wasm_internal;
+    enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) };
+    return _Tpvec(v_wasm_palignr_u8<imm2>(b.val, a.val));
+}
+
+#define OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(_Tpvec, _Tp) \
+inline _Tpvec v_load(const _Tp* ptr) \
+{ return _Tpvec(wasm_v128_load(ptr)); } \
+inline _Tpvec v_load_aligned(const _Tp* ptr) \
+{ return _Tpvec(wasm_v128_load(ptr)); } \
+inline _Tpvec v_load_low(const _Tp* ptr) \
+{ \
+    _Tp tmp[_Tpvec::nlanes] = {0}; \
+    for (int i=0; i<_Tpvec::nlanes/2; ++i) { \
+        tmp[i] = ptr[i]; \
+    } \
+    return _Tpvec(wasm_v128_load(tmp)); \
+} \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
+{ \
+    _Tp tmp[_Tpvec::nlanes]; \
+    for (int i=0; i<_Tpvec::nlanes/2; ++i) { \
+        tmp[i] = ptr0[i]; \
+        tmp[i+_Tpvec::nlanes/2] = ptr1[i]; \
+    } \
+    return _Tpvec(wasm_v128_load(tmp)); \
+} \
+inline void v_store(_Tp* ptr, const _Tpvec& a) \
+{ wasm_v128_store(ptr, a.val); } \
+inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
+{ wasm_v128_store(ptr, a.val); } \
+inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
+{ wasm_v128_store(ptr, a.val); } \
+inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \
+{ \
+    wasm_v128_store(ptr, a.val); \
+} \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
+{ \
+    fallback::_Tpvec a_(a); \
+    fallback::v_store_low(ptr, a_); \
+} \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
+{ \
+    fallback::_Tpvec a_(a); \
+    fallback::v_store_high(ptr, a_); \
+}
+
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint8x16, uchar)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int8x16, schar)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int16x8, short)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int32x4, int)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_int64x2, int64)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float32x4, float)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double)
+
+
+/** Reverse **/
+inline v_uint8x16 v_reverse(const v_uint8x16 &a)
+{ return v_uint8x16(wasm_v8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); }
+
+inline v_int8x16 v_reverse(const v_int8x16 &a)
+{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
+
+inline v_uint16x8 v_reverse(const v_uint16x8 &a)
+{ return v_uint16x8(wasm_v8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); }
+
+inline v_int16x8 v_reverse(const v_int16x8 &a)
+{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
+
+inline v_uint32x4 v_reverse(const v_uint32x4 &a)
+{ return v_uint32x4(wasm_v8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); }
+
+inline v_int32x4 v_reverse(const v_int32x4 &a)
+{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_float32x4 v_reverse(const v_float32x4 &a)
+{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
+
+inline v_uint64x2 v_reverse(const v_uint64x2 &a)
+{ return v_uint64x2(wasm_v8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); }
+
+inline v_int64x2 v_reverse(const v_int64x2 &a)
+{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
+
+inline v_float64x2 v_reverse(const v_float64x2 &a)
+{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
+
+
+#define OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \
+inline scalartype v_reduce_sum(const _Tpvec& a) \
+{ \
+    regtype val = a.val; \
+    val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \
+    val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \
+    return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \
+}
+
+OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_uint32x4, unsigned, v128_t, i32x4, i32x4)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_int32x4, int, v128_t, i32x4, i32x4)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_float32x4, float, v128_t, f32x4, f32x4)
+
+// To do: Optimize v_reduce_sum with wasm intrin.
+//        Now use fallback implementation as there is no widening op in wasm intrin.
+
+#define OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(_Tpvec, scalartype) \
+inline scalartype v_reduce_sum(const _Tpvec& a) \
+{ \
+    fallback::_Tpvec a_(a); \
+    return fallback::v_reduce_sum(a_); \
+}
+
+OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint8x16, unsigned)
+OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int8x16, int)
+OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint16x8, unsigned)
+OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int16x8, int)
+OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int64x2, int64)
+OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_float64x2, double)
+
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+    v128_t ac = wasm_f32x4_add(wasm_unpacklo_i32x4(a.val, c.val), wasm_unpackhi_i32x4(a.val, c.val));
+    v128_t bd = wasm_f32x4_add(wasm_unpacklo_i32x4(b.val, d.val), wasm_unpackhi_i32x4(b.val, d.val));
+    return v_float32x4(wasm_f32x4_add(wasm_unpacklo_i32x4(ac, bd), wasm_unpackhi_i32x4(ac, bd)));
+}
+
+#define OPENCV_HAL_IMPL_WASM_REDUCE_OP(_Tpvec, scalartype, func, scalar_func) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    scalartype buf[_Tpvec::nlanes]; \
+    v_store(buf, a); \
+    scalartype tmp = buf[0]; \
+    for (int i=1; i<_Tpvec::nlanes; ++i) { \
+        tmp = scalar_func(tmp, buf[i]); \
+    } \
+    return tmp; \
+}
+
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint8x16, uchar, max, std::max)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint8x16, uchar, min, std::min)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int8x16, schar, max, std::max)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int8x16, schar, min, std::min)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint16x8, ushort, max, std::max)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint16x8, ushort, min, std::min)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int16x8, short, max, std::max)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int16x8, short, min, std::min)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint32x4, unsigned, max, std::max)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_uint32x4, unsigned, min, std::min)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int32x4, int, max, std::max)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_int32x4, int, min, std::min)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_float32x4, float, max, std::max)
+OPENCV_HAL_IMPL_WASM_REDUCE_OP(v_float32x4, float, min, std::min)
+
+inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
+{
+    v_uint16x8 l16, h16;
+    v_uint32x4 l16_l32, l16_h32, h16_l32, h16_h32;
+    v_expand(v_absdiff(a, b), l16, h16);
+    v_expand(l16, l16_l32, l16_h32);
+    v_expand(h16, h16_l32, h16_h32);
+    return v_reduce_sum(l16_l32+l16_h32+h16_l32+h16_h32);
+}
+inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
+{
+    v_uint16x8 l16, h16;
+    v_uint32x4 l16_l32, l16_h32, h16_l32, h16_h32;
+    v_expand(v_absdiff(a, b), l16, h16);
+    v_expand(l16, l16_l32, l16_h32);
+    v_expand(h16, h16_l32, h16_h32);
+    return v_reduce_sum(l16_l32+l16_h32+h16_l32+h16_h32);
+}
+inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
+{
+    v_uint32x4 l, h;
+    v_expand(v_absdiff(a, b), l, h);
+    return v_reduce_sum(l + h);
+}
+inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
+{
+    v_uint32x4 l, h;
+    v_expand(v_absdiff(a, b), l, h);
+    return v_reduce_sum(l + h);
+}
+inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b)
+{
+    return v_reduce_sum(v_absdiff(a, b));
+}
+inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b)
+{
+    return v_reduce_sum(v_absdiff(a, b));
+}
+inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
+{
+    return v_reduce_sum(v_absdiff(a, b));
+}
+
+inline v_uint8x16 v_popcount(const v_uint8x16& a)
+{
+    v128_t m1 = wasm_i32x4_splat(0x55555555);
+    v128_t m2 = wasm_i32x4_splat(0x33333333);
+    v128_t m4 = wasm_i32x4_splat(0x0f0f0f0f);
+    v128_t p = a.val;
+    p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 1), m1), wasm_v128_and(p, m1));
+    p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 2), m2), wasm_v128_and(p, m2));
+    p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 4), m4), wasm_v128_and(p, m4));
+    return v_uint8x16(p);
+}
+inline v_uint16x8 v_popcount(const v_uint16x8& a)
+{
+    v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a));
+    p += v_rotate_right<1>(p);
+    return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff);
+}
+inline v_uint32x4 v_popcount(const v_uint32x4& a)
+{
+    v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a));
+    p += v_rotate_right<1>(p);
+    p += v_rotate_right<2>(p);
+    return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff);
+}
+inline v_uint64x2 v_popcount(const v_uint64x2& a)
+{
+    fallback::v_uint64x2 a_(a);
+    return fallback::v_popcount(a_);
+}
+inline v_uint8x16 v_popcount(const v_int8x16& a)
+{ return v_popcount(v_reinterpret_as_u8(a)); }
+inline v_uint16x8 v_popcount(const v_int16x8& a)
+{ return v_popcount(v_reinterpret_as_u16(a)); }
+inline v_uint32x4 v_popcount(const v_int32x4& a)
+{ return v_popcount(v_reinterpret_as_u32(a)); }
+inline v_uint64x2 v_popcount(const v_int64x2& a)
+{ return v_popcount(v_reinterpret_as_u64(a)); }
+
+#define OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(_Tpvec, suffix, scalarType) \
+inline int v_signmask(const _Tpvec& a) \
+{ \
+    fallback::_Tpvec a_(a); \
+    return fallback::v_signmask(a_); \
+} \
+inline bool v_check_all(const _Tpvec& a) \
+{ return wasm_i8x16_all_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0))); } \
+inline bool v_check_any(const _Tpvec& a) \
+{ return wasm_i8x16_any_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0)));; }
+
+OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint8x16, i8x16, schar)
+OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int8x16, i8x16, schar)
+OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint16x8, i16x8, short)
+OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int16x8, i16x8, short)
+OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_uint32x4, i32x4, int)
+OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_int32x4, i32x4, int)
+OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(v_float32x4, i32x4, float)
+
+inline int v_signmask(const v_float64x2& a)
+{
+    fallback::v_float64x2 a_(a);
+    return fallback::v_signmask(a_);
+}
+inline bool v_check_all(const v_float64x2& a)
+{
+#ifdef __wasm_unimplemented_simd128__
+    return wasm_i8x16_all_true((__i64x2)(a.val) < (__i64x2)(wasm_i64x2_splat(0)));
+#else
+    fallback::v_float64x2 a_(a);
+    return fallback::v_check_all(a_);
+#endif
+}
+inline bool v_check_any(const v_float64x2& a)
+{
+#ifdef __wasm_unimplemented_simd128__
+    return wasm_i8x16_any_true((__i64x2)(a.val) < (__i64x2)(wasm_i64x2_splat(0)));;
+#else
+    fallback::v_float64x2 a_(a);
+    return fallback::v_check_any(a_);
+#endif
+}
+
+inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
+inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
+inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
+inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
+inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
+inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
+inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
+inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
+inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
+inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
+
+#define OPENCV_HAL_IMPL_WASM_SELECT(_Tpvec) \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(wasm_v128_bitselect(a.val, b.val, mask.val)); \
+}
+
+OPENCV_HAL_IMPL_WASM_SELECT(v_uint8x16)
+OPENCV_HAL_IMPL_WASM_SELECT(v_int8x16)
+OPENCV_HAL_IMPL_WASM_SELECT(v_uint16x8)
+OPENCV_HAL_IMPL_WASM_SELECT(v_int16x8)
+OPENCV_HAL_IMPL_WASM_SELECT(v_uint32x4)
+OPENCV_HAL_IMPL_WASM_SELECT(v_int32x4)
+// OPENCV_HAL_IMPL_WASM_SELECT(v_uint64x2)
+// OPENCV_HAL_IMPL_WASM_SELECT(v_int64x2)
+OPENCV_HAL_IMPL_WASM_SELECT(v_float32x4)
+OPENCV_HAL_IMPL_WASM_SELECT(v_float64x2)
+
+#define OPENCV_HAL_IMPL_WASM_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin)    \
+inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1)      \
+{                                                                    \
+    b0.val = intrin(a.val);                                          \
+    b1.val = __CV_CAT(intrin, _high)(a.val);                         \
+}                                                                    \
+inline _Tpwvec v_expand_low(const _Tpvec& a)                         \
+{ return _Tpwvec(intrin(a.val)); }                                   \
+inline _Tpwvec v_expand_high(const _Tpvec& a)                        \
+{ return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); }                  \
+inline _Tpwvec v_load_expand(const _Tp* ptr)                         \
+{                                                                    \
+    v128_t a = wasm_v128_load(ptr);                                  \
+    return _Tpwvec(intrin(a));                                       \
+}
+
+OPENCV_HAL_IMPL_WASM_EXPAND(v_uint8x16, v_uint16x8, uchar, v128_cvtu8x16_i16x8)
+OPENCV_HAL_IMPL_WASM_EXPAND(v_int8x16,  v_int16x8,  schar, v128_cvti8x16_i16x8)
+OPENCV_HAL_IMPL_WASM_EXPAND(v_uint16x8, v_uint32x4, ushort, v128_cvtu16x8_i32x4)
+OPENCV_HAL_IMPL_WASM_EXPAND(v_int16x8,  v_int32x4,  short, v128_cvti16x8_i32x4)
+OPENCV_HAL_IMPL_WASM_EXPAND(v_uint32x4, v_uint64x2, unsigned, v128_cvtu32x4_i64x2)
+OPENCV_HAL_IMPL_WASM_EXPAND(v_int32x4,  v_int64x2,  int, v128_cvti32x4_i64x2)
+
+#define OPENCV_HAL_IMPL_WASM_EXPAND_Q(_Tpvec, _Tp, intrin)  \
+inline _Tpvec v_load_expand_q(const _Tp* ptr)               \
+{                                                           \
+    v128_t a = wasm_v128_load(ptr);                         \
+    return _Tpvec(intrin(a));                               \
+}
+
+OPENCV_HAL_IMPL_WASM_EXPAND_Q(v_uint32x4, uchar, v128_cvtu8x16_i32x4)
+OPENCV_HAL_IMPL_WASM_EXPAND_Q(v_int32x4, schar, v128_cvti8x16_i32x4)
+
+#define OPENCV_HAL_IMPL_WASM_UNPACKS(_Tpvec, suffix) \
+inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \
+{ \
+    b0.val = wasm_unpacklo_##suffix(a0.val, a1.val); \
+    b1.val = wasm_unpackhi_##suffix(a0.val, a1.val); \
+} \
+inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(wasm_unpacklo_i64x2(a.val, b.val)); \
+} \
+inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(wasm_unpackhi_i64x2(a.val, b.val)); \
+} \
+inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \
+{ \
+    c.val = wasm_unpacklo_i64x2(a.val, b.val); \
+    d.val = wasm_unpackhi_i64x2(a.val, b.val); \
+}
+
+OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint8x16, i8x16)
+OPENCV_HAL_IMPL_WASM_UNPACKS(v_int8x16, i8x16)
+OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint16x8, i16x8)
+OPENCV_HAL_IMPL_WASM_UNPACKS(v_int16x8, i16x8)
+OPENCV_HAL_IMPL_WASM_UNPACKS(v_uint32x4, i32x4)
+OPENCV_HAL_IMPL_WASM_UNPACKS(v_int32x4, i32x4)
+OPENCV_HAL_IMPL_WASM_UNPACKS(v_float32x4, i32x4)
+OPENCV_HAL_IMPL_WASM_UNPACKS(v_float64x2, i64x2)
+
+template<int s, typename _Tpvec>
+inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
+{
+    return v_rotate_right<s>(a, b);
+}
+
+inline v_int32x4 v_round(const v_float32x4& a)
+{
+    v128_t h = wasm_f32x4_splat(0.5);
+    return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(wasm_f32x4_add(a.val, h)));
+}
+
+inline v_int32x4 v_floor(const v_float32x4& a)
+{
+    v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val);
+    v128_t mask = wasm_f32x4_lt(a.val, wasm_f32x4_convert_i32x4(a1));
+    return v_int32x4(wasm_i32x4_add(a1, mask));
+}
+
+inline v_int32x4 v_ceil(const v_float32x4& a)
+{
+    v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val);
+    v128_t mask = wasm_f32x4_gt(a.val, wasm_f32x4_convert_i32x4(a1));
+    return v_int32x4(wasm_i32x4_sub(a1, mask));
+}
+
+inline v_int32x4 v_trunc(const v_float32x4& a)
+{ return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(a.val)); }
+
+#define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc, _Tpvec, _Tpnvec, _Tp, _Tpn) \
+inline _Tpnvec func(const _Tpvec& a) \
+{ \
+    fallback::_Tpvec a_(a); \
+    return fallback::func(a_); \
+}
+
+OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_round, cvRound, v_float64x2, v_int32x4, double, int)
+OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_floor, cvFloor, v_float64x2, v_int32x4, double, int)
+OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_ceil, cvCeil, v_float64x2, v_int32x4, double, int)
+OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_trunc, int, v_float64x2, v_int32x4, double, int)
+
+inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
+{
+    fallback::v_float64x2 a_(a), b_(b);
+    return fallback::v_round(a_, b_);
+}
+
+#define OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(_Tpvec, suffix) \
+inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \
+                           const _Tpvec& a2, const _Tpvec& a3, \
+                           _Tpvec& b0, _Tpvec& b1, \
+                           _Tpvec& b2, _Tpvec& b3) \
+{ \
+    v128_t t0 = wasm_unpacklo_##suffix(a0.val, a1.val); \
+    v128_t t1 = wasm_unpacklo_##suffix(a2.val, a3.val); \
+    v128_t t2 = wasm_unpackhi_##suffix(a0.val, a1.val); \
+    v128_t t3 = wasm_unpackhi_##suffix(a2.val, a3.val); \
+\
+    b0.val = wasm_unpacklo_i64x2(t0, t1); \
+    b1.val = wasm_unpackhi_i64x2(t0, t1); \
+    b2.val = wasm_unpacklo_i64x2(t2, t3); \
+    b3.val = wasm_unpackhi_i64x2(t2, t3); \
+}
+
+OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_uint32x4, i32x4)
+OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_int32x4, i32x4)
+OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(v_float32x4, i32x4)
+
+// load deinterleave
+inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
+{
+    v128_t t00 = wasm_v128_load(ptr);
+    v128_t t01 = wasm_v128_load(ptr + 16);
+
+    a.val = wasm_v8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30);
+    b.val = wasm_v8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31);
+}
+
+inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
+{
+    v128_t t00 = wasm_v128_load(ptr);
+    v128_t t01 = wasm_v128_load(ptr + 16);
+    v128_t t02 = wasm_v128_load(ptr + 32);
+
+    v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7);
+    v128_t t11 = wasm_v8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6);
+    v128_t t12 = wasm_v8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7);
+
+    a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29);
+    b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30);
+    c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31);
+}
+
+inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d)
+{
+    v128_t u0 = wasm_v128_load(ptr); // a0 b0 c0 d0 a1 b1 c1 d1 ...
+    v128_t u1 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ...
+    v128_t u2 = wasm_v128_load(ptr + 32); // a8 b8 c8 d8 ...
+    v128_t u3 = wasm_v128_load(ptr + 48); // a12 b12 c12 d12 ...
+
+    v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29);
+    v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29);
+    v128_t v2 = wasm_v8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31);
+    v128_t v3 = wasm_v8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31);
+
+    a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
+    b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
+    c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
+    d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
+}
+
+inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b)
+{
+    v128_t v0 = wasm_v128_load(ptr);     // a0 b0 a1 b1 a2 b2 a3 b3
+    v128_t v1 = wasm_v128_load(ptr + 8); // a4 b4 a5 b5 a6 b6 a7 b7
+
+    a.val = wasm_v8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29); // a0 a1 a2 a3 a4 a5 a6 a7
+    b.val = wasm_v8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31); // b0 b1 ab b3 b4 b5 b6 b7
+}
+
+inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c)
+{
+    v128_t t00 = wasm_v128_load(ptr);        // a0 b0 c0 a1 b1 c1 a2 b2
+    v128_t t01 = wasm_v128_load(ptr + 8);    // c2 a3 b3 c3 a4 b4 c4 a5
+    v128_t t02 = wasm_v128_load(ptr + 16);  // b5 c5 a6 b6 c6 a7 b7 c7
+
+    v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5);
+    v128_t t11 = wasm_v8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7);
+    v128_t t12 = wasm_v8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7);
+
+    a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27);
+    b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29);
+    c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31);
+}
+
+inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d)
+{
+    v128_t u0 = wasm_v128_load(ptr); // a0 b0 c0 d0 a1 b1 c1 d1
+    v128_t u1 = wasm_v128_load(ptr + 8); // a2 b2 c2 d2 ...
+    v128_t u2 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ...
+    v128_t u3 = wasm_v128_load(ptr + 24); // a6 b6 c6 d6 ...
+
+    v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a0 a1 a2 a3 b0 b1 b2 b3
+    v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a4 a5 a6 a7 b4 b5 b6 b7
+    v128_t v2 = wasm_v8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c0 c1 c2 c3 d0 d1 d2 d3
+    v128_t v3 = wasm_v8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c4 c5 c6 c7 d4 d5 d6 d7
+
+    a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
+    b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
+    c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
+    d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
+}
+
+inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b)
+{
+    v128_t v0 = wasm_v128_load(ptr);     // a0 b0 a1 b1
+    v128_t v1 = wasm_v128_load(ptr + 4); // a2 b2 a3 b3
+
+    a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3
+    b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3
+}
+
+inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c)
+{
+    v128_t t00 = wasm_v128_load(ptr);        // a0 b0 c0 a1
+    v128_t t01 = wasm_v128_load(ptr + 4);     // b2 c2 a3 b3
+    v128_t t02 = wasm_v128_load(ptr + 8);    // c3 a4 b4 c4
+
+    v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7);
+    v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3);
+    v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7);
+
+    a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23);
+    b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27);
+    c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31);
+}
+
+inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d)
+{
+    v_uint32x4 s0(wasm_v128_load(ptr));      // a0 b0 c0 d0
+    v_uint32x4 s1(wasm_v128_load(ptr + 4));  // a1 b1 c1 d1
+    v_uint32x4 s2(wasm_v128_load(ptr + 8));  // a2 b2 c2 d2
+    v_uint32x4 s3(wasm_v128_load(ptr + 12)); // a3 b3 c3 d3
+
+    v_transpose4x4(s0, s1, s2, s3, a, b, c, d);
+}
+
+inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
+{
+    v128_t v0 = wasm_v128_load(ptr);       // a0 b0 a1 b1
+    v128_t v1 = wasm_v128_load((ptr + 4)); // a2 b2 a3 b3
+
+    a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3
+    b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3
+}
+
+inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c)
+{
+    v128_t t00 = wasm_v128_load(ptr);        // a0 b0 c0 a1
+    v128_t t01 = wasm_v128_load(ptr + 4);     // b2 c2 a3 b3
+    v128_t t02 = wasm_v128_load(ptr + 8);    // c3 a4 b4 c4
+
+    v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7);
+    v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3);
+    v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7);
+
+    a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23);
+    b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27);
+    c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31);
+}
+
+inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d)
+{
+    v_float32x4 s0(wasm_v128_load(ptr));      // a0 b0 c0 d0
+    v_float32x4 s1(wasm_v128_load(ptr + 4));  // a1 b1 c1 d1
+    v_float32x4 s2(wasm_v128_load(ptr + 8));  // a2 b2 c2 d2
+    v_float32x4 s3(wasm_v128_load(ptr + 12)); // a3 b3 c3 d3
+
+    v_transpose4x4(s0, s1, s2, s3, a, b, c, d);
+}
+
+inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b)
+{
+    v128_t t0 = wasm_v128_load(ptr);      // a0 b0
+    v128_t t1 = wasm_v128_load(ptr + 2);  // a1 b1
+
+    a.val = wasm_unpacklo_i64x2(t0, t1);
+    b.val = wasm_unpackhi_i64x2(t0, t1);
+}
+
+inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
+{
+    v128_t t0 = wasm_v128_load(ptr);     // a0, b0
+    v128_t t1 = wasm_v128_load(ptr + 2); // c0, a1
+    v128_t t2 = wasm_v128_load(ptr + 4); // b1, c1
+
+    a.val = wasm_v8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
+    b.val = wasm_v8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23);
+    c.val = wasm_v8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
+}
+
+inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a,
+                                v_uint64x2& b, v_uint64x2& c, v_uint64x2& d)
+{
+    v128_t t0 = wasm_v128_load(ptr);     // a0 b0
+    v128_t t1 = wasm_v128_load(ptr + 2); // c0 d0
+    v128_t t2 = wasm_v128_load(ptr + 4); // a1 b1
+    v128_t t3 = wasm_v128_load(ptr + 6); // c1 d1
+
+    a.val = wasm_unpacklo_i64x2(t0, t2);
+    b.val = wasm_unpackhi_i64x2(t0, t2);
+    c.val = wasm_unpacklo_i64x2(t1, t3);
+    d.val = wasm_unpackhi_i64x2(t1, t3);
+}
+
+// store interleave
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
+                                hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t v0 = wasm_unpacklo_i8x16(a.val, b.val);
+    v128_t v1 = wasm_unpackhi_i8x16(a.val, b.val);
+
+    wasm_v128_store(ptr, v0);
+    wasm_v128_store(ptr + 16, v1);
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
+                                const v_uint8x16& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5);
+    v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26);
+    v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0);
+
+    v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15);
+    v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15);
+    v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31);
+
+    wasm_v128_store(ptr, t10);
+    wasm_v128_store(ptr + 16, t11);
+    wasm_v128_store(ptr + 32, t12);
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
+                                const v_uint8x16& c, const v_uint8x16& d,
+                                hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    // a0 a1 a2 a3 ....
+    // b0 b1 b2 b3 ....
+    // c0 c1 c2 c3 ....
+    // d0 d1 d2 d3 ....
+    v128_t u0 = wasm_unpacklo_i8x16(a.val, c.val); // a0 c0 a1 c1 ...
+    v128_t u1 = wasm_unpackhi_i8x16(a.val, c.val); // a8 c8 a9 c9 ...
+    v128_t u2 = wasm_unpacklo_i8x16(b.val, d.val); // b0 d0 b1 d1 ...
+    v128_t u3 = wasm_unpackhi_i8x16(b.val, d.val); // b8 d8 b9 d9 ...
+
+    v128_t v0 = wasm_unpacklo_i8x16(u0, u2); // a0 b0 c0 d0 ...
+    v128_t v1 = wasm_unpackhi_i8x16(u0, u2); // a4 b4 c4 d4 ...
+    v128_t v2 = wasm_unpacklo_i8x16(u1, u3); // a8 b8 c8 d8 ...
+    v128_t v3 = wasm_unpackhi_i8x16(u1, u3); // a12 b12 c12 d12 ...
+
+    wasm_v128_store(ptr, v0);
+    wasm_v128_store(ptr + 16, v1);
+    wasm_v128_store(ptr + 32, v2);
+    wasm_v128_store(ptr + 48, v3);
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
+                                hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t v0 = wasm_unpacklo_i16x8(a.val, b.val);
+    v128_t v1 = wasm_unpackhi_i16x8(a.val, b.val);
+
+    wasm_v128_store(ptr, v0);
+    wasm_v128_store(ptr + 8, v1);
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
+                                const v_uint16x8& b, const v_uint16x8& c,
+                                hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21);
+    v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11);
+    v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0);
+
+    v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15);
+    v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15);
+    v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31);
+
+    wasm_v128_store(ptr, t10);
+    wasm_v128_store(ptr + 8, t11);
+    wasm_v128_store(ptr + 16, t12);
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
+                                const v_uint16x8& c, const v_uint16x8& d,
+                                hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    // a0 a1 a2 a3 ....
+    // b0 b1 b2 b3 ....
+    // c0 c1 c2 c3 ....
+    // d0 d1 d2 d3 ....
+    v128_t u0 = wasm_unpacklo_i16x8(a.val, c.val); // a0 c0 a1 c1 ...
+    v128_t u1 = wasm_unpackhi_i16x8(a.val, c.val); // a4 c4 a5 c5 ...
+    v128_t u2 = wasm_unpacklo_i16x8(b.val, d.val); // b0 d0 b1 d1 ...
+    v128_t u3 = wasm_unpackhi_i16x8(b.val, d.val); // b4 d4 b5 d5 ...
+
+    v128_t v0 = wasm_unpacklo_i16x8(u0, u2); // a0 b0 c0 d0 ...
+    v128_t v1 = wasm_unpackhi_i16x8(u0, u2); // a2 b2 c2 d2 ...
+    v128_t v2 = wasm_unpacklo_i16x8(u1, u3); // a4 b4 c4 d4 ...
+    v128_t v3 = wasm_unpackhi_i16x8(u1, u3); // a6 b6 c6 d6 ...
+
+    wasm_v128_store(ptr, v0);
+    wasm_v128_store(ptr + 8, v1);
+    wasm_v128_store(ptr + 16, v2);
+    wasm_v128_store(ptr + 24, v3);
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
+                                hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t v0 = wasm_unpacklo_i32x4(a.val, b.val);
+    v128_t v1 = wasm_unpackhi_i32x4(a.val, b.val);
+
+    wasm_v128_store(ptr, v0);
+    wasm_v128_store(ptr + 4, v1);
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
+                                const v_uint32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7);
+    v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27);
+    v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0);
+
+    v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15);
+    v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15);
+    v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31);
+
+    wasm_v128_store(ptr, t10);
+    wasm_v128_store(ptr + 4, t11);
+    wasm_v128_store(ptr + 8, t12);
+}
+
+inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
+                               const v_uint32x4& c, const v_uint32x4& d,
+                               hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v_uint32x4 v0, v1, v2, v3;
+    v_transpose4x4(a, b, c, d, v0, v1, v2, v3);
+
+    wasm_v128_store(ptr, v0.val);
+    wasm_v128_store(ptr + 4, v1.val);
+    wasm_v128_store(ptr + 8, v2.val);
+    wasm_v128_store(ptr + 12, v3.val);
+}
+
+// 2-channel, float only
+inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
+                               hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t v0 = wasm_unpacklo_i32x4(a.val, b.val);
+    v128_t v1 = wasm_unpackhi_i32x4(a.val, b.val);
+
+    wasm_v128_store(ptr, v0);
+    wasm_v128_store(ptr + 4, v1);
+}
+
+inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
+                               const v_float32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7);
+    v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27);
+    v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0);
+
+    v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15);
+    v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15);
+    v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31);
+
+    wasm_v128_store(ptr, t10);
+    wasm_v128_store(ptr + 4, t11);
+    wasm_v128_store(ptr + 8, t12);
+}
+
+inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
+                               const v_float32x4& c, const v_float32x4& d,
+                               hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v_float32x4 v0, v1, v2, v3;
+    v_transpose4x4(a, b, c, d, v0, v1, v2, v3);
+
+    wasm_v128_store(ptr, v0.val);
+    wasm_v128_store(ptr + 4, v1.val);
+    wasm_v128_store(ptr + 8, v2.val);
+    wasm_v128_store(ptr + 12, v3.val);
+}
+
+inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
+                               hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t v0 = wasm_unpacklo_i64x2(a.val, b.val);
+    v128_t v1 = wasm_unpackhi_i64x2(a.val, b.val);
+
+    wasm_v128_store(ptr, v0);
+    wasm_v128_store(ptr + 2, v1);
+}
+
+inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
+                               const v_uint64x2& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t v0 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
+    v128_t v1 = wasm_v8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15);
+    v128_t v2 = wasm_v8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
+
+    wasm_v128_store(ptr, v0);
+    wasm_v128_store(ptr + 2, v1);
+    wasm_v128_store(ptr + 4, v2);
+}
+
+inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
+                               const v_uint64x2& c, const v_uint64x2& d,
+                               hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
+{
+    v128_t v0 = wasm_unpacklo_i64x2(a.val, b.val);
+    v128_t v1 = wasm_unpacklo_i64x2(c.val, d.val);
+    v128_t v2 = wasm_unpackhi_i64x2(a.val, b.val);
+    v128_t v3 = wasm_unpackhi_i64x2(c.val, d.val);
+
+    wasm_v128_store(ptr, v0);
+    wasm_v128_store(ptr + 2, v1);
+    wasm_v128_store(ptr + 4, v2);
+    wasm_v128_store(ptr + 6, v3);
+}
+
+#define OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \
+{ \
+    _Tpvec1 a1, b1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+} \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \
+{ \
+    _Tpvec1 a1, b1, c1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+    c0 = v_reinterpret_as_##suffix0(c1); \
+} \
+inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \
+{ \
+    _Tpvec1 a1, b1, c1, d1; \
+    v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \
+    a0 = v_reinterpret_as_##suffix0(a1); \
+    b0 = v_reinterpret_as_##suffix0(b1); \
+    c0 = v_reinterpret_as_##suffix0(c1); \
+    d0 = v_reinterpret_as_##suffix0(d1); \
+} \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
+                                hal::StoreMode mode = hal::STORE_UNALIGNED ) \
+{ \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, mode);      \
+} \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
+                                const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \
+{ \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode);  \
+} \
+inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
+                                const _Tpvec0& c0, const _Tpvec0& d0, \
+                                hal::StoreMode mode = hal::STORE_UNALIGNED ) \
+{ \
+    _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
+    _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
+    _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
+    _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
+    v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \
+}
+
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_int64x2, int64, s64, v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, uint64, u64)
+
+inline v_float32x4 v_cvt_f32(const v_int32x4& a)
+{
+    return v_float32x4(wasm_f32x4_convert_i32x4(a.val));
+}
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{
+    fallback::v_float64x2 a_(a);
+    return fallback::v_cvt_f32(a_);
+}
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b)
+{
+    fallback::v_float64x2 a_(a), b_(b);
+    return fallback::v_cvt_f32(a_, b_);
+}
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v128_t p = v128_cvti32x4_i64x2(a.val);
+    return v_float64x2(wasm_f64x2_convert_i64x2(p));
+#else
+    fallback::v_int32x4 a_(a);
+    return fallback::v_cvt_f64(a_);
+#endif
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+#ifdef __wasm_unimplemented_simd128__
+    v128_t p = v128_cvti32x4_i64x2_high(a.val);
+    return v_float64x2(wasm_f64x2_convert_i64x2(p));
+#else
+    fallback::v_int32x4 a_(a);
+    return fallback::v_cvt_f64_high(a_);
+#endif
+}
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{
+    fallback::v_float32x4 a_(a);
+    return fallback::v_cvt_f64(a_);
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    fallback::v_float32x4 a_(a);
+    return fallback::v_cvt_f64_high(a_);
+}
+
+inline v_float64x2 v_cvt_f64(const v_int64x2& a)
+{
+#ifdef __wasm_unimplemented_simd128__
+    return v_float64x2(wasm_f64x2_convert_i64x2(a.val));
+#else
+    fallback::v_int64x2 a_(a);
+    return fallback::v_cvt_f64(a_);
+#endif
+}
+
+////////////// Lookup table access ////////////////////
+
+inline v_int8x16 v_lut(const schar* tab, const int* idx)
+{
+    return v_int8x16(tab[idx[0]], tab[idx[1]], tab[idx[ 2]], tab[idx[ 3]], tab[idx[ 4]], tab[idx[ 5]], tab[idx[ 6]], tab[idx[ 7]],
+                     tab[idx[8]], tab[idx[9]], tab[idx[10]], tab[idx[11]], tab[idx[12]], tab[idx[13]], tab[idx[14]], tab[idx[15]]);
+}
+inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx)
+{
+    return v_int8x16(tab[idx[0]], tab[idx[0]+1], tab[idx[1]], tab[idx[1]+1], tab[idx[2]], tab[idx[2]+1], tab[idx[3]], tab[idx[3]+1],
+                     tab[idx[4]], tab[idx[4]+1], tab[idx[5]], tab[idx[5]+1], tab[idx[6]], tab[idx[6]+1], tab[idx[7]], tab[idx[7]+1]);
+}
+inline v_int8x16 v_lut_quads(const schar* tab, const int* idx)
+{
+    return v_int8x16(tab[idx[0]], tab[idx[0]+1], tab[idx[0]+2], tab[idx[0]+3], tab[idx[1]], tab[idx[1]+1], tab[idx[1]+2], tab[idx[1]+3],
+                     tab[idx[2]], tab[idx[2]+1], tab[idx[2]+2], tab[idx[2]+3], tab[idx[3]], tab[idx[3]+1], tab[idx[3]+2], tab[idx[3]+3]);
+}
+inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((const schar *)tab, idx)); }
+inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((const schar *)tab, idx)); }
+inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((const schar *)tab, idx)); }
+
+inline v_int16x8 v_lut(const short* tab, const int* idx)
+{
+    return v_int16x8(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]],
+                     tab[idx[4]], tab[idx[5]], tab[idx[6]], tab[idx[7]]);
+}
+inline v_int16x8 v_lut_pairs(const short* tab, const int* idx)
+{
+    return v_int16x8(tab[idx[0]], tab[idx[0]+1], tab[idx[1]], tab[idx[1]+1],
+                     tab[idx[2]], tab[idx[2]+1], tab[idx[3]], tab[idx[3]+1]);
+}
+inline v_int16x8 v_lut_quads(const short* tab, const int* idx)
+{
+    return v_int16x8(tab[idx[0]], tab[idx[0]+1], tab[idx[0]+2], tab[idx[0]+3],
+                     tab[idx[1]], tab[idx[1]+1], tab[idx[1]+2], tab[idx[1]+3]);
+}
+inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((const short *)tab, idx)); }
+inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((const short *)tab, idx)); }
+inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((const short *)tab, idx)); }
+
+inline v_int32x4 v_lut(const int* tab, const int* idx)
+{
+    return v_int32x4(tab[idx[0]], tab[idx[1]],
+                     tab[idx[2]], tab[idx[3]]);
+}
+inline v_int32x4 v_lut_pairs(const int* tab, const int* idx)
+{
+    return v_int32x4(tab[idx[0]], tab[idx[0]+1],
+                     tab[idx[1]], tab[idx[1]+1]);
+}
+inline v_int32x4 v_lut_quads(const int* tab, const int* idx)
+{
+    return v_int32x4(wasm_v128_load(tab + idx[0]));
+}
+inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((const int *)tab, idx)); }
+inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((const int *)tab, idx)); }
+inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((const int *)tab, idx)); }
+
+inline v_int64x2 v_lut(const int64_t* tab, const int* idx)
+{
+    return v_int64x2(tab[idx[0]], tab[idx[1]]);
+}
+inline v_int64x2 v_lut_pairs(const int64_t* tab, const int* idx)
+{
+    return v_int64x2(wasm_v128_load(tab + idx[0]));
+}
+inline v_uint64x2 v_lut(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); }
+inline v_uint64x2 v_lut_pairs(const uint64_t* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); }
+
+inline v_float32x4 v_lut(const float* tab, const int* idx)
+{
+    return v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]);
+}
+inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_pairs((const int *)tab, idx)); }
+inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { return v_reinterpret_as_f32(v_lut_quads((const int *)tab, idx)); }
+
+inline v_float64x2 v_lut(const double* tab, const int* idx)
+{
+    return v_float64x2(tab[idx[0]], tab[idx[1]]);
+}
+inline v_float64x2 v_lut_pairs(const double* tab, const int* idx)
+{
+    return v_float64x2(wasm_v128_load(tab + idx[0]));
+}
+
+inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec)
+{
+    return v_int32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)],
+                     tab[wasm_i32x4_extract_lane(idxvec.val, 1)],
+                     tab[wasm_i32x4_extract_lane(idxvec.val, 2)],
+                     tab[wasm_i32x4_extract_lane(idxvec.val, 3)]);
+}
+
+inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec)
+{
+    return v_reinterpret_as_u32(v_lut((const int *)tab, idxvec));
+}
+
+inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec)
+{
+    return v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)],
+                       tab[wasm_i32x4_extract_lane(idxvec.val, 1)],
+                       tab[wasm_i32x4_extract_lane(idxvec.val, 2)],
+                       tab[wasm_i32x4_extract_lane(idxvec.val, 3)]);
+}
+
+inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec)
+{
+    return v_float64x2(tab[wasm_i32x4_extract_lane(idxvec.val, 0)],
+                       tab[wasm_i32x4_extract_lane(idxvec.val, 1)]);
+}
+
+// loads pairs from the table and deinterleaves them, e.g. returns:
+//   x = (tab[idxvec[0], tab[idxvec[1]], tab[idxvec[2]], tab[idxvec[3]]),
+//   y = (tab[idxvec[0]+1], tab[idxvec[1]+1], tab[idxvec[2]+1], tab[idxvec[3]+1])
+// note that the indices are float's indices, not the float-pair indices.
+// in theory, this function can be used to implement bilinear interpolation,
+// when idxvec are the offsets within the image.
+inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y)
+{
+    x = v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)],
+                    tab[wasm_i32x4_extract_lane(idxvec.val, 1)],
+                    tab[wasm_i32x4_extract_lane(idxvec.val, 2)],
+                    tab[wasm_i32x4_extract_lane(idxvec.val, 3)]);
+    y = v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)+1],
+                    tab[wasm_i32x4_extract_lane(idxvec.val, 1)+1],
+                    tab[wasm_i32x4_extract_lane(idxvec.val, 2)+1],
+                    tab[wasm_i32x4_extract_lane(idxvec.val, 3)+1]);
+}
+
+inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y)
+{
+    v128_t xy0 = wasm_v128_load(tab + wasm_i32x4_extract_lane(idxvec.val, 0));
+    v128_t xy1 = wasm_v128_load(tab + wasm_i32x4_extract_lane(idxvec.val, 1));
+    x.val = wasm_unpacklo_i64x2(xy0, xy1);
+    y.val = wasm_unpacklo_i64x2(xy0, xy1);
+}
+
+inline v_int8x16 v_interleave_pairs(const v_int8x16& vec)
+{
+    return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15));
+}
+inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
+inline v_int8x16 v_interleave_quads(const v_int8x16& vec)
+{
+    return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15));
+}
+inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
+{
+    return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15));
+}
+inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
+inline v_int16x8 v_interleave_quads(const v_int16x8& vec)
+{
+    return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15));
+}
+inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_interleave_pairs(const v_int32x4& vec)
+{
+    return v_int32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15));
+}
+inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
+inline v_float32x4 v_interleave_pairs(const v_float32x4& vec)
+{
+    return v_float32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15));
+}
+
+inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
+{
+    return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16));
+}
+inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
+
+inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
+{
+    return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7));
+}
+inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }
+
+inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
+inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
+inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
+
+template<int i, typename _Tp>
+inline typename _Tp::lane_type v_extract_n(const _Tp& a)
+{
+    return v_rotate_right<i>(a).get0();
+}
+
+template<int i>
+inline v_uint32x4 v_broadcast_element(const v_uint32x4& a)
+{
+    return v_setall_u32(v_extract_n<i>(a));
+}
+template<int i>
+inline v_int32x4 v_broadcast_element(const v_int32x4& a)
+{
+    return v_setall_s32(v_extract_n<i>(a));
+}
+template<int i>
+inline v_float32x4 v_broadcast_element(const v_float32x4& a)
+{
+    return v_setall_f32(v_extract_n<i>(a));
+}
+
+
+////////////// FP16 support ///////////////////////////
+
+inline v_float32x4 v_load_expand(const float16_t* ptr)
+{
+    return fallback::v_load_expand(ptr);
+}
+
+inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
+{
+    fallback::v_float32x4 v_(v);
+    fallback::v_pack_store(ptr, v_);
+}
+
+inline void v_cleanup() {}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+}
+
+#endif
diff --git a/IPL/include/opencv/opencv2/core/hal/msa_macros.h b/IPL/include/opencv/opencv2/core/hal/msa_macros.h
new file mode 100644
index 0000000..bd6ddb1
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/hal/msa_macros.h
@@ -0,0 +1,1558 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_HAL_MSA_MACROS_H
+#define OPENCV_CORE_HAL_MSA_MACROS_H
+
+#ifdef __mips_msa
+#include "msa.h"
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Define 64 bits vector types */
+typedef signed char v8i8 __attribute__ ((vector_size(8), aligned(8)));
+typedef unsigned char v8u8 __attribute__ ((vector_size(8), aligned(8)));
+typedef short v4i16 __attribute__ ((vector_size(8), aligned(8)));
+typedef unsigned short v4u16 __attribute__ ((vector_size(8), aligned(8)));
+typedef int v2i32 __attribute__ ((vector_size(8), aligned(8)));
+typedef unsigned int v2u32 __attribute__ ((vector_size(8), aligned(8)));
+typedef long long v1i64 __attribute__ ((vector_size(8), aligned(8)));
+typedef unsigned long long v1u64 __attribute__ ((vector_size(8), aligned(8)));
+typedef float v2f32 __attribute__ ((vector_size(8), aligned(8)));
+typedef double v1f64 __attribute__ ((vector_size(8), aligned(8)));
+
+
+/* Load values from the given memory a 64-bit vector. */
+#define msa_ld1_s8(__a)  (*((v8i8*)(__a)))
+#define msa_ld1_s16(__a) (*((v4i16*)(__a)))
+#define msa_ld1_s32(__a) (*((v2i32*)(__a)))
+#define msa_ld1_s64(__a) (*((v1i64*)(__a)))
+#define msa_ld1_u8(__a)  (*((v8u8*)(__a)))
+#define msa_ld1_u16(__a) (*((v4u16*)(__a)))
+#define msa_ld1_u32(__a) (*((v2u32*)(__a)))
+#define msa_ld1_u64(__a) (*((v1u64*)(__a)))
+#define msa_ld1_f32(__a) (*((v2f32*)(__a)))
+#define msa_ld1_f64(__a) (*((v1f64*)(__a)))
+
+/* Load values from the given memory address to a 128-bit vector */
+#define msa_ld1q_s8(__a)  ((v16i8)__builtin_msa_ld_b(__a, 0))
+#define msa_ld1q_s16(__a) ((v8i16)__builtin_msa_ld_h(__a, 0))
+#define msa_ld1q_s32(__a) ((v4i32)__builtin_msa_ld_w(__a, 0))
+#define msa_ld1q_s64(__a) ((v2i64)__builtin_msa_ld_d(__a, 0))
+#define msa_ld1q_u8(__a)  ((v16u8)__builtin_msa_ld_b(__a, 0))
+#define msa_ld1q_u16(__a) ((v8u16)__builtin_msa_ld_h(__a, 0))
+#define msa_ld1q_u32(__a) ((v4u32)__builtin_msa_ld_w(__a, 0))
+#define msa_ld1q_u64(__a) ((v2u64)__builtin_msa_ld_d(__a, 0))
+#define msa_ld1q_f32(__a) ((v4f32)__builtin_msa_ld_w(__a, 0))
+#define msa_ld1q_f64(__a) ((v2f64)__builtin_msa_ld_d(__a, 0))
+
+/* Store 64bits vector elements values to the given memory address. */
+#define msa_st1_s8(__a, __b)  (*((v8i8*)(__a)) = __b)
+#define msa_st1_s16(__a, __b) (*((v4i16*)(__a)) = __b)
+#define msa_st1_s32(__a, __b) (*((v2i32*)(__a)) = __b)
+#define msa_st1_s64(__a, __b) (*((v1i64*)(__a)) = __b)
+#define msa_st1_u8(__a, __b)  (*((v8u8*)(__a)) = __b)
+#define msa_st1_u16(__a, __b) (*((v4u16*)(__a)) = __b)
+#define msa_st1_u32(__a, __b) (*((v2u32*)(__a)) = __b)
+#define msa_st1_u64(__a, __b) (*((v1u64*)(__a)) = __b)
+#define msa_st1_f32(__a, __b) (*((v2f32*)(__a)) = __b)
+#define msa_st1_f64(__a, __b) (*((v1f64*)(__a)) = __b)
+
+/* Store the values of elements in the 128 bits vector __a to the given memory address __a. */
+#define msa_st1q_s8(__a, __b)  (__builtin_msa_st_b((v16i8)(__b), __a, 0))
+#define msa_st1q_s16(__a, __b) (__builtin_msa_st_h((v8i16)(__b), __a, 0))
+#define msa_st1q_s32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0))
+#define msa_st1q_s64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0))
+#define msa_st1q_u8(__a, __b)  (__builtin_msa_st_b((v16i8)(__b), __a, 0))
+#define msa_st1q_u16(__a, __b) (__builtin_msa_st_h((v8i16)(__b), __a, 0))
+#define msa_st1q_u32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0))
+#define msa_st1q_u64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0))
+#define msa_st1q_f32(__a, __b) (__builtin_msa_st_w((v4i32)(__b), __a, 0))
+#define msa_st1q_f64(__a, __b) (__builtin_msa_st_d((v2i64)(__b), __a, 0))
+
+/* Store the value of the element with the index __c in vector __a to the given memory address __a. */
+#define msa_st1_lane_s8(__a, __b, __c)   (*((int8_t*)(__a)) = __b[__c])
+#define msa_st1_lane_s16(__a, __b, __c)  (*((int16_t*)(__a)) = __b[__c])
+#define msa_st1_lane_s32(__a, __b, __c)  (*((int32_t*)(__a)) = __b[__c])
+#define msa_st1_lane_s64(__a, __b, __c)  (*((int64_t*)(__a)) = __b[__c])
+#define msa_st1_lane_u8(__a, __b, __c)   (*((uint8_t*)(__a)) = __b[__c])
+#define msa_st1_lane_u16(__a, __b, __c)  (*((uint16_t*)(__a)) = __b[__c])
+#define msa_st1_lane_u32(__a, __b, __c)  (*((uint32_t*)(__a)) = __b[__c])
+#define msa_st1_lane_u64(__a, __b, __c)  (*((uint64_t*)(__a)) = __b[__c])
+#define msa_st1_lane_f32(__a, __b, __c)  (*((float*)(__a)) = __b[__c])
+#define msa_st1_lane_f64(__a, __b, __c)  (*((double*)(__a)) = __b[__c])
+#define msa_st1q_lane_s8(__a, __b, __c)  (*((int8_t*)(__a)) = (int8_t)__builtin_msa_copy_s_b(__b, __c))
+#define msa_st1q_lane_s16(__a, __b, __c) (*((int16_t*)(__a)) = (int16_t)__builtin_msa_copy_s_h(__b, __c))
+#define msa_st1q_lane_s32(__a, __b, __c) (*((int32_t*)(__a)) = __builtin_msa_copy_s_w(__b, __c))
+#define msa_st1q_lane_s64(__a, __b, __c) (*((int64_t*)(__a)) = __builtin_msa_copy_s_d(__b, __c))
+#define msa_st1q_lane_u8(__a, __b, __c)  (*((uint8_t*)(__a)) = (uint8_t)__builtin_msa_copy_u_b((v16i8)(__b), __c))
+#define msa_st1q_lane_u16(__a, __b, __c) (*((uint16_t*)(__a)) = (uint16_t)__builtin_msa_copy_u_h((v8i16)(__b), __c))
+#define msa_st1q_lane_u32(__a, __b, __c) (*((uint32_t*)(__a)) = __builtin_msa_copy_u_w((v4i32)(__b), __c))
+#define msa_st1q_lane_u64(__a, __b, __c) (*((uint64_t*)(__a)) = __builtin_msa_copy_u_d((v2i64)(__b), __c))
+#define msa_st1q_lane_f32(__a, __b, __c) (*((float*)(__a)) = __b[__c])
+#define msa_st1q_lane_f64(__a, __b, __c) (*((double*)(__a)) = __b[__c])
+
+/* Duplicate elements for 64-bit doubleword vectors */
+#define msa_dup_n_s8(__a)  ((v8i8)__builtin_msa_copy_s_d((v2i64)__builtin_msa_fill_b((int32_t)(__a)), 0))
+#define msa_dup_n_s16(__a) ((v4i16)__builtin_msa_copy_s_d((v2i64)__builtin_msa_fill_h((int32_t)(__a)), 0))
+#define msa_dup_n_s32(__a) ((v2i32){__a, __a})
+#define msa_dup_n_s64(__a) ((v1i64){__a})
+#define msa_dup_n_u8(__a)  ((v8u8)__builtin_msa_copy_u_d((v2i64)__builtin_msa_fill_b((int32_t)(__a)), 0))
+#define msa_dup_n_u16(__a) ((v4u16)__builtin_msa_copy_u_d((v2i64)__builtin_msa_fill_h((int32_t)(__a)), 0))
+#define msa_dup_n_u32(__a) ((v2u32){__a, __a})
+#define msa_dup_n_u64(__a) ((v1u64){__a})
+#define msa_dup_n_f32(__a) ((v2f32){__a, __a})
+#define msa_dup_n_f64(__a) ((v1f64){__a})
+
+/* Duplicate elements for 128-bit quadword vectors */
+#define msa_dupq_n_s8(__a)  (__builtin_msa_fill_b((int32_t)(__a)))
+#define msa_dupq_n_s16(__a) (__builtin_msa_fill_h((int32_t)(__a)))
+#define msa_dupq_n_s32(__a) (__builtin_msa_fill_w((int32_t)(__a)))
+#define msa_dupq_n_s64(__a) (__builtin_msa_fill_d((int64_t)(__a)))
+#define msa_dupq_n_u8(__a)  ((v16u8)__builtin_msa_fill_b((int32_t)(__a)))
+#define msa_dupq_n_u16(__a) ((v8u16)__builtin_msa_fill_h((int32_t)(__a)))
+#define msa_dupq_n_u32(__a) ((v4u32)__builtin_msa_fill_w((int32_t)(__a)))
+#define msa_dupq_n_u64(__a) ((v2u64)__builtin_msa_fill_d((int64_t)(__a)))
+#define msa_dupq_n_f32(__a) ((v4f32){__a, __a, __a, __a})
+#define msa_dupq_n_f64(__a) ((v2f64){__a, __a})
+#define msa_dupq_lane_s8(__a, __b)  (__builtin_msa_splat_b(__a, __b))
+#define msa_dupq_lane_s16(__a, __b) (__builtin_msa_splat_h(__a, __b))
+#define msa_dupq_lane_s32(__a, __b) (__builtin_msa_splat_w(__a, __b))
+#define msa_dupq_lane_s64(__a, __b) (__builtin_msa_splat_d(__a, __b))
+#define msa_dupq_lane_u8(__a, __b)  ((v16u8)__builtin_msa_splat_b((v16i8)(__a), __b))
+#define msa_dupq_lane_u16(__a, __b) ((v8u16)__builtin_msa_splat_h((v8i16)(__a), __b))
+#define msa_dupq_lane_u32(__a, __b) ((v4u32)__builtin_msa_splat_w((v4i32)(__a), __b))
+#define msa_dupq_lane_u64(__a, __b) ((v2u64)__builtin_msa_splat_d((v2i64)(__a), __b))
+
+/* Create a 64 bits vector */
+#define msa_create_s8(__a)  ((v8i8)((uint64_t)(__a)))
+#define msa_create_s16(__a) ((v4i16)((uint64_t)(__a)))
+#define msa_create_s32(__a) ((v2i32)((uint64_t)(__a)))
+#define msa_create_s64(__a) ((v1i64)((uint64_t)(__a)))
+#define msa_create_u8(__a)  ((v8u8)((uint64_t)(__a)))
+#define msa_create_u16(__a) ((v4u16)((uint64_t)(__a)))
+#define msa_create_u32(__a) ((v2u32)((uint64_t)(__a)))
+#define msa_create_u64(__a) ((v1u64)((uint64_t)(__a)))
+#define msa_create_f32(__a) ((v2f32)((uint64_t)(__a)))
+#define msa_create_f64(__a) ((v1f64)((uint64_t)(__a)))
+
+/* Sign extends or zero extends each element in a 64 bits vector to twice its original length, and places the results in a 128 bits vector. */
+/*Transform v8i8 to v8i16*/
+#define msa_movl_s8(__a) \
+((v8i16){(__a)[0], (__a)[1], (__a)[2], (__a)[3], \
+         (__a)[4], (__a)[5], (__a)[6], (__a)[7]})
+
+/*Transform v8u8 to v8u16*/
+#define msa_movl_u8(__a) \
+((v8u16){(__a)[0], (__a)[1], (__a)[2], (__a)[3], \
+         (__a)[4], (__a)[5], (__a)[6], (__a)[7]})
+
+/*Transform v4i16 to v8i16*/
+#define msa_movl_s16(__a) ((v4i32){(__a)[0], (__a)[1], (__a)[2], (__a)[3]})
+
+/*Transform v2i32 to v4i32*/
+#define msa_movl_s32(__a) ((v2i64){(__a)[0], (__a)[1]})
+
+/*Transform v4u16 to v8u16*/
+#define msa_movl_u16(__a) ((v4u32){(__a)[0], (__a)[1], (__a)[2], (__a)[3]})
+
+/*Transform v2u32 to v4u32*/
+#define msa_movl_u32(__a) ((v2u64){(__a)[0], (__a)[1]})
+
+/* Copies the least significant half of each element of a 128 bits vector into the corresponding elements of a 64 bits vector. */
+#define msa_movn_s16(__a) \
+({ \
+  v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)(__a)); \
+  (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_movn_s32(__a) \
+({ \
+  v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \
+  (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_movn_s64(__a) \
+({ \
+  v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \
+  (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_movn_u16(__a) \
+({ \
+  v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)(__a)); \
+  (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+#define msa_movn_u32(__a) \
+({ \
+  v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \
+  (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+#define msa_movn_u64(__a) \
+({ \
+  v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \
+  (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+/* qmovn */
+#define msa_qmovn_s16(__a) \
+({ \
+  v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_s_h((v8i16)(__a), 7)); \
+  (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_qmovn_s32(__a) \
+({ \
+  v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_s_w((v4i32)(__a), 15)); \
+  (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_qmovn_s64(__a) \
+({ \
+  v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_s_d((v2i64)(__a), 31)); \
+  (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_qmovn_u16(__a) \
+({ \
+  v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)(__a), 7)); \
+  (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+#define msa_qmovn_u32(__a) \
+({ \
+  v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)(__a), 15)); \
+  (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+#define msa_qmovn_u64(__a) \
+({ \
+  v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)(__a), 31)); \
+  (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+/* qmovun */
+#define msa_qmovun_s16(__a) \
+({ \
+  v8i16 __d = __builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)); \
+  v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \
+  (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \
+})
+
+#define msa_qmovun_s32(__a) \
+({ \
+  v4i32 __d = __builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)); \
+  v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \
+  (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \
+})
+
+#define msa_qmovun_s64(__a) \
+({ \
+  v2i64 __d = __builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)); \
+  v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \
+  (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \
+})
+
+/* Right shift elements in a 128 bits vector by an immediate value, and places the results in a 64 bits vector. */
+#define msa_shrn_n_s16(__a, __b) \
+({ \
+  v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srai_h((v8i16)(__a), (int)(__b))); \
+  (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_shrn_n_s32(__a, __b) \
+({ \
+  v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srai_w((v4i32)(__a), (int)(__b))); \
+  (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_shrn_n_s64(__a, __b) \
+({ \
+  v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srai_d((v2i64)(__a), (int)(__b))); \
+  (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_shrn_n_u16(__a, __b) \
+({ \
+  v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srli_h((v8i16)(__a), (int)(__b))); \
+  (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+#define msa_shrn_n_u32(__a, __b) \
+({ \
+  v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srli_w((v4i32)(__a), (int)(__b))); \
+  (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+#define msa_shrn_n_u64(__a, __b) \
+({ \
+  v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srli_d((v2i64)(__a), (int)(__b))); \
+  (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+/* Right shift elements in a 128 bits vector by an immediate value, and places the results in a 64 bits vector. */
+#define msa_rshrn_n_s16(__a, __b) \
+({ \
+  v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srari_h((v8i16)(__a), (int)__b)); \
+  (v8i8)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_rshrn_n_s32(__a, __b) \
+({ \
+  v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srari_w((v4i32)(__a), (int)__b)); \
+  (v4i16)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_rshrn_n_s64(__a, __b) \
+({ \
+  v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srari_d((v2i64)(__a), (int)__b)); \
+  (v2i32)__builtin_msa_copy_s_d((v2i64)__d, 0); \
+})
+
+#define msa_rshrn_n_u16(__a, __b) \
+({ \
+  v16i8 __d = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_srlri_h((v8i16)(__a), (int)__b)); \
+  (v8u8)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+#define msa_rshrn_n_u32(__a, __b) \
+({ \
+  v8i16 __d = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_srlri_w((v4i32)(__a), (int)__b)); \
+  (v4u16)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+#define msa_rshrn_n_u64(__a, __b) \
+({ \
+  v4i32 __d = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_srlri_d((v2i64)(__a), (int)__b)); \
+  (v2u32)__builtin_msa_copy_u_d((v2i64)__d, 0); \
+})
+
+/* Right shift elements in a 128 bits vector by an immediate value, saturate the results and them in a 64 bits vector. */
+#define msa_qrshrn_n_s16(__a, __b) \
+({ \
+  v8i16 __d = __builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__a), (int)(__b)), 7); \
+  v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__d); \
+  (v8i8)__builtin_msa_copy_s_d((v2i64)__e, 0); \
+})
+
+#define msa_qrshrn_n_s32(__a, __b) \
+({ \
+  v4i32 __d = __builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__a), (int)(__b)), 15); \
+  v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__d); \
+  (v4i16)__builtin_msa_copy_s_d((v2i64)__e, 0); \
+})
+
+#define msa_qrshrn_n_s64(__a, __b) \
+({ \
+  v2i64 __d = __builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__a), (int)(__b)), 31); \
+  v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__d); \
+  (v2i32)__builtin_msa_copy_s_d((v2i64)__e, 0); \
+})
+
+#define msa_qrshrn_n_u16(__a, __b) \
+({ \
+  v8u16 __d = __builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__a), (int)(__b)), 7); \
+  v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__d); \
+  (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \
+})
+
+#define msa_qrshrn_n_u32(__a, __b) \
+({ \
+  v4u32 __d = __builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__a), (int)(__b)), 15); \
+  v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__d); \
+  (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \
+})
+
+#define msa_qrshrn_n_u64(__a, __b) \
+({ \
+  v2u64 __d = __builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__a), (int)(__b)), 31); \
+  v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__d); \
+  (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \
+})
+
+/* Right shift elements in a 128 bits vector by an immediate value, saturate the results and them in a 64 bits vector.
+   Input is signed and output is unsigned. */
+#define msa_qrshrun_n_s16(__a, __b) \
+({ \
+  v8i16 __d = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)), (int)(__b)); \
+  v16i8 __e = __builtin_msa_pckev_b(__builtin_msa_fill_b(0), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \
+  (v8u8)__builtin_msa_copy_u_d((v2i64)__e, 0); \
+})
+
+#define msa_qrshrun_n_s32(__a, __b) \
+({ \
+  v4i32 __d = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)), (int)(__b)); \
+  v8i16 __e = __builtin_msa_pckev_h(__builtin_msa_fill_h(0), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \
+  (v4u16)__builtin_msa_copy_u_d((v2i64)__e, 0); \
+})
+
+#define msa_qrshrun_n_s64(__a, __b) \
+({ \
+  v2i64 __d = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)), (int)(__b)); \
+  v4i32 __e = __builtin_msa_pckev_w(__builtin_msa_fill_w(0), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \
+  (v2u32)__builtin_msa_copy_u_d((v2i64)__e, 0); \
+})
+
+/* pack */
+#define msa_pack_s16(__a, __b) (__builtin_msa_pckev_b((v16i8)(__b), (v16i8)(__a)))
+#define msa_pack_s32(__a, __b) (__builtin_msa_pckev_h((v8i16)(__b), (v8i16)(__a)))
+#define msa_pack_s64(__a, __b) (__builtin_msa_pckev_w((v4i32)(__b), (v4i32)(__a)))
+#define msa_pack_u16(__a, __b) ((v16u8)__builtin_msa_pckev_b((v16i8)(__b), (v16i8)(__a)))
+#define msa_pack_u32(__a, __b) ((v8u16)__builtin_msa_pckev_h((v8i16)(__b), (v8i16)(__a)))
+#define msa_pack_u64(__a, __b) ((v4u32)__builtin_msa_pckev_w((v4i32)(__b), (v4i32)(__a)))
+
+/* qpack */
+#define msa_qpack_s16(__a, __b) \
+(__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_s_h((v8i16)(__b), 7), (v16i8)__builtin_msa_sat_s_h((v8i16)(__a), 7)))
+#define msa_qpack_s32(__a, __b) \
+(__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_s_w((v4i32)(__b), 15), (v8i16)__builtin_msa_sat_s_w((v4i32)(__a), 15)))
+#define msa_qpack_s64(__a, __b) \
+(__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_s_d((v2i64)(__b), 31), (v4i32)__builtin_msa_sat_s_d((v2i64)(__a), 31)))
+#define msa_qpack_u16(__a, __b) \
+((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)(__b), 7), (v16i8)__builtin_msa_sat_u_h((v8u16)(__a), 7)))
+#define msa_qpack_u32(__a, __b) \
+((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)(__b), 15), (v8i16)__builtin_msa_sat_u_w((v4u32)(__a), 15)))
+#define msa_qpack_u64(__a, __b) \
+((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)(__b), 31), (v4i32)__builtin_msa_sat_u_d((v2u64)(__a), 31)))
+
+/* qpacku */
+#define msa_qpacku_s16(__a, __b) \
+((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__b))), 7), \
+                              (v16i8)__builtin_msa_sat_u_h((v8u16)(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a))), 7)))
+#define msa_qpacku_s32(__a, __b) \
+((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__b))), 15), \
+                              (v8i16)__builtin_msa_sat_u_w((v4u32)(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a))), 15)))
+#define msa_qpacku_s64(__a, __b) \
+((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__b))), 31), \
+                              (v4i32)__builtin_msa_sat_u_d((v2u64)(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a))), 31)))
+
+/* packr */
+#define msa_packr_s16(__a, __b, __c) \
+(__builtin_msa_pckev_b((v16i8)__builtin_msa_srai_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srai_h((v8i16)(__a), (int)(__c))))
+#define msa_packr_s32(__a, __b, __c) \
+(__builtin_msa_pckev_h((v8i16)__builtin_msa_srai_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srai_w((v4i32)(__a), (int)(__c))))
+#define msa_packr_s64(__a, __b, __c) \
+(__builtin_msa_pckev_w((v4i32)__builtin_msa_srai_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srai_d((v2i64)(__a), (int)(__c))))
+#define msa_packr_u16(__a, __b, __c) \
+((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_srli_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srli_h((v8i16)(__a), (int)(__c))))
+#define msa_packr_u32(__a, __b, __c) \
+((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_srli_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srli_w((v4i32)(__a), (int)(__c))))
+#define msa_packr_u64(__a, __b, __c) \
+((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_srli_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srli_d((v2i64)(__a), (int)(__c))))
+
+/* rpackr */
+#define msa_rpackr_s16(__a, __b, __c) \
+(__builtin_msa_pckev_b((v16i8)__builtin_msa_srari_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srari_h((v8i16)(__a), (int)(__c))))
+#define msa_rpackr_s32(__a, __b, __c) \
+(__builtin_msa_pckev_h((v8i16)__builtin_msa_srari_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srari_w((v4i32)(__a), (int)(__c))))
+#define msa_rpackr_s64(__a, __b, __c) \
+(__builtin_msa_pckev_w((v4i32)__builtin_msa_srari_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srari_d((v2i64)(__a), (int)(__c))))
+#define msa_rpackr_u16(__a, __b, __c) \
+((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_srlri_h((v8i16)(__b), (int)(__c)), (v16i8)__builtin_msa_srlri_h((v8i16)(__a), (int)(__c))))
+#define msa_rpackr_u32(__a, __b, __c) \
+((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_srlri_w((v4i32)(__b), (int)(__c)), (v8i16)__builtin_msa_srlri_w((v4i32)(__a), (int)(__c))))
+#define msa_rpackr_u64(__a, __b, __c) \
+((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_srlri_d((v2i64)(__b), (int)(__c)), (v4i32)__builtin_msa_srlri_d((v2i64)(__a), (int)(__c))))
+
+/* qrpackr */
+#define msa_qrpackr_s16(__a, __b, __c) \
+(__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__b), (int)(__c)), 7), \
+                       (v16i8)__builtin_msa_sat_s_h(__builtin_msa_srari_h((v8i16)(__a), (int)(__c)), 7)))
+#define msa_qrpackr_s32(__a, __b, __c) \
+(__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__b), (int)(__c)), 15), \
+                       (v8i16)__builtin_msa_sat_s_w(__builtin_msa_srari_w((v4i32)(__a), (int)(__c)), 15)))
+#define msa_qrpackr_s64(__a, __b, __c) \
+(__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__b), (int)(__c)), 31), \
+                       (v4i32)__builtin_msa_sat_s_d(__builtin_msa_srari_d((v2i64)(__a), (int)(__c)), 31)))
+#define msa_qrpackr_u16(__a, __b, __c) \
+((v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__b), (int)(__c)), 7), \
+                              (v16i8)__builtin_msa_sat_u_h((v8u16)__builtin_msa_srlri_h((v8i16)(__a), (int)(__c)), 7)))
+#define msa_qrpackr_u32(__a, __b, __c) \
+((v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__b), (int)(__c)), 15), \
+                              (v8i16)__builtin_msa_sat_u_w((v4u32)__builtin_msa_srlri_w((v4i32)(__a), (int)(__c)), 15)))
+#define msa_qrpackr_u64(__a, __b, __c) \
+((v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__b), (int)(__c)), 31), \
+                              (v4i32)__builtin_msa_sat_u_d((v2u64)__builtin_msa_srlri_d((v2i64)(__a), (int)(__c)), 31)))
+
+/* qrpackru */
+#define msa_qrpackru_s16(__a, __b, __c) \
+({ \
+  v8i16 __d = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__a)), (int)(__c)); \
+  v8i16 __e = __builtin_msa_srlri_h(__builtin_msa_max_s_h(__builtin_msa_fill_h(0), (v8i16)(__b)), (int)(__c)); \
+  (v16u8)__builtin_msa_pckev_b((v16i8)__builtin_msa_sat_u_h((v8u16)__e, 7), (v16i8)__builtin_msa_sat_u_h((v8u16)__d, 7)); \
+})
+
+#define msa_qrpackru_s32(__a, __b, __c) \
+({ \
+  v4i32 __d = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__a)), (int)(__c)); \
+  v4i32 __e = __builtin_msa_srlri_w(__builtin_msa_max_s_w(__builtin_msa_fill_w(0), (v4i32)(__b)), (int)(__c)); \
+  (v8u16)__builtin_msa_pckev_h((v8i16)__builtin_msa_sat_u_w((v4u32)__e, 15), (v8i16)__builtin_msa_sat_u_w((v4u32)__d, 15)); \
+})
+
+#define msa_qrpackru_s64(__a, __b, __c) \
+({ \
+  v2i64 __d = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__a)), (int)(__c)); \
+  v2i64 __e = __builtin_msa_srlri_d(__builtin_msa_max_s_d(__builtin_msa_fill_d(0), (v2i64)(__b)), (int)(__c)); \
+  (v4u32)__builtin_msa_pckev_w((v4i32)__builtin_msa_sat_u_d((v2u64)__e, 31), (v4i32)__builtin_msa_sat_u_d((v2u64)__d, 31)); \
+})
+
+/* Minimum values between corresponding elements in the two vectors are written to the returned vector. */
+#define msa_minq_s8(__a, __b)  (__builtin_msa_min_s_b(__a, __b))
+#define msa_minq_s16(__a, __b) (__builtin_msa_min_s_h(__a, __b))
+#define msa_minq_s32(__a, __b) (__builtin_msa_min_s_w(__a, __b))
+#define msa_minq_s64(__a, __b) (__builtin_msa_min_s_d(__a, __b))
+#define msa_minq_u8(__a, __b)  ((v16u8)__builtin_msa_min_u_b(__a, __b))
+#define msa_minq_u16(__a, __b) ((v8u16)__builtin_msa_min_u_h(__a, __b))
+#define msa_minq_u32(__a, __b) ((v4u32)__builtin_msa_min_u_w(__a, __b))
+#define msa_minq_u64(__a, __b) ((v2u64)__builtin_msa_min_u_d(__a, __b))
+#define msa_minq_f32(__a, __b) (__builtin_msa_fmin_w(__a, __b))
+#define msa_minq_f64(__a, __b) (__builtin_msa_fmin_d(__a, __b))
+
+/* Maximum values between corresponding elements in the two vectors are written to the returned vector. */
+#define msa_maxq_s8(__a, __b)  (__builtin_msa_max_s_b(__a, __b))
+#define msa_maxq_s16(__a, __b) (__builtin_msa_max_s_h(__a, __b))
+#define msa_maxq_s32(__a, __b) (__builtin_msa_max_s_w(__a, __b))
+#define msa_maxq_s64(__a, __b) (__builtin_msa_max_s_d(__a, __b))
+#define msa_maxq_u8(__a, __b)  ((v16u8)__builtin_msa_max_u_b(__a, __b))
+#define msa_maxq_u16(__a, __b) ((v8u16)__builtin_msa_max_u_h(__a, __b))
+#define msa_maxq_u32(__a, __b) ((v4u32)__builtin_msa_max_u_w(__a, __b))
+#define msa_maxq_u64(__a, __b) ((v2u64)__builtin_msa_max_u_d(__a, __b))
+#define msa_maxq_f32(__a, __b) (__builtin_msa_fmax_w(__a, __b))
+#define msa_maxq_f64(__a, __b) (__builtin_msa_fmax_d(__a, __b))
+
+/* Vector type reinterpretion */
+#define MSA_TPV_REINTERPRET(_Tpv, Vec) ((_Tpv)(Vec))
+
+/* Add the odd elements in vector __a with the even elements in vector __b to double width elements in the returned vector. */
+/* v8i16 msa_hadd_s16 ((v16i8)__a, (v16i8)__b) */
+#define msa_hadd_s16(__a, __b) (__builtin_msa_hadd_s_h((v16i8)(__a), (v16i8)(__b)))
+/* v4i32 msa_hadd_s32 ((v8i16)__a, (v8i16)__b) */
+#define msa_hadd_s32(__a, __b) (__builtin_msa_hadd_s_w((v8i16)(__a), (v8i16)(__b)))
+/* v2i64 msa_hadd_s64 ((v4i32)__a, (v4i32)__b) */
+#define msa_hadd_s64(__a, __b) (__builtin_msa_hadd_s_d((v4i32)(__a), (v4i32)(__b)))
+
+/* Copy even elements in __a to the left half and even elements in __b to the right half and return the result vector. */
+#define msa_pckev_s8(__a, __b)  (__builtin_msa_pckev_b((v16i8)(__a), (v16i8)(__b)))
+#define msa_pckev_s16(__a, __b) (__builtin_msa_pckev_h((v8i16)(__a), (v8i16)(__b)))
+#define msa_pckev_s32(__a, __b) (__builtin_msa_pckev_w((v4i32)(__a), (v4i32)(__b)))
+#define msa_pckev_s64(__a, __b) (__builtin_msa_pckev_d((v2i64)(__a), (v2i64)(__b)))
+
+/* Copy even elements in __a to the left half and even elements in __b to the right half and return the result vector. */
+#define msa_pckod_s8(__a, __b)  (__builtin_msa_pckod_b((v16i8)(__a), (v16i8)(__b)))
+#define msa_pckod_s16(__a, __b) (__builtin_msa_pckod_h((v8i16)(__a), (v8i16)(__b)))
+#define msa_pckod_s32(__a, __b) (__builtin_msa_pckod_w((v4i32)(__a), (v4i32)(__b)))
+#define msa_pckod_s64(__a, __b) (__builtin_msa_pckod_d((v2i64)(__a), (v2i64)(__b)))
+
+#ifdef _MIPSEB
+#define LANE_IMM0_1(x)  (0b1 - ((x) & 0b1))
+#define LANE_IMM0_3(x)  (0b11 - ((x) & 0b11))
+#define LANE_IMM0_7(x)  (0b111 - ((x) & 0b111))
+#define LANE_IMM0_15(x) (0b1111 - ((x) & 0b1111))
+#else
+#define LANE_IMM0_1(x)  ((x) & 0b1)
+#define LANE_IMM0_3(x)  ((x) & 0b11)
+#define LANE_IMM0_7(x)  ((x) & 0b111)
+#define LANE_IMM0_15(x) ((x) & 0b1111)
+#endif
+
+#define msa_get_lane_u8(__a, __b)        ((uint8_t)(__a)[LANE_IMM0_7(__b)])
+#define msa_get_lane_s8(__a, __b)        ((int8_t)(__a)[LANE_IMM0_7(__b)])
+#define msa_get_lane_u16(__a, __b)       ((uint16_t)(__a)[LANE_IMM0_3(__b)])
+#define msa_get_lane_s16(__a, __b)       ((int16_t)(__a)[LANE_IMM0_3(__b)])
+#define msa_get_lane_u32(__a, __b)       ((uint32_t)(__a)[LANE_IMM0_1(__b)])
+#define msa_get_lane_s32(__a, __b)       ((int32_t)(__a)[LANE_IMM0_1(__b)])
+#define msa_get_lane_f32(__a, __b)       ((float)(__a)[LANE_IMM0_3(__b)])
+#define msa_get_lane_s64(__a, __b)       ((int64_t)(__a)[LANE_IMM0_1(__b)])
+#define msa_get_lane_u64(__a, __b)       ((uint64_t)(__a)[LANE_IMM0_1(__b)])
+#define msa_get_lane_f64(__a, __b)       ((double)(__a)[LANE_IMM0_1(__b)])
+#define msa_getq_lane_u8(__a, imm0_15)   ((uint8_t)__builtin_msa_copy_u_b((v16i8)(__a), imm0_15))
+#define msa_getq_lane_s8(__a, imm0_15)   ((int8_t)__builtin_msa_copy_s_b(__a, imm0_15))
+#define msa_getq_lane_u16(__a, imm0_7)   ((uint16_t)__builtin_msa_copy_u_h((v8i16)(__a), imm0_7))
+#define msa_getq_lane_s16(__a, imm0_7)   ((int16_t)__builtin_msa_copy_s_h(__a, imm0_7))
+#define msa_getq_lane_u32(__a, imm0_3)   __builtin_msa_copy_u_w((v4i32)(__a), imm0_3)
+#define msa_getq_lane_s32                __builtin_msa_copy_s_w
+#define msa_getq_lane_f32(__a, __b)      ((float)(__a)[LANE_IMM0_3(__b)])
+#define msa_getq_lane_f64(__a, __b)      ((double)(__a)[LANE_IMM0_1(__b)])
+#if (__mips == 64)
+#define msa_getq_lane_u64(__a, imm0_1)   __builtin_msa_copy_u_d((v2i64)(__a), imm0_1)
+#define msa_getq_lane_s64                __builtin_msa_copy_s_d
+#else
+#define msa_getq_lane_u64(__a, imm0_1)   ((uint64_t)(__a)[LANE_IMM0_1(imm0_1)])
+#define msa_getq_lane_s64(__a, imm0_1)   ((int64_t)(__a)[LANE_IMM0_1(imm0_1)])
+#endif
+
+/* combine */
+#if (__mips == 64)
+#define __COMBINE_64_64(__TYPE, a, b)    ((__TYPE)((v2u64){((v1u64)(a))[0], ((v1u64)(b))[0]}))
+#else
+#define __COMBINE_64_64(__TYPE, a, b)    ((__TYPE)((v4u32){((v2u32)(a))[0], ((v2u32)(a))[1],  \
+                                                           ((v2u32)(b))[0], ((v2u32)(b))[1]}))
+#endif
+
+/* v16i8 msa_combine_s8 (v8i8 __a, v8i8 __b) */
+#define msa_combine_s8(__a, __b)  __COMBINE_64_64(v16i8, __a, __b)
+
+/* v8i16 msa_combine_s16(v4i16 __a, v4i16 __b) */
+#define msa_combine_s16(__a, __b)  __COMBINE_64_64(v8i16, __a, __b)
+
+/* v4i32 msa_combine_s32(v2i32 __a, v2i32 __b) */
+#define msa_combine_s32(__a, __b)  __COMBINE_64_64(v4i32, __a, __b)
+
+/* v2i64 msa_combine_s64(v1i64 __a, v1i64 __b) */
+#define msa_combine_s64(__a, __b)  __COMBINE_64_64(v2i64, __a, __b)
+
+/* v4f32 msa_combine_f32(v2f32 __a, v2f32 __b) */
+#define msa_combine_f32(__a, __b)  __COMBINE_64_64(v4f32, __a, __b)
+
+/* v16u8 msa_combine_u8(v8u8 __a, v8u8 __b) */
+#define msa_combine_u8(__a, __b)  __COMBINE_64_64(v16u8, __a, __b)
+
+/* v8u16 msa_combine_u16(v4u16 __a, v4u16 __b) */
+#define msa_combine_u16(__a, __b)  __COMBINE_64_64(v8u16, __a, __b)
+
+/* v4u32 msa_combine_u32(v2u32 __a, v2u32 __b) */
+#define msa_combine_u32(__a, __b)  __COMBINE_64_64(v4u32, __a, __b)
+
+/* v2u64 msa_combine_u64(v1u64 __a, v1u64 __b) */
+#define msa_combine_u64(__a, __b)  __COMBINE_64_64(v2u64, __a, __b)
+
+/* v2f64 msa_combine_f64(v1f64 __a, v1f64 __b) */
+#define msa_combine_f64(__a, __b)  __COMBINE_64_64(v2f64, __a, __b)
+
+/* get_low, get_high */
+#if (__mips == 64)
+#define __GET_LOW(__TYPE, a)   ((__TYPE)((v1u64)(__builtin_msa_copy_u_d((v2i64)(a), 0))))
+#define __GET_HIGH(__TYPE, a)  ((__TYPE)((v1u64)(__builtin_msa_copy_u_d((v2i64)(a), 1))))
+#else
+#define __GET_LOW(__TYPE, a)   ((__TYPE)(((v2u64)(a))[0]))
+#define __GET_HIGH(__TYPE, a)  ((__TYPE)(((v2u64)(a))[1]))
+#endif
+
+/* v8i8 msa_get_low_s8(v16i8 __a) */
+#define msa_get_low_s8(__a)  __GET_LOW(v8i8, __a)
+
+/* v4i16 msa_get_low_s16(v8i16 __a) */
+#define msa_get_low_s16(__a)  __GET_LOW(v4i16, __a)
+
+/* v2i32 msa_get_low_s32(v4i32 __a) */
+#define msa_get_low_s32(__a)  __GET_LOW(v2i32, __a)
+
+/* v1i64 msa_get_low_s64(v2i64 __a) */
+#define msa_get_low_s64(__a)  __GET_LOW(v1i64, __a)
+
+/* v8u8 msa_get_low_u8(v16u8 __a) */
+#define msa_get_low_u8(__a)  __GET_LOW(v8u8, __a)
+
+/* v4u16 msa_get_low_u16(v8u16 __a) */
+#define msa_get_low_u16(__a)  __GET_LOW(v4u16, __a)
+
+/* v2u32 msa_get_low_u32(v4u32 __a) */
+#define msa_get_low_u32(__a)  __GET_LOW(v2u32, __a)
+
+/* v1u64 msa_get_low_u64(v2u64 __a) */
+#define msa_get_low_u64(__a)  __GET_LOW(v1u64, __a)
+
+/* v2f32 msa_get_low_f32(v4f32 __a) */
+#define msa_get_low_f32(__a)  __GET_LOW(v2f32, __a)
+
+/* v1f64 msa_get_low_f64(v2f64 __a) */
+#define msa_get_low_f64(__a)  __GET_LOW(v1f64, __a)
+
+/* v8i8 msa_get_high_s8(v16i8 __a) */
+#define msa_get_high_s8(__a)  __GET_HIGH(v8i8, __a)
+
+/* v4i16 msa_get_high_s16(v8i16 __a) */
+#define msa_get_high_s16(__a)  __GET_HIGH(v4i16, __a)
+
+/* v2i32 msa_get_high_s32(v4i32 __a) */
+#define msa_get_high_s32(__a)  __GET_HIGH(v2i32, __a)
+
+/* v1i64 msa_get_high_s64(v2i64 __a) */
+#define msa_get_high_s64(__a)  __GET_HIGH(v1i64, __a)
+
+/* v8u8 msa_get_high_u8(v16u8 __a) */
+#define msa_get_high_u8(__a)  __GET_HIGH(v8u8, __a)
+
+/* v4u16 msa_get_high_u16(v8u16 __a) */
+#define msa_get_high_u16(__a)  __GET_HIGH(v4u16, __a)
+
+/* v2u32 msa_get_high_u32(v4u32 __a) */
+#define msa_get_high_u32(__a)  __GET_HIGH(v2u32, __a)
+
+/* v1u64 msa_get_high_u64(v2u64 __a) */
+#define msa_get_high_u64(__a)  __GET_HIGH(v1u64, __a)
+
+/* v2f32 msa_get_high_f32(v4f32 __a) */
+#define msa_get_high_f32(__a)  __GET_HIGH(v2f32, __a)
+
+/* v1f64 msa_get_high_f64(v2f64 __a) */
+#define msa_get_high_f64(__a)  __GET_HIGH(v1f64, __a)
+
+/* ri = ai * b[lane] */
+/* v4f32 msa_mulq_lane_f32(v4f32 __a, v4f32 __b, const int __lane) */
+#define msa_mulq_lane_f32(__a, __b, __lane)  ((__a) * msa_getq_lane_f32(__b, __lane))
+
+/* ri = ai + bi * c[lane] */
+/* v4f32 msa_mlaq_lane_f32(v4f32 __a, v4f32 __b, v4f32 __c, const int __lane) */
+#define msa_mlaq_lane_f32(__a, __b, __c, __lane)  ((__a) + ((__b) * msa_getq_lane_f32(__c, __lane)))
+
+/* uint16_t msa_sum_u16(v8u16 __a)*/
+#define msa_sum_u16(__a)                         \
+({                                               \
+  v4u32 _b;                                      \
+  v2u64 _c;                                      \
+  _b = __builtin_msa_hadd_u_w(__a, __a);         \
+  _c = __builtin_msa_hadd_u_d(_b, _b);           \
+  (uint16_t)(_c[0] + _c[1]);                     \
+})
+
+/* int16_t msa_sum_s16(v8i16 __a) */
+#define msa_sum_s16(__a)                        \
+({                                              \
+  v4i32 _b;                                     \
+  v2i64 _c;                                     \
+  _b = __builtin_msa_hadd_s_w(__a, __a);        \
+  _c = __builtin_msa_hadd_s_d(_b, _b);          \
+  (int16_t)(_c[0] + _c[1]);                     \
+})
+
+
+/* uint32_t msa_sum_u32(v4u32 __a)*/
+#define msa_sum_u32(__a)                       \
+({                                             \
+  v2u64 _b;                                    \
+  _b = __builtin_msa_hadd_u_d(__a, __a);       \
+  (uint32_t)(_b[0] + _b[1]);                   \
+})
+
+/* int32_t  msa_sum_s32(v4i32 __a)*/
+#define msa_sum_s32(__a)                       \
+({                                             \
+  v2i64 _b;                                    \
+  _b = __builtin_msa_hadd_s_d(__a, __a);       \
+  (int32_t)(_b[0] + _b[1]);                    \
+})
+
+/* uint8_t msa_sum_u8(v16u8 __a)*/
+#define msa_sum_u8(__a)                        \
+({                                             \
+  v8u16 _b16;                                    \
+  v4u32 _c32;                                    \
+  _b16 = __builtin_msa_hadd_u_h(__a, __a);       \
+  _c32 = __builtin_msa_hadd_u_w(_b16, _b16);         \
+  (uint8_t)msa_sum_u32(_c32);                    \
+})
+
+/* int8_t msa_sum_s8(v16s8 __a)*/
+#define msa_sum_s8(__a)                        \
+({                                             \
+  v8i16 _b16;                                    \
+  v4i32 _c32;                                    \
+  _b16 = __builtin_msa_hadd_s_h(__a, __a);       \
+  _c32 = __builtin_msa_hadd_s_w(_b16, _b16);         \
+  (int8_t)msa_sum_s32(_c32);                     \
+})
+
+/* float msa_sum_f32(v4f32 __a)*/
+#define msa_sum_f32(__a)  ((__a)[0] + (__a)[1] + (__a)[2] + (__a)[3])
+
+/* v8u16 msa_paddlq_u8(v16u8 __a) */
+#define msa_paddlq_u8(__a)  (__builtin_msa_hadd_u_h(__a, __a))
+
+/* v8i16 msa_paddlq_s8(v16i8 __a) */
+#define msa_paddlq_s8(__a)  (__builtin_msa_hadd_s_h(__a, __a))
+
+/* v4u32 msa_paddlq_u16 (v8u16 __a)*/
+#define msa_paddlq_u16(__a)  (__builtin_msa_hadd_u_w(__a, __a))
+
+/* v4i32 msa_paddlq_s16 (v8i16 __a)*/
+#define msa_paddlq_s16(__a)  (__builtin_msa_hadd_s_w(__a, __a))
+
+/* v2u64 msa_paddlq_u32(v4u32 __a) */
+#define msa_paddlq_u32(__a)  (__builtin_msa_hadd_u_d(__a, __a))
+
+/* v2i64 msa_paddlq_s32(v4i32 __a) */
+#define msa_paddlq_s32(__a)  (__builtin_msa_hadd_s_d(__a, __a))
+
+#define V8U8_2_V8U16(x)   {(uint16_t)x[0], (uint16_t)x[1], (uint16_t)x[2], (uint16_t)x[3], \
+                           (uint16_t)x[4], (uint16_t)x[5], (uint16_t)x[6], (uint16_t)x[7]}
+#define V8U8_2_V8I16(x)   {(int16_t)x[0], (int16_t)x[1], (int16_t)x[2], (int16_t)x[3], \
+                           (int16_t)x[4], (int16_t)x[5], (int16_t)x[6], (int16_t)x[7]}
+#define V8I8_2_V8I16(x)   {(int16_t)x[0], (int16_t)x[1], (int16_t)x[2], (int16_t)x[3], \
+                           (int16_t)x[4], (int16_t)x[5], (int16_t)x[6], (int16_t)x[7]}
+#define V4U16_2_V4U32(x)  {(uint32_t)x[0], (uint32_t)x[1], (uint32_t)x[2], (uint32_t)x[3]}
+#define V4U16_2_V4I32(x)  {(int32_t)x[0], (int32_t)x[1], (int32_t)x[2], (int32_t)x[3]}
+#define V4I16_2_V4I32(x)  {(int32_t)x[0], (int32_t)x[1], (int32_t)x[2], (int32_t)x[3]}
+#define V2U32_2_V2U64(x)  {(uint64_t)x[0], (uint64_t)x[1]}
+#define V2U32_2_V2I64(x)  {(int64_t)x[0], (int64_t)x[1]}
+
+/* v8u16 msa_mull_u8(v8u8 __a, v8u8 __b) */
+#define msa_mull_u8(__a, __b)  ((v8u16)__builtin_msa_mulv_h((v8i16)V8U8_2_V8I16(__a), (v8i16)V8U8_2_V8I16(__b)))
+
+/* v8i16 msa_mull_s8(v8i8 __a, v8i8 __b)*/
+#define msa_mull_s8(__a, __b)  (__builtin_msa_mulv_h((v8i16)V8I8_2_V8I16(__a), (v8i16)V8I8_2_V8I16(__b)))
+
+/* v4u32 msa_mull_u16(v4u16 __a, v4u16 __b) */
+#define msa_mull_u16(__a, __b)  ((v4u32)__builtin_msa_mulv_w((v4i32)V4U16_2_V4I32(__a), (v4i32)V4U16_2_V4I32(__b)))
+
+/* v4i32 msa_mull_s16(v4i16 __a, v4i16 __b) */
+#define msa_mull_s16(__a, __b)  (__builtin_msa_mulv_w((v4i32)V4I16_2_V4I32(__a), (v4i32)V4I16_2_V4I32(__b)))
+
+/* v2u64 msa_mull_u32(v2u32 __a, v2u32 __b) */
+#define msa_mull_u32(__a, __b)  ((v2u64)__builtin_msa_mulv_d((v2i64)V2U32_2_V2I64(__a), (v2i64)V2U32_2_V2I64(__b)))
+
+/* bitwise and: __builtin_msa_and_v */
+#define msa_andq_u8(__a, __b)  ((v16u8)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_andq_s8(__a, __b)  ((v16i8)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_andq_u16(__a, __b) ((v8u16)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_andq_s16(__a, __b) ((v8i16)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_andq_u32(__a, __b) ((v4u32)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_andq_s32(__a, __b) ((v4i32)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_andq_u64(__a, __b) ((v2u64)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_andq_s64(__a, __b) ((v2i64)__builtin_msa_and_v((v16u8)(__a), (v16u8)(__b)))
+
+/* bitwise or: __builtin_msa_or_v */
+#define msa_orrq_u8(__a, __b)  ((v16u8)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_orrq_s8(__a, __b)  ((v16i8)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_orrq_u16(__a, __b) ((v8u16)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_orrq_s16(__a, __b) ((v8i16)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_orrq_u32(__a, __b) ((v4u32)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_orrq_s32(__a, __b) ((v4i32)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_orrq_u64(__a, __b) ((v2u64)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_orrq_s64(__a, __b) ((v2i64)__builtin_msa_or_v((v16u8)(__a), (v16u8)(__b)))
+
+/* bitwise xor: __builtin_msa_xor_v */
+#define msa_eorq_u8(__a, __b)  ((v16u8)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_eorq_s8(__a, __b)  ((v16i8)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_eorq_u16(__a, __b) ((v8u16)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_eorq_s16(__a, __b) ((v8i16)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_eorq_u32(__a, __b) ((v4u32)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_eorq_s32(__a, __b) ((v4i32)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_eorq_u64(__a, __b) ((v2u64)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b)))
+#define msa_eorq_s64(__a, __b) ((v2i64)__builtin_msa_xor_v((v16u8)(__a), (v16u8)(__b)))
+
+/* bitwise not: v16u8 __builtin_msa_xori_b (v16u8, 0xff) */
+#define msa_mvnq_u8(__a)  ((v16u8)__builtin_msa_xori_b((v16u8)(__a), 0xFF))
+#define msa_mvnq_s8(__a)  ((v16i8)__builtin_msa_xori_b((v16u8)(__a), 0xFF))
+#define msa_mvnq_u16(__a) ((v8u16)__builtin_msa_xori_b((v16u8)(__a), 0xFF))
+#define msa_mvnq_s16(__a) ((v8i16)__builtin_msa_xori_b((v16u8)(__a), 0xFF))
+#define msa_mvnq_u32(__a) ((v4u32)__builtin_msa_xori_b((v16u8)(__a), 0xFF))
+#define msa_mvnq_s32(__a) ((v4i32)__builtin_msa_xori_b((v16u8)(__a), 0xFF))
+#define msa_mvnq_u64(__a) ((v2u64)__builtin_msa_xori_b((v16u8)(__a), 0xFF))
+#define msa_mvnq_s64(__a) ((v2i64)__builtin_msa_xori_b((v16u8)(__a), 0xFF))
+
+/* compare equal: ceq -> ri = ai == bi ? 1...1:0...0 */
+#define msa_ceqq_u8(__a, __b)  ((v16u8)__builtin_msa_ceq_b((v16i8)(__a), (v16i8)(__b)))
+#define msa_ceqq_s8(__a, __b)  ((v16u8)__builtin_msa_ceq_b((v16i8)(__a), (v16i8)(__b)))
+#define msa_ceqq_u16(__a, __b) ((v8u16)__builtin_msa_ceq_h((v8i16)(__a), (v8i16)(__b)))
+#define msa_ceqq_s16(__a, __b) ((v8u16)__builtin_msa_ceq_h((v8i16)(__a), (v8i16)(__b)))
+#define msa_ceqq_u32(__a, __b) ((v4u32)__builtin_msa_ceq_w((v4i32)(__a), (v4i32)(__b)))
+#define msa_ceqq_s32(__a, __b) ((v4u32)__builtin_msa_ceq_w((v4i32)(__a), (v4i32)(__b)))
+#define msa_ceqq_f32(__a, __b) ((v4u32)__builtin_msa_fceq_w((v4f32)(__a), (v4f32)(__b)))
+#define msa_ceqq_u64(__a, __b) ((v2u64)__builtin_msa_ceq_d((v2i64)(__a), (v2i64)(__b)))
+#define msa_ceqq_s64(__a, __b) ((v2u64)__builtin_msa_ceq_d((v2i64)(__a), (v2i64)(__b)))
+#define msa_ceqq_f64(__a, __b) ((v2u64)__builtin_msa_fceq_d((v2f64)(__a), (v2f64)(__b)))
+
+/* Compare less-than: clt -> ri = ai < bi ? 1...1:0...0 */
+#define msa_cltq_u8(__a, __b)  ((v16u8)__builtin_msa_clt_u_b((v16u8)(__a), (v16u8)(__b)))
+#define msa_cltq_s8(__a, __b)  ((v16u8)__builtin_msa_clt_s_b((v16i8)(__a), (v16i8)(__b)))
+#define msa_cltq_u16(__a, __b) ((v8u16)__builtin_msa_clt_u_h((v8u16)(__a), (v8u16)(__b)))
+#define msa_cltq_s16(__a, __b) ((v8u16)__builtin_msa_clt_s_h((v8i16)(__a), (v8i16)(__b)))
+#define msa_cltq_u32(__a, __b) ((v4u32)__builtin_msa_clt_u_w((v4u32)(__a), (v4u32)(__b)))
+#define msa_cltq_s32(__a, __b) ((v4u32)__builtin_msa_clt_s_w((v4i32)(__a), (v4i32)(__b)))
+#define msa_cltq_f32(__a, __b) ((v4u32)__builtin_msa_fclt_w((v4f32)(__a), (v4f32)(__b)))
+#define msa_cltq_u64(__a, __b) ((v2u64)__builtin_msa_clt_u_d((v2u64)(__a), (v2u64)(__b)))
+#define msa_cltq_s64(__a, __b) ((v2u64)__builtin_msa_clt_s_d((v2i64)(__a), (v2i64)(__b)))
+#define msa_cltq_f64(__a, __b) ((v2u64)__builtin_msa_fclt_d((v2f64)(__a), (v2f64)(__b)))
+
+/* compare greater-than: cgt -> ri = ai > bi ? 1...1:0...0 */
+#define msa_cgtq_u8(__a, __b)  ((v16u8)__builtin_msa_clt_u_b((v16u8)(__b), (v16u8)(__a)))
+#define msa_cgtq_s8(__a, __b)  ((v16u8)__builtin_msa_clt_s_b((v16i8)(__b), (v16i8)(__a)))
+#define msa_cgtq_u16(__a, __b) ((v8u16)__builtin_msa_clt_u_h((v8u16)(__b), (v8u16)(__a)))
+#define msa_cgtq_s16(__a, __b) ((v8u16)__builtin_msa_clt_s_h((v8i16)(__b), (v8i16)(__a)))
+#define msa_cgtq_u32(__a, __b) ((v4u32)__builtin_msa_clt_u_w((v4u32)(__b), (v4u32)(__a)))
+#define msa_cgtq_s32(__a, __b) ((v4u32)__builtin_msa_clt_s_w((v4i32)(__b), (v4i32)(__a)))
+#define msa_cgtq_f32(__a, __b) ((v4u32)__builtin_msa_fclt_w((v4f32)(__b), (v4f32)(__a)))
+#define msa_cgtq_u64(__a, __b) ((v2u64)__builtin_msa_clt_u_d((v2u64)(__b), (v2u64)(__a)))
+#define msa_cgtq_s64(__a, __b) ((v2u64)__builtin_msa_clt_s_d((v2i64)(__b), (v2i64)(__a)))
+#define msa_cgtq_f64(__a, __b) ((v2u64)__builtin_msa_fclt_d((v2f64)(__b), (v2f64)(__a)))
+
+/* compare less-equal: cle -> ri = ai <= bi ? 1...1:0...0 */
+#define msa_cleq_u8(__a, __b)  ((v16u8)__builtin_msa_cle_u_b((v16u8)(__a), (v16u8)(__b)))
+#define msa_cleq_s8(__a, __b)  ((v16u8)__builtin_msa_cle_s_b((v16i8)(__a), (v16i8)(__b)))
+#define msa_cleq_u16(__a, __b) ((v8u16)__builtin_msa_cle_u_h((v8u16)(__a), (v8u16)(__b)))
+#define msa_cleq_s16(__a, __b) ((v8u16)__builtin_msa_cle_s_h((v8i16)(__a), (v8i16)(__b)))
+#define msa_cleq_u32(__a, __b) ((v4u32)__builtin_msa_cle_u_w((v4u32)(__a), (v4u32)(__b)))
+#define msa_cleq_s32(__a, __b) ((v4u32)__builtin_msa_cle_s_w((v4i32)(__a), (v4i32)(__b)))
+#define msa_cleq_f32(__a, __b) ((v4u32)__builtin_msa_fcle_w((v4f32)(__a), (v4f32)(__b)))
+#define msa_cleq_u64(__a, __b) ((v2u64)__builtin_msa_cle_u_d((v2u64)(__a), (v2u64)(__b)))
+#define msa_cleq_s64(__a, __b) ((v2u64)__builtin_msa_cle_s_d((v2i64)(__a), (v2i64)(__b)))
+#define msa_cleq_f64(__a, __b) ((v2u64)__builtin_msa_fcle_d((v2f64)(__a), (v2f64)(__b)))
+
+/* compare greater-equal: cge -> ri = ai >= bi ? 1...1:0...0 */
+#define msa_cgeq_u8(__a, __b)  ((v16u8)__builtin_msa_cle_u_b((v16u8)(__b), (v16u8)(__a)))
+#define msa_cgeq_s8(__a, __b)  ((v16u8)__builtin_msa_cle_s_b((v16i8)(__b), (v16i8)(__a)))
+#define msa_cgeq_u16(__a, __b) ((v8u16)__builtin_msa_cle_u_h((v8u16)(__b), (v8u16)(__a)))
+#define msa_cgeq_s16(__a, __b) ((v8u16)__builtin_msa_cle_s_h((v8i16)(__b), (v8i16)(__a)))
+#define msa_cgeq_u32(__a, __b) ((v4u32)__builtin_msa_cle_u_w((v4u32)(__b), (v4u32)(__a)))
+#define msa_cgeq_s32(__a, __b) ((v4u32)__builtin_msa_cle_s_w((v4i32)(__b), (v4i32)(__a)))
+#define msa_cgeq_f32(__a, __b) ((v4u32)__builtin_msa_fcle_w((v4f32)(__b), (v4f32)(__a)))
+#define msa_cgeq_u64(__a, __b) ((v2u64)__builtin_msa_cle_u_d((v2u64)(__b), (v2u64)(__a)))
+#define msa_cgeq_s64(__a, __b) ((v2u64)__builtin_msa_cle_s_d((v2i64)(__b), (v2i64)(__a)))
+#define msa_cgeq_f64(__a, __b) ((v2u64)__builtin_msa_fcle_d((v2f64)(__b), (v2f64)(__a)))
+
+/* Shift Left Logical: shl -> ri = ai << bi; */
+#define msa_shlq_u8(__a, __b)  ((v16u8)__builtin_msa_sll_b((v16i8)(__a), (v16i8)(__b)))
+#define msa_shlq_s8(__a, __b)  ((v16i8)__builtin_msa_sll_b((v16i8)(__a), (v16i8)(__b)))
+#define msa_shlq_u16(__a, __b) ((v8u16)__builtin_msa_sll_h((v8i16)(__a), (v8i16)(__b)))
+#define msa_shlq_s16(__a, __b) ((v8i16)__builtin_msa_sll_h((v8i16)(__a), (v8i16)(__b)))
+#define msa_shlq_u32(__a, __b) ((v4u32)__builtin_msa_sll_w((v4i32)(__a), (v4i32)(__b)))
+#define msa_shlq_s32(__a, __b) ((v4i32)__builtin_msa_sll_w((v4i32)(__a), (v4i32)(__b)))
+#define msa_shlq_u64(__a, __b) ((v2u64)__builtin_msa_sll_d((v2i64)(__a), (v2i64)(__b)))
+#define msa_shlq_s64(__a, __b) ((v2i64)__builtin_msa_sll_d((v2i64)(__a), (v2i64)(__b)))
+
+/* Immediate Shift Left Logical: shl -> ri = ai << imm; */
+#define msa_shlq_n_u8(__a, __imm)  ((v16u8)__builtin_msa_slli_b((v16i8)(__a), __imm))
+#define msa_shlq_n_s8(__a, __imm)  ((v16i8)__builtin_msa_slli_b((v16i8)(__a), __imm))
+#define msa_shlq_n_u16(__a, __imm) ((v8u16)__builtin_msa_slli_h((v8i16)(__a), __imm))
+#define msa_shlq_n_s16(__a, __imm) ((v8i16)__builtin_msa_slli_h((v8i16)(__a), __imm))
+#define msa_shlq_n_u32(__a, __imm) ((v4u32)__builtin_msa_slli_w((v4i32)(__a), __imm))
+#define msa_shlq_n_s32(__a, __imm) ((v4i32)__builtin_msa_slli_w((v4i32)(__a), __imm))
+#define msa_shlq_n_u64(__a, __imm) ((v2u64)__builtin_msa_slli_d((v2i64)(__a), __imm))
+#define msa_shlq_n_s64(__a, __imm) ((v2i64)__builtin_msa_slli_d((v2i64)(__a), __imm))
+
+/* shift right: shrq -> ri = ai >> bi; */
+#define msa_shrq_u8(__a, __b)  ((v16u8)__builtin_msa_srl_b((v16i8)(__a), (v16i8)(__b)))
+#define msa_shrq_s8(__a, __b)  ((v16i8)__builtin_msa_sra_b((v16i8)(__a), (v16i8)(__b)))
+#define msa_shrq_u16(__a, __b) ((v8u16)__builtin_msa_srl_h((v8i16)(__a), (v8i16)(__b)))
+#define msa_shrq_s16(__a, __b) ((v8i16)__builtin_msa_sra_h((v8i16)(__a), (v8i16)(__b)))
+#define msa_shrq_u32(__a, __b) ((v4u32)__builtin_msa_srl_w((v4i32)(__a), (v4i32)(__b)))
+#define msa_shrq_s32(__a, __b) ((v4i32)__builtin_msa_sra_w((v4i32)(__a), (v4i32)(__b)))
+#define msa_shrq_u64(__a, __b) ((v2u64)__builtin_msa_srl_d((v2i64)(__a), (v2i64)(__b)))
+#define msa_shrq_s64(__a, __b) ((v2i64)__builtin_msa_sra_d((v2i64)(__a), (v2i64)(__b)))
+
+/* Immediate Shift Right: shr -> ri = ai >> imm; */
+#define msa_shrq_n_u8(__a, __imm)  ((v16u8)__builtin_msa_srli_b((v16i8)(__a), __imm))
+#define msa_shrq_n_s8(__a, __imm)  ((v16i8)__builtin_msa_srai_b((v16i8)(__a), __imm))
+#define msa_shrq_n_u16(__a, __imm) ((v8u16)__builtin_msa_srli_h((v8i16)(__a), __imm))
+#define msa_shrq_n_s16(__a, __imm) ((v8i16)__builtin_msa_srai_h((v8i16)(__a), __imm))
+#define msa_shrq_n_u32(__a, __imm) ((v4u32)__builtin_msa_srli_w((v4i32)(__a), __imm))
+#define msa_shrq_n_s32(__a, __imm) ((v4i32)__builtin_msa_srai_w((v4i32)(__a), __imm))
+#define msa_shrq_n_u64(__a, __imm) ((v2u64)__builtin_msa_srli_d((v2i64)(__a), __imm))
+#define msa_shrq_n_s64(__a, __imm) ((v2i64)__builtin_msa_srai_d((v2i64)(__a), __imm))
+
+/* Immediate Shift Right Rounded: shr -> ri = ai >> (rounded)imm; */
+#define msa_rshrq_n_u8(__a, __imm)  ((v16u8)__builtin_msa_srlri_b((v16i8)(__a), __imm))
+#define msa_rshrq_n_s8(__a, __imm)  ((v16i8)__builtin_msa_srari_b((v16i8)(__a), __imm))
+#define msa_rshrq_n_u16(__a, __imm) ((v8u16)__builtin_msa_srlri_h((v8i16)(__a), __imm))
+#define msa_rshrq_n_s16(__a, __imm) ((v8i16)__builtin_msa_srari_h((v8i16)(__a), __imm))
+#define msa_rshrq_n_u32(__a, __imm) ((v4u32)__builtin_msa_srlri_w((v4i32)(__a), __imm))
+#define msa_rshrq_n_s32(__a, __imm) ((v4i32)__builtin_msa_srari_w((v4i32)(__a), __imm))
+#define msa_rshrq_n_u64(__a, __imm) ((v2u64)__builtin_msa_srlri_d((v2i64)(__a), __imm))
+#define msa_rshrq_n_s64(__a, __imm) ((v2i64)__builtin_msa_srari_d((v2i64)(__a), __imm))
+
+/* Vector saturating rounding shift left, qrshl -> ri = ai << bi; */
+#define msa_qrshrq_s32(a, b)  ((v4i32)__msa_srar_w((v4i32)(a), (v4i32)(b)))
+
+/* Rename the msa builtin func to unify the name style for intrin_msa.hpp */
+#define msa_qaddq_u8          __builtin_msa_adds_u_b
+#define msa_qaddq_s8          __builtin_msa_adds_s_b
+#define msa_qaddq_u16         __builtin_msa_adds_u_h
+#define msa_qaddq_s16         __builtin_msa_adds_s_h
+#define msa_qaddq_u32         __builtin_msa_adds_u_w
+#define msa_qaddq_s32         __builtin_msa_adds_s_w
+#define msa_qaddq_u64         __builtin_msa_adds_u_d
+#define msa_qaddq_s64         __builtin_msa_adds_s_d
+#define msa_addq_u8(a, b)     ((v16u8)__builtin_msa_addv_b((v16i8)(a), (v16i8)(b)))
+#define msa_addq_s8           __builtin_msa_addv_b
+#define msa_addq_u16(a, b)    ((v8u16)__builtin_msa_addv_h((v8i16)(a), (v8i16)(b)))
+#define msa_addq_s16          __builtin_msa_addv_h
+#define msa_addq_u32(a, b)    ((v4u32)__builtin_msa_addv_w((v4i32)(a), (v4i32)(b)))
+#define msa_addq_s32          __builtin_msa_addv_w
+#define msa_addq_f32          __builtin_msa_fadd_w
+#define msa_addq_u64(a, b)    ((v2u64)__builtin_msa_addv_d((v2i64)(a), (v2i64)(b)))
+#define msa_addq_s64          __builtin_msa_addv_d
+#define msa_addq_f64          __builtin_msa_fadd_d
+#define msa_qsubq_u8          __builtin_msa_subs_u_b
+#define msa_qsubq_s8          __builtin_msa_subs_s_b
+#define msa_qsubq_u16         __builtin_msa_subs_u_h
+#define msa_qsubq_s16         __builtin_msa_subs_s_h
+#define msa_subq_u8(a, b)     ((v16u8)__builtin_msa_subv_b((v16i8)(a), (v16i8)(b)))
+#define msa_subq_s8           __builtin_msa_subv_b
+#define msa_subq_u16(a, b)    ((v8u16)__builtin_msa_subv_h((v8i16)(a), (v8i16)(b)))
+#define msa_subq_s16          __builtin_msa_subv_h
+#define msa_subq_u32(a, b)    ((v4u32)__builtin_msa_subv_w((v4i32)(a), (v4i32)(b)))
+#define msa_subq_s32          __builtin_msa_subv_w
+#define msa_subq_f32          __builtin_msa_fsub_w
+#define msa_subq_u64(a, b)    ((v2u64)__builtin_msa_subv_d((v2i64)(a), (v2i64)(b)))
+#define msa_subq_s64          __builtin_msa_subv_d
+#define msa_subq_f64          __builtin_msa_fsub_d
+#define msa_mulq_u8(a, b)     ((v16u8)__builtin_msa_mulv_b((v16i8)(a), (v16i8)(b)))
+#define msa_mulq_s8(a, b)     ((v16i8)__builtin_msa_mulv_b((v16i8)(a), (v16i8)(b)))
+#define msa_mulq_u16(a, b)    ((v8u16)__builtin_msa_mulv_h((v8i16)(a), (v8i16)(b)))
+#define msa_mulq_s16(a, b)    ((v8i16)__builtin_msa_mulv_h((v8i16)(a), (v8i16)(b)))
+#define msa_mulq_u32(a, b)    ((v4u32)__builtin_msa_mulv_w((v4i32)(a), (v4i32)(b)))
+#define msa_mulq_s32(a, b)    ((v4i32)__builtin_msa_mulv_w((v4i32)(a), (v4i32)(b)))
+#define msa_mulq_u64(a, b)    ((v2u64)__builtin_msa_mulv_d((v2i64)(a), (v2i64)(b)))
+#define msa_mulq_s64(a, b)    ((v2i64)__builtin_msa_mulv_d((v2i64)(a), (v2i64)(b)))
+#define msa_mulq_f32          __builtin_msa_fmul_w
+#define msa_mulq_f64          __builtin_msa_fmul_d
+#define msa_divq_f32          __builtin_msa_fdiv_w
+#define msa_divq_f64          __builtin_msa_fdiv_d
+#define msa_dotp_s_h          __builtin_msa_dotp_s_h
+#define msa_dotp_s_w          __builtin_msa_dotp_s_w
+#define msa_dotp_s_d          __builtin_msa_dotp_s_d
+#define msa_dotp_u_h          __builtin_msa_dotp_u_h
+#define msa_dotp_u_w          __builtin_msa_dotp_u_w
+#define msa_dotp_u_d          __builtin_msa_dotp_u_d
+#define msa_dpadd_s_h         __builtin_msa_dpadd_s_h
+#define msa_dpadd_s_w         __builtin_msa_dpadd_s_w
+#define msa_dpadd_s_d         __builtin_msa_dpadd_s_d
+#define msa_dpadd_u_h         __builtin_msa_dpadd_u_h
+#define msa_dpadd_u_w         __builtin_msa_dpadd_u_w
+#define msa_dpadd_u_d         __builtin_msa_dpadd_u_d
+
+#define ILVRL_B2(RTYPE, in0, in1, low, hi) do {       \
+      low = (RTYPE)__builtin_msa_ilvr_b((v16i8)(in0), (v16i8)(in1));  \
+      hi  = (RTYPE)__builtin_msa_ilvl_b((v16i8)(in0), (v16i8)(in1));  \
+    } while (0)
+#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
+#define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__)
+#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__)
+#define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
+#define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__)
+
+#define ILVRL_H2(RTYPE, in0, in1, low, hi) do {       \
+      low = (RTYPE)__builtin_msa_ilvr_h((v8i16)(in0), (v8i16)(in1));  \
+      hi  = (RTYPE)__builtin_msa_ilvl_h((v8i16)(in0), (v8i16)(in1));  \
+    } while (0)
+#define ILVRL_H2_UB(...) ILVRL_H2(v16u8, __VA_ARGS__)
+#define ILVRL_H2_SB(...) ILVRL_H2(v16i8, __VA_ARGS__)
+#define ILVRL_H2_UH(...) ILVRL_H2(v8u16, __VA_ARGS__)
+#define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__)
+#define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__)
+#define ILVRL_H2_UW(...) ILVRL_H2(v4u32, __VA_ARGS__)
+
+#define ILVRL_W2(RTYPE, in0, in1, low, hi) do {       \
+      low = (RTYPE)__builtin_msa_ilvr_w((v4i32)(in0), (v4i32)(in1));  \
+      hi  = (RTYPE)__builtin_msa_ilvl_w((v4i32)(in0), (v4i32)(in1));  \
+    } while (0)
+#define ILVRL_W2_UB(...) ILVRL_W2(v16u8, __VA_ARGS__)
+#define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__)
+#define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__)
+#define ILVRL_W2_UW(...) ILVRL_W2(v4u32, __VA_ARGS__)
+
+/* absq, qabsq (r = |a|;) */
+#define msa_absq_s8(a)        __builtin_msa_add_a_b(a, __builtin_msa_fill_b(0))
+#define msa_absq_s16(a)       __builtin_msa_add_a_h(a, __builtin_msa_fill_h(0))
+#define msa_absq_s32(a)       __builtin_msa_add_a_w(a, __builtin_msa_fill_w(0))
+#define msa_absq_s64(a)       __builtin_msa_add_a_d(a, __builtin_msa_fill_d(0))
+#define msa_absq_f32(a)       ((v4f32)__builtin_msa_bclri_w((v4u32)(a), 31))
+#define msa_absq_f64(a)       ((v2f64)__builtin_msa_bclri_d((v2u64)(a), 63))
+#define msa_qabsq_s8(a)       __builtin_msa_adds_a_b(a, __builtin_msa_fill_b(0))
+#define msa_qabsq_s16(a)      __builtin_msa_adds_a_h(a, __builtin_msa_fill_h(0))
+#define msa_qabsq_s32(a)      __builtin_msa_adds_a_w(a, __builtin_msa_fill_w(0))
+#define msa_qabsq_s64(a)      __builtin_msa_adds_a_d(a, __builtin_msa_fill_d(0))
+
+/* abdq, qabdq (r = |a - b|;) */
+#define msa_abdq_u8           __builtin_msa_asub_u_b
+#define msa_abdq_s8           __builtin_msa_asub_s_b
+#define msa_abdq_u16          __builtin_msa_asub_u_h
+#define msa_abdq_s16          __builtin_msa_asub_s_h
+#define msa_abdq_u32          __builtin_msa_asub_u_w
+#define msa_abdq_s32          __builtin_msa_asub_s_w
+#define msa_abdq_u64          __builtin_msa_asub_u_d
+#define msa_abdq_s64          __builtin_msa_asub_s_d
+#define msa_abdq_f32(a, b)    msa_absq_f32(__builtin_msa_fsub_w(a, b))
+#define msa_abdq_f64(a, b)    msa_absq_f64(__builtin_msa_fsub_d(a, b))
+#define msa_qabdq_s8(a, b)    msa_qabsq_s8(__builtin_msa_subs_s_b(a, b))
+#define msa_qabdq_s16(a, b)   msa_qabsq_s16(__builtin_msa_subs_s_h(a, b))
+#define msa_qabdq_s32(a, b)   msa_qabsq_s32(__builtin_msa_subs_s_w(a, b))
+#define msa_qabdq_s64(a, b)   msa_qabsq_s64(__builtin_msa_subs_s_d(a, b))
+
+/* sqrtq, rsqrtq */
+#define msa_sqrtq_f32         __builtin_msa_fsqrt_w
+#define msa_sqrtq_f64         __builtin_msa_fsqrt_d
+#define msa_rsqrtq_f32        __builtin_msa_frsqrt_w
+#define msa_rsqrtq_f64        __builtin_msa_frsqrt_d
+
+
+/* mlaq: r = a + b * c; */
+__extension__ extern __inline v4i32
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+msa_mlaq_s32(v4i32 __a, v4i32 __b, v4i32 __c)
+{
+  __asm__ volatile("maddv.w %w[__a], %w[__b], %w[__c]\n"
+               // Outputs
+               : [__a] "+f"(__a)
+               // Inputs
+               : [__b] "f"(__b), [__c] "f"(__c));
+  return __a;
+}
+
+__extension__ extern __inline v2i64
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+msa_mlaq_s64(v2i64 __a, v2i64 __b, v2i64 __c)
+{
+  __asm__ volatile("maddv.d %w[__a], %w[__b], %w[__c]\n"
+               // Outputs
+               : [__a] "+f"(__a)
+               // Inputs
+               : [__b] "f"(__b), [__c] "f"(__c));
+  return __a;
+}
+
+__extension__ extern __inline v4f32
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+msa_mlaq_f32(v4f32 __a, v4f32 __b, v4f32 __c)
+{
+  __asm__ volatile("fmadd.w %w[__a], %w[__b], %w[__c]\n"
+               // Outputs
+               : [__a] "+f"(__a)
+               // Inputs
+               : [__b] "f"(__b), [__c] "f"(__c));
+  return __a;
+}
+
+__extension__ extern __inline v2f64
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+msa_mlaq_f64(v2f64 __a, v2f64 __b, v2f64 __c)
+{
+  __asm__ volatile("fmadd.d %w[__a], %w[__b], %w[__c]\n"
+               // Outputs
+               : [__a] "+f"(__a)
+               // Inputs
+               : [__b] "f"(__b), [__c] "f"(__c));
+  return __a;
+}
+
+/* cntq */
+#define msa_cntq_s8           __builtin_msa_pcnt_b
+#define msa_cntq_s16          __builtin_msa_pcnt_h
+#define msa_cntq_s32          __builtin_msa_pcnt_w
+#define msa_cntq_s64          __builtin_msa_pcnt_d
+
+/* bslq (a: mask; r = b(if a == 0); r = c(if a == 1);) */
+#define msa_bslq_u8           __builtin_msa_bsel_v
+
+/* ilvrq, ilvlq (For EL only, ilvrq: b0, a0, b1, a1; ilvlq: b2, a2, b3, a3;) */
+#define msa_ilvrq_s8          __builtin_msa_ilvr_b
+#define msa_ilvrq_s16         __builtin_msa_ilvr_h
+#define msa_ilvrq_s32         __builtin_msa_ilvr_w
+#define msa_ilvrq_s64         __builtin_msa_ilvr_d
+#define msa_ilvlq_s8          __builtin_msa_ilvl_b
+#define msa_ilvlq_s16         __builtin_msa_ilvl_h
+#define msa_ilvlq_s32         __builtin_msa_ilvl_w
+#define msa_ilvlq_s64         __builtin_msa_ilvl_d
+
+/* ilvevq, ilvodq (ilvevq: b0, a0, b2, a2; ilvodq: b1, a1, b3, a3; ) */
+#define msa_ilvevq_s8         __builtin_msa_ilvev_b
+#define msa_ilvevq_s16        __builtin_msa_ilvev_h
+#define msa_ilvevq_s32        __builtin_msa_ilvev_w
+#define msa_ilvevq_s64        __builtin_msa_ilvev_d
+#define msa_ilvodq_s8         __builtin_msa_ilvod_b
+#define msa_ilvodq_s16        __builtin_msa_ilvod_h
+#define msa_ilvodq_s32        __builtin_msa_ilvod_w
+#define msa_ilvodq_s64        __builtin_msa_ilvod_d
+
+/* extq (r = (a || b); a concatenation b and get elements from index c) */
+#ifdef _MIPSEB
+#define msa_extq_s8(a, b, c)  \
+(__builtin_msa_vshf_b(__builtin_msa_subv_b((v16i8)((v2i64){0x1716151413121110, 0x1F1E1D1C1B1A1918}), __builtin_msa_fill_b(c)), a, b))
+#define msa_extq_s16(a, b, c) \
+(__builtin_msa_vshf_h(__builtin_msa_subv_h((v8i16)((v2i64){0x000B000A00090008, 0x000F000E000D000C}), __builtin_msa_fill_h(c)), a, b))
+#define msa_extq_s32(a, b, c) \
+(__builtin_msa_vshf_w(__builtin_msa_subv_w((v4i32)((v2i64){0x0000000500000004, 0x0000000700000006}), __builtin_msa_fill_w(c)), a, b))
+#define msa_extq_s64(a, b, c) \
+(__builtin_msa_vshf_d(__builtin_msa_subv_d((v2i64){0x0000000000000002, 0x0000000000000003}, __builtin_msa_fill_d(c)), a, b))
+#else
+#define msa_extq_s8(a, b, c)  \
+(__builtin_msa_vshf_b(__builtin_msa_addv_b((v16i8)((v2i64){0x0706050403020100, 0x0F0E0D0C0B0A0908}), __builtin_msa_fill_b(c)), b, a))
+#define msa_extq_s16(a, b, c) \
+(__builtin_msa_vshf_h(__builtin_msa_addv_h((v8i16)((v2i64){0x0003000200010000, 0x0007000600050004}), __builtin_msa_fill_h(c)), b, a))
+#define msa_extq_s32(a, b, c) \
+(__builtin_msa_vshf_w(__builtin_msa_addv_w((v4i32)((v2i64){0x0000000100000000, 0x0000000300000002}), __builtin_msa_fill_w(c)), b, a))
+#define msa_extq_s64(a, b, c) \
+(__builtin_msa_vshf_d(__builtin_msa_addv_d((v2i64){0x0000000000000000, 0x0000000000000001}, __builtin_msa_fill_d(c)), b, a))
+#endif /* _MIPSEB */
+
+/* cvttruncq, cvttintq, cvtrintq */
+#define msa_cvttruncq_u32_f32 __builtin_msa_ftrunc_u_w
+#define msa_cvttruncq_s32_f32 __builtin_msa_ftrunc_s_w
+#define msa_cvttruncq_u64_f64 __builtin_msa_ftrunc_u_d
+#define msa_cvttruncq_s64_f64 __builtin_msa_ftrunc_s_d
+#define msa_cvttintq_u32_f32  __builtin_msa_ftint_u_w
+#define msa_cvttintq_s32_f32  __builtin_msa_ftint_s_w
+#define msa_cvttintq_u64_f64  __builtin_msa_ftint_u_d
+#define msa_cvttintq_s64_f64  __builtin_msa_ftint_s_d
+#define msa_cvtrintq_f32      __builtin_msa_frint_w
+#define msa_cvtrintq_f64      __builtin_msa_frint_d
+
+/* cvtfintq, cvtfq */
+#define msa_cvtfintq_f32_u32  __builtin_msa_ffint_u_w
+#define msa_cvtfintq_f32_s32  __builtin_msa_ffint_s_w
+#define msa_cvtfintq_f64_u64  __builtin_msa_ffint_u_d
+#define msa_cvtfintq_f64_s64  __builtin_msa_ffint_s_d
+#define msa_cvtfq_f32_f64     __builtin_msa_fexdo_w
+#define msa_cvtflq_f64_f32    __builtin_msa_fexupr_d
+#define msa_cvtfhq_f64_f32    __builtin_msa_fexupl_d
+
+#define msa_addl_u8(a, b)     ((v8u16)__builtin_msa_addv_h((v8i16)V8U8_2_V8I16(a), (v8i16)V8U8_2_V8I16(b)))
+#define msa_addl_s8(a, b)     (__builtin_msa_addv_h((v8i16)V8I8_2_V8I16(a), (v8i16)V8I8_2_V8I16(b)))
+#define msa_addl_u16(a, b)    ((v4u32)__builtin_msa_addv_w((v4i32)V4U16_2_V4I32(a), (v4i32)V4U16_2_V4I32(b)))
+#define msa_addl_s16(a, b)    (__builtin_msa_addv_w((v4i32)V4I16_2_V4I32(a), (v4i32)V4I16_2_V4I32(b)))
+#define msa_subl_s16(a, b)    (__builtin_msa_subv_w((v4i32)V4I16_2_V4I32(a), (v4i32)V4I16_2_V4I32(b)))
+#define msa_recpeq_f32        __builtin_msa_frcp_w
+#define msa_recpsq_f32(a, b)  (__builtin_msa_fsub_w(msa_dupq_n_f32(2.0f), __builtin_msa_fmul_w(a, b)))
+
+#define MSA_INTERLEAVED_IMPL_LOAD2_STORE2(_Tp, _Tpv, _Tpvs, suffix, df, nlanes) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_ld2q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b) \
+{ \
+  _Tpv v0 = msa_ld1q_##suffix(ptr); \
+  _Tpv v1 = msa_ld1q_##suffix(ptr + nlanes); \
+  *a = (_Tpv)__builtin_msa_pckev_##df((_Tpvs)v1, (_Tpvs)v0); \
+  *b = (_Tpv)__builtin_msa_pckod_##df((_Tpvs)v1, (_Tpvs)v0); \
+} \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_st2q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b) \
+{ \
+  msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_##df((_Tpvs)b, (_Tpvs)a)); \
+  msa_st1q_##suffix(ptr + nlanes, (_Tpv)__builtin_msa_ilvl_##df((_Tpvs)b, (_Tpvs)a)); \
+}
+
+MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint8_t, v16u8, v16i8, u8, b, 16)
+MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int8_t, v16i8, v16i8, s8, b, 16)
+MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint16_t, v8u16, v8i16, u16, h, 8)
+MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int16_t, v8i16, v8i16, s16, h, 8)
+MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint32_t, v4u32, v4i32, u32, w, 4)
+MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int32_t, v4i32, v4i32, s32, w, 4)
+MSA_INTERLEAVED_IMPL_LOAD2_STORE2(float, v4f32, v4i32, f32, w, 4)
+MSA_INTERLEAVED_IMPL_LOAD2_STORE2(uint64_t, v2u64, v2i64, u64, d, 2)
+MSA_INTERLEAVED_IMPL_LOAD2_STORE2(int64_t, v2i64, v2i64, s64, d, 2)
+MSA_INTERLEAVED_IMPL_LOAD2_STORE2(double, v2f64, v2i64, f64, d, 2)
+
+#ifdef _MIPSEB
+#define MSA_INTERLEAVED_IMPL_LOAD3_8(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \
+{ \
+  _Tpv v0 = msa_ld1q_##suffix(ptr); \
+  _Tpv v1 = msa_ld1q_##suffix(ptr + 16); \
+  _Tpv v2 = msa_ld1q_##suffix(ptr + 32); \
+  _Tpvs v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0704011F1F1F1F1F, 0x1F1C191613100D0A}), (_Tpvs)v0, (_Tpvs)v1); \
+  *a = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x1716150E0B080502, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \
+  v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0603001F1F1F1F1F, 0x1E1B1815120F0C09}), (_Tpvs)v0, (_Tpvs)v1); \
+  *b = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x1716150D0A070401, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \
+  v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x05021F1F1F1F1F1F, 0x1D1A1714110E0B08}), (_Tpvs)v0, (_Tpvs)v1); \
+  *c = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x17160F0C09060300, 0x1F1E1D1C1B1A1918}), v3, (_Tpvs)v2); \
+}
+#else
+#define MSA_INTERLEAVED_IMPL_LOAD3_8(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \
+{ \
+  _Tpv v0 = msa_ld1q_##suffix(ptr); \
+  _Tpv v1 = msa_ld1q_##suffix(ptr + 16); \
+  _Tpv v2 = msa_ld1q_##suffix(ptr + 32); \
+  _Tpvs v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x15120F0C09060300, 0x00000000001E1B18}), (_Tpvs)v1, (_Tpvs)v0); \
+  *a = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1D1A1714110A0908}), (_Tpvs)v2, v3); \
+  v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1613100D0A070401, 0x00000000001F1C19}), (_Tpvs)v1, (_Tpvs)v0); \
+  *b = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1E1B1815120A0908}), (_Tpvs)v2, v3); \
+  v3 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1714110E0B080502, 0x0000000000001D1A}), (_Tpvs)v1, (_Tpvs)v0); \
+  *c = (_Tpv)__builtin_msa_vshf_b((_Tpvs)((v2i64){0x0706050403020100, 0x1F1C191613100908}), (_Tpvs)v2, v3); \
+}
+#endif
+
+MSA_INTERLEAVED_IMPL_LOAD3_8(uint8_t, v16u8, v16i8, u8)
+MSA_INTERLEAVED_IMPL_LOAD3_8(int8_t, v16i8, v16i8, s8)
+
+#ifdef _MIPSEB
+#define MSA_INTERLEAVED_IMPL_LOAD3_16(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \
+{ \
+  _Tpv v0 = msa_ld1q_##suffix(ptr); \
+  _Tpv v1 = msa_ld1q_##suffix(ptr + 8); \
+  _Tpv v2 = msa_ld1q_##suffix(ptr + 16); \
+  _Tpvs v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00030000000F000F, 0x000F000C00090006}), (_Tpvs)v1, (_Tpvs)v0); \
+  *a = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000A00050002, 0x000F000E000D000C}), (_Tpvs)v2, v3); \
+  v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0002000F000F000F, 0x000E000B00080005}), (_Tpvs)v1, (_Tpvs)v0); \
+  *b = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000700040001, 0x000F000E000D000C}), (_Tpvs)v2, v3); \
+  v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000F000F000F, 0x000D000A00070004}), (_Tpvs)v1, (_Tpvs)v0); \
+  *c = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000600030000, 0x000F000E000D000C}), (_Tpvs)v2, v3); \
+}
+#else
+#define MSA_INTERLEAVED_IMPL_LOAD3_16(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \
+{ \
+  _Tpv v0 = msa_ld1q_##suffix(ptr); \
+  _Tpv v1 = msa_ld1q_##suffix(ptr + 8); \
+  _Tpv v2 = msa_ld1q_##suffix(ptr + 16); \
+  _Tpvs v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0009000600030000, 0x00000000000F000C}), (_Tpvs)v1, (_Tpvs)v0); \
+  *a = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000D000A00050004}), (_Tpvs)v2, v3); \
+  v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000A000700040001, 0x000000000000000D}), (_Tpvs)v1, (_Tpvs)v0); \
+  *b = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000E000B00080004}), (_Tpvs)v2, v3); \
+  v3 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B000800050002, 0x000000000000000E}), (_Tpvs)v1, (_Tpvs)v0); \
+  *c = (_Tpv)__builtin_msa_vshf_h((_Tpvs)((v2i64){0x0003000200010000, 0x000F000C00090004}), (_Tpvs)v2, v3); \
+}
+#endif
+
+MSA_INTERLEAVED_IMPL_LOAD3_16(uint16_t, v8u16, v8i16, u16)
+MSA_INTERLEAVED_IMPL_LOAD3_16(int16_t, v8i16, v8i16, s16)
+
+#define MSA_INTERLEAVED_IMPL_LOAD3_32(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \
+{ \
+  _Tpv v00 = msa_ld1q_##suffix(ptr); \
+  _Tpv v01 = msa_ld1q_##suffix(ptr + 4); \
+  _Tpv v02 = msa_ld1q_##suffix(ptr + 8); \
+  _Tpvs v10 = __builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v01, (v2i64)v01), (_Tpvs)v00); \
+  _Tpvs v11 = __builtin_msa_ilvr_w((_Tpvs)v02, (_Tpvs)__builtin_msa_ilvl_d((v2i64)v00, (v2i64)v00)); \
+  _Tpvs v12 = __builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v02, (v2i64)v02), (_Tpvs)v01); \
+  *a = (_Tpv)__builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v11, (v2i64)v11), v10); \
+  *b = (_Tpv)__builtin_msa_ilvr_w(v12, (_Tpvs)__builtin_msa_ilvl_d((v2i64)v10, (v2i64)v10)); \
+  *c = (_Tpv)__builtin_msa_ilvr_w((_Tpvs)__builtin_msa_ilvl_d((v2i64)v12, (v2i64)v12), v11); \
+}
+
+MSA_INTERLEAVED_IMPL_LOAD3_32(uint32_t, v4u32, v4i32, u32)
+MSA_INTERLEAVED_IMPL_LOAD3_32(int32_t, v4i32, v4i32, s32)
+MSA_INTERLEAVED_IMPL_LOAD3_32(float, v4f32, v4i32, f32)
+
+#define MSA_INTERLEAVED_IMPL_LOAD3_64(_Tp, _Tpv, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_ld3q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c) \
+{ \
+  *((_Tp*)a) = *ptr;           *((_Tp*)b) = *(ptr + 1);     *((_Tp*)c) = *(ptr + 2);     \
+  *((_Tp*)a + 1) = *(ptr + 3); *((_Tp*)b + 1) = *(ptr + 4); *((_Tp*)c + 1) = *(ptr + 5); \
+}
+
+MSA_INTERLEAVED_IMPL_LOAD3_64(uint64_t, v2u64, u64)
+MSA_INTERLEAVED_IMPL_LOAD3_64(int64_t, v2i64, s64)
+MSA_INTERLEAVED_IMPL_LOAD3_64(double, v2f64, f64)
+
+#ifdef _MIPSEB
+#define MSA_INTERLEAVED_IMPL_STORE3_8(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \
+{ \
+  _Tpvs v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0F0E0D0C0B1F1F1F, 0x1F1E1D1C1B1A1F1F}), (_Tpvs)b, (_Tpvs)a); \
+  _Tpvs v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0D1C140C1B130B1A, 0x1F170F1E160E1D15}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0A09080706051F1F, 0x19181716151F1F1F}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x1D14071C13061B12, 0x170A1F16091E1508}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x04030201001F1F1F, 0x14131211101F1F1F}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x15021C14011B1300, 0x051F17041E16031D}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 32, (_Tpv)v1); \
+}
+#else
+#define MSA_INTERLEAVED_IMPL_STORE3_8(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \
+{ \
+  _Tpvs v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000050403020100, 0x0000001413121110}), (_Tpvs)b, (_Tpvs)a); \
+  _Tpvs v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0A02110901100800, 0x05140C04130B0312}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000000A09080706, 0x00001A1918171615}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x170A011609001508, 0x0D04190C03180B02}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x0000000F0E0D0C0B, 0x0000001F1E1D1C1B}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_b((_Tpvs)((v2i64){0x021C09011B08001A, 0x1F0C041E0B031D0A}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 32, (_Tpv)v1); \
+}
+#endif
+
+MSA_INTERLEAVED_IMPL_STORE3_8(uint8_t, v16u8, v16i8, u8)
+MSA_INTERLEAVED_IMPL_STORE3_8(int8_t, v16i8, v16i8, s8)
+
+#ifdef _MIPSEB
+#define MSA_INTERLEAVED_IMPL_STORE3_16(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \
+{ \
+  _Tpvs v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000700060005000F, 0x000F000E000D000F}), (_Tpvs)b, (_Tpvs)a); \
+  _Tpvs v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000A0006000D0009, 0x000F000B0007000E}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00040003000F000F, 0x000C000B000A000F}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000E000A0003000D, 0x0005000F000B0004}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000200010000000F, 0x00090008000F000F}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000E00090000, 0x000B0002000F000A}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \
+}
+#else
+#define MSA_INTERLEAVED_IMPL_STORE3_16(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \
+{ \
+  _Tpvs v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000200010000, 0x0000000A00090008}), (_Tpvs)b, (_Tpvs)a); \
+  _Tpvs v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0001000800040000, 0x0006000200090005}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000500040003, 0x00000000000C000B}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x000B00040000000A, 0x0002000C00050001}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x0000000000070006, 0x0000000F000E000D}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_h((_Tpvs)((v2i64){0x00050000000D0004, 0x000F00060001000E}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 16, (_Tpv)v1); \
+}
+#endif
+
+MSA_INTERLEAVED_IMPL_STORE3_16(uint16_t, v8u16, v8i16, u16)
+MSA_INTERLEAVED_IMPL_STORE3_16(int16_t, v8i16, v8i16, s16)
+
+#ifdef _MIPSEB
+#define MSA_INTERLEAVED_IMPL_STORE3_32(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \
+{ \
+  _Tpvs v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000300000007, 0x0000000700000006}), (_Tpvs)b, (_Tpvs)a); \
+  _Tpvs v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000300000006, 0x0000000700000005}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000200000001, 0x0000000500000007}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000700000004, 0x0000000500000002}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 4, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000007, 0x0000000400000007}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000500000000, 0x0000000100000007}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \
+}
+#else
+#define MSA_INTERLEAVED_IMPL_STORE3_32(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \
+{ \
+  _Tpvs v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000100000000, 0x0000000000000004}), (_Tpvs)b, (_Tpvs)a); \
+  _Tpvs v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000200000000, 0x0000000100000004}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000002, 0x0000000600000005}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000500000002, 0x0000000300000000}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 4, (_Tpv)v1); \
+  v0 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000003, 0x0000000000000007}), (_Tpvs)b, (_Tpvs)a); \
+  v1 = __builtin_msa_vshf_w((_Tpvs)((v2i64){0x0000000000000006, 0x0000000700000002}), (_Tpvs)c, (_Tpvs)v0); \
+  msa_st1q_##suffix(ptr + 8, (_Tpv)v1); \
+}
+#endif
+
+MSA_INTERLEAVED_IMPL_STORE3_32(uint32_t, v4u32, v4i32, u32)
+MSA_INTERLEAVED_IMPL_STORE3_32(int32_t, v4i32, v4i32, s32)
+MSA_INTERLEAVED_IMPL_STORE3_32(float, v4f32, v4i32, f32)
+
+#define MSA_INTERLEAVED_IMPL_STORE3_64(_Tp, _Tpv, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_st3q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c) \
+{ \
+  *ptr = a[0];       *(ptr + 1) = b[0]; *(ptr + 2) = c[0]; \
+  *(ptr + 3) = a[1]; *(ptr + 4) = b[1]; *(ptr + 5) = c[1]; \
+}
+
+MSA_INTERLEAVED_IMPL_STORE3_64(uint64_t, v2u64, u64)
+MSA_INTERLEAVED_IMPL_STORE3_64(int64_t, v2i64, s64)
+MSA_INTERLEAVED_IMPL_STORE3_64(double, v2f64, f64)
+
+#define MSA_INTERLEAVED_IMPL_LOAD4_STORE4(_Tp, _Tpv, _Tpvs, suffix, df, nlanes) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_ld4q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c, _Tpv* d) \
+{ \
+  _Tpv v0 = msa_ld1q_##suffix(ptr); \
+  _Tpv v1 = msa_ld1q_##suffix(ptr + nlanes); \
+  _Tpv v2 = msa_ld1q_##suffix(ptr + nlanes * 2); \
+  _Tpv v3 = msa_ld1q_##suffix(ptr + nlanes * 3); \
+  _Tpvs t0 = __builtin_msa_pckev_##df((_Tpvs)v1, (_Tpvs)v0); \
+  _Tpvs t1 = __builtin_msa_pckev_##df((_Tpvs)v3, (_Tpvs)v2); \
+  _Tpvs t2 = __builtin_msa_pckod_##df((_Tpvs)v1, (_Tpvs)v0); \
+  _Tpvs t3 = __builtin_msa_pckod_##df((_Tpvs)v3, (_Tpvs)v2); \
+  *a = (_Tpv)__builtin_msa_pckev_##df(t1, t0); \
+  *b = (_Tpv)__builtin_msa_pckev_##df(t3, t2); \
+  *c = (_Tpv)__builtin_msa_pckod_##df(t1, t0); \
+  *d = (_Tpv)__builtin_msa_pckod_##df(t3, t2); \
+} \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_st4q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c, const _Tpv d) \
+{ \
+  _Tpvs v0 = __builtin_msa_ilvr_##df((_Tpvs)c, (_Tpvs)a); \
+  _Tpvs v1 = __builtin_msa_ilvr_##df((_Tpvs)d, (_Tpvs)b); \
+  _Tpvs v2 = __builtin_msa_ilvl_##df((_Tpvs)c, (_Tpvs)a); \
+  _Tpvs v3 = __builtin_msa_ilvl_##df((_Tpvs)d, (_Tpvs)b); \
+  msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_##df(v1, v0)); \
+  msa_st1q_##suffix(ptr + nlanes, (_Tpv)__builtin_msa_ilvl_##df(v1, v0)); \
+  msa_st1q_##suffix(ptr + 2 * nlanes, (_Tpv)__builtin_msa_ilvr_##df(v3, v2)); \
+  msa_st1q_##suffix(ptr + 3 * nlanes, (_Tpv)__builtin_msa_ilvl_##df(v3, v2)); \
+}
+
+MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint8_t, v16u8, v16i8, u8, b, 16)
+MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int8_t, v16i8, v16i8, s8, b, 16)
+MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint16_t, v8u16, v8i16, u16, h, 8)
+MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int16_t, v8i16, v8i16, s16, h, 8)
+MSA_INTERLEAVED_IMPL_LOAD4_STORE4(uint32_t, v4u32, v4i32, u32, w, 4)
+MSA_INTERLEAVED_IMPL_LOAD4_STORE4(int32_t, v4i32, v4i32, s32, w, 4)
+MSA_INTERLEAVED_IMPL_LOAD4_STORE4(float, v4f32, v4i32, f32, w, 4)
+
+#define MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(_Tp, _Tpv, _Tpvs, suffix) \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_ld4q_##suffix(const _Tp* ptr, _Tpv* a, _Tpv* b, _Tpv* c, _Tpv* d) \
+{ \
+  _Tpv v0 = msa_ld1q_##suffix(ptr); \
+  _Tpv v1 = msa_ld1q_##suffix(ptr + 2); \
+  _Tpv v2 = msa_ld1q_##suffix(ptr + 4); \
+  _Tpv v3 = msa_ld1q_##suffix(ptr + 6); \
+  *a = (_Tpv)__builtin_msa_ilvr_d((_Tpvs)v2, (_Tpvs)v0); \
+  *b = (_Tpv)__builtin_msa_ilvl_d((_Tpvs)v2, (_Tpvs)v0); \
+  *c = (_Tpv)__builtin_msa_ilvr_d((_Tpvs)v3, (_Tpvs)v1); \
+  *d = (_Tpv)__builtin_msa_ilvl_d((_Tpvs)v3, (_Tpvs)v1); \
+} \
+__extension__ extern __inline void \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+msa_st4q_##suffix(_Tp* ptr, const _Tpv a, const _Tpv b, const _Tpv c, const _Tpv d) \
+{ \
+  msa_st1q_##suffix(ptr, (_Tpv)__builtin_msa_ilvr_d((_Tpvs)b, (_Tpvs)a)); \
+  msa_st1q_##suffix(ptr + 2, (_Tpv)__builtin_msa_ilvr_d((_Tpvs)d, (_Tpvs)c)); \
+  msa_st1q_##suffix(ptr + 4, (_Tpv)__builtin_msa_ilvl_d((_Tpvs)b, (_Tpvs)a)); \
+  msa_st1q_##suffix(ptr + 6, (_Tpv)__builtin_msa_ilvl_d((_Tpvs)d, (_Tpvs)c)); \
+}
+
+MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(uint64_t, v2u64, v2i64, u64)
+MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(int64_t, v2i64, v2i64, s64)
+MSA_INTERLEAVED_IMPL_LOAD4_STORE4_64(double, v2f64, v2i64, f64)
+
+__extension__ extern __inline v8i16
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+msa_qdmulhq_n_s16(v8i16 a, int16_t b)
+{
+  v8i16 a_lo, a_hi;
+  ILVRL_H2_SH(a, msa_dupq_n_s16(0), a_lo, a_hi);
+  return msa_packr_s32(msa_shlq_n_s32(msa_mulq_s32(msa_paddlq_s16(a_lo), msa_dupq_n_s32(b)), 1),
+                       msa_shlq_n_s32(msa_mulq_s32(msa_paddlq_s16(a_hi), msa_dupq_n_s32(b)), 1), 16);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /*__mips_msa*/
+#endif /* OPENCV_CORE_MSA_MACROS_H */
diff --git a/IPL/include/opencv/opencv2/core/hal/simd_utils.impl.hpp b/IPL/include/opencv/opencv2/core/hal/simd_utils.impl.hpp
new file mode 100644
index 0000000..fff8f94
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/hal/simd_utils.impl.hpp
@@ -0,0 +1,146 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This header is not standalone. Don't include directly, use "intrin.hpp" instead.
+#ifdef OPENCV_HAL_INTRIN_HPP  // defined in intrin.hpp
+
+
+#if CV_SIMD128 || CV_SIMD128_CPP
+
+template<typename _T> struct Type2Vec128_Traits;
+#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \
+    template<> struct Type2Vec128_Traits<type_> \
+    { \
+        typedef vec_type_ vec_type; \
+    }
+
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2);
+#if CV_SIMD128_64F
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2);
+#endif
+
+template<typename _T> static inline
+typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a);
+
+template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const  uchar& a) { return v_setall_u8(a); }
+template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const  schar& a) { return v_setall_s8(a); }
+template<> inline Type2Vec128_Traits<ushort>::vec_type v_setall<ushort>(const ushort& a) { return v_setall_u16(a); }
+template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const  short& a) { return v_setall_s16(a); }
+template<> inline Type2Vec128_Traits<  uint>::vec_type v_setall<  uint>(const   uint& a) { return v_setall_u32(a); }
+template<> inline Type2Vec128_Traits<   int>::vec_type v_setall<   int>(const    int& a) { return v_setall_s32(a); }
+template<> inline Type2Vec128_Traits<uint64>::vec_type v_setall<uint64>(const uint64& a) { return v_setall_u64(a); }
+template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const  int64& a) { return v_setall_s64(a); }
+template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const  float& a) { return v_setall_f32(a); }
+#if CV_SIMD128_64F
+template<> inline Type2Vec128_Traits<double>::vec_type v_setall<double>(const double& a) { return v_setall_f64(a); }
+#endif
+
+#endif  // SIMD128
+
+
+#if CV_SIMD256
+
+template<typename _T> struct Type2Vec256_Traits;
+#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \
+    template<> struct Type2Vec256_Traits<type_> \
+    { \
+        typedef vec_type_ vec_type; \
+    }
+
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4);
+#if CV_SIMD256_64F
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4);
+#endif
+
+template<typename _T> static inline
+typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a);
+
+template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const  uchar& a) { return v256_setall_u8(a); }
+template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const  schar& a) { return v256_setall_s8(a); }
+template<> inline Type2Vec256_Traits<ushort>::vec_type v256_setall<ushort>(const ushort& a) { return v256_setall_u16(a); }
+template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const  short& a) { return v256_setall_s16(a); }
+template<> inline Type2Vec256_Traits<  uint>::vec_type v256_setall<  uint>(const   uint& a) { return v256_setall_u32(a); }
+template<> inline Type2Vec256_Traits<   int>::vec_type v256_setall<   int>(const    int& a) { return v256_setall_s32(a); }
+template<> inline Type2Vec256_Traits<uint64>::vec_type v256_setall<uint64>(const uint64& a) { return v256_setall_u64(a); }
+template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const  int64& a) { return v256_setall_s64(a); }
+template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const  float& a) { return v256_setall_f32(a); }
+#if CV_SIMD256_64F
+template<> inline Type2Vec256_Traits<double>::vec_type v256_setall<double>(const double& a) { return v256_setall_f64(a); }
+#endif
+
+#endif  // SIMD256
+
+
+#if CV_SIMD512
+
+template<typename _T> struct Type2Vec512_Traits;
+#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \
+    template<> struct Type2Vec512_Traits<type_> \
+    { \
+        typedef vec_type_ vec_type; \
+    }
+
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8);
+#if CV_SIMD512_64F
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8);
+#endif
+
+template<typename _T> static inline
+typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a);
+
+template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const  uchar& a) { return v512_setall_u8(a); }
+template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const  schar& a) { return v512_setall_s8(a); }
+template<> inline Type2Vec512_Traits<ushort>::vec_type v512_setall<ushort>(const ushort& a) { return v512_setall_u16(a); }
+template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const  short& a) { return v512_setall_s16(a); }
+template<> inline Type2Vec512_Traits<  uint>::vec_type v512_setall<  uint>(const   uint& a) { return v512_setall_u32(a); }
+template<> inline Type2Vec512_Traits<   int>::vec_type v512_setall<   int>(const    int& a) { return v512_setall_s32(a); }
+template<> inline Type2Vec512_Traits<uint64>::vec_type v512_setall<uint64>(const uint64& a) { return v512_setall_u64(a); }
+template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const  int64& a) { return v512_setall_s64(a); }
+template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const  float& a) { return v512_setall_f32(a); }
+#if CV_SIMD512_64F
+template<> inline Type2Vec512_Traits<double>::vec_type v512_setall<double>(const double& a) { return v512_setall_f64(a); }
+#endif
+
+#endif  // SIMD512
+
+
+#if CV_SIMD_WIDTH == 16
+template<typename _T> static inline
+typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); }
+#elif CV_SIMD_WIDTH == 32
+template<typename _T> static inline
+typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); }
+#elif CV_SIMD_WIDTH == 64
+template<typename _T> static inline
+typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); }
+#else
+#error "Build configuration error, unsupported CV_SIMD_WIDTH"
+#endif
+
+
+#endif  // OPENCV_HAL_INTRIN_HPP
diff --git a/IPL/include/opencv/opencv2/core/ippasync.hpp b/IPL/include/opencv/opencv2/core/ippasync.hpp
deleted file mode 100644
index 4de8611..0000000
--- a/IPL/include/opencv/opencv2/core/ippasync.hpp
+++ /dev/null
@@ -1,195 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Copyright (C) 2015, Itseez Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_CORE_IPPASYNC_HPP__
-#define __OPENCV_CORE_IPPASYNC_HPP__
-
-#ifdef HAVE_IPP_A
-
-#include "opencv2/core.hpp"
-#include <ipp_async_op.h>
-#include <ipp_async_accel.h>
-
-namespace cv
-{
-
-namespace hpp
-{
-
-/** @addtogroup core_ipp
-This section describes conversion between OpenCV and [Intel&reg; IPP Asynchronous
-C/C++](http://software.intel.com/en-us/intel-ipp-preview) library. [Getting Started
-Guide](http://registrationcenter.intel.com/irc_nas/3727/ipp_async_get_started.htm) help you to
-install the library, configure header and library build paths.
- */
-//! @{
-
-    //! convert OpenCV data type to hppDataType
-    inline int toHppType(const int cvType)
-    {
-        int depth = CV_MAT_DEPTH(cvType);
-        int hppType = depth == CV_8U ? HPP_DATA_TYPE_8U :
-                     depth == CV_16U ? HPP_DATA_TYPE_16U :
-                     depth == CV_16S ? HPP_DATA_TYPE_16S :
-                     depth == CV_32S ? HPP_DATA_TYPE_32S :
-                     depth == CV_32F ? HPP_DATA_TYPE_32F :
-                     depth == CV_64F ? HPP_DATA_TYPE_64F : -1;
-        CV_Assert( hppType >= 0 );
-        return hppType;
-    }
-
-    //! convert hppDataType to OpenCV data type
-    inline int toCvType(const int hppType)
-    {
-        int cvType = hppType == HPP_DATA_TYPE_8U ? CV_8U :
-                    hppType == HPP_DATA_TYPE_16U ? CV_16U :
-                    hppType == HPP_DATA_TYPE_16S ? CV_16S :
-                    hppType == HPP_DATA_TYPE_32S ? CV_32S :
-                    hppType == HPP_DATA_TYPE_32F ? CV_32F :
-                    hppType == HPP_DATA_TYPE_64F ? CV_64F : -1;
-        CV_Assert( cvType >= 0 );
-        return cvType;
-    }
-
-    /** @brief Convert hppiMatrix to Mat.
-
-    This function allocates and initializes new matrix (if needed) that has the same size and type as
-    input matrix. Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
-    @param src input hppiMatrix.
-    @param dst output matrix.
-    @param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
-    @param cn number of channels.
-     */
-    inline void copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn)
-    {
-        hppDataType type;
-        hpp32u width, height;
-        hppStatus sts;
-
-        if (src == NULL)
-            return dst.release();
-
-        sts = hppiInquireMatrix(src, &type, &width, &height);
-
-        CV_Assert( sts == HPP_STATUS_NO_ERROR);
-
-        int matType = CV_MAKETYPE(toCvType(type), cn);
-
-        CV_Assert(width%cn == 0);
-
-        width /= cn;
-
-        dst.create((int)height, (int)width, (int)matType);
-
-        size_t newSize = (size_t)(height*(hpp32u)(dst.step));
-
-        sts = hppiGetMatrixData(accel,src,(hpp32u)(dst.step),dst.data,&newSize);
-
-        CV_Assert( sts == HPP_STATUS_NO_ERROR);
-    }
-
-    /** @brief Create Mat from hppiMatrix.
-
-    This function allocates and initializes the Mat that has the same size and type as input matrix.
-    Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
-    @param src input hppiMatrix.
-    @param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
-    @param cn number of channels.
-    @sa howToUseIPPAconversion, hpp::copyHppToMat, hpp::getHpp.
-     */
-    inline Mat getMat(hppiMatrix* src, hppAccel accel, int cn)
-    {
-        Mat dst;
-        copyHppToMat(src, dst, accel, cn);
-        return dst;
-    }
-
-    /** @brief Create hppiMatrix from Mat.
-
-    This function allocates and initializes the hppiMatrix that has the same size and type as input
-    matrix, returns the hppiMatrix*.
-
-    If you want to use zero-copy for GPU you should to have 4KB aligned matrix data. See details
-    [hppiCreateSharedMatrix](http://software.intel.com/ru-ru/node/501697).
-
-    Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
-
-    @note The hppiMatrix pointer to the image buffer in system memory refers to the src.data. Control
-    the lifetime of the matrix and don't change its data, if there is no special need.
-    @param src input matrix.
-    @param accel accelerator instance. Supports type:
-    -   **HPP_ACCEL_TYPE_CPU** - accelerated by optimized CPU instructions.
-    -   **HPP_ACCEL_TYPE_GPU** - accelerated by GPU programmable units or fixed-function
-        accelerators.
-    -   **HPP_ACCEL_TYPE_ANY** - any acceleration or no acceleration available.
-    @sa howToUseIPPAconversion, hpp::getMat
-     */
-    inline hppiMatrix* getHpp(const Mat& src, hppAccel accel)
-    {
-        int htype = toHppType(src.type());
-        int cn = src.channels();
-
-        CV_Assert(src.data);
-        hppAccelType accelType = hppQueryAccelType(accel);
-
-        if (accelType!=HPP_ACCEL_TYPE_CPU)
-        {
-            hpp32u pitch, size;
-            hppQueryMatrixAllocParams(accel, src.cols*cn, src.rows, htype, &pitch, &size);
-            if (pitch!=0 && size!=0)
-                if ((int)(src.data)%4096==0 && pitch==(hpp32u)(src.step))
-                {
-                    return hppiCreateSharedMatrix(htype, src.cols*cn, src.rows, src.data, pitch, size);
-                }
-        }
-
-        return hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));;
-    }
-
-//! @}
-}}
-
-#endif
-
-#endif
diff --git a/IPL/include/opencv/opencv2/core/mat.hpp b/IPL/include/opencv/opencv2/core/mat.hpp
index d554663..adabeca 100644
--- a/IPL/include/opencv/opencv2/core/mat.hpp
+++ b/IPL/include/opencv/opencv2/core/mat.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_MAT_HPP__
-#define __OPENCV_CORE_MAT_HPP__
+#ifndef OPENCV_CORE_MAT_HPP
+#define OPENCV_CORE_MAT_HPP
 
 #ifndef __cplusplus
 #  error mat.hpp header must be compiled as C++
@@ -53,14 +53,20 @@
 
 #include "opencv2/core/bufferpool.hpp"
 
+#include <type_traits>
+
 namespace cv
 {
 
 //! @addtogroup core_basic
 //! @{
 
-enum { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25,
+enum AccessFlag { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25,
     ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 };
+CV_ENUM_FLAGS(AccessFlag)
+__CV_ENUM_FLAGS_BITWISE_AND(AccessFlag, int, AccessFlag)
+
+CV__DEBUG_NS_BEGIN
 
 class CV_EXPORTS _OutputArray;
 
@@ -73,8 +79,8 @@ It is defined as:
     typedef const _InputArray& InputArray;
 @endcode
 where _InputArray is a class that can be constructed from `Mat`, `Mat_<T>`, `Matx<T, m, n>`,
-`std::vector<T>`, `std::vector<std::vector<T> >` or `std::vector<Mat>`. It can also be constructed
-from a matrix expression.
+`std::vector<T>`, `std::vector<std::vector<T> >`, `std::vector<Mat>`, `std::vector<Mat_<T> >`,
+`UMat`, `std::vector<UMat>` or `double`. It can also be constructed from a matrix expression.
 
 Since this is mostly implementation-level class, and its interface may change in future versions, we
 do not describe it in details. There are a few key things, though, that should be kept in mind:
@@ -142,11 +148,17 @@ synonym is needed to generate Python/Java etc. wrappers properly. At the functio
 level their use is similar, but _InputArray::getMat(idx) should be used to get header for the
 idx-th component of the outer vector and _InputArray::size().area() should be used to find the
 number of components (vectors/matrices) of the outer vector.
+
+In general, type support is limited to cv::Mat types. Other types are forbidden.
+But in some cases we need to support passing of custom non-general Mat types, like arrays of cv::KeyPoint, cv::DMatch, etc.
+This data is not intended to be interpreted as an image data, or processed somehow like regular cv::Mat.
+To pass such custom type use rawIn() / rawOut() / rawInOut() wrappers.
+Custom type is wrapped as Mat-compatible `CV_8UC<N>` values (N = sizeof(T), N <= CV_CN_MAX).
  */
 class CV_EXPORTS _InputArray
 {
 public:
-    enum {
+    enum KindFlag {
         KIND_SHIFT = 16,
         FIXED_TYPE = 0x8000 << KIND_SHIFT,
         FIXED_SIZE = 0x4000 << KIND_SHIFT,
@@ -165,7 +177,9 @@ class CV_EXPORTS _InputArray
         UMAT              =10 << KIND_SHIFT,
         STD_VECTOR_UMAT   =11 << KIND_SHIFT,
         STD_BOOL_VECTOR   =12 << KIND_SHIFT,
-        STD_VECTOR_CUDA_GPU_MAT = 13 << KIND_SHIFT
+        STD_VECTOR_CUDA_GPU_MAT = 13 << KIND_SHIFT,
+        STD_ARRAY         =14 << KIND_SHIFT,
+        STD_ARRAY_MAT     =15 << KIND_SHIFT
     };
 
     _InputArray();
@@ -177,6 +191,7 @@ class CV_EXPORTS _InputArray
     template<typename _Tp> _InputArray(const std::vector<_Tp>& vec);
     _InputArray(const std::vector<bool>& vec);
     template<typename _Tp> _InputArray(const std::vector<std::vector<_Tp> >& vec);
+    _InputArray(const std::vector<std::vector<bool> >&) = delete;  // not supported
     template<typename _Tp> _InputArray(const std::vector<Mat_<_Tp> >& vec);
     template<typename _Tp> _InputArray(const _Tp* vec, int n);
     template<typename _Tp, int m, int n> _InputArray(const Matx<_Tp, m, n>& matx);
@@ -189,6 +204,12 @@ class CV_EXPORTS _InputArray
     _InputArray(const UMat& um);
     _InputArray(const std::vector<UMat>& umv);
 
+    template<typename _Tp, std::size_t _Nm> _InputArray(const std::array<_Tp, _Nm>& arr);
+    template<std::size_t _Nm> _InputArray(const std::array<Mat, _Nm>& arr);
+
+    template<typename _Tp> static _InputArray rawIn(const std::vector<_Tp>& vec);
+    template<typename _Tp, std::size_t _Nm> static _InputArray rawIn(const std::array<_Tp, _Nm>& arr);
+
     Mat getMat(int idx=-1) const;
     Mat getMat_(int idx=-1) const;
     UMat getUMat(int idx=-1) const;
@@ -202,7 +223,7 @@ class CV_EXPORTS _InputArray
     void* getObj() const;
     Size getSz() const;
 
-    int kind() const;
+    _InputArray::KindFlag kind() const;
     int dims(int i=-1) const;
     int cols(int i=-1) const;
     int rows(int i=-1) const;
@@ -226,6 +247,7 @@ class CV_EXPORTS _InputArray
     bool isUMatVector() const;
     bool isMatx() const;
     bool isVector() const;
+    bool isGpuMat() const;
     bool isGpuMatVector() const;
     ~_InputArray();
 
@@ -237,7 +259,8 @@ class CV_EXPORTS _InputArray
     void init(int _flags, const void* _obj);
     void init(int _flags, const void* _obj, Size _sz);
 };
-
+CV_ENUM_FLAGS(_InputArray::KindFlag)
+__CV_ENUM_FLAGS_BITWISE_AND(_InputArray::KindFlag, int, _InputArray::KindFlag)
 
 /** @brief This type is very similar to InputArray except that it is used for input/output and output function
 parameters.
@@ -267,7 +290,7 @@ There are several synonyms for OutputArray that are used to assist automatic Pyt
 class CV_EXPORTS _OutputArray : public _InputArray
 {
 public:
-    enum
+    enum DepthMask
     {
         DEPTH_MASK_8U = 1 << CV_8U,
         DEPTH_MASK_8S = 1 << CV_8S,
@@ -276,8 +299,10 @@ class CV_EXPORTS _OutputArray : public _InputArray
         DEPTH_MASK_32S = 1 << CV_32S,
         DEPTH_MASK_32F = 1 << CV_32F,
         DEPTH_MASK_64F = 1 << CV_64F,
+        DEPTH_MASK_16F = 1 << CV_16F,
         DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1,
         DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S,
+        DEPTH_MASK_ALL_16F = (DEPTH_MASK_16F<<1)-1,
         DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F
     };
 
@@ -291,8 +316,9 @@ class CV_EXPORTS _OutputArray : public _InputArray
     _OutputArray(cuda::HostMem& cuda_mem);
     template<typename _Tp> _OutputArray(cudev::GpuMat_<_Tp>& m);
     template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
-    _OutputArray(std::vector<bool>& vec);
+    _OutputArray(std::vector<bool>& vec) = delete;  // not supported
     template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
+    _OutputArray(std::vector<std::vector<bool> >&) = delete;  // not supported
     template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
     template<typename _Tp> _OutputArray(Mat_<_Tp>& m);
     template<typename _Tp> _OutputArray(_Tp* vec, int n);
@@ -316,6 +342,14 @@ class CV_EXPORTS _OutputArray : public _InputArray
     _OutputArray(const UMat& m);
     _OutputArray(const std::vector<UMat>& vec);
 
+    template<typename _Tp, std::size_t _Nm> _OutputArray(std::array<_Tp, _Nm>& arr);
+    template<typename _Tp, std::size_t _Nm> _OutputArray(const std::array<_Tp, _Nm>& arr);
+    template<std::size_t _Nm> _OutputArray(std::array<Mat, _Nm>& arr);
+    template<std::size_t _Nm> _OutputArray(const std::array<Mat, _Nm>& arr);
+
+    template<typename _Tp> static _OutputArray rawOut(std::vector<_Tp>& vec);
+    template<typename _Tp, std::size_t _Nm> static _OutputArray rawOut(std::array<_Tp, _Nm>& arr);
+
     bool fixedSize() const;
     bool fixedType() const;
     bool needed() const;
@@ -325,9 +359,9 @@ class CV_EXPORTS _OutputArray : public _InputArray
     std::vector<cuda::GpuMat>& getGpuMatVecRef() const;
     ogl::Buffer& getOGlBufferRef() const;
     cuda::HostMem& getHostMemRef() const;
-    void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
-    void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
-    void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
+    void create(Size sz, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const;
+    void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const;
+    void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, _OutputArray::DepthMask fixedDepthMask=static_cast<_OutputArray::DepthMask>(0)) const;
     void createSameSize(const _InputArray& arr, int mtype) const;
     void release() const;
     void clear() const;
@@ -335,6 +369,12 @@ class CV_EXPORTS _OutputArray : public _InputArray
 
     void assign(const UMat& u) const;
     void assign(const Mat& m) const;
+
+    void assign(const std::vector<UMat>& v) const;
+    void assign(const std::vector<Mat>& v) const;
+
+    void move(UMat& u) const;
+    void move(Mat& m) const;
 };
 
 
@@ -350,7 +390,7 @@ class CV_EXPORTS _InputOutputArray : public _OutputArray
     _InputOutputArray(cuda::HostMem& cuda_mem);
     template<typename _Tp> _InputOutputArray(cudev::GpuMat_<_Tp>& m);
     template<typename _Tp> _InputOutputArray(std::vector<_Tp>& vec);
-    _InputOutputArray(std::vector<bool>& vec);
+    _InputOutputArray(std::vector<bool>& vec) = delete;  // not supported
     template<typename _Tp> _InputOutputArray(std::vector<std::vector<_Tp> >& vec);
     template<typename _Tp> _InputOutputArray(std::vector<Mat_<_Tp> >& vec);
     template<typename _Tp> _InputOutputArray(Mat_<_Tp>& m);
@@ -374,8 +414,26 @@ class CV_EXPORTS _InputOutputArray : public _OutputArray
     template<typename _Tp, int m, int n> _InputOutputArray(const Matx<_Tp, m, n>& matx);
     _InputOutputArray(const UMat& m);
     _InputOutputArray(const std::vector<UMat>& vec);
+
+    template<typename _Tp, std::size_t _Nm> _InputOutputArray(std::array<_Tp, _Nm>& arr);
+    template<typename _Tp, std::size_t _Nm> _InputOutputArray(const std::array<_Tp, _Nm>& arr);
+    template<std::size_t _Nm> _InputOutputArray(std::array<Mat, _Nm>& arr);
+    template<std::size_t _Nm> _InputOutputArray(const std::array<Mat, _Nm>& arr);
+
+    template<typename _Tp> static _InputOutputArray rawInOut(std::vector<_Tp>& vec);
+    template<typename _Tp, std::size_t _Nm> _InputOutputArray rawInOut(std::array<_Tp, _Nm>& arr);
+
 };
 
+/** Helper to wrap custom types. @see InputArray */
+template<typename _Tp> static inline _InputArray rawIn(_Tp& v);
+/** Helper to wrap custom types. @see InputArray */
+template<typename _Tp> static inline _OutputArray rawOut(_Tp& v);
+/** Helper to wrap custom types. @see InputArray */
+template<typename _Tp> static inline _InputOutputArray rawInOut(_Tp& v);
+
+CV__DEBUG_NS_END
+
 typedef const _InputArray& InputArray;
 typedef InputArray InputArrayOfArrays;
 typedef const _OutputArray& OutputArray;
@@ -415,10 +473,10 @@ class CV_EXPORTS MatAllocator
     //                      uchar*& datastart, uchar*& data, size_t* step) = 0;
     //virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0;
     virtual UMatData* allocate(int dims, const int* sizes, int type,
-                               void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const = 0;
-    virtual bool allocate(UMatData* data, int accessflags, UMatUsageFlags usageFlags) const = 0;
+                               void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const = 0;
+    virtual bool allocate(UMatData* data, AccessFlag accessflags, UMatUsageFlags usageFlags) const = 0;
     virtual void deallocate(UMatData* data) const = 0;
-    virtual void map(UMatData* data, int accessflags) const;
+    virtual void map(UMatData* data, AccessFlag accessflags) const;
     virtual void unmap(UMatData* data) const;
     virtual void download(UMatData* data, void* dst, int dims, const size_t sz[],
                           const size_t srcofs[], const size_t srcstep[],
@@ -471,9 +529,11 @@ template<typename _Tp> class MatCommaInitializer_
 // it should be explicitly initialized using init().
 struct CV_EXPORTS UMatData
 {
-    enum { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2,
+    enum MemoryFlag { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2,
         DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24,
-        USER_ALLOCATED=32, DEVICE_MEM_MAPPED=64};
+        USER_ALLOCATED=32, DEVICE_MEM_MAPPED=64,
+        ASYNC_CLEANUP=128
+    };
     UMatData(const MatAllocator* allocator);
     ~UMatData();
 
@@ -499,30 +559,24 @@ struct CV_EXPORTS UMatData
     uchar* origdata;
     size_t size;
 
-    int flags;
+    UMatData::MemoryFlag flags;
     void* handle;
     void* userdata;
     int allocatorFlags_;
     int mapcount;
     UMatData* originalUMatData;
 };
-
-
-struct CV_EXPORTS UMatDataAutoLock
-{
-    explicit UMatDataAutoLock(UMatData* u);
-    ~UMatDataAutoLock();
-    UMatData* u;
-};
+CV_ENUM_FLAGS(UMatData::MemoryFlag)
 
 
 struct CV_EXPORTS MatSize
 {
     explicit MatSize(int* _p);
+    int dims() const;
     Size operator()() const;
     const int& operator[](int i) const;
     int& operator[](int i);
-    operator const int*() const;
+    operator const int*() const;  // TODO OpenCV 4.0: drop this
     bool operator == (const MatSize& sz) const;
     bool operator != (const MatSize& sz) const;
 
@@ -544,11 +598,11 @@ struct CV_EXPORTS MatStep
     MatStep& operator = (const MatStep&);
 };
 
-/** @example cout_mat.cpp
+/** @example samples/cpp/cout_mat.cpp
 An example demonstrating the serial out capabilities of cv::Mat
 */
 
- /** @brief n-dimensional dense array class
+ /** @brief n-dimensional dense array class \anchor CVMat_Details
 
 The class Mat represents an n-dimensional dense numerical single-channel or multi-channel array. It
 can be used to store real or complex-valued vectors and matrices, grayscale or color images, voxel
@@ -563,12 +617,11 @@ Note that `M.step[i] >= M.step[i+1]` (in fact, `M.step[i] >= M.step[i+1]*M.size[
 that 2-dimensional matrices are stored row-by-row, 3-dimensional matrices are stored plane-by-plane,
 and so on. M.step[M.dims-1] is minimal and always equal to the element size M.elemSize() .
 
-So, the data layout in Mat is fully compatible with CvMat, IplImage, and CvMatND types from OpenCV
-1.x. It is also compatible with the majority of dense array types from the standard toolkits and
-SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps), and others, that is, with any
-array that uses *steps* (or *strides*) to compute the position of a pixel. Due to this
-compatibility, it is possible to make a Mat header for user-allocated data and process it in-place
-using OpenCV functions.
+So, the data layout in Mat is compatible with the majority of dense array types from the standard
+toolkits and SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps), and others,
+that is, with any array that uses *steps* (or *strides*) to compute the position of a pixel.
+Due to this compatibility, it is possible to make a Mat header for user-allocated data and process
+it in-place using OpenCV functions.
 
 There are many different ways to create a Mat object. The most popular options are listed below:
 
@@ -653,14 +706,10 @@ sub-matrices.
         Mat M = Mat(3, 3, CV_64F, m).inv();
     @endcode
     .
-    Partial yet very common cases of this *user-allocated data* case are conversions from CvMat and
-    IplImage to Mat. For this purpose, there is function cv::cvarrToMat taking pointers to CvMat or
-    IplImage and the optional flag indicating whether to copy the data or not.
-    @snippet samples/cpp/image.cpp iplimage
 
 - Use MATLAB-style array initializers, zeros(), ones(), eye(), for example:
 @code
-    // create a double-precision identity martix and add it to M.
+    // create a double-precision identity matrix and add it to M.
     M += Mat::eye(M.rows, M.cols, CV_64F);
 @endcode
 
@@ -693,7 +742,7 @@ If you need to process a whole row of a 2D array, the most efficient way is to g
 the row first, and then just use the plain C operator [] :
 @code
     // compute sum of positive matrix elements
-    // (assuming that M isa double-precision matrix)
+    // (assuming that M is a double-precision matrix)
     double sum=0;
     for(int i = 0; i < M.rows; i++)
     {
@@ -736,6 +785,8 @@ Finally, there are STL-style iterators that are smart enough to skip gaps betwee
 @endcode
 The matrix iterators are random-access iterators, so they can be passed to any STL algorithm,
 including std::sort().
+
+@note Matrix Expressions and arithmetic see MatExpr
 */
 class CV_EXPORTS Mat
 {
@@ -794,6 +845,13 @@ class CV_EXPORTS Mat
     */
     Mat(int ndims, const int* sizes, int type);
 
+    /** @overload
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    */
+    Mat(const std::vector<int>& sizes, int type);
+
     /** @overload
     @param ndims Array dimensionality.
     @param sizes Array of integers specifying an n-dimensional array shape.
@@ -805,6 +863,17 @@ class CV_EXPORTS Mat
     */
     Mat(int ndims, const int* sizes, int type, const Scalar& s);
 
+    /** @overload
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param s An optional value to initialize each matrix element with. To set all the matrix elements to
+    the particular value after the construction, use the assignment operator
+    Mat::operator=(const Scalar& value) .
+    */
+    Mat(const std::vector<int>& sizes, int type, const Scalar& s);
+
+
     /** @overload
     @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
     by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
@@ -861,6 +930,20 @@ class CV_EXPORTS Mat
     */
     Mat(int ndims, const int* sizes, int type, void* data, const size_t* steps=0);
 
+    /** @overload
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
+    allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+    data, which means that no data is copied. This operation is very efficient and can be used to
+    process external data using OpenCV functions. The external data is not automatically deallocated, so
+    you should take care of it.
+    @param steps Array of ndims-1 steps in case of a multi-dimensional array (the last step is always
+    set to the element size). If not specified, the matrix is assumed to be continuous.
+    */
+    Mat(const std::vector<int>& sizes, int type, void* data, const size_t* steps=0);
+
     /** @overload
     @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
     by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
@@ -893,6 +976,16 @@ class CV_EXPORTS Mat
     */
     Mat(const Mat& m, const Range* ranges);
 
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    @param ranges Array of selected ranges of m along each dimensionality.
+    */
+    Mat(const Mat& m, const std::vector<Range>& ranges);
+
     /** @overload
     @param vec STL vector whose elements form the matrix. The matrix has a single column and the number
     of rows equal to the number of vector elements. Type of the matrix matches the type of vector
@@ -911,6 +1004,19 @@ class CV_EXPORTS Mat
     */
     template<typename _Tp> explicit Mat(const std::vector<_Tp>& vec, bool copyData=false);
 
+    /** @overload
+    */
+    template<typename _Tp, typename = typename std::enable_if<std::is_arithmetic<_Tp>::value>::type>
+    explicit Mat(const std::initializer_list<_Tp> list);
+
+    /** @overload
+    */
+    template<typename _Tp> explicit Mat(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> list);
+
+    /** @overload
+    */
+    template<typename _Tp, size_t _Nm> explicit Mat(const std::array<_Tp, _Nm>& arr, bool copyData=false);
+
     /** @overload
     */
     template<typename _Tp, int n> explicit Mat(const Vec<_Tp, n>& vec, bool copyData=true);
@@ -957,7 +1063,7 @@ class CV_EXPORTS Mat
     Mat& operator = (const MatExpr& expr);
 
     //! retrieve UMat from Mat
-    UMat getUMat(int accessFlags, UMatUsageFlags usageFlags = USAGE_DEFAULT) const;
+    UMat getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags = USAGE_DEFAULT) const;
 
     /** @brief Creates a matrix header for the specified matrix row.
 
@@ -1037,18 +1143,40 @@ class CV_EXPORTS Mat
     single-column matrix. Similarly to Mat::row and Mat::col, this is an O(1) operation.
     @param d index of the diagonal, with the following values:
     - `d=0` is the main diagonal.
-    - `d>0` is a diagonal from the lower half. For example, d=1 means the diagonal is set
+    - `d<0` is a diagonal from the lower half. For example, d=-1 means the diagonal is set
       immediately below the main one.
-    - `d<0` is a diagonal from the upper half. For example, d=-1 means the diagonal is set
+    - `d>0` is a diagonal from the upper half. For example, d=1 means the diagonal is set
       immediately above the main one.
+    For example:
+    @code
+        Mat m = (Mat_<int>(3,3) <<
+                    1,2,3,
+                    4,5,6,
+                    7,8,9);
+        Mat d0 = m.diag(0);
+        Mat d1 = m.diag(1);
+        Mat d_1 = m.diag(-1);
+    @endcode
+    The resulting matrices are
+    @code
+     d0 =
+       [1;
+        5;
+        9]
+     d1 =
+       [2;
+        6]
+     d_1 =
+       [4;
+        8]
+    @endcode
      */
     Mat diag(int d=0) const;
 
     /** @brief creates a diagonal matrix
 
-    The method makes a new header for the specified matrix diagonal. The new matrix is represented as a
-    single-column matrix. Similarly to Mat::row and Mat::col, this is an O(1) operation.
-    @param d Single-column matrix that forms a diagonal matrix
+    The method creates a square diagonal matrix from specified main diagonal.
+    @param d One-dimensional matrix that represents the main diagonal.
      */
     static Mat diag(const Mat& d);
 
@@ -1057,7 +1185,7 @@ class CV_EXPORTS Mat
     The method creates a full copy of the array. The original step[] is not taken into account. So, the
     array copy is a continuous array occupying total()*elemSize() bytes.
      */
-    Mat clone() const;
+    Mat clone() const CV_NODISCARD;
 
     /** @brief Copies the matrix to another one.
 
@@ -1079,8 +1207,8 @@ class CV_EXPORTS Mat
     /** @overload
     @param m Destination matrix. If it does not have a proper size or type before the operation, it is
     reallocated.
-    @param mask Operation mask. Its non-zero elements indicate which matrix elements need to be copied.
-    The mask has to be of type CV_8U and can have 1 or multiple channels.
+    @param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix
+    elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels.
     */
     void copyTo( OutputArray m, InputArray mask ) const;
 
@@ -1116,7 +1244,8 @@ class CV_EXPORTS Mat
 
     This is an advanced variant of the Mat::operator=(const Scalar& s) operator.
     @param value Assigned scalar converted to the actual array type.
-    @param mask Operation mask of the same size as \*this.
+    @param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix
+    elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels
      */
     Mat& setTo(InputArray value, InputArray mask=noArray());
 
@@ -1149,6 +1278,9 @@ class CV_EXPORTS Mat
     /** @overload */
     Mat reshape(int cn, int newndims, const int* newsz) const;
 
+    /** @overload */
+    Mat reshape(int cn, const std::vector<int>& newshape) const;
+
     /** @brief Transposes a matrix.
 
     The method performs matrix transposition by means of matrix expressions. It does not perform the
@@ -1206,7 +1338,7 @@ class CV_EXPORTS Mat
     /** @brief Returns a zero array of the specified size and type.
 
     The method returns a Matlab-style zero array initializer. It can be used to quickly form a constant
-    array as a function parameter, part of a matrix expression, or as a matrix initializer. :
+    array as a function parameter, part of a matrix expression, or as a matrix initializer:
     @code
         Mat A;
         A = Mat::zeros(3, 3, CV_32F);
@@ -1242,6 +1374,8 @@ class CV_EXPORTS Mat
     The above operation does not form a 100x100 matrix of 1's and then multiply it by 3. Instead, it
     just remembers the scale factor (3 in this case) and use it when actually invoking the matrix
     initializer.
+    @note In case of multi-channels type, only the first channel will be initialized with 1's, the
+    others will be set to 0's.
     @param rows Number of rows.
     @param cols Number of columns.
     @param type Created matrix type.
@@ -1269,6 +1403,8 @@ class CV_EXPORTS Mat
         // make a 4x4 diagonal matrix with 0.1's on the diagonal.
         Mat A = Mat::eye(4, 4, CV_32F)*0.1;
     @endcode
+    @note In case of multi-channels type, identity matrix will be initialized only for the first channel,
+    the others will be set to 0's
     @param rows Number of rows.
     @param cols Number of columns.
     @param type Created matrix type.
@@ -1329,6 +1465,12 @@ class CV_EXPORTS Mat
     */
     void create(int ndims, const int* sizes, int type);
 
+    /** @overload
+    @param sizes Array of integers specifying a new array shape.
+    @param type New matrix type.
+    */
+    void create(const std::vector<int>& sizes, int type);
+
     /** @brief Increments the reference counter.
 
     The method increments the reference counter associated with the matrix data. If the matrix header
@@ -1355,7 +1497,7 @@ class CV_EXPORTS Mat
      */
     void release();
 
-    //! deallocates the matrix data
+    //! internal use function, consider to use 'release' method instead; deallocates the matrix data
     void deallocate();
     //! internal use function; properly re-allocates _size, _step arrays
     void copySize(const Mat& m);
@@ -1369,6 +1511,14 @@ class CV_EXPORTS Mat
      */
     void reserve(size_t sz);
 
+    /** @brief Reserves space for the certain number of bytes.
+
+    The method reserves space for sz bytes. If the matrix already has enough space to store sz bytes,
+    nothing happens. If matrix has to be reallocated its previous content could be lost.
+    @param sz Number of bytes.
+    */
+    void reserveBuffer(size_t sz);
+
     /** @brief Changes the number of matrix rows.
 
     The methods change the number of matrix rows. If the matrix is reallocated, the first
@@ -1401,6 +1551,11 @@ class CV_EXPORTS Mat
     */
     template<typename _Tp> void push_back(const Mat_<_Tp>& elem);
 
+    /** @overload
+    @param elem Added element(s).
+    */
+    template<typename _Tp> void push_back(const std::vector<_Tp>& elem);
+
     /** @overload
     @param m Added line(s).
     */
@@ -1479,17 +1634,17 @@ class CV_EXPORTS Mat
     */
     Mat operator()( const Range* ranges ) const;
 
-    // //! converts header to CvMat; no data is copied
-    // operator CvMat() const;
-    // //! converts header to CvMatND; no data is copied
-    // operator CvMatND() const;
-    // //! converts header to IplImage; no data is copied
-    // operator IplImage() const;
+    /** @overload
+    @param ranges Array of selected ranges along each array dimension.
+    */
+    Mat operator()(const std::vector<Range>& ranges) const;
 
     template<typename _Tp> operator std::vector<_Tp>() const;
     template<typename _Tp, int n> operator Vec<_Tp, n>() const;
     template<typename _Tp, int m, int n> operator Matx<_Tp, m, n>() const;
 
+    template<typename _Tp, std::size_t _Nm> operator std::array<_Tp, _Nm>() const;
+
     /** @brief Reports whether the matrix is continuous or not.
 
     The method returns true if the matrix elements are stored continuously without gaps at the end of
@@ -1522,7 +1677,7 @@ class CV_EXPORTS Mat
                         inv_scale = 1.f/alpha_scale;
 
             CV_Assert( src1.type() == src2.type() &&
-                       src1.type() == CV_MAKETYPE(DataType<T>::depth, 4) &&
+                       src1.type() == CV_MAKETYPE(traits::Depth<T>::value, 4) &&
                        src1.size() == src2.size());
             Size size = src1.size();
             dst.create(size, src1.type());
@@ -1632,7 +1787,33 @@ class CV_EXPORTS Mat
      */
     size_t total() const;
 
-    //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise
+    /** @brief Returns the total number of array elements.
+
+     The method returns the number of elements within a certain sub-array slice with startDim <= dim < endDim
+     */
+    size_t total(int startDim, int endDim=INT_MAX) const;
+
+    /**
+     * @param elemChannels Number of channels or number of columns the matrix should have.
+     *                     For a 2-D matrix, when the matrix has only 1 column, then it should have
+     *                     elemChannels channels; When the matrix has only 1 channel,
+     *                     then it should have elemChannels columns.
+     *                     For a 3-D matrix, it should have only one channel. Furthermore,
+     *                     if the number of planes is not one, then the number of rows
+     *                     within every plane has to be 1; if the number of rows within
+     *                     every plane is not 1, then the number of planes has to be 1.
+     * @param depth The depth the matrix should have. Set it to -1 when any depth is fine.
+     * @param requireContinuous Set it to true to require the matrix to be continuous
+     * @return -1 if the requirement is not satisfied.
+     *         Otherwise, it returns the number of elements in the matrix. Note
+     *         that an element may have multiple channels.
+     *
+     * The following code demonstrates its usage for a 2-d matrix:
+     * @snippet snippets/core_mat_checkVector.cpp example-2d
+     *
+     * The following code demonstrates its usage for a 3-d matrix:
+     * @snippet snippets/core_mat_checkVector.cpp example-3d
+     */
     int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const;
 
     /** @brief Returns a pointer to the specified matrix row.
@@ -1645,10 +1826,16 @@ class CV_EXPORTS Mat
     /** @overload */
     const uchar* ptr(int i0=0) const;
 
-    /** @overload */
-    uchar* ptr(int i0, int i1);
-    /** @overload */
-    const uchar* ptr(int i0, int i1) const;
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    uchar* ptr(int row, int col);
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    const uchar* ptr(int row, int col) const;
 
     /** @overload */
     uchar* ptr(int i0, int i1, int i2);
@@ -1668,10 +1855,16 @@ class CV_EXPORTS Mat
     template<typename _Tp> _Tp* ptr(int i0=0);
     /** @overload */
     template<typename _Tp> const _Tp* ptr(int i0=0) const;
-    /** @overload */
-    template<typename _Tp> _Tp* ptr(int i0, int i1);
-    /** @overload */
-    template<typename _Tp> const _Tp* ptr(int i0, int i1) const;
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    template<typename _Tp> _Tp* ptr(int row, int col);
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    template<typename _Tp> const _Tp* ptr(int row, int col) const;
     /** @overload */
     template<typename _Tp> _Tp* ptr(int i0, int i1, int i2);
     /** @overload */
@@ -1721,15 +1914,15 @@ class CV_EXPORTS Mat
     */
     template<typename _Tp> const _Tp& at(int i0=0) const;
     /** @overload
-    @param i0 Index along the dimension 0
-    @param i1 Index along the dimension 1
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
     */
-    template<typename _Tp> _Tp& at(int i0, int i1);
+    template<typename _Tp> _Tp& at(int row, int col);
     /** @overload
-    @param i0 Index along the dimension 0
-    @param i1 Index along the dimension 1
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
     */
-    template<typename _Tp> const _Tp& at(int i0, int i1) const;
+    template<typename _Tp> const _Tp& at(int row, int col) const;
 
     /** @overload
     @param i0 Index along the dimension 0
@@ -1784,7 +1977,7 @@ class CV_EXPORTS Mat
                         inv_scale = 1.f/alpha_scale;
 
             CV_Assert( src1.type() == src2.type() &&
-                       src1.type() == DataType<VT>::type &&
+                       src1.type() == traits::Type<VT>::value &&
                        src1.size() == src2.size());
             Size size = src1.size();
             dst.create(size, src1.type());
@@ -1816,19 +2009,18 @@ class CV_EXPORTS Mat
     template<typename _Tp> MatIterator_<_Tp> end();
     template<typename _Tp> MatConstIterator_<_Tp> end() const;
 
-    /** @brief Invoke with arguments functor, and runs the functor over all matrix element.
+    /** @brief Runs the given functor over all matrix elements in parallel.
 
-    The methods runs operation in parallel. Operation is passed by arguments. Operation have to be a
-    function pointer, a function object or a lambda(C++11).
+    The operation passed as argument has to be a function pointer, a function object or a lambda(C++11).
 
-    All of below operation is equal. Put 0xFF to first channel of all matrix elements:
+    Example 1. All of the operations below put 0xFF the first channel of all matrix elements:
     @code
         Mat image(1920, 1080, CV_8UC3);
         typedef cv::Point3_<uint8_t> Pixel;
 
         // first. raw pointer access.
         for (int r = 0; r < image.rows; ++r) {
-            Pixel* ptr = image.ptr<Pixel>(0, r);
+            Pixel* ptr = image.ptr<Pixel>(r, 0);
             const Pixel* ptr_end = ptr + image.cols;
             for (; ptr != ptr_end; ++ptr) {
                 ptr->x = 255;
@@ -1853,18 +2045,18 @@ class CV_EXPORTS Mat
             p.x = 255;
         });
     @endcode
-    position parameter is index of current pixel:
+    Example 2. Using the pixel's position:
     @code
-        // Creating 3D matrix (255 x 255 x 255) typed uint8_t,
-        //  and initialize all elements by the value which equals elements position.
-        //  i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3).
+        // Creating 3D matrix (255 x 255 x 255) typed uint8_t
+        // and initialize all elements by the value which equals elements position.
+        // i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3).
 
         int sizes[] = { 255, 255, 255 };
         typedef cv::Point3_<uint8_t> Pixel;
 
         Mat_<Pixel> image = Mat::zeros(3, sizes, CV_8UC3);
 
-        image.forEachWithPosition([&](Pixel& pixel, const int position[]) -> void{
+        image.forEach<Pixel>([&](Pixel& pixel, const int position[]) -> void {
             pixel.x = position[0];
             pixel.y = position[1];
             pixel.z = position[2];
@@ -1875,10 +2067,8 @@ class CV_EXPORTS Mat
     /** @overload */
     template<typename _Tp, typename Functor> void forEach(const Functor& operation) const;
 
-#ifdef CV_CXX_MOVE_SEMANTICS
     Mat(Mat&& m);
     Mat& operator = (Mat&& m);
-#endif
 
     enum { MAGIC_VAL  = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
     enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
@@ -1909,6 +2099,9 @@ class CV_EXPORTS Mat
     static MatAllocator* getDefaultAllocator();
     static void setDefaultAllocator(MatAllocator* allocator);
 
+    //! internal use method: updates the continuity flag
+    void updateContinuityFlag();
+
     //! interaction with UMat
     UMatData* u;
 
@@ -1924,7 +2117,7 @@ class CV_EXPORTS Mat
 
 /** @brief Template matrix class derived from Mat
 
-@code
+@code{.cpp}
     template<typename _Tp> class Mat_ : public Mat
     {
     public:
@@ -1936,7 +2129,7 @@ class CV_EXPORTS Mat
 The class `Mat_<_Tp>` is a *thin* template wrapper on top of the Mat class. It does not have any
 extra data fields. Nor this class nor Mat has any virtual methods. Thus, references or pointers to
 these two classes can be freely but carefully converted one to another. For example:
-@code
+@code{.cpp}
     // create a 100x100 8-bit matrix
     Mat M(100,100,CV_8U);
     // this will be compiled fine. no any data conversion will be done.
@@ -1948,7 +2141,7 @@ While Mat is sufficient in most cases, Mat_ can be more convenient if you use a
 access operations and if you know matrix type at the compilation time. Note that
 `Mat::at(int y,int x)` and `Mat_::operator()(int y,int x)` do absolutely the same
 and run at the same speed, but the latter is certainly shorter:
-@code
+@code{.cpp}
     Mat_<double> M(20,20);
     for(int i = 0; i < M.rows; i++)
         for(int j = 0; j < M.cols; j++)
@@ -1958,7 +2151,7 @@ and run at the same speed, but the latter is certainly shorter:
     cout << E.at<double>(0,0)/E.at<double>(M.rows-1,0);
 @endcode
 To use Mat_ for multi-channel images/matrices, pass Vec as a Mat_ parameter:
-@code
+@code{.cpp}
     // allocate a 320x240 color image and fill it with green (in RGB space)
     Mat_<Vec3b> img(240, 320, Vec3b(0,255,0));
     // now draw a diagonal white line
@@ -1968,6 +2161,17 @@ To use Mat_ for multi-channel images/matrices, pass Vec as a Mat_ parameter:
     for(int i = 0; i < img.rows; i++)
         for(int j = 0; j < img.cols; j++)
             img(i,j)[2] ^= (uchar)(i ^ j);
+@endcode
+Mat_ is fully compatible with C++11 range-based for loop. For example such loop
+can be used to safely apply look-up table:
+@code{.cpp}
+void applyTable(Mat_<uchar>& I, const uchar* const table)
+{
+    for(auto& pixel : I)
+    {
+        pixel = table[pixel];
+    }
+}
 @endcode
  */
 template<typename _Tp> class Mat_ : public Mat
@@ -1992,7 +2196,7 @@ template<typename _Tp> class Mat_ : public Mat
     Mat_(int _ndims, const int* _sizes);
     //! n-dim array constructor that sets each matrix element to specified value
     Mat_(int _ndims, const int* _sizes, const _Tp& value);
-    //! copy/conversion contructor. If m is of different type, it's converted
+    //! copy/conversion constructor. If m is of different type, it's converted
     Mat_(const Mat& m);
     //! copy constructor
     Mat_(const Mat_& m);
@@ -2006,6 +2210,8 @@ template<typename _Tp> class Mat_ : public Mat
     Mat_(const Mat_& m, const Rect& roi);
     //! selects a submatrix, n-dim version
     Mat_(const Mat_& m, const Range* ranges);
+    //! selects a submatrix, n-dim version
+    Mat_(const Mat_& m, const std::vector<Range>& ranges);
     //! from a matrix expression
     explicit Mat_(const MatExpr& e);
     //! makes a matrix out of Vec, std::vector, Point_ or Point3_. The matrix will have a single column
@@ -2016,6 +2222,11 @@ template<typename _Tp> class Mat_ : public Mat
     explicit Mat_(const Point3_<typename DataType<_Tp>::channel_type>& pt, bool copyData=true);
     explicit Mat_(const MatCommaInitializer_<_Tp>& commaInitializer);
 
+    Mat_(std::initializer_list<_Tp> values);
+    explicit Mat_(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> values);
+
+    template <std::size_t _Nm> explicit Mat_(const std::array<_Tp, _Nm>& arr, bool copyData=false);
+
     Mat_& operator = (const Mat& m);
     Mat_& operator = (const Mat_& m);
     //! set all the elements to s.
@@ -2040,6 +2251,8 @@ template<typename _Tp> class Mat_ : public Mat
     void create(Size _size);
     //! equivalent to Mat::create(_ndims, _sizes, DatType<_Tp>::type)
     void create(int _ndims, const int* _sizes);
+    //! equivalent to Mat::release()
+    void release();
     //! cross-product
     Mat_ cross(const Mat_& m) const;
     //! data type conversion
@@ -2048,7 +2261,7 @@ template<typename _Tp> class Mat_ : public Mat
     Mat_ row(int y) const;
     Mat_ col(int x) const;
     Mat_ diag(int d=0) const;
-    Mat_ clone() const;
+    Mat_ clone() const CV_NODISCARD;
 
     //! overridden forms of Mat::elemSize() etc.
     size_t elemSize() const;
@@ -2070,11 +2283,12 @@ template<typename _Tp> class Mat_ : public Mat
     static MatExpr eye(int rows, int cols);
     static MatExpr eye(Size size);
 
-    //! some more overriden methods
+    //! some more overridden methods
     Mat_& adjustROI( int dtop, int dbottom, int dleft, int dright );
     Mat_ operator()( const Range& rowRange, const Range& colRange ) const;
     Mat_ operator()( const Rect& roi ) const;
     Mat_ operator()( const Range* ranges ) const;
+    Mat_ operator()(const std::vector<Range>& ranges) const;
 
     //! more convenient forms of row and element access operators
     _Tp* operator [](int y);
@@ -2095,9 +2309,9 @@ template<typename _Tp> class Mat_ : public Mat
     //! returns read-only reference to the specified element (1D case)
     const _Tp& operator ()(int idx0) const;
     //! returns reference to the specified element (2D case)
-    _Tp& operator ()(int idx0, int idx1);
+    _Tp& operator ()(int row, int col);
     //! returns read-only reference to the specified element (2D case)
-    const _Tp& operator ()(int idx0, int idx1) const;
+    const _Tp& operator ()(int row, int col) const;
     //! returns reference to the specified element (3D case)
     _Tp& operator ()(int idx0, int idx1, int idx2);
     //! returns read-only reference to the specified element (3D case)
@@ -2108,12 +2322,15 @@ template<typename _Tp> class Mat_ : public Mat
 
     //! conversion to vector.
     operator std::vector<_Tp>() const;
+
+    //! conversion to array.
+    template<std::size_t _Nm> operator std::array<_Tp, _Nm>() const;
+
     //! conversion to Vec
     template<int n> operator Vec<typename DataType<_Tp>::channel_type, n>() const;
     //! conversion to Matx
     template<int m, int n> operator Matx<typename DataType<_Tp>::channel_type, m, n>() const;
 
-#ifdef CV_CXX_MOVE_SEMANTICS
     Mat_(Mat_&& m);
     Mat_& operator = (Mat_&& m);
 
@@ -2121,7 +2338,6 @@ template<typename _Tp> class Mat_ : public Mat
     Mat_& operator = (Mat&& m);
 
     Mat_(MatExpr&& e);
-#endif
 };
 
 typedef Mat_<uchar> Mat1b;
@@ -2164,7 +2380,7 @@ class CV_EXPORTS UMat
     // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
     UMat(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
     UMat(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
-    //! constucts 2D matrix and fills it with the specified value _s.
+    //! constructs 2D matrix and fills it with the specified value _s.
     UMat(int rows, int cols, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT);
     UMat(Size size, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT);
 
@@ -2179,8 +2395,10 @@ class CV_EXPORTS UMat
     UMat(const UMat& m, const Range& rowRange, const Range& colRange=Range::all());
     UMat(const UMat& m, const Rect& roi);
     UMat(const UMat& m, const Range* ranges);
+    UMat(const UMat& m, const std::vector<Range>& ranges);
     //! builds matrix from std::vector with or without copying the data
     template<typename _Tp> explicit UMat(const std::vector<_Tp>& vec, bool copyData=false);
+
     //! builds matrix from cv::Vec; the data is copied by default
     template<typename _Tp, int n> explicit UMat(const Vec<_Tp, n>& vec, bool copyData=true);
     //! builds matrix from cv::Matx; the data is copied by default
@@ -2197,7 +2415,7 @@ class CV_EXPORTS UMat
     //! assignment operators
     UMat& operator = (const UMat& m);
 
-    Mat getMat(int flags) const;
+    Mat getMat(AccessFlag flags) const;
 
     //! returns a new matrix header for the specified row
     UMat row(int y) const;
@@ -2210,21 +2428,21 @@ class CV_EXPORTS UMat
     UMat colRange(int startcol, int endcol) const;
     UMat colRange(const Range& r) const;
     //! ... for the specified diagonal
-    // (d=0 - the main diagonal,
-    //  >0 - a diagonal from the lower half,
-    //  <0 - a diagonal from the upper half)
+    //! (d=0 - the main diagonal,
+    //!  >0 - a diagonal from the upper half,
+    //!  <0 - a diagonal from the lower half)
     UMat diag(int d=0) const;
     //! constructs a square diagonal matrix which main diagonal is vector "d"
     static UMat diag(const UMat& d);
 
     //! returns deep copy of the matrix, i.e. the data is copied
-    UMat clone() const;
+    UMat clone() const CV_NODISCARD;
     //! copies the matrix content to "m".
     // It calls m.create(this->size(), this->type()).
     void copyTo( OutputArray m ) const;
     //! copies those matrix elements to "m" that are marked with non-zero mask elements.
     void copyTo( OutputArray m, InputArray mask ) const;
-    //! converts matrix to another datatype with optional scalng. See cvConvertScale.
+    //! converts matrix to another datatype with optional scaling. See cvConvertScale.
     void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const;
 
     void assignTo( UMat& m, int type=-1 ) const;
@@ -2263,6 +2481,7 @@ class CV_EXPORTS UMat
     void create(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
     void create(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
     void create(int ndims, const int* sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    void create(const std::vector<int>& sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
 
     //! increases the reference counter; use with care to avoid memleaks
     void addref();
@@ -2284,6 +2503,7 @@ class CV_EXPORTS UMat
     UMat operator()( Range rowRange, Range colRange ) const;
     UMat operator()( const Rect& roi ) const;
     UMat operator()( const Range* ranges ) const;
+    UMat operator()(const std::vector<Range>& ranges) const;
 
     //! returns true iff the matrix data is continuous
     // (i.e. when there are no gaps between successive rows).
@@ -2314,12 +2534,14 @@ class CV_EXPORTS UMat
     //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise
     int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const;
 
-#ifdef CV_CXX_MOVE_SEMANTICS
     UMat(UMat&& m);
     UMat& operator = (UMat&& m);
-#endif
 
-    void* handle(int accessFlags) const;
+    /*! Returns the OpenCL buffer handle on which UMat operates on.
+        The UMat instance should be kept alive during the use of the handle to prevent the buffer to be
+        returned to the OpenCV buffer pool.
+     */
+    void* handle(AccessFlag accessFlags) const;
     void ndoffset(size_t* ofs) const;
 
     enum { MAGIC_VAL  = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
@@ -2343,6 +2565,9 @@ class CV_EXPORTS UMat
     //! and the standard allocator
     static MatAllocator* getStdAllocator();
 
+    //! internal use method: updates the continuity flag
+    void updateContinuityFlag();
+
     // black-box container of UMat data
     UMatData* u;
 
@@ -2370,15 +2595,16 @@ Elements can be accessed using the following methods:
     SparseMat::find), for example:
     @code
         const int dims = 5;
-        int size[] = {10, 10, 10, 10, 10};
+        int size[5] = {10, 10, 10, 10, 10};
         SparseMat sparse_mat(dims, size, CV_32F);
         for(int i = 0; i < 1000; i++)
         {
             int idx[dims];
             for(int k = 0; k < dims; k++)
-                idx[k] = rand()
+                idx[k] = rand() % size[k];
             sparse_mat.ref<float>(idx) += 1.f;
         }
+        cout << "nnz = " << sparse_mat.nzcount() << endl;
     @endcode
 -   Sparse matrix iterators. They are similar to MatIterator but different from NAryMatIterator.
     That is, the iteration loop is familiar to STL users:
@@ -2503,7 +2729,7 @@ class CV_EXPORTS SparseMat
     SparseMat& operator = (const Mat& m);
 
     //! creates full copy of the matrix
-    SparseMat clone() const;
+    SparseMat clone() const CV_NODISCARD;
 
     //! copies all the data to the destination matrix. All the previous content of m is erased
     void copyTo( SparseMat& m ) const;
@@ -2515,11 +2741,11 @@ class CV_EXPORTS SparseMat
     /*!
         @param [out] m - output matrix; if it does not have a proper size or type before the operation,
             it is reallocated
-        @param [in] rtype – desired output matrix type or, rather, the depth since the number of channels
+        @param [in] rtype - desired output matrix type or, rather, the depth since the number of channels
             are the same as the input has; if rtype is negative, the output matrix will have the
             same type as the input.
-        @param [in] alpha – optional scale factor
-        @param [in] beta – optional delta added to the scaled values
+        @param [in] alpha - optional scale factor
+        @param [in] beta - optional delta added to the scaled values
     */
     void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const;
 
@@ -2599,7 +2825,7 @@ class CV_EXPORTS SparseMat
 
      `ref<_Tp>(i0,...[,hashval])` is equivalent to `*(_Tp*)ptr(i0,...,true[,hashval])`.
      The methods always return a valid reference.
-     If the element did not exist, it is created and initialiazed with 0.
+     If the element did not exist, it is created and initialized with 0.
     */
     //! returns reference to the specified element (1D case)
     template<typename _Tp> _Tp& ref(int i0, size_t* hashval=0);
@@ -2722,7 +2948,7 @@ template<typename _Tp> class SparseMat_ : public SparseMat
 
     //! the default constructor
     SparseMat_();
-    //! the full constructor equivelent to SparseMat(dims, _sizes, DataType<_Tp>::type)
+    //! the full constructor equivalent to SparseMat(dims, _sizes, DataType<_Tp>::type)
     SparseMat_(int dims, const int* _sizes);
     //! the copy constructor. If DataType<_Tp>.type != m.type(), the m elements are converted
     SparseMat_(const SparseMat& m);
@@ -2740,7 +2966,7 @@ template<typename _Tp> class SparseMat_ : public SparseMat
     SparseMat_& operator = (const Mat& m);
 
     //! makes full copy of the matrix. All the elements are duplicated
-    SparseMat_ clone() const;
+    SparseMat_ clone() const CV_NODISCARD;
     //! equivalent to cv::SparseMat::create(dims, _sizes, DataType<_Tp>::type)
     void create(int dims, const int* _sizes);
     //! converts sparse matrix to the old-style CvSparseMat. All the elements are copied
@@ -2793,9 +3019,7 @@ class CV_EXPORTS MatConstIterator
     typedef const uchar** pointer;
     typedef uchar* reference;
 
-#ifndef OPENCV_NOSTL
     typedef std::random_access_iterator_tag iterator_category;
-#endif
 
     //! default constructor
     MatConstIterator();
@@ -2860,9 +3084,7 @@ class MatConstIterator_ : public MatConstIterator
     typedef const _Tp* pointer;
     typedef const _Tp& reference;
 
-#ifndef OPENCV_NOSTL
     typedef std::random_access_iterator_tag iterator_category;
-#endif
 
     //! default constructor
     MatConstIterator_();
@@ -2880,9 +3102,9 @@ class MatConstIterator_ : public MatConstIterator
     //! copy operator
     MatConstIterator_& operator = (const MatConstIterator_& it);
     //! returns the current matrix element
-    _Tp operator *() const;
+    const _Tp& operator *() const;
     //! returns the i-th matrix element, relative to the current
-    _Tp operator [](ptrdiff_t i) const;
+    const _Tp& operator [](ptrdiff_t i) const;
 
     //! shifts the iterator forward by the specified number of elements
     MatConstIterator_& operator += (ptrdiff_t ofs);
@@ -2913,9 +3135,7 @@ class MatIterator_ : public MatConstIterator_<_Tp>
     typedef _Tp* pointer;
     typedef _Tp& reference;
 
-#ifndef OPENCV_NOSTL
     typedef std::random_access_iterator_tag iterator_category;
-#endif
 
     //! the default constructor
     MatIterator_();
@@ -3049,9 +3269,7 @@ template<typename _Tp> class SparseMatConstIterator_ : public SparseMatConstIter
 {
 public:
 
-#ifndef OPENCV_NOSTL
     typedef std::forward_iterator_tag iterator_category;
-#endif
 
     //! the default constructor
     SparseMatConstIterator_();
@@ -3085,9 +3303,7 @@ template<typename _Tp> class SparseMatIterator_ : public SparseMatConstIterator_
 {
 public:
 
-#ifndef OPENCV_NOSTL
     typedef std::forward_iterator_tag iterator_category;
-#endif
 
     //! the default constructor
     SparseMatIterator_();
@@ -3146,21 +3362,29 @@ The example below illustrates how you can compute a normalized and threshold 3D
         }
 
         minProb *= image.rows*image.cols;
-        Mat plane;
-        NAryMatIterator it(&hist, &plane, 1);
+
+        // initialize iterator (the style is different from STL).
+        // after initialization the iterator will contain
+        // the number of slices or planes the iterator will go through.
+        // it simultaneously increments iterators for several matrices
+        // supplied as a null terminated list of pointers
+        const Mat* arrays[] = {&hist, 0};
+        Mat planes[1];
+        NAryMatIterator itNAry(arrays, planes, 1);
         double s = 0;
         // iterate through the matrix. on each iteration
-        // it.planes[*] (of type Mat) will be set to the current plane.
-        for(int p = 0; p < it.nplanes; p++, ++it)
+        // itNAry.planes[i] (of type Mat) will be set to the current plane
+        // of the i-th n-dim matrix passed to the iterator constructor.
+        for(int p = 0; p < itNAry.nplanes; p++, ++itNAry)
         {
-            threshold(it.planes[0], it.planes[0], minProb, 0, THRESH_TOZERO);
-            s += sum(it.planes[0])[0];
+            threshold(itNAry.planes[0], itNAry.planes[0], minProb, 0, THRESH_TOZERO);
+            s += sum(itNAry.planes[0])[0];
         }
 
         s = 1./s;
-        it = NAryMatIterator(&hist, &plane, 1);
-        for(int p = 0; p < it.nplanes; p++, ++it)
-            it.planes[0] *= s;
+        itNAry = NAryMatIterator(arrays, planes, 1);
+        for(int p = 0; p < itNAry.nplanes; p++, ++itNAry)
+            itNAry.planes[0] *= s;
     }
 @endcode
  */
@@ -3338,6 +3562,10 @@ CV_EXPORTS MatExpr operator + (const Mat& m, const MatExpr& e);
 CV_EXPORTS MatExpr operator + (const MatExpr& e, const Scalar& s);
 CV_EXPORTS MatExpr operator + (const Scalar& s, const MatExpr& e);
 CV_EXPORTS MatExpr operator + (const MatExpr& e1, const MatExpr& e2);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator + (const Mat& a, const Matx<_Tp, m, n>& b) { return a + Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator + (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) + b; }
 
 CV_EXPORTS MatExpr operator - (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator - (const Mat& a, const Scalar& s);
@@ -3347,6 +3575,10 @@ CV_EXPORTS MatExpr operator - (const Mat& m, const MatExpr& e);
 CV_EXPORTS MatExpr operator - (const MatExpr& e, const Scalar& s);
 CV_EXPORTS MatExpr operator - (const Scalar& s, const MatExpr& e);
 CV_EXPORTS MatExpr operator - (const MatExpr& e1, const MatExpr& e2);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator - (const Mat& a, const Matx<_Tp, m, n>& b) { return a - Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator - (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) - b; }
 
 CV_EXPORTS MatExpr operator - (const Mat& m);
 CV_EXPORTS MatExpr operator - (const MatExpr& e);
@@ -3359,6 +3591,10 @@ CV_EXPORTS MatExpr operator * (const Mat& m, const MatExpr& e);
 CV_EXPORTS MatExpr operator * (const MatExpr& e, double s);
 CV_EXPORTS MatExpr operator * (double s, const MatExpr& e);
 CV_EXPORTS MatExpr operator * (const MatExpr& e1, const MatExpr& e2);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator * (const Mat& a, const Matx<_Tp, m, n>& b) { return a * Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator * (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) * b; }
 
 CV_EXPORTS MatExpr operator / (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator / (const Mat& a, double s);
@@ -3368,52 +3604,100 @@ CV_EXPORTS MatExpr operator / (const Mat& m, const MatExpr& e);
 CV_EXPORTS MatExpr operator / (const MatExpr& e, double s);
 CV_EXPORTS MatExpr operator / (double s, const MatExpr& e);
 CV_EXPORTS MatExpr operator / (const MatExpr& e1, const MatExpr& e2);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator / (const Mat& a, const Matx<_Tp, m, n>& b) { return a / Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator / (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) / b; }
 
 CV_EXPORTS MatExpr operator < (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator < (const Mat& a, double s);
 CV_EXPORTS MatExpr operator < (double s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator < (const Mat& a, const Matx<_Tp, m, n>& b) { return a < Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator < (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) < b; }
 
 CV_EXPORTS MatExpr operator <= (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator <= (const Mat& a, double s);
 CV_EXPORTS MatExpr operator <= (double s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator <= (const Mat& a, const Matx<_Tp, m, n>& b) { return a <= Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator <= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) <= b; }
 
 CV_EXPORTS MatExpr operator == (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator == (const Mat& a, double s);
 CV_EXPORTS MatExpr operator == (double s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator == (const Mat& a, const Matx<_Tp, m, n>& b) { return a == Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator == (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) == b; }
 
 CV_EXPORTS MatExpr operator != (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator != (const Mat& a, double s);
 CV_EXPORTS MatExpr operator != (double s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator != (const Mat& a, const Matx<_Tp, m, n>& b) { return a != Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator != (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) != b; }
 
 CV_EXPORTS MatExpr operator >= (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator >= (const Mat& a, double s);
 CV_EXPORTS MatExpr operator >= (double s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator >= (const Mat& a, const Matx<_Tp, m, n>& b) { return a >= Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator >= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) >= b; }
 
 CV_EXPORTS MatExpr operator > (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator > (const Mat& a, double s);
 CV_EXPORTS MatExpr operator > (double s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator > (const Mat& a, const Matx<_Tp, m, n>& b) { return a > Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator > (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) > b; }
 
 CV_EXPORTS MatExpr operator & (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator & (const Mat& a, const Scalar& s);
 CV_EXPORTS MatExpr operator & (const Scalar& s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator & (const Mat& a, const Matx<_Tp, m, n>& b) { return a & Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator & (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) & b; }
 
 CV_EXPORTS MatExpr operator | (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator | (const Mat& a, const Scalar& s);
 CV_EXPORTS MatExpr operator | (const Scalar& s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator | (const Mat& a, const Matx<_Tp, m, n>& b) { return a | Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator | (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) | b; }
 
 CV_EXPORTS MatExpr operator ^ (const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr operator ^ (const Mat& a, const Scalar& s);
 CV_EXPORTS MatExpr operator ^ (const Scalar& s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr operator ^ (const Mat& a, const Matx<_Tp, m, n>& b) { return a ^ Mat(b); }
+template<typename _Tp, int m, int n> static inline
+MatExpr operator ^ (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) ^ b; }
 
 CV_EXPORTS MatExpr operator ~(const Mat& m);
 
 CV_EXPORTS MatExpr min(const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr min(const Mat& a, double s);
 CV_EXPORTS MatExpr min(double s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr min (const Mat& a, const Matx<_Tp, m, n>& b) { return min(a, Mat(b)); }
+template<typename _Tp, int m, int n> static inline
+MatExpr min (const Matx<_Tp, m, n>& a, const Mat& b) { return min(Mat(a), b); }
 
 CV_EXPORTS MatExpr max(const Mat& a, const Mat& b);
 CV_EXPORTS MatExpr max(const Mat& a, double s);
 CV_EXPORTS MatExpr max(double s, const Mat& a);
+template<typename _Tp, int m, int n> static inline
+MatExpr max (const Mat& a, const Matx<_Tp, m, n>& b) { return max(a, Mat(b)); }
+template<typename _Tp, int m, int n> static inline
+MatExpr max (const Matx<_Tp, m, n>& a, const Mat& b) { return max(Mat(a), b); }
 
 /** @brief Calculates an absolute value of each matrix element.
 
@@ -3439,4 +3723,4 @@ CV_EXPORTS MatExpr abs(const MatExpr& e);
 
 #include "opencv2/core/mat.inl.hpp"
 
-#endif // __OPENCV_CORE_MAT_HPP__
+#endif // OPENCV_CORE_MAT_HPP
diff --git a/IPL/include/opencv/opencv2/core/mat.inl.hpp b/IPL/include/opencv/opencv2/core/mat.inl.hpp
index b4b1418..5066fb1 100644
--- a/IPL/include/opencv/opencv2/core/mat.inl.hpp
+++ b/IPL/include/opencv/opencv2/core/mat.inl.hpp
@@ -42,18 +42,50 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_MATRIX_OPERATIONS_HPP__
-#define __OPENCV_CORE_MATRIX_OPERATIONS_HPP__
+#ifndef OPENCV_CORE_MATRIX_OPERATIONS_HPP
+#define OPENCV_CORE_MATRIX_OPERATIONS_HPP
 
 #ifndef __cplusplus
 #  error mat.inl.hpp header must be compiled as C++
 #endif
 
+#ifdef _MSC_VER
+#pragma warning( push )
+#pragma warning( disable: 4127 )
+#endif
+
+#if defined(CV_SKIP_DISABLE_CLANG_ENUM_WARNINGS)
+  // nothing
+#elif defined(CV_FORCE_DISABLE_CLANG_ENUM_WARNINGS)
+  #define CV_DISABLE_CLANG_ENUM_WARNINGS
+#elif defined(__clang__) && defined(__has_warning)
+  #if __has_warning("-Wdeprecated-enum-enum-conversion") && __has_warning("-Wdeprecated-anon-enum-enum-conversion")
+    #define CV_DISABLE_CLANG_ENUM_WARNINGS
+  #endif
+#endif
+#ifdef CV_DISABLE_CLANG_ENUM_WARNINGS
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion"
+#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion"
+#endif
+
 namespace cv
 {
+CV__DEBUG_NS_BEGIN
+
 
 //! @cond IGNORED
 
+////////////////////////// Custom (raw) type wrapper //////////////////////////
+
+template<typename _Tp> static inline
+int rawType()
+{
+    CV_StaticAssert(sizeof(_Tp) <= CV_CN_MAX, "sizeof(_Tp) is too large");
+    const int elemSize = sizeof(_Tp);
+    return (int)CV_MAKETYPE(CV_8U, elemSize);
+}
+
 //////////////////////// Input/Output Arrays ////////////////////////
 
 inline void _InputArray::init(int _flags, const void* _obj)
@@ -66,7 +98,7 @@ inline void* _InputArray::getObj() const { return obj; }
 inline int _InputArray::getFlags() const { return flags; }
 inline Size _InputArray::getSz() const { return sz; }
 
-inline _InputArray::_InputArray() { init(NONE, 0); }
+inline _InputArray::_InputArray() { init(0 + NONE, 0); }
 inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); }
 inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); }
 inline _InputArray::_InputArray(const std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); }
@@ -75,31 +107,39 @@ inline _InputArray::_InputArray(const std::vector<UMat>& vec) { init(STD_VECTOR_
 
 template<typename _Tp> inline
 _InputArray::_InputArray(const std::vector<_Tp>& vec)
-{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); }
+{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); }
+
+template<typename _Tp, std::size_t _Nm> inline
+_InputArray::_InputArray(const std::array<_Tp, _Nm>& arr)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ, arr.data(), Size(1, _Nm)); }
+
+template<std::size_t _Nm> inline
+_InputArray::_InputArray(const std::array<Mat, _Nm>& arr)
+{ init(STD_ARRAY_MAT + ACCESS_READ, arr.data(), Size(1, _Nm)); }
 
 inline
 _InputArray::_InputArray(const std::vector<bool>& vec)
-{ init(FIXED_TYPE + STD_BOOL_VECTOR + DataType<bool>::type + ACCESS_READ, &vec); }
+{ init(FIXED_TYPE + STD_BOOL_VECTOR + traits::Type<bool>::value + ACCESS_READ, &vec); }
 
 template<typename _Tp> inline
 _InputArray::_InputArray(const std::vector<std::vector<_Tp> >& vec)
-{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); }
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); }
 
 template<typename _Tp> inline
 _InputArray::_InputArray(const std::vector<Mat_<_Tp> >& vec)
-{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_READ, &vec); }
+{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_READ, &vec); }
 
 template<typename _Tp, int m, int n> inline
 _InputArray::_InputArray(const Matx<_Tp, m, n>& mtx)
-{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_READ, &mtx, Size(n, m)); }
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, &mtx, Size(n, m)); }
 
 template<typename _Tp> inline
 _InputArray::_InputArray(const _Tp* vec, int n)
-{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_READ, vec, Size(n, 1)); }
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, vec, Size(n, 1)); }
 
 template<typename _Tp> inline
 _InputArray::_InputArray(const Mat_<_Tp>& m)
-{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_READ, &m); }
+{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_READ, &m); }
 
 inline _InputArray::_InputArray(const double& val)
 { init(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F + ACCESS_READ, &val, Size(1,1)); }
@@ -119,6 +159,25 @@ inline _InputArray::_InputArray(const ogl::Buffer& buf)
 inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem)
 { init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); }
 
+template<typename _Tp> inline
+_InputArray _InputArray::rawIn(const std::vector<_Tp>& vec)
+{
+    _InputArray v;
+    v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_READ;
+    v.obj = (void*)&vec;
+    return v;
+}
+
+template<typename _Tp, std::size_t _Nm> inline
+_InputArray _InputArray::rawIn(const std::array<_Tp, _Nm>& arr)
+{
+    _InputArray v;
+    v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ;
+    v.obj = (void*)arr.data();
+    v.sz = Size(1, _Nm);
+    return v;
+}
+
 inline _InputArray::~_InputArray() {}
 
 inline Mat _InputArray::getMat(int i) const
@@ -133,69 +192,84 @@ inline bool _InputArray::isUMat() const  { return kind() == _InputArray::UMAT; }
 inline bool _InputArray::isMatVector() const { return kind() == _InputArray::STD_VECTOR_MAT; }
 inline bool _InputArray::isUMatVector() const  { return kind() == _InputArray::STD_VECTOR_UMAT; }
 inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; }
-inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR || kind() == _InputArray::STD_BOOL_VECTOR; }
+inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR ||
+                                                   kind() == _InputArray::STD_BOOL_VECTOR ||
+                                                   kind() == _InputArray::STD_ARRAY; }
+inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; }
 inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; }
 
 ////////////////////////////////////////////////////////////////////////////////////////
 
-inline _OutputArray::_OutputArray() { init(ACCESS_WRITE, 0); }
-inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags|ACCESS_WRITE, _obj); }
+inline _OutputArray::_OutputArray() { init(NONE + ACCESS_WRITE, 0); }
+inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags + ACCESS_WRITE, _obj); }
 inline _OutputArray::_OutputArray(Mat& m) { init(MAT+ACCESS_WRITE, &m); }
-inline _OutputArray::_OutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_WRITE, &vec); }
-inline _OutputArray::_OutputArray(UMat& m) { init(UMAT+ACCESS_WRITE, &m); }
-inline _OutputArray::_OutputArray(std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_WRITE, &vec); }
+inline _OutputArray::_OutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT + ACCESS_WRITE, &vec); }
+inline _OutputArray::_OutputArray(UMat& m) { init(UMAT + ACCESS_WRITE, &m); }
+inline _OutputArray::_OutputArray(std::vector<UMat>& vec) { init(STD_VECTOR_UMAT + ACCESS_WRITE, &vec); }
 
 template<typename _Tp> inline
 _OutputArray::_OutputArray(std::vector<_Tp>& vec)
-{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
 
-inline
-_OutputArray::_OutputArray(std::vector<bool>&)
-{ CV_Error(Error::StsUnsupportedFormat, "std::vector<bool> cannot be an output array\n"); }
+template<typename _Tp, std::size_t _Nm> inline
+_OutputArray::_OutputArray(std::array<_Tp, _Nm>& arr)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
+
+template<std::size_t _Nm> inline
+_OutputArray::_OutputArray(std::array<Mat, _Nm>& arr)
+{ init(STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
 
 template<typename _Tp> inline
 _OutputArray::_OutputArray(std::vector<std::vector<_Tp> >& vec)
-{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
 
 template<typename _Tp> inline
 _OutputArray::_OutputArray(std::vector<Mat_<_Tp> >& vec)
-{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
 
 template<typename _Tp> inline
 _OutputArray::_OutputArray(Mat_<_Tp>& m)
-{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_WRITE, &m); }
+{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &m); }
 
 template<typename _Tp, int m, int n> inline
 _OutputArray::_OutputArray(Matx<_Tp, m, n>& mtx)
-{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, &mtx, Size(n, m)); }
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, &mtx, Size(n, m)); }
 
 template<typename _Tp> inline
 _OutputArray::_OutputArray(_Tp* vec, int n)
-{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); }
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); }
 
 template<typename _Tp> inline
 _OutputArray::_OutputArray(const std::vector<_Tp>& vec)
-{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
+
+template<typename _Tp, std::size_t _Nm> inline
+_OutputArray::_OutputArray(const std::array<_Tp, _Nm>& arr)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
+
+template<std::size_t _Nm> inline
+_OutputArray::_OutputArray(const std::array<Mat, _Nm>& arr)
+{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
 
 template<typename _Tp> inline
 _OutputArray::_OutputArray(const std::vector<std::vector<_Tp> >& vec)
-{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
 
 template<typename _Tp> inline
 _OutputArray::_OutputArray(const std::vector<Mat_<_Tp> >& vec)
-{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
 
 template<typename _Tp> inline
 _OutputArray::_OutputArray(const Mat_<_Tp>& m)
-{ init(FIXED_TYPE + FIXED_SIZE + MAT + DataType<_Tp>::type + ACCESS_WRITE, &m); }
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &m); }
 
 template<typename _Tp, int m, int n> inline
 _OutputArray::_OutputArray(const Matx<_Tp, m, n>& mtx)
-{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, &mtx, Size(n, m)); }
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, &mtx, Size(n, m)); }
 
 template<typename _Tp> inline
 _OutputArray::_OutputArray(const _Tp* vec, int n)
-{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); }
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); }
 
 inline _OutputArray::_OutputArray(cuda::GpuMat& d_mat)
 { init(CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); }
@@ -231,10 +305,29 @@ inline _OutputArray::_OutputArray(const ogl::Buffer& buf)
 inline _OutputArray::_OutputArray(const cuda::HostMem& cuda_mem)
 { init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); }
 
+template<typename _Tp> inline
+_OutputArray _OutputArray::rawOut(std::vector<_Tp>& vec)
+{
+    _OutputArray v;
+    v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_WRITE;
+    v.obj = (void*)&vec;
+    return v;
+}
+
+template<typename _Tp, std::size_t _Nm> inline
+_OutputArray _OutputArray::rawOut(std::array<_Tp, _Nm>& arr)
+{
+    _OutputArray v;
+    v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE;
+    v.obj = (void*)arr.data();
+    v.sz = Size(1, _Nm);
+    return v;
+}
+
 ///////////////////////////////////////////////////////////////////////////////////////////
 
-inline _InputOutputArray::_InputOutputArray() { init(ACCESS_RW, 0); }
-inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags|ACCESS_RW, _obj); }
+inline _InputOutputArray::_InputOutputArray() { init(0+ACCESS_RW, 0); }
+inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags+ACCESS_RW, _obj); }
 inline _InputOutputArray::_InputOutputArray(Mat& m) { init(MAT+ACCESS_RW, &m); }
 inline _InputOutputArray::_InputOutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_RW, &vec); }
 inline _InputOutputArray::_InputOutputArray(UMat& m) { init(UMAT+ACCESS_RW, &m); }
@@ -242,54 +335,67 @@ inline _InputOutputArray::_InputOutputArray(std::vector<UMat>& vec) { init(STD_V
 
 template<typename _Tp> inline
 _InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec)
-{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
+
+template<typename _Tp, std::size_t _Nm> inline
+_InputOutputArray::_InputOutputArray(std::array<_Tp, _Nm>& arr)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); }
 
-inline _InputOutputArray::_InputOutputArray(std::vector<bool>&)
-{ CV_Error(Error::StsUnsupportedFormat, "std::vector<bool> cannot be an input/output array\n"); }
+template<std::size_t _Nm> inline
+_InputOutputArray::_InputOutputArray(std::array<Mat, _Nm>& arr)
+{ init(STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); }
 
 template<typename _Tp> inline
 _InputOutputArray::_InputOutputArray(std::vector<std::vector<_Tp> >& vec)
-{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
 
 template<typename _Tp> inline
 _InputOutputArray::_InputOutputArray(std::vector<Mat_<_Tp> >& vec)
-{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_RW, &vec); }
+{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
 
 template<typename _Tp> inline
 _InputOutputArray::_InputOutputArray(Mat_<_Tp>& m)
-{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_RW, &m); }
+{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_RW, &m); }
 
 template<typename _Tp, int m, int n> inline
 _InputOutputArray::_InputOutputArray(Matx<_Tp, m, n>& mtx)
-{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, &mtx, Size(n, m)); }
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, &mtx, Size(n, m)); }
 
 template<typename _Tp> inline
 _InputOutputArray::_InputOutputArray(_Tp* vec, int n)
-{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); }
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); }
 
 template<typename _Tp> inline
 _InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec)
-{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
+
+template<typename _Tp, std::size_t _Nm> inline
+_InputOutputArray::_InputOutputArray(const std::array<_Tp, _Nm>& arr)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); }
+
+template<std::size_t _Nm> inline
+_InputOutputArray::_InputOutputArray(const std::array<Mat, _Nm>& arr)
+{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); }
 
 template<typename _Tp> inline
 _InputOutputArray::_InputOutputArray(const std::vector<std::vector<_Tp> >& vec)
-{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
 
 template<typename _Tp> inline
 _InputOutputArray::_InputOutputArray(const std::vector<Mat_<_Tp> >& vec)
-{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_RW, &vec); }
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
 
 template<typename _Tp> inline
 _InputOutputArray::_InputOutputArray(const Mat_<_Tp>& m)
-{ init(FIXED_TYPE + FIXED_SIZE + MAT + DataType<_Tp>::type + ACCESS_RW, &m); }
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + traits::Type<_Tp>::value + ACCESS_RW, &m); }
 
 template<typename _Tp, int m, int n> inline
 _InputOutputArray::_InputOutputArray(const Matx<_Tp, m, n>& mtx)
-{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, &mtx, Size(n, m)); }
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, &mtx, Size(n, m)); }
 
 template<typename _Tp> inline
 _InputOutputArray::_InputOutputArray(const _Tp* vec, int n)
-{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); }
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); }
 
 inline _InputOutputArray::_InputOutputArray(cuda::GpuMat& d_mat)
 { init(CUDA_GPU_MAT + ACCESS_RW, &d_mat); }
@@ -314,8 +420,12 @@ inline _InputOutputArray::_InputOutputArray(const std::vector<UMat>& vec)
 
 inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat)
 { init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_RW, &d_mat); }
+
 inline _InputOutputArray::_InputOutputArray(const std::vector<cuda::GpuMat>& d_mat)
-{	init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);}
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);}
+
+template<> inline _InputOutputArray::_InputOutputArray(std::vector<cuda::GpuMat>& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);}
 
 inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf)
 { init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_RW, &buf); }
@@ -323,18 +433,44 @@ inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf)
 inline _InputOutputArray::_InputOutputArray(const cuda::HostMem& cuda_mem)
 { init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); }
 
+template<typename _Tp> inline
+_InputOutputArray _InputOutputArray::rawInOut(std::vector<_Tp>& vec)
+{
+    _InputOutputArray v;
+    v.flags = _InputArray::FIXED_TYPE + _InputArray::STD_VECTOR + rawType<_Tp>() + ACCESS_RW;
+    v.obj = (void*)&vec;
+    return v;
+}
+
+template<typename _Tp, std::size_t _Nm> inline
+_InputOutputArray _InputOutputArray::rawInOut(std::array<_Tp, _Nm>& arr)
+{
+    _InputOutputArray v;
+    v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW;
+    v.obj = (void*)arr.data();
+    v.sz = Size(1, _Nm);
+    return v;
+}
+
+
+template<typename _Tp> static inline _InputArray rawIn(_Tp& v) { return _InputArray::rawIn(v); }
+template<typename _Tp> static inline _OutputArray rawOut(_Tp& v) { return _OutputArray::rawOut(v); }
+template<typename _Tp> static inline _InputOutputArray rawInOut(_Tp& v) { return _InputOutputArray::rawInOut(v); }
+
+CV__DEBUG_NS_END
+
 //////////////////////////////////////////// Mat //////////////////////////////////////////
 
 inline
 Mat::Mat()
     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows)
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {}
 
 inline
 Mat::Mat(int _rows, int _cols, int _type)
     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows)
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     create(_rows, _cols, _type);
 }
@@ -342,7 +478,7 @@ Mat::Mat(int _rows, int _cols, int _type)
 inline
 Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s)
     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows)
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     create(_rows, _cols, _type);
     *this = _s;
@@ -351,7 +487,7 @@ Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s)
 inline
 Mat::Mat(Size _sz, int _type)
     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows)
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     create( _sz.height, _sz.width, _type );
 }
@@ -359,7 +495,7 @@ Mat::Mat(Size _sz, int _type)
 inline
 Mat::Mat(Size _sz, int _type, const Scalar& _s)
     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows)
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     create(_sz.height, _sz.width, _type);
     *this = _s;
@@ -368,7 +504,7 @@ Mat::Mat(Size _sz, int _type, const Scalar& _s)
 inline
 Mat::Mat(int _dims, const int* _sz, int _type)
     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows)
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     create(_dims, _sz, _type);
 }
@@ -376,17 +512,34 @@ Mat::Mat(int _dims, const int* _sz, int _type)
 inline
 Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s)
     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
-      datalimit(0), allocator(0), u(0), size(&rows)
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     create(_dims, _sz, _type);
     *this = _s;
 }
 
+inline
+Mat::Mat(const std::vector<int>& _sz, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_sz, _type);
+}
+
+inline
+Mat::Mat(const std::vector<int>& _sz, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_sz, _type);
+    *this = _s;
+}
+
 inline
 Mat::Mat(const Mat& m)
     : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
       datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
-      u(m.u), size(&rows)
+      u(m.u), size(&rows), step(0)
 {
     if( u )
         CV_XADD(&u->refcount, 1);
@@ -414,24 +567,20 @@ Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step)
     if( _step == AUTO_STEP )
     {
         _step = minstep;
-        flags |= CONTINUOUS_FLAG;
     }
     else
     {
-        if( rows == 1 ) _step = minstep;
         CV_DbgAssert( _step >= minstep );
-
         if (_step % esz1 != 0)
         {
             CV_Error(Error::BadStep, "Step must be a multiple of esz1");
         }
-
-        flags |= _step == minstep ? CONTINUOUS_FLAG : 0;
     }
     step[0] = _step;
     step[1] = esz;
     datalimit = datastart + _step * rows;
     dataend = datalimit - _step + minstep;
+    updateContinuityFlag();
 }
 
 inline
@@ -447,30 +596,27 @@ Mat::Mat(Size _sz, int _type, void* _data, size_t _step)
     if( _step == AUTO_STEP )
     {
         _step = minstep;
-        flags |= CONTINUOUS_FLAG;
     }
     else
     {
-        if( rows == 1 ) _step = minstep;
         CV_DbgAssert( _step >= minstep );
 
         if (_step % esz1 != 0)
         {
             CV_Error(Error::BadStep, "Step must be a multiple of esz1");
         }
-
-        flags |= _step == minstep ? CONTINUOUS_FLAG : 0;
     }
     step[0] = _step;
     step[1] = esz;
     datalimit = datastart + _step*rows;
     dataend = datalimit - _step + minstep;
+    updateContinuityFlag();
 }
 
 template<typename _Tp> inline
 Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
-    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
-      cols(1), data(0), datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+    : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
+      cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     if(vec.empty())
         return;
@@ -481,13 +627,50 @@ Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
         datalimit = dataend = datastart + rows * step[0];
     }
     else
-        Mat((int)vec.size(), 1, DataType<_Tp>::type, (uchar*)&vec[0]).copyTo(*this);
+        Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this);
+}
+
+template<typename _Tp, typename> inline
+Mat::Mat(const std::initializer_list<_Tp> list)
+    : Mat()
+{
+    CV_Assert(list.size() != 0);
+    Mat((int)list.size(), 1, traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this);
+}
+
+template<typename _Tp> inline
+Mat::Mat(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> list)
+    : Mat()
+{
+    size_t size_total = 1;
+    for(auto s : sizes)
+        size_total *= s;
+    CV_Assert(list.size() != 0);
+    CV_Assert(size_total == list.size());
+    Mat((int)sizes.size(), (int*)sizes.begin(), traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this);
+}
+
+template<typename _Tp, std::size_t _Nm> inline
+Mat::Mat(const std::array<_Tp, _Nm>& arr, bool copyData)
+    : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)arr.size()),
+      cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    if(arr.empty())
+        return;
+    if( !copyData )
+    {
+        step[0] = step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)arr.data();
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+        Mat((int)arr.size(), 1, traits::Type<_Tp>::value, (uchar*)arr.data()).copyTo(*this);
 }
 
 template<typename _Tp, int n> inline
 Mat::Mat(const Vec<_Tp, n>& vec, bool copyData)
-    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0),
-      datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+    : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0),
+      datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     if( !copyData )
     {
@@ -496,14 +679,14 @@ Mat::Mat(const Vec<_Tp, n>& vec, bool copyData)
         datalimit = dataend = datastart + rows * step[0];
     }
     else
-        Mat(n, 1, DataType<_Tp>::type, (void*)vec.val).copyTo(*this);
+        Mat(n, 1, traits::Type<_Tp>::value, (void*)vec.val).copyTo(*this);
 }
 
 
 template<typename _Tp, int m, int n> inline
 Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData)
-    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0),
-      datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+    : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0),
+      datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     if( !copyData )
     {
@@ -513,13 +696,13 @@ Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData)
         datalimit = dataend = datastart + rows * step[0];
     }
     else
-        Mat(m, n, DataType<_Tp>::type, (uchar*)M.val).copyTo(*this);
+        Mat(m, n, traits::Type<_Tp>::value, (uchar*)M.val).copyTo(*this);
 }
 
 template<typename _Tp> inline
 Mat::Mat(const Point_<_Tp>& pt, bool copyData)
-    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0),
-      datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+    : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0),
+      datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     if( !copyData )
     {
@@ -529,7 +712,7 @@ Mat::Mat(const Point_<_Tp>& pt, bool copyData)
     }
     else
     {
-        create(2, 1, DataType<_Tp>::type);
+        create(2, 1, traits::Type<_Tp>::value);
         ((_Tp*)data)[0] = pt.x;
         ((_Tp*)data)[1] = pt.y;
     }
@@ -537,8 +720,8 @@ Mat::Mat(const Point_<_Tp>& pt, bool copyData)
 
 template<typename _Tp> inline
 Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
-    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0),
-      datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+    : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0),
+      datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
 {
     if( !copyData )
     {
@@ -548,7 +731,7 @@ Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
     }
     else
     {
-        create(3, 1, DataType<_Tp>::type);
+        create(3, 1, traits::Type<_Tp>::value);
         ((_Tp*)data)[0] = pt.x;
         ((_Tp*)data)[1] = pt.y;
         ((_Tp*)data)[2] = pt.z;
@@ -557,7 +740,7 @@ Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
 
 template<typename _Tp> inline
 Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer)
-    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0),
+    : flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0),
       datastart(0), dataend(0), allocator(0), u(0), size(&rows)
 {
     *this = commaInitializer.operator Mat_<_Tp>();
@@ -676,7 +859,8 @@ void Mat::addref()
         CV_XADD(&u->refcount, 1);
 }
 
-inline void Mat::release()
+inline
+void Mat::release()
 {
     if( u && CV_XADD(&u->refcount, -1) == 1 )
         deallocate();
@@ -684,6 +868,16 @@ inline void Mat::release()
     datastart = dataend = datalimit = data = 0;
     for(int i = 0; i < dims; i++)
         size.p[i] = 0;
+#ifdef _DEBUG
+    flags = MAGIC_VAL;
+    dims = rows = cols = 0;
+    if(step.p != step.buf)
+    {
+        fastFree(step.p);
+        step.p = step.buf;
+        size.p = &rows;
+    }
+#endif
 }
 
 inline
@@ -704,6 +898,12 @@ Mat Mat::operator()(const Range* ranges) const
     return Mat(*this, ranges);
 }
 
+inline
+Mat Mat::operator()(const std::vector<Range>& ranges) const
+{
+    return Mat(*this, ranges);
+}
+
 inline
 bool Mat::isContinuous() const
 {
@@ -719,7 +919,9 @@ bool Mat::isSubmatrix() const
 inline
 size_t Mat::elemSize() const
 {
-    return dims > 0 ? step.p[dims - 1] : 0;
+    size_t res = dims > 0 ? step.p[dims - 1] : 0;
+    CV_DbgAssert(res != 0);
+    return res;
 }
 
 inline
@@ -755,7 +957,7 @@ size_t Mat::step1(int i) const
 inline
 bool Mat::empty() const
 {
-    return data == 0 || total() == 0;
+    return data == 0 || total() == 0 || dims == 0;
 }
 
 inline
@@ -769,6 +971,17 @@ size_t Mat::total() const
     return p;
 }
 
+inline
+size_t Mat::total(int startDim, int endDim) const
+{
+    CV_Assert( 0 <= startDim && startDim <= endDim);
+    size_t p = 1;
+    int endDim_ = endDim <= dims ? endDim : dims;
+    for( int i = startDim; i < endDim_; i++ )
+        p *= size[i];
+    return p;
+}
+
 inline
 uchar* Mat::ptr(int y)
 {
@@ -793,7 +1006,7 @@ _Tp* Mat::ptr(int y)
 template<typename _Tp> inline
 const _Tp* Mat::ptr(int y) const
 {
-    CV_DbgAssert( y == 0 || (data && dims >= 1 && data && (unsigned)y < (unsigned)size.p[0]) );
+    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
     return (const _Tp*)(data + step.p[0] * y);
 }
 
@@ -909,6 +1122,34 @@ const uchar* Mat::ptr(const int* idx) const
     return p;
 }
 
+template<typename _Tp> inline
+_Tp* Mat::ptr(const int* idx)
+{
+    int i, d = dims;
+    uchar* p = data;
+    CV_DbgAssert( d >= 1 && p );
+    for( i = 0; i < d; i++ )
+    {
+        CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] );
+        p += idx[i] * step.p[i];
+    }
+    return (_Tp*)p;
+}
+
+template<typename _Tp> inline
+const _Tp* Mat::ptr(const int* idx) const
+{
+    int i, d = dims;
+    uchar* p = data;
+    CV_DbgAssert( d >= 1 && p );
+    for( i = 0; i < d; i++ )
+    {
+        CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] );
+        p += idx[i] * step.p[i];
+    }
+    return (const _Tp*)p;
+}
+
 template<typename _Tp> inline
 _Tp& Mat::at(int i0, int i1)
 {
@@ -916,7 +1157,7 @@ _Tp& Mat::at(int i0, int i1)
     CV_DbgAssert(data);
     CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
     CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
-    CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
+    CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1());
     return ((_Tp*)(data + step.p[0] * i0))[i1];
 }
 
@@ -927,7 +1168,7 @@ const _Tp& Mat::at(int i0, int i1) const
     CV_DbgAssert(data);
     CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
     CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
-    CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
+    CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1());
     return ((const _Tp*)(data + step.p[0] * i0))[i1];
 }
 
@@ -938,7 +1179,7 @@ _Tp& Mat::at(Point pt)
     CV_DbgAssert(data);
     CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
     CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
-    CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
+    CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1());
     return ((_Tp*)(data + step.p[0] * pt.y))[pt.x];
 }
 
@@ -949,7 +1190,7 @@ const _Tp& Mat::at(Point pt) const
     CV_DbgAssert(data);
     CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
     CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
-    CV_DbgAssert(CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
+    CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1());
     return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x];
 }
 
@@ -959,7 +1200,7 @@ _Tp& Mat::at(int i0)
     CV_DbgAssert(dims <= 2);
     CV_DbgAssert(data);
     CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1]));
-    CV_DbgAssert(elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type));
+    CV_DbgAssert(elemSize() == sizeof(_Tp));
     if( isContinuous() || size.p[0] == 1 )
         return ((_Tp*)data)[i0];
     if( size.p[1] == 1 )
@@ -974,7 +1215,7 @@ const _Tp& Mat::at(int i0) const
     CV_DbgAssert(dims <= 2);
     CV_DbgAssert(data);
     CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1]));
-    CV_DbgAssert(elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type));
+    CV_DbgAssert(elemSize() == sizeof(_Tp));
     if( isContinuous() || size.p[0] == 1 )
         return ((const _Tp*)data)[i0];
     if( size.p[1] == 1 )
@@ -986,48 +1227,50 @@ const _Tp& Mat::at(int i0) const
 template<typename _Tp> inline
 _Tp& Mat::at(int i0, int i1, int i2)
 {
-    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
     return *(_Tp*)ptr(i0, i1, i2);
 }
 
 template<typename _Tp> inline
 const _Tp& Mat::at(int i0, int i1, int i2) const
 {
-    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
     return *(const _Tp*)ptr(i0, i1, i2);
 }
 
 template<typename _Tp> inline
 _Tp& Mat::at(const int* idx)
 {
-    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
     return *(_Tp*)ptr(idx);
 }
 
 template<typename _Tp> inline
 const _Tp& Mat::at(const int* idx) const
 {
-    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
     return *(const _Tp*)ptr(idx);
 }
 
 template<typename _Tp, int n> inline
 _Tp& Mat::at(const Vec<int, n>& idx)
 {
-    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
     return *(_Tp*)ptr(idx.val);
 }
 
 template<typename _Tp, int n> inline
 const _Tp& Mat::at(const Vec<int, n>& idx) const
 {
-    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
     return *(const _Tp*)ptr(idx.val);
 }
 
 template<typename _Tp> inline
 MatConstIterator_<_Tp> Mat::begin() const
 {
+    if (empty())
+        return MatConstIterator_<_Tp>();
     CV_DbgAssert( elemSize() == sizeof(_Tp) );
     return MatConstIterator_<_Tp>((const Mat_<_Tp>*)this);
 }
@@ -1035,6 +1278,8 @@ MatConstIterator_<_Tp> Mat::begin() const
 template<typename _Tp> inline
 MatConstIterator_<_Tp> Mat::end() const
 {
+    if (empty())
+        return MatConstIterator_<_Tp>();
     CV_DbgAssert( elemSize() == sizeof(_Tp) );
     MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this);
     it += total();
@@ -1044,6 +1289,8 @@ MatConstIterator_<_Tp> Mat::end() const
 template<typename _Tp> inline
 MatIterator_<_Tp> Mat::begin()
 {
+    if (empty())
+        return MatIterator_<_Tp>();
     CV_DbgAssert( elemSize() == sizeof(_Tp) );
     return MatIterator_<_Tp>((Mat_<_Tp>*)this);
 }
@@ -1051,6 +1298,8 @@ MatIterator_<_Tp> Mat::begin()
 template<typename _Tp> inline
 MatIterator_<_Tp> Mat::end()
 {
+    if (empty())
+        return MatIterator_<_Tp>();
     CV_DbgAssert( elemSize() == sizeof(_Tp) );
     MatIterator_<_Tp> it((Mat_<_Tp>*)this);
     it += total();
@@ -1065,7 +1314,7 @@ void Mat::forEach(const Functor& operation) {
 template<typename _Tp, typename Functor> inline
 void Mat::forEach(const Functor& operation) const {
     // call as not const
-    (const_cast<Mat*>(this))->forEach<const _Tp>(operation);
+    (const_cast<Mat*>(this))->forEach<_Tp>(operation);
 }
 
 template<typename _Tp> inline
@@ -1076,16 +1325,24 @@ Mat::operator std::vector<_Tp>() const
     return v;
 }
 
+template<typename _Tp, std::size_t _Nm> inline
+Mat::operator std::array<_Tp, _Nm>() const
+{
+    std::array<_Tp, _Nm> v;
+    copyTo(v);
+    return v;
+}
+
 template<typename _Tp, int n> inline
 Mat::operator Vec<_Tp, n>() const
 {
     CV_Assert( data && dims <= 2 && (rows == 1 || cols == 1) &&
                rows + cols - 1 == n && channels() == 1 );
 
-    if( isContinuous() && type() == DataType<_Tp>::type )
+    if( isContinuous() && type() == traits::Type<_Tp>::value )
         return Vec<_Tp, n>((_Tp*)data);
     Vec<_Tp, n> v;
-    Mat tmp(rows, cols, DataType<_Tp>::type, v.val);
+    Mat tmp(rows, cols, traits::Type<_Tp>::value, v.val);
     convertTo(tmp, tmp.type());
     return v;
 }
@@ -1095,10 +1352,10 @@ Mat::operator Matx<_Tp, m, n>() const
 {
     CV_Assert( data && dims <= 2 && rows == m && cols == n && channels() == 1 );
 
-    if( isContinuous() && type() == DataType<_Tp>::type )
+    if( isContinuous() && type() == traits::Type<_Tp>::value )
         return Matx<_Tp, m, n>((_Tp*)data);
     Matx<_Tp, m, n> mtx;
-    Mat tmp(rows, cols, DataType<_Tp>::type, mtx.val);
+    Mat tmp(rows, cols, traits::Type<_Tp>::value, mtx.val);
     convertTo(tmp, tmp.type());
     return mtx;
 }
@@ -1108,10 +1365,10 @@ void Mat::push_back(const _Tp& elem)
 {
     if( !data )
     {
-        *this = Mat(1, 1, DataType<_Tp>::type, (void*)&elem).clone();
+        *this = Mat(1, 1, traits::Type<_Tp>::value, (void*)&elem).clone();
         return;
     }
-    CV_Assert(DataType<_Tp>::type == type() && cols == 1
+    CV_Assert(traits::Type<_Tp>::value == type() && cols == 1
               /* && dims == 2 (cols == 1 implies dims == 2) */);
     const uchar* tmp = dataend + step[0];
     if( !isSubmatrix() && isContinuous() && tmp <= datalimit )
@@ -1135,7 +1392,12 @@ void Mat::push_back(const MatExpr& expr)
     push_back(static_cast<Mat>(expr));
 }
 
-#ifdef CV_CXX_MOVE_SEMANTICS
+
+template<typename _Tp> inline
+void Mat::push_back(const std::vector<_Tp>& v)
+{
+    push_back(Mat(v));
+}
 
 inline
 Mat::Mat(Mat&& m)
@@ -1165,6 +1427,9 @@ Mat::Mat(Mat&& m)
 inline
 Mat& Mat::operator = (Mat&& m)
 {
+    if (this == &m)
+      return *this;
+
     release();
     flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols; data = m.data;
     datastart = m.datastart; dataend = m.dataend; datalimit = m.datalimit; allocator = m.allocator;
@@ -1195,8 +1460,6 @@ Mat& Mat::operator = (Mat&& m)
     return *this;
 }
 
-#endif
-
 
 ///////////////////////////// MatSize ////////////////////////////
 
@@ -1204,22 +1467,36 @@ inline
 MatSize::MatSize(int* _p)
     : p(_p) {}
 
+inline
+int MatSize::dims() const
+{
+    return (p - 1)[0];
+}
+
 inline
 Size MatSize::operator()() const
 {
-    CV_DbgAssert(p[-1] <= 2);
+    CV_DbgAssert(dims() <= 2);
     return Size(p[1], p[0]);
 }
 
 inline
 const int& MatSize::operator[](int i) const
 {
+    CV_DbgAssert(i < dims());
+#ifdef __OPENCV_BUILD
+    CV_DbgAssert(i >= 0);
+#endif
     return p[i];
 }
 
 inline
 int& MatSize::operator[](int i)
 {
+    CV_DbgAssert(i < dims());
+#ifdef __OPENCV_BUILD
+    CV_DbgAssert(i >= 0);
+#endif
     return p[i];
 }
 
@@ -1232,8 +1509,8 @@ MatSize::operator const int*() const
 inline
 bool MatSize::operator == (const MatSize& sz) const
 {
-    int d = p[-1];
-    int dsz = sz.p[-1];
+    int d = dims();
+    int dsz = sz.dims();
     if( d != dsz )
         return false;
     if( d == 2 )
@@ -1300,47 +1577,47 @@ template<typename _Tp> inline
 Mat_<_Tp>::Mat_()
     : Mat()
 {
-    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type;
+    flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
 }
 
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(int _rows, int _cols)
-    : Mat(_rows, _cols, DataType<_Tp>::type)
+    : Mat(_rows, _cols, traits::Type<_Tp>::value)
 {
 }
 
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(int _rows, int _cols, const _Tp& value)
-    : Mat(_rows, _cols, DataType<_Tp>::type)
+    : Mat(_rows, _cols, traits::Type<_Tp>::value)
 {
     *this = value;
 }
 
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(Size _sz)
-    : Mat(_sz.height, _sz.width, DataType<_Tp>::type)
+    : Mat(_sz.height, _sz.width, traits::Type<_Tp>::value)
 {}
 
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(Size _sz, const _Tp& value)
-    : Mat(_sz.height, _sz.width, DataType<_Tp>::type)
+    : Mat(_sz.height, _sz.width, traits::Type<_Tp>::value)
 {
     *this = value;
 }
 
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(int _dims, const int* _sz)
-    : Mat(_dims, _sz, DataType<_Tp>::type)
+    : Mat(_dims, _sz, traits::Type<_Tp>::value)
 {}
 
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(int _dims, const int* _sz, const _Tp& _s)
-    : Mat(_dims, _sz, DataType<_Tp>::type, Scalar(_s))
+    : Mat(_dims, _sz, traits::Type<_Tp>::value, Scalar(_s))
 {}
 
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(int _dims, const int* _sz, _Tp* _data, const size_t* _steps)
-    : Mat(_dims, _sz, DataType<_Tp>::type, _data, _steps)
+    : Mat(_dims, _sz, traits::Type<_Tp>::value, _data, _steps)
 {}
 
 template<typename _Tp> inline
@@ -1348,11 +1625,16 @@ Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const Range* ranges)
     : Mat(m, ranges)
 {}
 
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const std::vector<Range>& ranges)
+    : Mat(m, ranges)
+{}
+
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(const Mat& m)
     : Mat()
 {
-    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type;
+    flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
     *this = m;
 }
 
@@ -1363,7 +1645,7 @@ Mat_<_Tp>::Mat_(const Mat_& m)
 
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(int _rows, int _cols, _Tp* _data, size_t steps)
-    : Mat(_rows, _cols, DataType<_Tp>::type, _data, steps)
+    : Mat(_rows, _cols, traits::Type<_Tp>::value, _data, steps)
 {}
 
 template<typename _Tp> inline
@@ -1378,7 +1660,7 @@ Mat_<_Tp>::Mat_(const Mat_& m, const Rect& roi)
 
 template<typename _Tp> template<int n> inline
 Mat_<_Tp>::Mat_(const Vec<typename DataType<_Tp>::channel_type, n>& vec, bool copyData)
-    : Mat(n / DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&vec)
+    : Mat(n / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&vec)
 {
     CV_Assert(n%DataType<_Tp>::channels == 0);
     if( copyData )
@@ -1387,7 +1669,7 @@ Mat_<_Tp>::Mat_(const Vec<typename DataType<_Tp>::channel_type, n>& vec, bool co
 
 template<typename _Tp> template<int m, int n> inline
 Mat_<_Tp>::Mat_(const Matx<typename DataType<_Tp>::channel_type, m, n>& M, bool copyData)
-    : Mat(m, n / DataType<_Tp>::channels, DataType<_Tp>::type, (void*)&M)
+    : Mat(m, n / DataType<_Tp>::channels, traits::Type<_Tp>::value, (void*)&M)
 {
     CV_Assert(n % DataType<_Tp>::channels == 0);
     if( copyData )
@@ -1396,7 +1678,7 @@ Mat_<_Tp>::Mat_(const Matx<typename DataType<_Tp>::channel_type, m, n>& M, bool
 
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(const Point_<typename DataType<_Tp>::channel_type>& pt, bool copyData)
-    : Mat(2 / DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&pt)
+    : Mat(2 / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&pt)
 {
     CV_Assert(2 % DataType<_Tp>::channels == 0);
     if( copyData )
@@ -1405,7 +1687,7 @@ Mat_<_Tp>::Mat_(const Point_<typename DataType<_Tp>::channel_type>& pt, bool cop
 
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(const Point3_<typename DataType<_Tp>::channel_type>& pt, bool copyData)
-    : Mat(3 / DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&pt)
+    : Mat(3 / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&pt)
 {
     CV_Assert(3 % DataType<_Tp>::channels == 0);
     if( copyData )
@@ -1422,19 +1704,39 @@ Mat_<_Tp>::Mat_(const std::vector<_Tp>& vec, bool copyData)
     : Mat(vec, copyData)
 {}
 
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(std::initializer_list<_Tp> list)
+    : Mat(list)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const std::initializer_list<int> sizes, std::initializer_list<_Tp> list)
+    : Mat(sizes, list)
+{}
+
+template<typename _Tp> template<std::size_t _Nm> inline
+Mat_<_Tp>::Mat_(const std::array<_Tp, _Nm>& arr, bool copyData)
+    : Mat(arr, copyData)
+{}
+
 template<typename _Tp> inline
 Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat& m)
 {
-    if( DataType<_Tp>::type == m.type() )
+    if (m.empty())
+    {
+        release();
+        return *this;
+    }
+    if( traits::Type<_Tp>::value == m.type() )
     {
         Mat::operator = (m);
         return *this;
     }
-    if( DataType<_Tp>::depth == m.depth() )
+    if( traits::Depth<_Tp>::value == m.depth() )
     {
         return (*this = m.reshape(DataType<_Tp>::channels, m.dims, 0));
     }
-    CV_DbgAssert(DataType<_Tp>::channels == m.channels());
+    CV_Assert(DataType<_Tp>::channels == m.channels() || m.empty());
     m.convertTo(*this, type());
     return *this;
 }
@@ -1457,19 +1759,28 @@ Mat_<_Tp>& Mat_<_Tp>::operator = (const _Tp& s)
 template<typename _Tp> inline
 void Mat_<_Tp>::create(int _rows, int _cols)
 {
-    Mat::create(_rows, _cols, DataType<_Tp>::type);
+    Mat::create(_rows, _cols, traits::Type<_Tp>::value);
 }
 
 template<typename _Tp> inline
 void Mat_<_Tp>::create(Size _sz)
 {
-    Mat::create(_sz, DataType<_Tp>::type);
+    Mat::create(_sz, traits::Type<_Tp>::value);
 }
 
 template<typename _Tp> inline
 void Mat_<_Tp>::create(int _dims, const int* _sz)
 {
-    Mat::create(_dims, _sz, DataType<_Tp>::type);
+    Mat::create(_dims, _sz, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+void Mat_<_Tp>::release()
+{
+    Mat::release();
+#ifdef _DEBUG
+    flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
+#endif
 }
 
 template<typename _Tp> inline
@@ -1481,7 +1792,7 @@ Mat_<_Tp> Mat_<_Tp>::cross(const Mat_& m) const
 template<typename _Tp> template<typename T2> inline
 Mat_<_Tp>::operator Mat_<T2>() const
 {
-    return Mat_<T2>(*this);
+    return Mat_<T2>(static_cast<const Mat&>(*this));
 }
 
 template<typename _Tp> inline
@@ -1525,15 +1836,15 @@ size_t Mat_<_Tp>::elemSize1() const
 template<typename _Tp> inline
 int Mat_<_Tp>::type() const
 {
-    CV_DbgAssert( Mat::type() == DataType<_Tp>::type );
-    return DataType<_Tp>::type;
+    CV_DbgAssert( Mat::type() == traits::Type<_Tp>::value );
+    return traits::Type<_Tp>::value;
 }
 
 template<typename _Tp> inline
 int Mat_<_Tp>::depth() const
 {
-    CV_DbgAssert( Mat::depth() == DataType<_Tp>::depth );
-    return DataType<_Tp>::depth;
+    CV_DbgAssert( Mat::depth() == traits::Depth<_Tp>::value );
+    return traits::Depth<_Tp>::value;
 }
 
 template<typename _Tp> inline
@@ -1579,17 +1890,23 @@ Mat_<_Tp> Mat_<_Tp>::operator()( const Range* ranges ) const
     return Mat_<_Tp>(*this, ranges);
 }
 
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::operator()(const std::vector<Range>& ranges) const
+{
+    return Mat_<_Tp>(*this, ranges);
+}
+
 template<typename _Tp> inline
 _Tp* Mat_<_Tp>::operator [](int y)
 {
-    CV_DbgAssert( 0 <= y && y < rows );
+    CV_DbgAssert( 0 <= y && y < size.p[0] );
     return (_Tp*)(data + y*step.p[0]);
 }
 
 template<typename _Tp> inline
 const _Tp* Mat_<_Tp>::operator [](int y) const
 {
-    CV_DbgAssert( 0 <= y && y < rows );
+    CV_DbgAssert( 0 <= y && y < size.p[0] );
     return (const _Tp*)(data + y*step.p[0]);
 }
 
@@ -1600,7 +1917,7 @@ _Tp& Mat_<_Tp>::operator ()(int i0, int i1)
     CV_DbgAssert(data);
     CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
     CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
-    CV_DbgAssert(type() == DataType<_Tp>::type);
+    CV_DbgAssert(type() == traits::Type<_Tp>::value);
     return ((_Tp*)(data + step.p[0] * i0))[i1];
 }
 
@@ -1611,7 +1928,7 @@ const _Tp& Mat_<_Tp>::operator ()(int i0, int i1) const
     CV_DbgAssert(data);
     CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
     CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
-    CV_DbgAssert(type() == DataType<_Tp>::type);
+    CV_DbgAssert(type() == traits::Type<_Tp>::value);
     return ((const _Tp*)(data + step.p[0] * i0))[i1];
 }
 
@@ -1622,7 +1939,7 @@ _Tp& Mat_<_Tp>::operator ()(Point pt)
     CV_DbgAssert(data);
     CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
     CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]);
-    CV_DbgAssert(type() == DataType<_Tp>::type);
+    CV_DbgAssert(type() == traits::Type<_Tp>::value);
     return ((_Tp*)(data + step.p[0] * pt.y))[pt.x];
 }
 
@@ -1633,7 +1950,7 @@ const _Tp& Mat_<_Tp>::operator ()(Point pt) const
     CV_DbgAssert(data);
     CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
     CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]);
-    CV_DbgAssert(type() == DataType<_Tp>::type);
+    CV_DbgAssert(type() == traits::Type<_Tp>::value);
     return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x];
 }
 
@@ -1693,6 +2010,14 @@ Mat_<_Tp>::operator std::vector<_Tp>() const
     return v;
 }
 
+template<typename _Tp> template<std::size_t _Nm> inline
+Mat_<_Tp>::operator std::array<_Tp, _Nm>() const
+{
+    std::array<_Tp, _Nm> a;
+    copyTo(a);
+    return a;
+}
+
 template<typename _Tp> template<int n> inline
 Mat_<_Tp>::operator Vec<typename DataType<_Tp>::channel_type, n>() const
 {
@@ -1755,18 +2080,16 @@ void Mat_<_Tp>::forEach(const Functor& operation) const {
     Mat::forEach<_Tp, Functor>(operation);
 }
 
-#ifdef CV_CXX_MOVE_SEMANTICS
-
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(Mat_&& m)
-    : Mat(m)
+    : Mat(std::move(m))
 {
 }
 
 template<typename _Tp> inline
 Mat_<_Tp>& Mat_<_Tp>::operator = (Mat_&& m)
 {
-    Mat::operator = (m);
+    Mat::operator = (std::move(m));
     return *this;
 }
 
@@ -1774,19 +2097,24 @@ template<typename _Tp> inline
 Mat_<_Tp>::Mat_(Mat&& m)
     : Mat()
 {
-    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type;
-    *this = m;
+    flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
+    *this = std::move(m);
 }
 
 template<typename _Tp> inline
 Mat_<_Tp>& Mat_<_Tp>::operator = (Mat&& m)
 {
-    if( DataType<_Tp>::type == m.type() )
+    if (m.empty())
+    {
+        release();
+        return *this;
+    }
+    if( traits::Type<_Tp>::value == m.type() )
     {
         Mat::operator = ((Mat&&)m);
         return *this;
     }
-    if( DataType<_Tp>::depth == m.depth() )
+    if( traits::Depth<_Tp>::value == m.depth() )
     {
         Mat::operator = ((Mat&&)m.reshape(DataType<_Tp>::channels, m.dims, 0));
         return *this;
@@ -1800,11 +2128,10 @@ template<typename _Tp> inline
 Mat_<_Tp>::Mat_(MatExpr&& e)
     : Mat()
 {
-    flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type;
+    flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value;
     *this = Mat(e);
 }
 
-#endif
 
 ///////////////////////////// SparseMat /////////////////////////////
 
@@ -2137,21 +2464,21 @@ SparseMatConstIterator_<_Tp> SparseMat::end() const
 template<typename _Tp> inline
 SparseMat_<_Tp>::SparseMat_()
 {
-    flags = MAGIC_VAL | DataType<_Tp>::type;
+    flags = MAGIC_VAL + traits::Type<_Tp>::value;
 }
 
 template<typename _Tp> inline
 SparseMat_<_Tp>::SparseMat_(int _dims, const int* _sizes)
-    : SparseMat(_dims, _sizes, DataType<_Tp>::type)
+    : SparseMat(_dims, _sizes, traits::Type<_Tp>::value)
 {}
 
 template<typename _Tp> inline
 SparseMat_<_Tp>::SparseMat_(const SparseMat& m)
 {
-    if( m.type() == DataType<_Tp>::type )
+    if( m.type() == traits::Type<_Tp>::value )
         *this = (const SparseMat_<_Tp>&)m;
     else
-        m.convertTo(*this, DataType<_Tp>::type);
+        m.convertTo(*this, traits::Type<_Tp>::value);
 }
 
 template<typename _Tp> inline
@@ -2186,9 +2513,9 @@ SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat_<_Tp>& m)
 template<typename _Tp> inline
 SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat& m)
 {
-    if( m.type() == DataType<_Tp>::type )
+    if( m.type() == traits::Type<_Tp>::value )
         return (*this = (const SparseMat_<_Tp>&)m);
-    m.convertTo(*this, DataType<_Tp>::type);
+    m.convertTo(*this, traits::Type<_Tp>::value);
     return *this;
 }
 
@@ -2209,19 +2536,19 @@ SparseMat_<_Tp> SparseMat_<_Tp>::clone() const
 template<typename _Tp> inline
 void SparseMat_<_Tp>::create(int _dims, const int* _sizes)
 {
-    SparseMat::create(_dims, _sizes, DataType<_Tp>::type);
+    SparseMat::create(_dims, _sizes, traits::Type<_Tp>::value);
 }
 
 template<typename _Tp> inline
 int SparseMat_<_Tp>::type() const
 {
-    return DataType<_Tp>::type;
+    return traits::Type<_Tp>::value;
 }
 
 template<typename _Tp> inline
 int SparseMat_<_Tp>::depth() const
 {
-    return DataType<_Tp>::depth;
+    return traits::Depth<_Tp>::value;
 }
 
 template<typename _Tp> inline
@@ -2321,6 +2648,7 @@ MatConstIterator::MatConstIterator(const Mat* _m)
 {
     if( m && m->isContinuous() )
     {
+        CV_Assert(!m->empty());
         sliceStart = m->ptr();
         sliceEnd = sliceStart + m->total()*elemSize;
     }
@@ -2334,6 +2662,7 @@ MatConstIterator::MatConstIterator(const Mat* _m, int _row, int _col)
     CV_Assert(m && m->dims <= 2);
     if( m->isContinuous() )
     {
+        CV_Assert(!m->empty());
         sliceStart = m->ptr();
         sliceEnd = sliceStart + m->total()*elemSize;
     }
@@ -2348,6 +2677,7 @@ MatConstIterator::MatConstIterator(const Mat* _m, Point _pt)
     CV_Assert(m && m->dims <= 2);
     if( m->isContinuous() )
     {
+        CV_Assert(!m->empty());
         sliceStart = m->ptr();
         sliceEnd = sliceStart + m->total()*elemSize;
     }
@@ -2474,7 +2804,7 @@ ptrdiff_t operator - (const MatConstIterator& b, const MatConstIterator& a)
     if( a.m != b.m )
         return ((size_t)(-1) >> 1);
     if( a.sliceEnd == b.sliceEnd )
-        return (b.ptr - a.ptr)/b.elemSize;
+        return (b.ptr - a.ptr)/static_cast<ptrdiff_t>(b.elemSize);
 
     return b.lpos() - a.lpos();
 }
@@ -2543,7 +2873,7 @@ MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator = (const MatConstIterat
 }
 
 template<typename _Tp> inline
-_Tp MatConstIterator_<_Tp>::operator *() const
+const _Tp& MatConstIterator_<_Tp>::operator *() const
 {
     return *(_Tp*)(this->ptr);
 }
@@ -2649,7 +2979,7 @@ MatConstIterator_<_Tp> operator - (const MatConstIterator_<_Tp>& a, ptrdiff_t of
 }
 
 template<typename _Tp> inline
-_Tp MatConstIterator_<_Tp>::operator [](ptrdiff_t i) const
+const _Tp& MatConstIterator_<_Tp>::operator [](ptrdiff_t i) const
 {
     return *(_Tp*)MatConstIterator::operator [](i);
 }
@@ -2922,7 +3252,7 @@ template<typename _Tp> inline
 SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat* _m)
     : SparseMatConstIterator(_m)
 {
-    CV_Assert( _m->type() == DataType<_Tp>::type );
+    CV_Assert( _m->type() == traits::Type<_Tp>::value );
 }
 
 template<typename _Tp> inline
@@ -3058,50 +3388,50 @@ Mat& Mat::operator = (const MatExpr& e)
 template<typename _Tp> inline
 Mat_<_Tp>::Mat_(const MatExpr& e)
 {
-    e.op->assign(e, *this, DataType<_Tp>::type);
+    e.op->assign(e, *this, traits::Type<_Tp>::value);
 }
 
 template<typename _Tp> inline
 Mat_<_Tp>& Mat_<_Tp>::operator = (const MatExpr& e)
 {
-    e.op->assign(e, *this, DataType<_Tp>::type);
+    e.op->assign(e, *this, traits::Type<_Tp>::value);
     return *this;
 }
 
 template<typename _Tp> inline
 MatExpr Mat_<_Tp>::zeros(int rows, int cols)
 {
-    return Mat::zeros(rows, cols, DataType<_Tp>::type);
+    return Mat::zeros(rows, cols, traits::Type<_Tp>::value);
 }
 
 template<typename _Tp> inline
 MatExpr Mat_<_Tp>::zeros(Size sz)
 {
-    return Mat::zeros(sz, DataType<_Tp>::type);
+    return Mat::zeros(sz, traits::Type<_Tp>::value);
 }
 
 template<typename _Tp> inline
 MatExpr Mat_<_Tp>::ones(int rows, int cols)
 {
-    return Mat::ones(rows, cols, DataType<_Tp>::type);
+    return Mat::ones(rows, cols, traits::Type<_Tp>::value);
 }
 
 template<typename _Tp> inline
 MatExpr Mat_<_Tp>::ones(Size sz)
 {
-    return Mat::ones(sz, DataType<_Tp>::type);
+    return Mat::ones(sz, traits::Type<_Tp>::value);
 }
 
 template<typename _Tp> inline
 MatExpr Mat_<_Tp>::eye(int rows, int cols)
 {
-    return Mat::eye(rows, cols, DataType<_Tp>::type);
+    return Mat::eye(rows, cols, traits::Type<_Tp>::value);
 }
 
 template<typename _Tp> inline
 MatExpr Mat_<_Tp>::eye(Size sz)
 {
-    return Mat::eye(sz, DataType<_Tp>::type);
+    return Mat::eye(sz, traits::Type<_Tp>::value);
 }
 
 inline
@@ -3127,7 +3457,7 @@ template<typename _Tp> inline
 MatExpr::operator Mat_<_Tp>() const
 {
     Mat_<_Tp> m;
-    op->assign(*this, m, DataType<_Tp>::type);
+    op->assign(*this, m, traits::Type<_Tp>::value);
     return m;
 }
 
@@ -3360,7 +3690,7 @@ UMat::UMat(const UMat& m)
 
 template<typename _Tp> inline
 UMat::UMat(const std::vector<_Tp>& vec, bool copyData)
-: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
+: flags(MAGIC_VAL + traits::Type<_Tp>::value + CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
 cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
 {
     if(vec.empty())
@@ -3371,10 +3701,9 @@ cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
         CV_Error(Error::StsNotImplemented, "");
     }
     else
-        Mat((int)vec.size(), 1, DataType<_Tp>::type, (uchar*)&vec[0]).copyTo(*this);
+        Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this);
 }
 
-
 inline
 UMat& UMat::operator = (const UMat& m)
 {
@@ -3505,6 +3834,12 @@ UMat UMat::operator()(const Range* ranges) const
     return UMat(*this, ranges);
 }
 
+inline
+UMat UMat::operator()(const std::vector<Range>& ranges) const
+{
+    return UMat(*this, ranges);
+}
+
 inline
 bool UMat::isContinuous() const
 {
@@ -3520,7 +3855,9 @@ bool UMat::isSubmatrix() const
 inline
 size_t UMat::elemSize() const
 {
-    return dims > 0 ? step.p[dims - 1] : 0;
+    size_t res = dims > 0 ? step.p[dims - 1] : 0;
+    CV_DbgAssert(res != 0);
+    return res;
 }
 
 inline
@@ -3556,7 +3893,7 @@ size_t UMat::step1(int i) const
 inline
 bool UMat::empty() const
 {
-    return u == 0 || total() == 0;
+    return u == 0 || total() == 0 || dims == 0;
 }
 
 inline
@@ -3570,8 +3907,6 @@ size_t UMat::total() const
     return p;
 }
 
-#ifdef CV_CXX_MOVE_SEMANTICS
-
 inline
 UMat::UMat(UMat&& m)
 : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
@@ -3599,6 +3934,8 @@ UMat::UMat(UMat&& m)
 inline
 UMat& UMat::operator = (UMat&& m)
 {
+    if (this == &m)
+      return *this;
     release();
     flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols;
     allocator = m.allocator; usageFlags = m.usageFlags;
@@ -3630,8 +3967,6 @@ UMat& UMat::operator = (UMat&& m)
     return *this;
 }
 
-#endif
-
 
 inline bool UMatData::hostCopyObsolete() const { return (flags & HOST_COPY_OBSOLETE) != 0; }
 inline bool UMatData::deviceCopyObsolete() const { return (flags & DEVICE_COPY_OBSOLETE) != 0; }
@@ -3663,11 +3998,17 @@ inline void UMatData::markDeviceCopyObsolete(bool flag)
         flags &= ~DEVICE_COPY_OBSOLETE;
 }
 
-inline UMatDataAutoLock::UMatDataAutoLock(UMatData* _u) : u(_u) { u->lock(); }
-inline UMatDataAutoLock::~UMatDataAutoLock() { u->unlock(); }
-
 //! @endcond
 
 } //cv
 
+#ifdef _MSC_VER
+#pragma warning( pop )
+#endif
+
+#ifdef CV_DISABLE_CLANG_ENUM_WARNINGS
+#undef CV_DISABLE_CLANG_ENUM_WARNINGS
+#pragma clang diagnostic pop
+#endif
+
 #endif
diff --git a/IPL/include/opencv/opencv2/core/matx.hpp b/IPL/include/opencv/opencv2/core/matx.hpp
index e4d72f7..6a22b17 100644
--- a/IPL/include/opencv/opencv2/core/matx.hpp
+++ b/IPL/include/opencv/opencv2/core/matx.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_MATX_HPP__
-#define __OPENCV_CORE_MATX_HPP__
+#ifndef OPENCV_CORE_MATX_HPP
+#define OPENCV_CORE_MATX_HPP
 
 #ifndef __cplusplus
 #  error matx.hpp header must be compiled as C++
@@ -53,6 +53,8 @@
 #include "opencv2/core/traits.hpp"
 #include "opencv2/core/saturate.hpp"
 
+#include <initializer_list>
+
 namespace cv
 {
 
@@ -62,13 +64,14 @@ namespace cv
 ////////////////////////////// Small Matrix ///////////////////////////
 
 //! @cond IGNORED
-struct CV_EXPORTS Matx_AddOp {};
-struct CV_EXPORTS Matx_SubOp {};
-struct CV_EXPORTS Matx_ScaleOp {};
-struct CV_EXPORTS Matx_MulOp {};
-struct CV_EXPORTS Matx_DivOp {};
-struct CV_EXPORTS Matx_MatMulOp {};
-struct CV_EXPORTS Matx_TOp {};
+// FIXIT Remove this (especially CV_EXPORTS modifier)
+struct CV_EXPORTS Matx_AddOp { Matx_AddOp() {} Matx_AddOp(const Matx_AddOp&) {} };
+struct CV_EXPORTS Matx_SubOp { Matx_SubOp() {} Matx_SubOp(const Matx_SubOp&) {} };
+struct CV_EXPORTS Matx_ScaleOp { Matx_ScaleOp() {} Matx_ScaleOp(const Matx_ScaleOp&) {} };
+struct CV_EXPORTS Matx_MulOp { Matx_MulOp() {} Matx_MulOp(const Matx_MulOp&) {} };
+struct CV_EXPORTS Matx_DivOp { Matx_DivOp() {} Matx_DivOp(const Matx_DivOp&) {} };
+struct CV_EXPORTS Matx_MatMulOp { Matx_MatMulOp() {} Matx_MatMulOp(const Matx_MatMulOp&) {} };
+struct CV_EXPORTS Matx_TOp { Matx_TOp() {} Matx_TOp(const Matx_TOp&) {} };
 //! @endcond
 
 /** @brief Template class for small matrices whose type and size are known at compilation time
@@ -77,21 +80,33 @@ If you need a more flexible type, use Mat . The elements of the matrix M are acc
 M(i,j) notation. Most of the common matrix operations (see also @ref MatrixExpressions ) are
 available. To do an operation on Matx that is not implemented, you can easily convert the matrix to
 Mat and backwards:
-@code
+@code{.cpp}
     Matx33f m(1, 2, 3,
               4, 5, 6,
               7, 8, 9);
     cout << sum(Mat(m*m.t())) << endl;
- @endcode
+@endcode
+Except of the plain constructor which takes a list of elements, Matx can be initialized from a C-array:
+@code{.cpp}
+    float values[] = { 1, 2, 3};
+    Matx31f m(values);
+@endcode
+In case if C++11 features are available, std::initializer_list can be also used to initialize Matx:
+@code{.cpp}
+    Matx31f m = { 1, 2, 3};
+@endcode
  */
 template<typename _Tp, int m, int n> class Matx
 {
 public:
-    enum { depth    = DataType<_Tp>::depth,
+    enum {
            rows     = m,
            cols     = n,
            channels = rows*cols,
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           depth    = traits::Type<_Tp>::value,
            type     = CV_MAKETYPE(depth, channels),
+#endif
            shortdim = (m < n ? m : n)
          };
 
@@ -102,7 +117,7 @@ template<typename _Tp, int m, int n> class Matx
     //! default constructor
     Matx();
 
-    Matx(_Tp v0); //!< 1x1 matrix
+    explicit Matx(_Tp v0); //!< 1x1 matrix
     Matx(_Tp v0, _Tp v1); //!< 1x2 or 2x1 matrix
     Matx(_Tp v0, _Tp v1, _Tp v2); //!< 1x3 or 3x1 matrix
     Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 1x4, 2x2 or 4x1 matrix
@@ -125,6 +140,8 @@ template<typename _Tp, int m, int n> class Matx
          _Tp v12, _Tp v13, _Tp v14, _Tp v15); //!< 1x16, 4x4 or 16x1 matrix
     explicit Matx(const _Tp* vals); //!< initialize from a plain array
 
+    Matx(std::initializer_list<_Tp>); //!< initialize from an initializer list
+
     static Matx all(_Tp alpha);
     static Matx zeros();
     static Matx ones();
@@ -146,7 +163,7 @@ template<typename _Tp, int m, int n> class Matx
     template<int m1, int n1> Matx<_Tp, m1, n1> reshape() const;
 
     //! extract part of the matrix
-    template<int m1, int n1> Matx<_Tp, m1, n1> get_minor(int i, int j) const;
+    template<int m1, int n1> Matx<_Tp, m1, n1> get_minor(int base_row, int base_col) const;
 
     //! extract the matrix row
     Matx<_Tp, 1, n> row(int i) const;
@@ -174,8 +191,8 @@ template<typename _Tp, int m, int n> class Matx
     Matx<_Tp, m, n> div(const Matx<_Tp, m, n>& a) const;
 
     //! element access
-    const _Tp& operator ()(int i, int j) const;
-    _Tp& operator ()(int i, int j);
+    const _Tp& operator ()(int row, int col) const;
+    _Tp& operator ()(int row, int col);
 
     //! 1D element access
     const _Tp& operator ()(int i) const;
@@ -242,13 +259,23 @@ template<typename _Tp, int m, int n> class DataType< Matx<_Tp, m, n> >
     typedef value_type                                    vec_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = m * n,
-           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels)
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
          };
 };
 
+namespace traits {
+template<typename _Tp, int m, int n>
+struct Depth< Matx<_Tp, m, n> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp, int m, int n>
+struct Type< Matx<_Tp, m, n> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, n*m) }; };
+} // namespace
+
+
 /** @brief  Comma-separated Matrix Initializer
 */
 template<typename _Tp, int m, int n> class MatxCommaInitializer
@@ -306,9 +333,13 @@ template<typename _Tp, int cn> class Vec : public Matx<_Tp, cn, 1>
 {
 public:
     typedef _Tp value_type;
-    enum { depth    = Matx<_Tp, cn, 1>::depth,
+    enum {
            channels = cn,
-           type     = CV_MAKETYPE(depth, channels)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           depth    = Matx<_Tp, cn, 1>::depth,
+           type     = CV_MAKETYPE(depth, channels),
+#endif
+           _dummy_enum_finalizer = 0
          };
 
     //! default constructor
@@ -327,6 +358,8 @@ template<typename _Tp, int cn> class Vec : public Matx<_Tp, cn, 1>
     Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13); //!< 14-element vector constructor
     explicit Vec(const _Tp* values);
 
+    Vec(std::initializer_list<_Tp>);
+
     Vec(const Vec<_Tp, cn>& v);
 
     static Vec all(_Tp alpha);
@@ -352,6 +385,10 @@ template<typename _Tp, int cn> class Vec : public Matx<_Tp, cn, 1>
     const _Tp& operator ()(int i) const;
     _Tp& operator ()(int i);
 
+#ifdef CV_CXX11
+    Vec<_Tp, cn>& operator=(const Vec<_Tp, cn>& rhs) = default;
+#endif
+
     Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp);
     Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp);
     template<typename _T2> Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp);
@@ -401,13 +438,24 @@ template<typename _Tp, int cn> class DataType< Vec<_Tp, cn> >
     typedef value_type                                 vec_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = cn,
            fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           depth        = DataType<channel_type>::depth,
+           type         = CV_MAKETYPE(depth, channels),
+#endif
+           _dummy_enum_finalizer = 0
          };
 };
 
+namespace traits {
+template<typename _Tp, int cn>
+struct Depth< Vec<_Tp, cn> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp, int cn>
+struct Type< Vec<_Tp, cn> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, cn) }; };
+} // namespace
+
+
 /** @brief  Comma-separated Vec Initializer
 */
 template<typename _Tp, int m> class VecCommaInitializer : public MatxCommaInitializer<_Tp, m, 1>
@@ -438,7 +486,7 @@ template<typename _Tp, int m> struct Matx_DetOp
             return p;
         for( int i = 0; i < m; i++ )
             p *= temp(i, i);
-        return 1./p;
+        return p;
     }
 };
 
@@ -590,11 +638,12 @@ Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp
 template<typename _Tp, int m, int n> inline
 Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13)
 {
-    CV_StaticAssert(channels == 14, "Matx should have at least 14 elements.");
+    CV_StaticAssert(channels >= 14, "Matx should have at least 14 elements.");
     val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
     val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
     val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
     val[12] = v12; val[13] = v13;
+    for (int i = 14; i < channels; i++) val[i] = _Tp(0);
 }
 
 
@@ -615,6 +664,17 @@ Matx<_Tp, m, n>::Matx(const _Tp* values)
     for( int i = 0; i < channels; i++ ) val[i] = values[i];
 }
 
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(std::initializer_list<_Tp> list)
+{
+    CV_DbgAssert(list.size() == channels);
+    int i = 0;
+    for(const auto& elem : list)
+    {
+        val[i++] = elem;
+    }
+}
+
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, m, n> Matx<_Tp, m, n>::all(_Tp alpha)
 {
@@ -686,13 +746,13 @@ Matx<_Tp, m1, n1> Matx<_Tp, m, n>::reshape() const
 
 template<typename _Tp, int m, int n>
 template<int m1, int n1> inline
-Matx<_Tp, m1, n1> Matx<_Tp, m, n>::get_minor(int i, int j) const
+Matx<_Tp, m1, n1> Matx<_Tp, m, n>::get_minor(int base_row, int base_col) const
 {
-    CV_DbgAssert(0 <= i && i+m1 <= m && 0 <= j && j+n1 <= n);
+    CV_DbgAssert(0 <= base_row && base_row+m1 <= m && 0 <= base_col && base_col+n1 <= n);
     Matx<_Tp, m1, n1> s;
     for( int di = 0; di < m1; di++ )
         for( int dj = 0; dj < n1; dj++ )
-            s(di, dj) = (*this)(i+di, j+dj);
+            s(di, dj) = (*this)(base_row+di, base_col+dj);
     return s;
 }
 
@@ -723,17 +783,17 @@ typename Matx<_Tp, m, n>::diag_type Matx<_Tp, m, n>::diag() const
 }
 
 template<typename _Tp, int m, int n> inline
-const _Tp& Matx<_Tp, m, n>::operator()(int i, int j) const
+const _Tp& Matx<_Tp, m, n>::operator()(int row_idx, int col_idx) const
 {
-    CV_DbgAssert( (unsigned)i < (unsigned)m && (unsigned)j < (unsigned)n );
-    return this->val[i*n + j];
+    CV_DbgAssert( (unsigned)row_idx < (unsigned)m && (unsigned)col_idx < (unsigned)n );
+    return this->val[row_idx*n + col_idx];
 }
 
 template<typename _Tp, int m, int n> inline
-_Tp& Matx<_Tp, m, n>::operator ()(int i, int j)
+_Tp& Matx<_Tp, m, n>::operator ()(int row_idx, int col_idx)
 {
-    CV_DbgAssert( (unsigned)i < (unsigned)m && (unsigned)j < (unsigned)n );
-    return val[i*n + j];
+    CV_DbgAssert( (unsigned)row_idx < (unsigned)m && (unsigned)col_idx < (unsigned)n );
+    return val[row_idx*n + col_idx];
 }
 
 template<typename _Tp, int m, int n> inline
@@ -956,6 +1016,10 @@ template<typename _Tp, int cn> inline
 Vec<_Tp, cn>::Vec(const _Tp* values)
     : Matx<_Tp, cn, 1>(values) {}
 
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(std::initializer_list<_Tp> list)
+    : Matx<_Tp, cn, 1>(list) {}
+
 template<typename _Tp, int cn> inline
 Vec<_Tp, cn>::Vec(const Vec<_Tp, cn>& m)
     : Matx<_Tp, cn, 1>(m.val) {}
@@ -1080,7 +1144,7 @@ Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v)
 
 
 
-//////////////////////////////// matx comma initializer //////////////////////////////////
+//////////////////////////////// vec comma initializer //////////////////////////////////
 
 
 template<typename _Tp, typename _T2, int cn> static inline
@@ -1205,6 +1269,34 @@ Matx<_Tp, m, n> operator * (double alpha, const Matx<_Tp, m, n>& a)
     return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
 }
 
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, float alpha)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = a.val[i] / alpha;
+    return a;
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, double alpha)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = a.val[i] / alpha;
+    return a;
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, float alpha)
+{
+    return Matx<_Tp, m, n>(a, 1.f/alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, double alpha)
+{
+    return Matx<_Tp, m, n>(a, 1./alpha, Matx_ScaleOp());
+}
+
 template<typename _Tp, int m, int n> static inline
 Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a)
 {
@@ -1404,4 +1496,4 @@ template<typename _Tp> inline Vec<_Tp, 4>& operator *= (Vec<_Tp, 4>& v1, const V
 
 } // cv
 
-#endif // __OPENCV_CORE_MATX_HPP__
+#endif // OPENCV_CORE_MATX_HPP
diff --git a/IPL/include/opencv/opencv2/core/neon_utils.hpp b/IPL/include/opencv/opencv2/core/neon_utils.hpp
index adb750f..573ba99 100644
--- a/IPL/include/opencv/opencv2/core/neon_utils.hpp
+++ b/IPL/include/opencv/opencv2/core/neon_utils.hpp
@@ -39,8 +39,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_HAL_NEON_UTILS_HPP__
-#define __OPENCV_HAL_NEON_UTILS_HPP__
+#ifndef OPENCV_HAL_NEON_UTILS_HPP
+#define OPENCV_HAL_NEON_UTILS_HPP
 
 #include "opencv2/core/cvdef.h"
 
@@ -125,4 +125,4 @@ inline float32x2_t cv_vsqrt_f32(float32x2_t val)
 
 //! @}
 
-#endif // __OPENCV_HAL_NEON_UTILS_HPP__
+#endif // OPENCV_HAL_NEON_UTILS_HPP
diff --git a/IPL/include/opencv/opencv2/core/ocl.hpp b/IPL/include/opencv/opencv2/core/ocl.hpp
index bc989a3..115f5d1 100644
--- a/IPL/include/opencv/opencv2/core/ocl.hpp
+++ b/IPL/include/opencv/opencv2/core/ocl.hpp
@@ -39,8 +39,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_OPENCL_HPP__
-#define __OPENCV_OPENCL_HPP__
+#ifndef OPENCV_OPENCL_HPP
+#define OPENCV_OPENCL_HPP
 
 #include "opencv2/core.hpp"
 
@@ -59,7 +59,7 @@ CV_EXPORTS_W void finish();
 CV_EXPORTS bool haveSVM();
 
 class CV_EXPORTS Context;
-class CV_EXPORTS Device;
+class CV_EXPORTS_W_SIMPLE Device;
 class CV_EXPORTS Kernel;
 class CV_EXPORTS Program;
 class CV_EXPORTS ProgramSource;
@@ -67,14 +67,14 @@ class CV_EXPORTS Queue;
 class CV_EXPORTS PlatformInfo;
 class CV_EXPORTS Image2D;
 
-class CV_EXPORTS Device
+class CV_EXPORTS_W_SIMPLE Device
 {
 public:
-    Device();
+    CV_WRAP Device();
     explicit Device(void* d);
     Device(const Device& d);
     Device& operator = (const Device& d);
-    ~Device();
+    CV_WRAP ~Device();
 
     void set(void* d);
 
@@ -89,23 +89,24 @@ class CV_EXPORTS Device
         TYPE_ALL         = 0xFFFFFFFF
     };
 
-    String name() const;
-    String extensions() const;
-    String version() const;
-    String vendorName() const;
-    String OpenCL_C_Version() const;
-    String OpenCLVersion() const;
-    int deviceVersionMajor() const;
-    int deviceVersionMinor() const;
-    String driverVersion() const;
+    CV_WRAP String name() const;
+    CV_WRAP String extensions() const;
+    CV_WRAP bool isExtensionSupported(const String& extensionName) const;
+    CV_WRAP String version() const;
+    CV_WRAP String vendorName() const;
+    CV_WRAP String OpenCL_C_Version() const;
+    CV_WRAP String OpenCLVersion() const;
+    CV_WRAP int deviceVersionMajor() const;
+    CV_WRAP int deviceVersionMinor() const;
+    CV_WRAP String driverVersion() const;
     void* ptr() const;
 
-    int type() const;
+    CV_WRAP int type() const;
 
-    int addressBits() const;
-    bool available() const;
-    bool compilerAvailable() const;
-    bool linkerAvailable() const;
+    CV_WRAP int addressBits() const;
+    CV_WRAP bool available() const;
+    CV_WRAP bool compilerAvailable() const;
+    CV_WRAP bool linkerAvailable() const;
 
     enum
     {
@@ -118,21 +119,21 @@ class CV_EXPORTS Device
         FP_SOFT_FLOAT=(1 << 6),
         FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
     };
-    int doubleFPConfig() const;
-    int singleFPConfig() const;
-    int halfFPConfig() const;
+    CV_WRAP int doubleFPConfig() const;
+    CV_WRAP int singleFPConfig() const;
+    CV_WRAP int halfFPConfig() const;
 
-    bool endianLittle() const;
-    bool errorCorrectionSupport() const;
+    CV_WRAP bool endianLittle() const;
+    CV_WRAP bool errorCorrectionSupport() const;
 
     enum
     {
         EXEC_KERNEL=(1 << 0),
         EXEC_NATIVE_KERNEL=(1 << 1)
     };
-    int executionCapabilities() const;
+    CV_WRAP int executionCapabilities() const;
 
-    size_t globalMemCacheSize() const;
+    CV_WRAP size_t globalMemCacheSize() const;
 
     enum
     {
@@ -140,35 +141,38 @@ class CV_EXPORTS Device
         READ_ONLY_CACHE=1,
         READ_WRITE_CACHE=2
     };
-    int globalMemCacheType() const;
-    int globalMemCacheLineSize() const;
-    size_t globalMemSize() const;
+    CV_WRAP int globalMemCacheType() const;
+    CV_WRAP int globalMemCacheLineSize() const;
+    CV_WRAP size_t globalMemSize() const;
 
-    size_t localMemSize() const;
+    CV_WRAP size_t localMemSize() const;
     enum
     {
         NO_LOCAL_MEM=0,
         LOCAL_IS_LOCAL=1,
         LOCAL_IS_GLOBAL=2
     };
-    int localMemType() const;
-    bool hostUnifiedMemory() const;
+    CV_WRAP int localMemType() const;
+    CV_WRAP bool hostUnifiedMemory() const;
 
-    bool imageSupport() const;
+    CV_WRAP bool imageSupport() const;
 
-    bool imageFromBufferSupport() const;
+    CV_WRAP bool imageFromBufferSupport() const;
     uint imagePitchAlignment() const;
     uint imageBaseAddressAlignment() const;
 
-    size_t image2DMaxWidth() const;
-    size_t image2DMaxHeight() const;
+    /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value)
+    CV_WRAP bool intelSubgroupsSupport() const;
+
+    CV_WRAP size_t image2DMaxWidth() const;
+    CV_WRAP size_t image2DMaxHeight() const;
 
-    size_t image3DMaxWidth() const;
-    size_t image3DMaxHeight() const;
-    size_t image3DMaxDepth() const;
+    CV_WRAP size_t image3DMaxWidth() const;
+    CV_WRAP size_t image3DMaxHeight() const;
+    CV_WRAP size_t image3DMaxDepth() const;
 
-    size_t imageMaxBufferSize() const;
-    size_t imageMaxArraySize() const;
+    CV_WRAP size_t imageMaxBufferSize() const;
+    CV_WRAP size_t imageMaxArraySize() const;
 
     enum
     {
@@ -177,53 +181,53 @@ class CV_EXPORTS Device
         VENDOR_INTEL=2,
         VENDOR_NVIDIA=3
     };
-    int vendorID() const;
+    CV_WRAP int vendorID() const;
     // FIXIT
     // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
     // This method should use platform name instead of vendor name.
     // After fix restore code in arithm.cpp: ocl_compare()
-    inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
-    inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
-    inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
+    CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
+    CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
+    CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
 
-    int maxClockFrequency() const;
-    int maxComputeUnits() const;
-    int maxConstantArgs() const;
-    size_t maxConstantBufferSize() const;
+    CV_WRAP int maxClockFrequency() const;
+    CV_WRAP int maxComputeUnits() const;
+    CV_WRAP int maxConstantArgs() const;
+    CV_WRAP size_t maxConstantBufferSize() const;
 
-    size_t maxMemAllocSize() const;
-    size_t maxParameterSize() const;
+    CV_WRAP size_t maxMemAllocSize() const;
+    CV_WRAP size_t maxParameterSize() const;
 
-    int maxReadImageArgs() const;
-    int maxWriteImageArgs() const;
-    int maxSamplers() const;
+    CV_WRAP int maxReadImageArgs() const;
+    CV_WRAP int maxWriteImageArgs() const;
+    CV_WRAP int maxSamplers() const;
 
-    size_t maxWorkGroupSize() const;
-    int maxWorkItemDims() const;
+    CV_WRAP size_t maxWorkGroupSize() const;
+    CV_WRAP int maxWorkItemDims() const;
     void maxWorkItemSizes(size_t*) const;
 
-    int memBaseAddrAlign() const;
+    CV_WRAP int memBaseAddrAlign() const;
 
-    int nativeVectorWidthChar() const;
-    int nativeVectorWidthShort() const;
-    int nativeVectorWidthInt() const;
-    int nativeVectorWidthLong() const;
-    int nativeVectorWidthFloat() const;
-    int nativeVectorWidthDouble() const;
-    int nativeVectorWidthHalf() const;
+    CV_WRAP int nativeVectorWidthChar() const;
+    CV_WRAP int nativeVectorWidthShort() const;
+    CV_WRAP int nativeVectorWidthInt() const;
+    CV_WRAP int nativeVectorWidthLong() const;
+    CV_WRAP int nativeVectorWidthFloat() const;
+    CV_WRAP int nativeVectorWidthDouble() const;
+    CV_WRAP int nativeVectorWidthHalf() const;
 
-    int preferredVectorWidthChar() const;
-    int preferredVectorWidthShort() const;
-    int preferredVectorWidthInt() const;
-    int preferredVectorWidthLong() const;
-    int preferredVectorWidthFloat() const;
-    int preferredVectorWidthDouble() const;
-    int preferredVectorWidthHalf() const;
+    CV_WRAP int preferredVectorWidthChar() const;
+    CV_WRAP int preferredVectorWidthShort() const;
+    CV_WRAP int preferredVectorWidthInt() const;
+    CV_WRAP int preferredVectorWidthLong() const;
+    CV_WRAP int preferredVectorWidthFloat() const;
+    CV_WRAP int preferredVectorWidthDouble() const;
+    CV_WRAP int preferredVectorWidthHalf() const;
 
-    size_t printfBufferSize() const;
-    size_t profilingTimerResolution() const;
+    CV_WRAP size_t printfBufferSize() const;
+    CV_WRAP size_t profilingTimerResolution() const;
 
-    static const Device& getDefault();
+    CV_WRAP static const Device& getDefault();
 
 protected:
     struct Impl;
@@ -246,6 +250,7 @@ class CV_EXPORTS Context
     const Device& device(size_t idx) const;
     Program getProg(const ProgramSource& prog,
                     const String& buildopt, String& errmsg);
+    void unloadProg(Program& prog);
 
     static Context& getDefault(bool initialize = true);
     void* ptr() const;
@@ -256,6 +261,8 @@ class CV_EXPORTS Context
     void setUseSVM(bool enabled);
 
     struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+//protected:
     Impl* p;
 };
 
@@ -276,55 +283,38 @@ class CV_EXPORTS Platform
     Impl* p;
 };
 
-/*
-//! @brief Attaches OpenCL context to OpenCV
-//
-//! @note Note:
-//    OpenCV will check if available OpenCL platform has platformName name,
-//    then assign context to OpenCV and call clRetainContext function.
-//    The deviceID device will be used as target device and new command queue
-//    will be created.
-//
-// Params:
-//! @param platformName - name of OpenCL platform to attach,
-//!                       this string is used to check if platform is available
-//!                       to OpenCV at runtime
-//! @param platfromID   - ID of platform attached context was created for
-//! @param context      - OpenCL context to be attached to OpenCV
-//! @param deviceID     - ID of device, must be created from attached context
+/** @brief Attaches OpenCL context to OpenCV
+@note
+  OpenCV will check if available OpenCL platform has platformName name, then assign context to
+  OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and
+  new command queue will be created.
+@param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime
+@param platformID ID of platform attached context was created for
+@param context OpenCL context to be attached to OpenCV
+@param deviceID ID of device, must be created from attached context
 */
 CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID);
 
-/*
-//! @brief Convert OpenCL buffer to UMat
-//
-//! @note Note:
-//   OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV.
-//   Memory content is not copied from clBuffer to UMat. Instead, buffer handle assigned
-//   to UMat and clRetainMemObject is called.
-//
-// Params:
-//! @param  cl_mem_buffer - source clBuffer handle
-//! @param  step          - num of bytes in single row
-//! @param  rows          - number of rows
-//! @param  cols          - number of cols
-//! @param  type          - OpenCV type of image
-//! @param  dst           - destination UMat
+/** @brief Convert OpenCL buffer to UMat
+@note
+  OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. Memory
+  content is not copied from `clBuffer` to UMat. Instead, buffer handle assigned to UMat and
+  `clRetainMemObject` is called.
+@param cl_mem_buffer source clBuffer handle
+@param step num of bytes in single row
+@param rows number of rows
+@param cols number of cols
+@param type OpenCV type of image
+@param dst destination UMat
 */
 CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst);
 
-/*
-//! @brief Convert OpenCL image2d_t to UMat
-//
-//! @note Note:
-//   OpenCL image2d_t (cl_mem_image), should be compatible with OpenCV
-//   UMat formats.
-//   Memory content is copied from image to UMat with
-//   clEnqueueCopyImageToBuffer function.
-//
-// Params:
-//! @param  cl_mem_image - source image2d_t handle
-//! @param  dst          - destination UMat
+/** @brief Convert OpenCL image2d_t to UMat
+@note
+  OpenCL `image2d_t` (cl_mem_image), should be compatible with OpenCV UMat formats. Memory content
+  is copied from image to UMat with `clEnqueueCopyImageToBuffer` function.
+@param cl_mem_image source image2d_t handle
+@param dst destination UMat
 */
 CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst);
 
@@ -345,8 +335,12 @@ class CV_EXPORTS Queue
     void* ptr() const;
     static Queue& getDefault();
 
+    /// @brief Returns OpenCL command queue with enable profiling mode support
+    const Queue& getProfilingQueue() const;
+
+    struct Impl; friend struct Impl;
+    inline Impl* getImpl() const { return p; }
 protected:
-    struct Impl;
     Impl* p;
 };
 
@@ -358,7 +352,8 @@ class CV_EXPORTS KernelArg
     KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0);
     KernelArg();
 
-    static KernelArg Local() { return KernelArg(LOCAL, 0); }
+    static KernelArg Local(size_t localMemSize)
+    { return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); }
     static KernelArg PtrWriteOnly(const UMat& m)
     { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); }
     static KernelArg PtrReadOnly(const UMat& m)
@@ -412,166 +407,48 @@ class CV_EXPORTS Kernel
     template<typename _Tp> int set(int i, const _Tp& value)
     { return set(i, &value, sizeof(value)); }
 
-    template<typename _Tp0>
-    Kernel& args(const _Tp0& a0)
-    {
-        set(0, a0); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1)
-    {
-        int i = set(0, a0); set(i, a1); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
-    {
-        int i = set(0, a0); i = set(i, a1); set(i, a2); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
-                 const _Tp3& a3, const _Tp4& a4)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2);
-        i = set(i, a3); set(i, a4); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2,
-             typename _Tp3, typename _Tp4, typename _Tp5>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
-                 const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2);
-        i = set(i, a3); i = set(i, a4); set(i, a5); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
-             typename _Tp4, typename _Tp5, typename _Tp6>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
-                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
-        i = set(i, a4); i = set(i, a5); set(i, a6); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
-             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
-                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
-        i = set(i, a4); i = set(i, a5); i = set(i, a6); set(i, a7); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
-             typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
-                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
-                 const _Tp8& a8)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
-        i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
-             typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
-                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
-                 const _Tp8& a8, const _Tp9& a9)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
-        i = set(i, a6); i = set(i, a7); i = set(i, a8); set(i, a9); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
-             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
-             typename _Tp8, typename _Tp9, typename _Tp10>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
-                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
-                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
-        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); set(i, a10); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
-             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
-             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
-                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
-                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
-        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
-             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
-             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
-                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
-                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
-                 const _Tp12& a12)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
-        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
-        set(i, a12); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
-             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
-             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
-             typename _Tp13>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
-                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
-                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
-                 const _Tp12& a12, const _Tp13& a13)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
-        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
-        i = set(i, a12); set(i, a13); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
-             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
-             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
-             typename _Tp13, typename _Tp14>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
-                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
-                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
-                 const _Tp12& a12, const _Tp13& a13, const _Tp14& a14)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
-        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
-        i = set(i, a12); i = set(i, a13); set(i, a14); return *this;
-    }
-
-    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
-             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
-             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
-             typename _Tp13, typename _Tp14, typename _Tp15>
-    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
-                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
-                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
-                 const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15)
-    {
-        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
-        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
-        i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this;
-    }
 
+protected:
+    template<typename _Tp0> inline
+    int set_args_(int i, const _Tp0& a0) { return set(i, a0); }
+    template<typename _Tp0, typename... _Tps> inline
+    int set_args_(int i, const _Tp0& a0, const _Tps&... rest_args) { i = set(i, a0); return set_args_(i, rest_args...); }
+public:
+    /** @brief Setup OpenCL Kernel arguments.
+    Avoid direct using of set(i, ...) methods.
+    @code
+    bool ok = kernel
+        .args(
+            srcUMat, dstUMat,
+            (float)some_float_param
+        ).run(ndims, globalSize, localSize);
+    if (!ok) return false;
+    @endcode
+    */
+    template<typename... _Tps> inline
+    Kernel& args(const _Tps&... kernel_args) { set_args_(0, kernel_args...); return *this; }
+
+
+    /** @brief Run the OpenCL kernel.
+    @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3.
+    @param globalsize work items for each dimension. It is not the final globalsize passed to
+      OpenCL. Each dimension will be adjusted to the nearest integer divisible by the corresponding
+      value in localsize. If localsize is NULL, it will still be adjusted depending on dims. The
+      adjusted values are greater than or equal to the original values.
+    @param localsize work-group size for each dimension.
+    @param sync specify whether to wait for OpenCL computation to finish before return.
+    @param q command queue
+    */
     bool run(int dims, size_t globalsize[],
              size_t localsize[], bool sync, const Queue& q=Queue());
     bool runTask(bool sync, const Queue& q=Queue());
 
+    /** @brief Similar to synchronized run() call with returning of kernel execution time
+     * Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE)
+     * @return Execution time in nanoseconds or negative number on error
+     */
+    int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue());
+
     size_t workGroupSize() const;
     size_t preferedWorkGroupSizeMultiple() const;
     bool compileWorkGroupSize(size_t wsz[]) const;
@@ -590,7 +467,6 @@ class CV_EXPORTS Program
     Program();
     Program(const ProgramSource& src,
             const String& buildflags, String& errmsg);
-    explicit Program(const String& buf);
     Program(const Program& prog);
 
     Program& operator = (const Program& prog);
@@ -598,38 +474,104 @@ class CV_EXPORTS Program
 
     bool create(const ProgramSource& src,
                 const String& buildflags, String& errmsg);
-    bool read(const String& buf, const String& buildflags);
-    bool write(String& buf) const;
 
-    const ProgramSource& source() const;
     void* ptr() const;
 
-    String getPrefix() const;
-    static String getPrefix(const String& buildflags);
-
+    /**
+     * @brief Query device-specific program binary.
+     *
+     * Returns RAW OpenCL executable binary without additional attachments.
+     *
+     * @sa ProgramSource::fromBinary
+     *
+     * @param[out] binary output buffer
+     */
+    void getBinary(std::vector<char>& binary) const;
+
+    struct Impl; friend struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
 protected:
-    struct Impl;
     Impl* p;
+public:
+#ifndef OPENCV_REMOVE_DEPRECATED_API
+    // TODO Remove this
+    CV_DEPRECATED bool read(const String& buf, const String& buildflags); // removed, use ProgramSource instead
+    CV_DEPRECATED bool write(String& buf) const; // removed, use getBinary() method instead (RAW OpenCL binary)
+    CV_DEPRECATED const ProgramSource& source() const; // implementation removed
+    CV_DEPRECATED String getPrefix() const; // deprecated, implementation replaced
+    CV_DEPRECATED static String getPrefix(const String& buildflags); // deprecated, implementation replaced
+#endif
 };
 
 
 class CV_EXPORTS ProgramSource
 {
 public:
-    typedef uint64 hash_t;
+    typedef uint64 hash_t; // deprecated
 
     ProgramSource();
-    explicit ProgramSource(const String& prog);
-    explicit ProgramSource(const char* prog);
+    explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash);
+    explicit ProgramSource(const String& prog); // deprecated
+    explicit ProgramSource(const char* prog); // deprecated
     ~ProgramSource();
     ProgramSource(const ProgramSource& prog);
     ProgramSource& operator = (const ProgramSource& prog);
 
-    const String& source() const;
-    hash_t hash() const;
-
+    const String& source() const; // deprecated
+    hash_t hash() const; // deprecated
+
+
+    /** @brief Describe OpenCL program binary.
+     * Do not call clCreateProgramWithBinary() and/or clBuildProgram().
+     *
+     * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies).
+     *
+     * This kind of binary is not portable between platforms in general - it is specific to OpenCL vendor / device / driver version.
+     *
+     * @param module name of program owner module
+     * @param name unique name of program (module+name is used as key for OpenCL program caching)
+     * @param binary buffer address. See buffer lifetime requirement in description.
+     * @param size buffer size
+     * @param buildOptions additional program-related build options passed to clBuildProgram()
+     * @return created ProgramSource object
+     */
+    static ProgramSource fromBinary(const String& module, const String& name,
+            const unsigned char* binary, const size_t size,
+            const cv::String& buildOptions = cv::String());
+
+    /** @brief Describe OpenCL program in SPIR format.
+     * Do not call clCreateProgramWithBinary() and/or clBuildProgram().
+     *
+     * Supports SPIR 1.2 by default (pass '-spir-std=X.Y' in buildOptions to override this behavior)
+     *
+     * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies).
+     *
+     * Programs in this format are portable between OpenCL implementations with 'khr_spir' extension:
+     * https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/cl_khr_spir.html
+     * (but they are not portable between different platforms: 32-bit / 64-bit)
+     *
+     * Note: these programs can't support vendor specific extensions, like 'cl_intel_subgroups'.
+     *
+     * @param module name of program owner module
+     * @param name unique name of program (module+name is used as key for OpenCL program caching)
+     * @param binary buffer address. See buffer lifetime requirement in description.
+     * @param size buffer size
+     * @param buildOptions additional program-related build options passed to clBuildProgram()
+     *        (these options are added automatically: '-x spir' and '-spir-std=1.2')
+     * @return created ProgramSource object.
+     */
+    static ProgramSource fromSPIR(const String& module, const String& name,
+            const unsigned char* binary, const size_t size,
+            const cv::String& buildOptions = cv::String());
+
+    //OpenCL 2.1+ only
+    //static Program fromSPIRV(const String& module, const String& name,
+    //        const unsigned char* binary, const size_t size,
+    //        const cv::String& buildOptions = cv::String());
+
+    struct Impl; friend struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
 protected:
-    struct Impl;
     Impl* p;
 };
 
@@ -658,6 +600,7 @@ CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
 CV_EXPORTS const char* typeToStr(int t);
 CV_EXPORTS const char* memopTypeToStr(int t);
 CV_EXPORTS const char* vecopTypeToStr(int t);
+CV_EXPORTS const char* getOpenCLErrorString(int errorCode);
 CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
 CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
 
@@ -697,22 +640,25 @@ class CV_EXPORTS Image2D
 public:
     Image2D();
 
-    // src:     The UMat from which to get image properties and data
-    // norm:    Flag to enable the use of normalized channel data types
-    // alias:   Flag indicating that the image should alias the src UMat.
-    //          If true, changes to the image or src will be reflected in
-    //          both objects.
+    /**
+    @param src UMat object from which to get image properties and data
+    @param norm flag to enable the use of normalized channel data types
+    @param alias flag indicating that the image should alias the src UMat. If true, changes to the
+        image or src will be reflected in both objects.
+    */
     explicit Image2D(const UMat &src, bool norm = false, bool alias = false);
     Image2D(const Image2D & i);
     ~Image2D();
 
     Image2D & operator = (const Image2D & i);
 
-    // Indicates if creating an aliased image should succeed.  Depends on the
-    // underlying platform and the dimensions of the UMat.
+    /** Indicates if creating an aliased image should succeed.
+    Depends on the underlying platform and the dimensions of the UMat.
+    */
     static bool canCreateAlias(const UMat &u);
 
-    // Indicates if the image format is supported.
+    /** Indicates if the image format is supported.
+    */
     static bool isFormatSupported(int depth, int cn, bool norm);
 
     void* ptr() const;
@@ -721,6 +667,24 @@ class CV_EXPORTS Image2D
     Impl* p;
 };
 
+class CV_EXPORTS Timer
+{
+public:
+    Timer(const Queue& q);
+    ~Timer();
+    void start();
+    void stop();
+
+    uint64 durationNS() const; //< duration in nanoseconds
+
+protected:
+    struct Impl;
+    Impl* const p;
+
+private:
+    Timer(const Timer&); // disabled
+    Timer& operator=(const Timer&); // disabled
+};
 
 CV_EXPORTS MatAllocator* getOpenCLAllocator();
 
@@ -728,6 +692,9 @@ CV_EXPORTS MatAllocator* getOpenCLAllocator();
 #ifdef __OPENCV_BUILD
 namespace internal {
 
+CV_EXPORTS bool isOpenCLForced();
+#define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition))
+
 CV_EXPORTS bool isPerformanceCheckBypassed();
 #define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
 
diff --git a/IPL/include/opencv/opencv2/core/ocl_genbase.hpp b/IPL/include/opencv/opencv2/core/ocl_genbase.hpp
index d53bc1a..5334cf1 100644
--- a/IPL/include/opencv/opencv2/core/ocl_genbase.hpp
+++ b/IPL/include/opencv/opencv2/core/ocl_genbase.hpp
@@ -39,26 +39,31 @@
 //
 //M*/
 
-#ifndef __OPENCV_OPENCL_GENBASE_HPP__
-#define __OPENCV_OPENCL_GENBASE_HPP__
-
-namespace cv
-{
-namespace ocl
-{
+#ifndef OPENCV_OPENCL_GENBASE_HPP
+#define OPENCV_OPENCL_GENBASE_HPP
 
 //! @cond IGNORED
 
-struct ProgramEntry
+namespace cv {
+namespace ocl {
+
+class ProgramSource;
+
+namespace internal {
+
+struct CV_EXPORTS ProgramEntry
 {
+    const char* module;
     const char* name;
-    const char* programStr;
+    const char* programCode;
     const char* programHash;
+    ProgramSource* pProgramSource;
+
+    operator ProgramSource& () const;
 };
 
-//! @endcond
+} } } // namespace
 
-}
-}
+//! @endcond
 
 #endif
diff --git a/IPL/include/opencv/opencv2/core/opencl/ocl_defs.hpp b/IPL/include/opencv/opencv2/core/opencl/ocl_defs.hpp
new file mode 100644
index 0000000..14df750
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/ocl_defs.hpp
@@ -0,0 +1,82 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#ifndef OPENCV_CORE_OPENCL_DEFS_HPP
+#define OPENCV_CORE_OPENCL_DEFS_HPP
+
+#include "opencv2/core/utility.hpp"
+#include "cvconfig.h"
+
+namespace cv { namespace ocl {
+#ifdef HAVE_OPENCL
+/// Call is similar to useOpenCL() but doesn't try to load OpenCL runtime or create OpenCL context
+CV_EXPORTS bool isOpenCLActivated();
+#else
+static inline bool isOpenCLActivated() { return false; }
+#endif
+}} // namespace
+
+
+//#define CV_OPENCL_RUN_ASSERT
+
+#ifdef HAVE_OPENCL
+
+#ifdef CV_OPENCL_RUN_VERBOSE
+#define CV_OCL_RUN_(condition, func, ...)                                   \
+    {                                                                       \
+        if (cv::ocl::isOpenCLActivated() && (condition) && func)            \
+        {                                                                   \
+            printf("%s: OpenCL implementation is running\n", CV_Func);      \
+            fflush(stdout);                                                 \
+            CV_IMPL_ADD(CV_IMPL_OCL);                                       \
+            return __VA_ARGS__;                                             \
+        }                                                                   \
+        else                                                                \
+        {                                                                   \
+            printf("%s: Plain implementation is running\n", CV_Func);       \
+            fflush(stdout);                                                 \
+        }                                                                   \
+    }
+#elif defined CV_OPENCL_RUN_ASSERT
+#define CV_OCL_RUN_(condition, func, ...)                                   \
+    {                                                                       \
+        if (cv::ocl::isOpenCLActivated() && (condition))                    \
+        {                                                                   \
+            if(func)                                                        \
+            {                                                               \
+                CV_IMPL_ADD(CV_IMPL_OCL);                                   \
+            }                                                               \
+            else                                                            \
+            {                                                               \
+                CV_Error(cv::Error::StsAssert, #func);                      \
+            }                                                               \
+            return __VA_ARGS__;                                             \
+        }                                                                   \
+    }
+#else
+#define CV_OCL_RUN_(condition, func, ...)                                   \
+try \
+{ \
+    if (cv::ocl::isOpenCLActivated() && (condition) && func)                \
+    {                                                                       \
+        CV_IMPL_ADD(CV_IMPL_OCL);                                           \
+        return __VA_ARGS__;                                                 \
+    } \
+} \
+catch (const cv::Exception& e) \
+{ \
+    CV_UNUSED(e); /* TODO: Add some logging here */ \
+}
+#endif
+
+#else
+#define CV_OCL_RUN_(condition, func, ...)
+#endif
+
+#define CV_OCL_RUN(condition, func) CV_OCL_RUN_(condition, func)
+
+#endif // OPENCV_CORE_OPENCL_DEFS_HPP
diff --git a/IPL/include/opencv/opencv2/core/opencl/opencl_info.hpp b/IPL/include/opencv/opencv2/core/opencl/opencl_info.hpp
new file mode 100644
index 0000000..5e5c846
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/opencl_info.hpp
@@ -0,0 +1,205 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include <iostream>
+
+#include <opencv2/core.hpp>
+#include <opencv2/core/ocl.hpp>
+
+#ifndef DUMP_CONFIG_PROPERTY
+#define DUMP_CONFIG_PROPERTY(...)
+#endif
+
+#ifndef DUMP_MESSAGE_STDOUT
+#define DUMP_MESSAGE_STDOUT(...) do { std::cout << __VA_ARGS__ << std::endl; } while (false)
+#endif
+
+namespace cv {
+
+namespace {
+static std::string bytesToStringRepr(size_t value)
+{
+    size_t b = value % 1024;
+    value /= 1024;
+
+    size_t kb = value % 1024;
+    value /= 1024;
+
+    size_t mb = value % 1024;
+    value /= 1024;
+
+    size_t gb = value;
+
+    std::ostringstream stream;
+
+    if (gb > 0)
+        stream << gb << " GB ";
+    if (mb > 0)
+        stream << mb << " MB ";
+    if (kb > 0)
+        stream << kb << " KB ";
+    if (b > 0)
+        stream << b << " B";
+
+    std::string s = stream.str();
+    if (s[s.size() - 1] == ' ')
+        s = s.substr(0, s.size() - 1);
+    return s;
+}
+
+static String getDeviceTypeString(const cv::ocl::Device& device)
+{
+    if (device.type() == cv::ocl::Device::TYPE_CPU) {
+        return "CPU";
+    }
+
+    if (device.type() == cv::ocl::Device::TYPE_GPU) {
+        if (device.hostUnifiedMemory()) {
+            return "iGPU";
+        } else {
+            return "dGPU";
+        }
+    }
+
+    return "unknown";
+}
+} // namespace
+
+static void dumpOpenCLInformation()
+{
+    using namespace cv::ocl;
+
+    try
+    {
+        if (!haveOpenCL() || !useOpenCL())
+        {
+            DUMP_MESSAGE_STDOUT("OpenCL is disabled");
+            DUMP_CONFIG_PROPERTY("cv_ocl", "disabled");
+            return;
+        }
+
+        std::vector<PlatformInfo> platforms;
+        cv::ocl::getPlatfomsInfo(platforms);
+        if (platforms.empty())
+        {
+            DUMP_MESSAGE_STDOUT("OpenCL is not available");
+            DUMP_CONFIG_PROPERTY("cv_ocl", "not available");
+            return;
+        }
+
+        DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
+        for (size_t i = 0; i < platforms.size(); i++)
+        {
+            const PlatformInfo* platform = &platforms[i];
+            DUMP_MESSAGE_STDOUT("    " << platform->name());
+            Device current_device;
+            for (int j = 0; j < platform->deviceNumber(); j++)
+            {
+                platform->getDevice(current_device, j);
+                String deviceTypeStr = getDeviceTypeString(current_device);
+                DUMP_MESSAGE_STDOUT( "        " << deviceTypeStr << ": " << current_device.name() << " (" << current_device.version() << ")");
+                DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, j ),
+                    cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)",
+                    platform->name().c_str(), deviceTypeStr.c_str(), current_device.name().c_str(), current_device.version().c_str()) );
+            }
+        }
+        const Device& device = Device::getDefault();
+        if (!device.available())
+            CV_Error(Error::OpenCLInitError, "OpenCL device is not available");
+
+        DUMP_MESSAGE_STDOUT("Current OpenCL device: ");
+
+        String deviceTypeStr = getDeviceTypeString(device);
+        DUMP_MESSAGE_STDOUT("    Type = " << deviceTypeStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceType", deviceTypeStr);
+
+        DUMP_MESSAGE_STDOUT("    Name = " << device.name());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceName", device.name());
+
+        DUMP_MESSAGE_STDOUT("    Version = " << device.version());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceVersion", device.version());
+
+        DUMP_MESSAGE_STDOUT("    Driver version = " << device.driverVersion());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_driverVersion", device.driverVersion());
+
+        DUMP_MESSAGE_STDOUT("    Address bits = " << device.addressBits());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_addressBits", device.addressBits());
+
+        DUMP_MESSAGE_STDOUT("    Compute units = " << device.maxComputeUnits());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_maxComputeUnits", device.maxComputeUnits());
+
+        DUMP_MESSAGE_STDOUT("    Max work group size = " << device.maxWorkGroupSize());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_maxWorkGroupSize", device.maxWorkGroupSize());
+
+        std::string localMemorySizeStr = bytesToStringRepr(device.localMemSize());
+        DUMP_MESSAGE_STDOUT("    Local memory size = " << localMemorySizeStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_localMemSize", device.localMemSize());
+
+        std::string maxMemAllocSizeStr = bytesToStringRepr(device.maxMemAllocSize());
+        DUMP_MESSAGE_STDOUT("    Max memory allocation size = " << maxMemAllocSizeStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_maxMemAllocSize", device.maxMemAllocSize());
+
+        const char* doubleSupportStr = device.doubleFPConfig() > 0 ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Double support = " << doubleSupportStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0);
+
+        const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Host unified memory = " << isUnifiedMemoryStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory());
+
+        DUMP_MESSAGE_STDOUT("    Device extensions:");
+        String extensionsStr = device.extensions();
+        size_t pos = 0;
+        while (pos < extensionsStr.size())
+        {
+            size_t pos2 = extensionsStr.find(' ', pos);
+            if (pos2 == String::npos)
+                pos2 = extensionsStr.size();
+            if (pos2 > pos)
+            {
+                String extensionName = extensionsStr.substr(pos, pos2 - pos);
+                DUMP_MESSAGE_STDOUT("        " << extensionName);
+            }
+            pos = pos2 + 1;
+        }
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr);
+
+        const char* haveAmdBlasStr = haveAmdBlas() ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Has AMD Blas = " << haveAmdBlasStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdBlas", haveAmdBlas());
+
+        const char* haveAmdFftStr = haveAmdFft() ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Has AMD Fft = " << haveAmdFftStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdFft", haveAmdFft());
+
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width char = " << device.preferredVectorWidthChar());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthChar", device.preferredVectorWidthChar());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width short = " << device.preferredVectorWidthShort());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthShort", device.preferredVectorWidthShort());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width int = " << device.preferredVectorWidthInt());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthInt", device.preferredVectorWidthInt());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width long = " << device.preferredVectorWidthLong());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthLong", device.preferredVectorWidthLong());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width float = " << device.preferredVectorWidthFloat());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthFloat", device.preferredVectorWidthFloat());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width double = " << device.preferredVectorWidthDouble());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble());
+    }
+    catch (...)
+    {
+        DUMP_MESSAGE_STDOUT("Exception. Can't dump OpenCL info");
+        DUMP_MESSAGE_STDOUT("OpenCL device not available");
+        DUMP_CONFIG_PROPERTY("cv_ocl", "not available");
+    }
+}
+#undef DUMP_MESSAGE_STDOUT
+#undef DUMP_CONFIG_PROPERTY
+
+} // namespace
diff --git a/IPL/include/opencv/opencv2/core/opencl/opencl_svm.hpp b/IPL/include/opencv/opencv2/core/opencl/opencl_svm.hpp
new file mode 100644
index 0000000..7453082
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/opencl_svm.hpp
@@ -0,0 +1,81 @@
+/* See LICENSE file in the root OpenCV directory */
+
+#ifndef OPENCV_CORE_OPENCL_SVM_HPP
+#define OPENCV_CORE_OPENCL_SVM_HPP
+
+//
+// Internal usage only (binary compatibility is not guaranteed)
+//
+#ifndef __OPENCV_BUILD
+#error Internal header file
+#endif
+
+#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
+#include "runtime/opencl_core.hpp"
+#include "runtime/opencl_svm_20.hpp"
+#include "runtime/opencl_svm_hsa_extension.hpp"
+
+namespace cv { namespace ocl { namespace svm {
+
+struct SVMCapabilities
+{
+    enum Value
+    {
+        SVM_COARSE_GRAIN_BUFFER = (1 << 0),
+        SVM_FINE_GRAIN_BUFFER = (1 << 1),
+        SVM_FINE_GRAIN_SYSTEM = (1 << 2),
+        SVM_ATOMICS = (1 << 3),
+    };
+    int value_;
+
+    SVMCapabilities(int capabilities = 0) : value_(capabilities) { }
+    operator int() const { return value_; }
+
+    inline bool isNoSVMSupport() const { return value_ == 0; }
+    inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; }
+    inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; }
+    inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; }
+    inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; }
+};
+
+CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context);
+
+struct SVMFunctions
+{
+    clSVMAllocAMD_fn fn_clSVMAlloc;
+    clSVMFreeAMD_fn fn_clSVMFree;
+    clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer;
+    //clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
+    //clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
+    clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy;
+    clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill;
+    clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap;
+    clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap;
+
+    inline SVMFunctions()
+        : fn_clSVMAlloc(NULL), fn_clSVMFree(NULL),
+          fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/
+          /*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL),
+          fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL)
+    {
+        // nothing
+    }
+
+    inline bool isValid() const
+    {
+        return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer &&
+                /*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy &&
+                fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap;
+    }
+};
+
+// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
+CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context);
+
+CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags);
+
+}}} //namespace cv::ocl::svm
+#endif
+
+#endif // OPENCV_CORE_OPENCL_SVM_HPP
+/* End of file. */
diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp
new file mode 100644
index 0000000..65c8493
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp
@@ -0,0 +1,714 @@
+//
+// AUTOGENERATED, DO NOT EDIT
+//
+#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
+#error "Invalid usage"
+#endif
+
+// generated by parser_clamdblas.py
+#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_
+#define clAmdBlasCaxpy clAmdBlasCaxpy_
+#define clAmdBlasCcopy clAmdBlasCcopy_
+#define clAmdBlasCdotc clAmdBlasCdotc_
+#define clAmdBlasCdotu clAmdBlasCdotu_
+#define clAmdBlasCgbmv clAmdBlasCgbmv_
+#define clAmdBlasCgemm clAmdBlasCgemm_
+#define clAmdBlasCgemmEx clAmdBlasCgemmEx_
+#define clAmdBlasCgemv clAmdBlasCgemv_
+#define clAmdBlasCgemvEx clAmdBlasCgemvEx_
+#define clAmdBlasCgerc clAmdBlasCgerc_
+#define clAmdBlasCgeru clAmdBlasCgeru_
+#define clAmdBlasChbmv clAmdBlasChbmv_
+#define clAmdBlasChemm clAmdBlasChemm_
+#define clAmdBlasChemv clAmdBlasChemv_
+#define clAmdBlasCher clAmdBlasCher_
+#define clAmdBlasCher2 clAmdBlasCher2_
+#define clAmdBlasCher2k clAmdBlasCher2k_
+#define clAmdBlasCherk clAmdBlasCherk_
+#define clAmdBlasChpmv clAmdBlasChpmv_
+#define clAmdBlasChpr clAmdBlasChpr_
+#define clAmdBlasChpr2 clAmdBlasChpr2_
+#define clAmdBlasCrotg clAmdBlasCrotg_
+#define clAmdBlasCscal clAmdBlasCscal_
+#define clAmdBlasCsrot clAmdBlasCsrot_
+#define clAmdBlasCsscal clAmdBlasCsscal_
+#define clAmdBlasCswap clAmdBlasCswap_
+#define clAmdBlasCsymm clAmdBlasCsymm_
+#define clAmdBlasCsyr2k clAmdBlasCsyr2k_
+#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_
+#define clAmdBlasCsyrk clAmdBlasCsyrk_
+#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_
+#define clAmdBlasCtbmv clAmdBlasCtbmv_
+#define clAmdBlasCtbsv clAmdBlasCtbsv_
+#define clAmdBlasCtpmv clAmdBlasCtpmv_
+#define clAmdBlasCtpsv clAmdBlasCtpsv_
+#define clAmdBlasCtrmm clAmdBlasCtrmm_
+#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_
+#define clAmdBlasCtrmv clAmdBlasCtrmv_
+#define clAmdBlasCtrsm clAmdBlasCtrsm_
+#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_
+#define clAmdBlasCtrsv clAmdBlasCtrsv_
+#define clAmdBlasDasum clAmdBlasDasum_
+#define clAmdBlasDaxpy clAmdBlasDaxpy_
+#define clAmdBlasDcopy clAmdBlasDcopy_
+#define clAmdBlasDdot clAmdBlasDdot_
+#define clAmdBlasDgbmv clAmdBlasDgbmv_
+#define clAmdBlasDgemm clAmdBlasDgemm_
+#define clAmdBlasDgemmEx clAmdBlasDgemmEx_
+#define clAmdBlasDgemv clAmdBlasDgemv_
+#define clAmdBlasDgemvEx clAmdBlasDgemvEx_
+#define clAmdBlasDger clAmdBlasDger_
+#define clAmdBlasDnrm2 clAmdBlasDnrm2_
+#define clAmdBlasDrot clAmdBlasDrot_
+#define clAmdBlasDrotg clAmdBlasDrotg_
+#define clAmdBlasDrotm clAmdBlasDrotm_
+#define clAmdBlasDrotmg clAmdBlasDrotmg_
+#define clAmdBlasDsbmv clAmdBlasDsbmv_
+#define clAmdBlasDscal clAmdBlasDscal_
+#define clAmdBlasDspmv clAmdBlasDspmv_
+#define clAmdBlasDspr clAmdBlasDspr_
+#define clAmdBlasDspr2 clAmdBlasDspr2_
+#define clAmdBlasDswap clAmdBlasDswap_
+#define clAmdBlasDsymm clAmdBlasDsymm_
+#define clAmdBlasDsymv clAmdBlasDsymv_
+#define clAmdBlasDsymvEx clAmdBlasDsymvEx_
+#define clAmdBlasDsyr clAmdBlasDsyr_
+#define clAmdBlasDsyr2 clAmdBlasDsyr2_
+#define clAmdBlasDsyr2k clAmdBlasDsyr2k_
+#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_
+#define clAmdBlasDsyrk clAmdBlasDsyrk_
+#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_
+#define clAmdBlasDtbmv clAmdBlasDtbmv_
+#define clAmdBlasDtbsv clAmdBlasDtbsv_
+#define clAmdBlasDtpmv clAmdBlasDtpmv_
+#define clAmdBlasDtpsv clAmdBlasDtpsv_
+#define clAmdBlasDtrmm clAmdBlasDtrmm_
+#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_
+#define clAmdBlasDtrmv clAmdBlasDtrmv_
+#define clAmdBlasDtrsm clAmdBlasDtrsm_
+#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_
+#define clAmdBlasDtrsv clAmdBlasDtrsv_
+#define clAmdBlasDzasum clAmdBlasDzasum_
+#define clAmdBlasDznrm2 clAmdBlasDznrm2_
+#define clAmdBlasGetVersion clAmdBlasGetVersion_
+#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_
+#define clAmdBlasSasum clAmdBlasSasum_
+#define clAmdBlasSaxpy clAmdBlasSaxpy_
+#define clAmdBlasScasum clAmdBlasScasum_
+#define clAmdBlasScnrm2 clAmdBlasScnrm2_
+#define clAmdBlasScopy clAmdBlasScopy_
+#define clAmdBlasSdot clAmdBlasSdot_
+#define clAmdBlasSetup clAmdBlasSetup_
+#define clAmdBlasSgbmv clAmdBlasSgbmv_
+#define clAmdBlasSgemm clAmdBlasSgemm_
+#define clAmdBlasSgemmEx clAmdBlasSgemmEx_
+#define clAmdBlasSgemv clAmdBlasSgemv_
+#define clAmdBlasSgemvEx clAmdBlasSgemvEx_
+#define clAmdBlasSger clAmdBlasSger_
+#define clAmdBlasSnrm2 clAmdBlasSnrm2_
+#define clAmdBlasSrot clAmdBlasSrot_
+#define clAmdBlasSrotg clAmdBlasSrotg_
+#define clAmdBlasSrotm clAmdBlasSrotm_
+#define clAmdBlasSrotmg clAmdBlasSrotmg_
+#define clAmdBlasSsbmv clAmdBlasSsbmv_
+#define clAmdBlasSscal clAmdBlasSscal_
+#define clAmdBlasSspmv clAmdBlasSspmv_
+#define clAmdBlasSspr clAmdBlasSspr_
+#define clAmdBlasSspr2 clAmdBlasSspr2_
+#define clAmdBlasSswap clAmdBlasSswap_
+#define clAmdBlasSsymm clAmdBlasSsymm_
+#define clAmdBlasSsymv clAmdBlasSsymv_
+#define clAmdBlasSsymvEx clAmdBlasSsymvEx_
+#define clAmdBlasSsyr clAmdBlasSsyr_
+#define clAmdBlasSsyr2 clAmdBlasSsyr2_
+#define clAmdBlasSsyr2k clAmdBlasSsyr2k_
+#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_
+#define clAmdBlasSsyrk clAmdBlasSsyrk_
+#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_
+#define clAmdBlasStbmv clAmdBlasStbmv_
+#define clAmdBlasStbsv clAmdBlasStbsv_
+#define clAmdBlasStpmv clAmdBlasStpmv_
+#define clAmdBlasStpsv clAmdBlasStpsv_
+#define clAmdBlasStrmm clAmdBlasStrmm_
+#define clAmdBlasStrmmEx clAmdBlasStrmmEx_
+#define clAmdBlasStrmv clAmdBlasStrmv_
+#define clAmdBlasStrsm clAmdBlasStrsm_
+#define clAmdBlasStrsmEx clAmdBlasStrsmEx_
+#define clAmdBlasStrsv clAmdBlasStrsv_
+#define clAmdBlasTeardown clAmdBlasTeardown_
+#define clAmdBlasZaxpy clAmdBlasZaxpy_
+#define clAmdBlasZcopy clAmdBlasZcopy_
+#define clAmdBlasZdotc clAmdBlasZdotc_
+#define clAmdBlasZdotu clAmdBlasZdotu_
+#define clAmdBlasZdrot clAmdBlasZdrot_
+#define clAmdBlasZdscal clAmdBlasZdscal_
+#define clAmdBlasZgbmv clAmdBlasZgbmv_
+#define clAmdBlasZgemm clAmdBlasZgemm_
+#define clAmdBlasZgemmEx clAmdBlasZgemmEx_
+#define clAmdBlasZgemv clAmdBlasZgemv_
+#define clAmdBlasZgemvEx clAmdBlasZgemvEx_
+#define clAmdBlasZgerc clAmdBlasZgerc_
+#define clAmdBlasZgeru clAmdBlasZgeru_
+#define clAmdBlasZhbmv clAmdBlasZhbmv_
+#define clAmdBlasZhemm clAmdBlasZhemm_
+#define clAmdBlasZhemv clAmdBlasZhemv_
+#define clAmdBlasZher clAmdBlasZher_
+#define clAmdBlasZher2 clAmdBlasZher2_
+#define clAmdBlasZher2k clAmdBlasZher2k_
+#define clAmdBlasZherk clAmdBlasZherk_
+#define clAmdBlasZhpmv clAmdBlasZhpmv_
+#define clAmdBlasZhpr clAmdBlasZhpr_
+#define clAmdBlasZhpr2 clAmdBlasZhpr2_
+#define clAmdBlasZrotg clAmdBlasZrotg_
+#define clAmdBlasZscal clAmdBlasZscal_
+#define clAmdBlasZswap clAmdBlasZswap_
+#define clAmdBlasZsymm clAmdBlasZsymm_
+#define clAmdBlasZsyr2k clAmdBlasZsyr2k_
+#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_
+#define clAmdBlasZsyrk clAmdBlasZsyrk_
+#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_
+#define clAmdBlasZtbmv clAmdBlasZtbmv_
+#define clAmdBlasZtbsv clAmdBlasZtbsv_
+#define clAmdBlasZtpmv clAmdBlasZtpmv_
+#define clAmdBlasZtpsv clAmdBlasZtpsv_
+#define clAmdBlasZtrmm clAmdBlasZtrmm_
+#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_
+#define clAmdBlasZtrmv clAmdBlasZtrmv_
+#define clAmdBlasZtrsm clAmdBlasZtrsm_
+#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_
+#define clAmdBlasZtrsv clAmdBlasZtrsv_
+#define clAmdBlasiCamax clAmdBlasiCamax_
+#define clAmdBlasiDamax clAmdBlasiDamax_
+#define clAmdBlasiSamax clAmdBlasiSamax_
+#define clAmdBlasiZamax clAmdBlasiZamax_
+
+#include <clAmdBlas.h>
+
+// generated by parser_clamdblas.py
+#undef clAmdBlasAddScratchImage
+//#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_pfn
+#undef clAmdBlasCaxpy
+//#define clAmdBlasCaxpy clAmdBlasCaxpy_pfn
+#undef clAmdBlasCcopy
+//#define clAmdBlasCcopy clAmdBlasCcopy_pfn
+#undef clAmdBlasCdotc
+//#define clAmdBlasCdotc clAmdBlasCdotc_pfn
+#undef clAmdBlasCdotu
+//#define clAmdBlasCdotu clAmdBlasCdotu_pfn
+#undef clAmdBlasCgbmv
+//#define clAmdBlasCgbmv clAmdBlasCgbmv_pfn
+#undef clAmdBlasCgemm
+//#define clAmdBlasCgemm clAmdBlasCgemm_pfn
+#undef clAmdBlasCgemmEx
+#define clAmdBlasCgemmEx clAmdBlasCgemmEx_pfn
+#undef clAmdBlasCgemv
+//#define clAmdBlasCgemv clAmdBlasCgemv_pfn
+#undef clAmdBlasCgemvEx
+//#define clAmdBlasCgemvEx clAmdBlasCgemvEx_pfn
+#undef clAmdBlasCgerc
+//#define clAmdBlasCgerc clAmdBlasCgerc_pfn
+#undef clAmdBlasCgeru
+//#define clAmdBlasCgeru clAmdBlasCgeru_pfn
+#undef clAmdBlasChbmv
+//#define clAmdBlasChbmv clAmdBlasChbmv_pfn
+#undef clAmdBlasChemm
+//#define clAmdBlasChemm clAmdBlasChemm_pfn
+#undef clAmdBlasChemv
+//#define clAmdBlasChemv clAmdBlasChemv_pfn
+#undef clAmdBlasCher
+//#define clAmdBlasCher clAmdBlasCher_pfn
+#undef clAmdBlasCher2
+//#define clAmdBlasCher2 clAmdBlasCher2_pfn
+#undef clAmdBlasCher2k
+//#define clAmdBlasCher2k clAmdBlasCher2k_pfn
+#undef clAmdBlasCherk
+//#define clAmdBlasCherk clAmdBlasCherk_pfn
+#undef clAmdBlasChpmv
+//#define clAmdBlasChpmv clAmdBlasChpmv_pfn
+#undef clAmdBlasChpr
+//#define clAmdBlasChpr clAmdBlasChpr_pfn
+#undef clAmdBlasChpr2
+//#define clAmdBlasChpr2 clAmdBlasChpr2_pfn
+#undef clAmdBlasCrotg
+//#define clAmdBlasCrotg clAmdBlasCrotg_pfn
+#undef clAmdBlasCscal
+//#define clAmdBlasCscal clAmdBlasCscal_pfn
+#undef clAmdBlasCsrot
+//#define clAmdBlasCsrot clAmdBlasCsrot_pfn
+#undef clAmdBlasCsscal
+//#define clAmdBlasCsscal clAmdBlasCsscal_pfn
+#undef clAmdBlasCswap
+//#define clAmdBlasCswap clAmdBlasCswap_pfn
+#undef clAmdBlasCsymm
+//#define clAmdBlasCsymm clAmdBlasCsymm_pfn
+#undef clAmdBlasCsyr2k
+//#define clAmdBlasCsyr2k clAmdBlasCsyr2k_pfn
+#undef clAmdBlasCsyr2kEx
+//#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_pfn
+#undef clAmdBlasCsyrk
+//#define clAmdBlasCsyrk clAmdBlasCsyrk_pfn
+#undef clAmdBlasCsyrkEx
+//#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_pfn
+#undef clAmdBlasCtbmv
+//#define clAmdBlasCtbmv clAmdBlasCtbmv_pfn
+#undef clAmdBlasCtbsv
+//#define clAmdBlasCtbsv clAmdBlasCtbsv_pfn
+#undef clAmdBlasCtpmv
+//#define clAmdBlasCtpmv clAmdBlasCtpmv_pfn
+#undef clAmdBlasCtpsv
+//#define clAmdBlasCtpsv clAmdBlasCtpsv_pfn
+#undef clAmdBlasCtrmm
+//#define clAmdBlasCtrmm clAmdBlasCtrmm_pfn
+#undef clAmdBlasCtrmmEx
+//#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_pfn
+#undef clAmdBlasCtrmv
+//#define clAmdBlasCtrmv clAmdBlasCtrmv_pfn
+#undef clAmdBlasCtrsm
+//#define clAmdBlasCtrsm clAmdBlasCtrsm_pfn
+#undef clAmdBlasCtrsmEx
+//#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_pfn
+#undef clAmdBlasCtrsv
+//#define clAmdBlasCtrsv clAmdBlasCtrsv_pfn
+#undef clAmdBlasDasum
+//#define clAmdBlasDasum clAmdBlasDasum_pfn
+#undef clAmdBlasDaxpy
+//#define clAmdBlasDaxpy clAmdBlasDaxpy_pfn
+#undef clAmdBlasDcopy
+//#define clAmdBlasDcopy clAmdBlasDcopy_pfn
+#undef clAmdBlasDdot
+//#define clAmdBlasDdot clAmdBlasDdot_pfn
+#undef clAmdBlasDgbmv
+//#define clAmdBlasDgbmv clAmdBlasDgbmv_pfn
+#undef clAmdBlasDgemm
+//#define clAmdBlasDgemm clAmdBlasDgemm_pfn
+#undef clAmdBlasDgemmEx
+#define clAmdBlasDgemmEx clAmdBlasDgemmEx_pfn
+#undef clAmdBlasDgemv
+//#define clAmdBlasDgemv clAmdBlasDgemv_pfn
+#undef clAmdBlasDgemvEx
+//#define clAmdBlasDgemvEx clAmdBlasDgemvEx_pfn
+#undef clAmdBlasDger
+//#define clAmdBlasDger clAmdBlasDger_pfn
+#undef clAmdBlasDnrm2
+//#define clAmdBlasDnrm2 clAmdBlasDnrm2_pfn
+#undef clAmdBlasDrot
+//#define clAmdBlasDrot clAmdBlasDrot_pfn
+#undef clAmdBlasDrotg
+//#define clAmdBlasDrotg clAmdBlasDrotg_pfn
+#undef clAmdBlasDrotm
+//#define clAmdBlasDrotm clAmdBlasDrotm_pfn
+#undef clAmdBlasDrotmg
+//#define clAmdBlasDrotmg clAmdBlasDrotmg_pfn
+#undef clAmdBlasDsbmv
+//#define clAmdBlasDsbmv clAmdBlasDsbmv_pfn
+#undef clAmdBlasDscal
+//#define clAmdBlasDscal clAmdBlasDscal_pfn
+#undef clAmdBlasDspmv
+//#define clAmdBlasDspmv clAmdBlasDspmv_pfn
+#undef clAmdBlasDspr
+//#define clAmdBlasDspr clAmdBlasDspr_pfn
+#undef clAmdBlasDspr2
+//#define clAmdBlasDspr2 clAmdBlasDspr2_pfn
+#undef clAmdBlasDswap
+//#define clAmdBlasDswap clAmdBlasDswap_pfn
+#undef clAmdBlasDsymm
+//#define clAmdBlasDsymm clAmdBlasDsymm_pfn
+#undef clAmdBlasDsymv
+//#define clAmdBlasDsymv clAmdBlasDsymv_pfn
+#undef clAmdBlasDsymvEx
+//#define clAmdBlasDsymvEx clAmdBlasDsymvEx_pfn
+#undef clAmdBlasDsyr
+//#define clAmdBlasDsyr clAmdBlasDsyr_pfn
+#undef clAmdBlasDsyr2
+//#define clAmdBlasDsyr2 clAmdBlasDsyr2_pfn
+#undef clAmdBlasDsyr2k
+//#define clAmdBlasDsyr2k clAmdBlasDsyr2k_pfn
+#undef clAmdBlasDsyr2kEx
+//#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_pfn
+#undef clAmdBlasDsyrk
+//#define clAmdBlasDsyrk clAmdBlasDsyrk_pfn
+#undef clAmdBlasDsyrkEx
+//#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_pfn
+#undef clAmdBlasDtbmv
+//#define clAmdBlasDtbmv clAmdBlasDtbmv_pfn
+#undef clAmdBlasDtbsv
+//#define clAmdBlasDtbsv clAmdBlasDtbsv_pfn
+#undef clAmdBlasDtpmv
+//#define clAmdBlasDtpmv clAmdBlasDtpmv_pfn
+#undef clAmdBlasDtpsv
+//#define clAmdBlasDtpsv clAmdBlasDtpsv_pfn
+#undef clAmdBlasDtrmm
+//#define clAmdBlasDtrmm clAmdBlasDtrmm_pfn
+#undef clAmdBlasDtrmmEx
+//#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_pfn
+#undef clAmdBlasDtrmv
+//#define clAmdBlasDtrmv clAmdBlasDtrmv_pfn
+#undef clAmdBlasDtrsm
+//#define clAmdBlasDtrsm clAmdBlasDtrsm_pfn
+#undef clAmdBlasDtrsmEx
+//#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_pfn
+#undef clAmdBlasDtrsv
+//#define clAmdBlasDtrsv clAmdBlasDtrsv_pfn
+#undef clAmdBlasDzasum
+//#define clAmdBlasDzasum clAmdBlasDzasum_pfn
+#undef clAmdBlasDznrm2
+//#define clAmdBlasDznrm2 clAmdBlasDznrm2_pfn
+#undef clAmdBlasGetVersion
+//#define clAmdBlasGetVersion clAmdBlasGetVersion_pfn
+#undef clAmdBlasRemoveScratchImage
+//#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_pfn
+#undef clAmdBlasSasum
+//#define clAmdBlasSasum clAmdBlasSasum_pfn
+#undef clAmdBlasSaxpy
+//#define clAmdBlasSaxpy clAmdBlasSaxpy_pfn
+#undef clAmdBlasScasum
+//#define clAmdBlasScasum clAmdBlasScasum_pfn
+#undef clAmdBlasScnrm2
+//#define clAmdBlasScnrm2 clAmdBlasScnrm2_pfn
+#undef clAmdBlasScopy
+//#define clAmdBlasScopy clAmdBlasScopy_pfn
+#undef clAmdBlasSdot
+//#define clAmdBlasSdot clAmdBlasSdot_pfn
+#undef clAmdBlasSetup
+#define clAmdBlasSetup clAmdBlasSetup_pfn
+#undef clAmdBlasSgbmv
+//#define clAmdBlasSgbmv clAmdBlasSgbmv_pfn
+#undef clAmdBlasSgemm
+//#define clAmdBlasSgemm clAmdBlasSgemm_pfn
+#undef clAmdBlasSgemmEx
+#define clAmdBlasSgemmEx clAmdBlasSgemmEx_pfn
+#undef clAmdBlasSgemv
+//#define clAmdBlasSgemv clAmdBlasSgemv_pfn
+#undef clAmdBlasSgemvEx
+//#define clAmdBlasSgemvEx clAmdBlasSgemvEx_pfn
+#undef clAmdBlasSger
+//#define clAmdBlasSger clAmdBlasSger_pfn
+#undef clAmdBlasSnrm2
+//#define clAmdBlasSnrm2 clAmdBlasSnrm2_pfn
+#undef clAmdBlasSrot
+//#define clAmdBlasSrot clAmdBlasSrot_pfn
+#undef clAmdBlasSrotg
+//#define clAmdBlasSrotg clAmdBlasSrotg_pfn
+#undef clAmdBlasSrotm
+//#define clAmdBlasSrotm clAmdBlasSrotm_pfn
+#undef clAmdBlasSrotmg
+//#define clAmdBlasSrotmg clAmdBlasSrotmg_pfn
+#undef clAmdBlasSsbmv
+//#define clAmdBlasSsbmv clAmdBlasSsbmv_pfn
+#undef clAmdBlasSscal
+//#define clAmdBlasSscal clAmdBlasSscal_pfn
+#undef clAmdBlasSspmv
+//#define clAmdBlasSspmv clAmdBlasSspmv_pfn
+#undef clAmdBlasSspr
+//#define clAmdBlasSspr clAmdBlasSspr_pfn
+#undef clAmdBlasSspr2
+//#define clAmdBlasSspr2 clAmdBlasSspr2_pfn
+#undef clAmdBlasSswap
+//#define clAmdBlasSswap clAmdBlasSswap_pfn
+#undef clAmdBlasSsymm
+//#define clAmdBlasSsymm clAmdBlasSsymm_pfn
+#undef clAmdBlasSsymv
+//#define clAmdBlasSsymv clAmdBlasSsymv_pfn
+#undef clAmdBlasSsymvEx
+//#define clAmdBlasSsymvEx clAmdBlasSsymvEx_pfn
+#undef clAmdBlasSsyr
+//#define clAmdBlasSsyr clAmdBlasSsyr_pfn
+#undef clAmdBlasSsyr2
+//#define clAmdBlasSsyr2 clAmdBlasSsyr2_pfn
+#undef clAmdBlasSsyr2k
+//#define clAmdBlasSsyr2k clAmdBlasSsyr2k_pfn
+#undef clAmdBlasSsyr2kEx
+//#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_pfn
+#undef clAmdBlasSsyrk
+//#define clAmdBlasSsyrk clAmdBlasSsyrk_pfn
+#undef clAmdBlasSsyrkEx
+//#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_pfn
+#undef clAmdBlasStbmv
+//#define clAmdBlasStbmv clAmdBlasStbmv_pfn
+#undef clAmdBlasStbsv
+//#define clAmdBlasStbsv clAmdBlasStbsv_pfn
+#undef clAmdBlasStpmv
+//#define clAmdBlasStpmv clAmdBlasStpmv_pfn
+#undef clAmdBlasStpsv
+//#define clAmdBlasStpsv clAmdBlasStpsv_pfn
+#undef clAmdBlasStrmm
+//#define clAmdBlasStrmm clAmdBlasStrmm_pfn
+#undef clAmdBlasStrmmEx
+//#define clAmdBlasStrmmEx clAmdBlasStrmmEx_pfn
+#undef clAmdBlasStrmv
+//#define clAmdBlasStrmv clAmdBlasStrmv_pfn
+#undef clAmdBlasStrsm
+//#define clAmdBlasStrsm clAmdBlasStrsm_pfn
+#undef clAmdBlasStrsmEx
+//#define clAmdBlasStrsmEx clAmdBlasStrsmEx_pfn
+#undef clAmdBlasStrsv
+//#define clAmdBlasStrsv clAmdBlasStrsv_pfn
+#undef clAmdBlasTeardown
+#define clAmdBlasTeardown clAmdBlasTeardown_pfn
+#undef clAmdBlasZaxpy
+//#define clAmdBlasZaxpy clAmdBlasZaxpy_pfn
+#undef clAmdBlasZcopy
+//#define clAmdBlasZcopy clAmdBlasZcopy_pfn
+#undef clAmdBlasZdotc
+//#define clAmdBlasZdotc clAmdBlasZdotc_pfn
+#undef clAmdBlasZdotu
+//#define clAmdBlasZdotu clAmdBlasZdotu_pfn
+#undef clAmdBlasZdrot
+//#define clAmdBlasZdrot clAmdBlasZdrot_pfn
+#undef clAmdBlasZdscal
+//#define clAmdBlasZdscal clAmdBlasZdscal_pfn
+#undef clAmdBlasZgbmv
+//#define clAmdBlasZgbmv clAmdBlasZgbmv_pfn
+#undef clAmdBlasZgemm
+//#define clAmdBlasZgemm clAmdBlasZgemm_pfn
+#undef clAmdBlasZgemmEx
+#define clAmdBlasZgemmEx clAmdBlasZgemmEx_pfn
+#undef clAmdBlasZgemv
+//#define clAmdBlasZgemv clAmdBlasZgemv_pfn
+#undef clAmdBlasZgemvEx
+//#define clAmdBlasZgemvEx clAmdBlasZgemvEx_pfn
+#undef clAmdBlasZgerc
+//#define clAmdBlasZgerc clAmdBlasZgerc_pfn
+#undef clAmdBlasZgeru
+//#define clAmdBlasZgeru clAmdBlasZgeru_pfn
+#undef clAmdBlasZhbmv
+//#define clAmdBlasZhbmv clAmdBlasZhbmv_pfn
+#undef clAmdBlasZhemm
+//#define clAmdBlasZhemm clAmdBlasZhemm_pfn
+#undef clAmdBlasZhemv
+//#define clAmdBlasZhemv clAmdBlasZhemv_pfn
+#undef clAmdBlasZher
+//#define clAmdBlasZher clAmdBlasZher_pfn
+#undef clAmdBlasZher2
+//#define clAmdBlasZher2 clAmdBlasZher2_pfn
+#undef clAmdBlasZher2k
+//#define clAmdBlasZher2k clAmdBlasZher2k_pfn
+#undef clAmdBlasZherk
+//#define clAmdBlasZherk clAmdBlasZherk_pfn
+#undef clAmdBlasZhpmv
+//#define clAmdBlasZhpmv clAmdBlasZhpmv_pfn
+#undef clAmdBlasZhpr
+//#define clAmdBlasZhpr clAmdBlasZhpr_pfn
+#undef clAmdBlasZhpr2
+//#define clAmdBlasZhpr2 clAmdBlasZhpr2_pfn
+#undef clAmdBlasZrotg
+//#define clAmdBlasZrotg clAmdBlasZrotg_pfn
+#undef clAmdBlasZscal
+//#define clAmdBlasZscal clAmdBlasZscal_pfn
+#undef clAmdBlasZswap
+//#define clAmdBlasZswap clAmdBlasZswap_pfn
+#undef clAmdBlasZsymm
+//#define clAmdBlasZsymm clAmdBlasZsymm_pfn
+#undef clAmdBlasZsyr2k
+//#define clAmdBlasZsyr2k clAmdBlasZsyr2k_pfn
+#undef clAmdBlasZsyr2kEx
+//#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_pfn
+#undef clAmdBlasZsyrk
+//#define clAmdBlasZsyrk clAmdBlasZsyrk_pfn
+#undef clAmdBlasZsyrkEx
+//#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_pfn
+#undef clAmdBlasZtbmv
+//#define clAmdBlasZtbmv clAmdBlasZtbmv_pfn
+#undef clAmdBlasZtbsv
+//#define clAmdBlasZtbsv clAmdBlasZtbsv_pfn
+#undef clAmdBlasZtpmv
+//#define clAmdBlasZtpmv clAmdBlasZtpmv_pfn
+#undef clAmdBlasZtpsv
+//#define clAmdBlasZtpsv clAmdBlasZtpsv_pfn
+#undef clAmdBlasZtrmm
+//#define clAmdBlasZtrmm clAmdBlasZtrmm_pfn
+#undef clAmdBlasZtrmmEx
+//#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_pfn
+#undef clAmdBlasZtrmv
+//#define clAmdBlasZtrmv clAmdBlasZtrmv_pfn
+#undef clAmdBlasZtrsm
+//#define clAmdBlasZtrsm clAmdBlasZtrsm_pfn
+#undef clAmdBlasZtrsmEx
+//#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_pfn
+#undef clAmdBlasZtrsv
+//#define clAmdBlasZtrsv clAmdBlasZtrsv_pfn
+#undef clAmdBlasiCamax
+//#define clAmdBlasiCamax clAmdBlasiCamax_pfn
+#undef clAmdBlasiDamax
+//#define clAmdBlasiDamax clAmdBlasiDamax_pfn
+#undef clAmdBlasiSamax
+//#define clAmdBlasiSamax clAmdBlasiSamax_pfn
+#undef clAmdBlasiZamax
+//#define clAmdBlasiZamax clAmdBlasiZamax_pfn
+
+// generated by parser_clamdblas.py
+//extern CL_RUNTIME_EXPORT cl_ulong (*clAmdBlasAddScratchImage)(cl_context context, size_t width, size_t height, clAmdBlasStatus* status);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCaxpy)(size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, FloatComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, float alpha, const cl_mem A, size_t offa, size_t lda, float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCscal)(size_t N, cl_float2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDaxpy)(size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDger)(clAmdBlasOrder order, size_t M, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotg)(cl_mem DA, size_t offDA, cl_mem DB, size_t offDB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotmg)(cl_mem DD1, size_t offDD1, cl_mem DD2, size_t offDD2, cl_mem DX1, size_t offDX1, const cl_mem DY1, size_t offDY1, cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDzasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDznrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasRemoveScratchImage)(cl_ulong imageID);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSaxpy)(size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSetup)();
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSger)(clAmdBlasOrder order, size_t M, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotg)(cl_mem SA, size_t offSA, cl_mem SB, size_t offSB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotmg)(cl_mem SD1, size_t offSD1, cl_mem SD2, size_t offSD2, cl_mem SX1, size_t offSX1, const cl_mem SY1, size_t offSY1, cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT void (*clAmdBlasTeardown)();
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZaxpy)(size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, DoubleComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, double alpha, const cl_mem A, size_t offa, size_t lda, double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZscal)(size_t N, cl_double2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiCamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiDamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiSamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
+//extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiZamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp
new file mode 100644
index 0000000..1457d7e
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp
@@ -0,0 +1,142 @@
+//
+// AUTOGENERATED, DO NOT EDIT
+//
+#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
+#error "Invalid usage"
+#endif
+
+// generated by parser_clamdfft.py
+#define clAmdFftBakePlan clAmdFftBakePlan_
+#define clAmdFftCopyPlan clAmdFftCopyPlan_
+#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_
+#define clAmdFftDestroyPlan clAmdFftDestroyPlan_
+#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_
+#define clAmdFftGetLayout clAmdFftGetLayout_
+#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_
+#define clAmdFftGetPlanContext clAmdFftGetPlanContext_
+#define clAmdFftGetPlanDim clAmdFftGetPlanDim_
+#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_
+#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_
+#define clAmdFftGetPlanLength clAmdFftGetPlanLength_
+#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_
+#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_
+#define clAmdFftGetPlanScale clAmdFftGetPlanScale_
+#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_
+#define clAmdFftGetResultLocation clAmdFftGetResultLocation_
+#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_
+#define clAmdFftGetVersion clAmdFftGetVersion_
+#define clAmdFftSetLayout clAmdFftSetLayout_
+#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_
+#define clAmdFftSetPlanDim clAmdFftSetPlanDim_
+#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_
+#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_
+#define clAmdFftSetPlanLength clAmdFftSetPlanLength_
+#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_
+#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_
+#define clAmdFftSetPlanScale clAmdFftSetPlanScale_
+#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_
+#define clAmdFftSetResultLocation clAmdFftSetResultLocation_
+#define clAmdFftSetup clAmdFftSetup_
+#define clAmdFftTeardown clAmdFftTeardown_
+
+#include <clAmdFft.h>
+
+// generated by parser_clamdfft.py
+#undef clAmdFftBakePlan
+#define clAmdFftBakePlan clAmdFftBakePlan_pfn
+#undef clAmdFftCopyPlan
+//#define clAmdFftCopyPlan clAmdFftCopyPlan_pfn
+#undef clAmdFftCreateDefaultPlan
+#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_pfn
+#undef clAmdFftDestroyPlan
+#define clAmdFftDestroyPlan clAmdFftDestroyPlan_pfn
+#undef clAmdFftEnqueueTransform
+#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_pfn
+#undef clAmdFftGetLayout
+//#define clAmdFftGetLayout clAmdFftGetLayout_pfn
+#undef clAmdFftGetPlanBatchSize
+//#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_pfn
+#undef clAmdFftGetPlanContext
+//#define clAmdFftGetPlanContext clAmdFftGetPlanContext_pfn
+#undef clAmdFftGetPlanDim
+//#define clAmdFftGetPlanDim clAmdFftGetPlanDim_pfn
+#undef clAmdFftGetPlanDistance
+//#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_pfn
+#undef clAmdFftGetPlanInStride
+//#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_pfn
+#undef clAmdFftGetPlanLength
+//#define clAmdFftGetPlanLength clAmdFftGetPlanLength_pfn
+#undef clAmdFftGetPlanOutStride
+//#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_pfn
+#undef clAmdFftGetPlanPrecision
+//#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_pfn
+#undef clAmdFftGetPlanScale
+//#define clAmdFftGetPlanScale clAmdFftGetPlanScale_pfn
+#undef clAmdFftGetPlanTransposeResult
+//#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_pfn
+#undef clAmdFftGetResultLocation
+//#define clAmdFftGetResultLocation clAmdFftGetResultLocation_pfn
+#undef clAmdFftGetTmpBufSize
+#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_pfn
+#undef clAmdFftGetVersion
+#define clAmdFftGetVersion clAmdFftGetVersion_pfn
+#undef clAmdFftSetLayout
+#define clAmdFftSetLayout clAmdFftSetLayout_pfn
+#undef clAmdFftSetPlanBatchSize
+#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_pfn
+#undef clAmdFftSetPlanDim
+//#define clAmdFftSetPlanDim clAmdFftSetPlanDim_pfn
+#undef clAmdFftSetPlanDistance
+#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_pfn
+#undef clAmdFftSetPlanInStride
+#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_pfn
+#undef clAmdFftSetPlanLength
+//#define clAmdFftSetPlanLength clAmdFftSetPlanLength_pfn
+#undef clAmdFftSetPlanOutStride
+#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn
+#undef clAmdFftSetPlanPrecision
+#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn
+#undef clAmdFftSetPlanScale
+#define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn
+#undef clAmdFftSetPlanTransposeResult
+//#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_pfn
+#undef clAmdFftSetResultLocation
+#define clAmdFftSetResultLocation clAmdFftSetResultLocation_pfn
+#undef clAmdFftSetup
+#define clAmdFftSetup clAmdFftSetup_pfn
+#undef clAmdFftTeardown
+#define clAmdFftTeardown clAmdFftTeardown_pfn
+
+// generated by parser_clamdfft.py
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftBakePlan)(clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, void (CL_CALLBACK* pfn_notify) (clAmdFftPlanHandle plHandle, void* user_data), void* user_data);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCopyPlan)(clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCreateDefaultPlan)(clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, const size_t* clLengths);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftDestroyPlan)(clAmdFftPlanHandle* plHandle);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftEnqueueTransform)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_mem* inputBuffers, cl_mem* outputBuffers, cl_mem tmpBuffer);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetLayout)(const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanBatchSize)(const clAmdFftPlanHandle plHandle, size_t* batchSize);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanContext)(const clAmdFftPlanHandle plHandle, cl_context* context);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDim)(const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDistance)(const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanInStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanLength)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanOutStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanPrecision)(const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanScale)(const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanTransposeResult)(const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed* transposed);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetResultLocation)(const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetTmpBufSize)(const clAmdFftPlanHandle plHandle, size_t* buffersize);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetLayout)(clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanBatchSize)(clAmdFftPlanHandle plHandle, size_t batchSize);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDim)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale);
+//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetup)(const clAmdFftSetupData* setupData);
+extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftTeardown)();
diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp
new file mode 100644
index 0000000..28618a1
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp
@@ -0,0 +1,371 @@
+//
+// AUTOGENERATED, DO NOT EDIT
+//
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP
+#error "Invalid usage"
+#endif
+
+// generated by parser_cl.py
+#define clBuildProgram clBuildProgram_
+#define clCompileProgram clCompileProgram_
+#define clCreateBuffer clCreateBuffer_
+#define clCreateCommandQueue clCreateCommandQueue_
+#define clCreateContext clCreateContext_
+#define clCreateContextFromType clCreateContextFromType_
+#define clCreateImage clCreateImage_
+#define clCreateImage2D clCreateImage2D_
+#define clCreateImage3D clCreateImage3D_
+#define clCreateKernel clCreateKernel_
+#define clCreateKernelsInProgram clCreateKernelsInProgram_
+#define clCreateProgramWithBinary clCreateProgramWithBinary_
+#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_
+#define clCreateProgramWithSource clCreateProgramWithSource_
+#define clCreateSampler clCreateSampler_
+#define clCreateSubBuffer clCreateSubBuffer_
+#define clCreateSubDevices clCreateSubDevices_
+#define clCreateUserEvent clCreateUserEvent_
+#define clEnqueueBarrier clEnqueueBarrier_
+#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_
+#define clEnqueueCopyBuffer clEnqueueCopyBuffer_
+#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_
+#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_
+#define clEnqueueCopyImage clEnqueueCopyImage_
+#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_
+#define clEnqueueFillBuffer clEnqueueFillBuffer_
+#define clEnqueueFillImage clEnqueueFillImage_
+#define clEnqueueMapBuffer clEnqueueMapBuffer_
+#define clEnqueueMapImage clEnqueueMapImage_
+#define clEnqueueMarker clEnqueueMarker_
+#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_
+#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_
+#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_
+#define clEnqueueNativeKernel clEnqueueNativeKernel_
+#define clEnqueueReadBuffer clEnqueueReadBuffer_
+#define clEnqueueReadBufferRect clEnqueueReadBufferRect_
+#define clEnqueueReadImage clEnqueueReadImage_
+#define clEnqueueTask clEnqueueTask_
+#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_
+#define clEnqueueWaitForEvents clEnqueueWaitForEvents_
+#define clEnqueueWriteBuffer clEnqueueWriteBuffer_
+#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_
+#define clEnqueueWriteImage clEnqueueWriteImage_
+#define clFinish clFinish_
+#define clFlush clFlush_
+#define clGetCommandQueueInfo clGetCommandQueueInfo_
+#define clGetContextInfo clGetContextInfo_
+#define clGetDeviceIDs clGetDeviceIDs_
+#define clGetDeviceInfo clGetDeviceInfo_
+#define clGetEventInfo clGetEventInfo_
+#define clGetEventProfilingInfo clGetEventProfilingInfo_
+#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_
+#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_
+#define clGetImageInfo clGetImageInfo_
+#define clGetKernelArgInfo clGetKernelArgInfo_
+#define clGetKernelInfo clGetKernelInfo_
+#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_
+#define clGetMemObjectInfo clGetMemObjectInfo_
+#define clGetPlatformIDs clGetPlatformIDs_
+#define clGetPlatformInfo clGetPlatformInfo_
+#define clGetProgramBuildInfo clGetProgramBuildInfo_
+#define clGetProgramInfo clGetProgramInfo_
+#define clGetSamplerInfo clGetSamplerInfo_
+#define clGetSupportedImageFormats clGetSupportedImageFormats_
+#define clLinkProgram clLinkProgram_
+#define clReleaseCommandQueue clReleaseCommandQueue_
+#define clReleaseContext clReleaseContext_
+#define clReleaseDevice clReleaseDevice_
+#define clReleaseEvent clReleaseEvent_
+#define clReleaseKernel clReleaseKernel_
+#define clReleaseMemObject clReleaseMemObject_
+#define clReleaseProgram clReleaseProgram_
+#define clReleaseSampler clReleaseSampler_
+#define clRetainCommandQueue clRetainCommandQueue_
+#define clRetainContext clRetainContext_
+#define clRetainDevice clRetainDevice_
+#define clRetainEvent clRetainEvent_
+#define clRetainKernel clRetainKernel_
+#define clRetainMemObject clRetainMemObject_
+#define clRetainProgram clRetainProgram_
+#define clRetainSampler clRetainSampler_
+#define clSetEventCallback clSetEventCallback_
+#define clSetKernelArg clSetKernelArg_
+#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_
+#define clSetUserEventStatus clSetUserEventStatus_
+#define clUnloadCompiler clUnloadCompiler_
+#define clUnloadPlatformCompiler clUnloadPlatformCompiler_
+#define clWaitForEvents clWaitForEvents_
+
+#if defined __APPLE__
+#define CL_SILENCE_DEPRECATION
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+// generated by parser_cl.py
+#undef clBuildProgram
+#define clBuildProgram clBuildProgram_pfn
+#undef clCompileProgram
+#define clCompileProgram clCompileProgram_pfn
+#undef clCreateBuffer
+#define clCreateBuffer clCreateBuffer_pfn
+#undef clCreateCommandQueue
+#define clCreateCommandQueue clCreateCommandQueue_pfn
+#undef clCreateContext
+#define clCreateContext clCreateContext_pfn
+#undef clCreateContextFromType
+#define clCreateContextFromType clCreateContextFromType_pfn
+#undef clCreateImage
+#define clCreateImage clCreateImage_pfn
+#undef clCreateImage2D
+#define clCreateImage2D clCreateImage2D_pfn
+#undef clCreateImage3D
+#define clCreateImage3D clCreateImage3D_pfn
+#undef clCreateKernel
+#define clCreateKernel clCreateKernel_pfn
+#undef clCreateKernelsInProgram
+#define clCreateKernelsInProgram clCreateKernelsInProgram_pfn
+#undef clCreateProgramWithBinary
+#define clCreateProgramWithBinary clCreateProgramWithBinary_pfn
+#undef clCreateProgramWithBuiltInKernels
+#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_pfn
+#undef clCreateProgramWithSource
+#define clCreateProgramWithSource clCreateProgramWithSource_pfn
+#undef clCreateSampler
+#define clCreateSampler clCreateSampler_pfn
+#undef clCreateSubBuffer
+#define clCreateSubBuffer clCreateSubBuffer_pfn
+#undef clCreateSubDevices
+#define clCreateSubDevices clCreateSubDevices_pfn
+#undef clCreateUserEvent
+#define clCreateUserEvent clCreateUserEvent_pfn
+#undef clEnqueueBarrier
+#define clEnqueueBarrier clEnqueueBarrier_pfn
+#undef clEnqueueBarrierWithWaitList
+#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_pfn
+#undef clEnqueueCopyBuffer
+#define clEnqueueCopyBuffer clEnqueueCopyBuffer_pfn
+#undef clEnqueueCopyBufferRect
+#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_pfn
+#undef clEnqueueCopyBufferToImage
+#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_pfn
+#undef clEnqueueCopyImage
+#define clEnqueueCopyImage clEnqueueCopyImage_pfn
+#undef clEnqueueCopyImageToBuffer
+#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_pfn
+#undef clEnqueueFillBuffer
+#define clEnqueueFillBuffer clEnqueueFillBuffer_pfn
+#undef clEnqueueFillImage
+#define clEnqueueFillImage clEnqueueFillImage_pfn
+#undef clEnqueueMapBuffer
+#define clEnqueueMapBuffer clEnqueueMapBuffer_pfn
+#undef clEnqueueMapImage
+#define clEnqueueMapImage clEnqueueMapImage_pfn
+#undef clEnqueueMarker
+#define clEnqueueMarker clEnqueueMarker_pfn
+#undef clEnqueueMarkerWithWaitList
+#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_pfn
+#undef clEnqueueMigrateMemObjects
+#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_pfn
+#undef clEnqueueNDRangeKernel
+#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_pfn
+#undef clEnqueueNativeKernel
+#define clEnqueueNativeKernel clEnqueueNativeKernel_pfn
+#undef clEnqueueReadBuffer
+#define clEnqueueReadBuffer clEnqueueReadBuffer_pfn
+#undef clEnqueueReadBufferRect
+#define clEnqueueReadBufferRect clEnqueueReadBufferRect_pfn
+#undef clEnqueueReadImage
+#define clEnqueueReadImage clEnqueueReadImage_pfn
+#undef clEnqueueTask
+#define clEnqueueTask clEnqueueTask_pfn
+#undef clEnqueueUnmapMemObject
+#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_pfn
+#undef clEnqueueWaitForEvents
+#define clEnqueueWaitForEvents clEnqueueWaitForEvents_pfn
+#undef clEnqueueWriteBuffer
+#define clEnqueueWriteBuffer clEnqueueWriteBuffer_pfn
+#undef clEnqueueWriteBufferRect
+#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_pfn
+#undef clEnqueueWriteImage
+#define clEnqueueWriteImage clEnqueueWriteImage_pfn
+#undef clFinish
+#define clFinish clFinish_pfn
+#undef clFlush
+#define clFlush clFlush_pfn
+#undef clGetCommandQueueInfo
+#define clGetCommandQueueInfo clGetCommandQueueInfo_pfn
+#undef clGetContextInfo
+#define clGetContextInfo clGetContextInfo_pfn
+#undef clGetDeviceIDs
+#define clGetDeviceIDs clGetDeviceIDs_pfn
+#undef clGetDeviceInfo
+#define clGetDeviceInfo clGetDeviceInfo_pfn
+#undef clGetEventInfo
+#define clGetEventInfo clGetEventInfo_pfn
+#undef clGetEventProfilingInfo
+#define clGetEventProfilingInfo clGetEventProfilingInfo_pfn
+#undef clGetExtensionFunctionAddress
+#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_pfn
+#undef clGetExtensionFunctionAddressForPlatform
+#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_pfn
+#undef clGetImageInfo
+#define clGetImageInfo clGetImageInfo_pfn
+#undef clGetKernelArgInfo
+#define clGetKernelArgInfo clGetKernelArgInfo_pfn
+#undef clGetKernelInfo
+#define clGetKernelInfo clGetKernelInfo_pfn
+#undef clGetKernelWorkGroupInfo
+#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_pfn
+#undef clGetMemObjectInfo
+#define clGetMemObjectInfo clGetMemObjectInfo_pfn
+#undef clGetPlatformIDs
+#define clGetPlatformIDs clGetPlatformIDs_pfn
+#undef clGetPlatformInfo
+#define clGetPlatformInfo clGetPlatformInfo_pfn
+#undef clGetProgramBuildInfo
+#define clGetProgramBuildInfo clGetProgramBuildInfo_pfn
+#undef clGetProgramInfo
+#define clGetProgramInfo clGetProgramInfo_pfn
+#undef clGetSamplerInfo
+#define clGetSamplerInfo clGetSamplerInfo_pfn
+#undef clGetSupportedImageFormats
+#define clGetSupportedImageFormats clGetSupportedImageFormats_pfn
+#undef clLinkProgram
+#define clLinkProgram clLinkProgram_pfn
+#undef clReleaseCommandQueue
+#define clReleaseCommandQueue clReleaseCommandQueue_pfn
+#undef clReleaseContext
+#define clReleaseContext clReleaseContext_pfn
+#undef clReleaseDevice
+#define clReleaseDevice clReleaseDevice_pfn
+#undef clReleaseEvent
+#define clReleaseEvent clReleaseEvent_pfn
+#undef clReleaseKernel
+#define clReleaseKernel clReleaseKernel_pfn
+#undef clReleaseMemObject
+#define clReleaseMemObject clReleaseMemObject_pfn
+#undef clReleaseProgram
+#define clReleaseProgram clReleaseProgram_pfn
+#undef clReleaseSampler
+#define clReleaseSampler clReleaseSampler_pfn
+#undef clRetainCommandQueue
+#define clRetainCommandQueue clRetainCommandQueue_pfn
+#undef clRetainContext
+#define clRetainContext clRetainContext_pfn
+#undef clRetainDevice
+#define clRetainDevice clRetainDevice_pfn
+#undef clRetainEvent
+#define clRetainEvent clRetainEvent_pfn
+#undef clRetainKernel
+#define clRetainKernel clRetainKernel_pfn
+#undef clRetainMemObject
+#define clRetainMemObject clRetainMemObject_pfn
+#undef clRetainProgram
+#define clRetainProgram clRetainProgram_pfn
+#undef clRetainSampler
+#define clRetainSampler clRetainSampler_pfn
+#undef clSetEventCallback
+#define clSetEventCallback clSetEventCallback_pfn
+#undef clSetKernelArg
+#define clSetKernelArg clSetKernelArg_pfn
+#undef clSetMemObjectDestructorCallback
+#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_pfn
+#undef clSetUserEventStatus
+#define clSetUserEventStatus clSetUserEventStatus_pfn
+#undef clUnloadCompiler
+#define clUnloadCompiler clUnloadCompiler_pfn
+#undef clUnloadPlatformCompiler
+#define clUnloadPlatformCompiler clUnloadPlatformCompiler_pfn
+#undef clWaitForEvents
+#define clWaitForEvents clWaitForEvents_pfn
+
+// generated by parser_cl.py
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*);
+extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_command_queue (CL_API_CALL*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*);
+extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_kernel (CL_API_CALL*clCreateKernel)(cl_program, const char*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*);
+extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_sampler (CL_API_CALL*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*);
+extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*);
+extern CL_RUNTIME_EXPORT cl_event (CL_API_CALL*clCreateUserEvent)(cl_context, cl_int*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrier)(cl_command_queue);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*);
+extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarker)(cl_command_queue, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFinish)(cl_command_queue);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFlush)(cl_command_queue);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddress)(const char*);
+extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*);
+extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseCommandQueue)(cl_command_queue);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseContext)(cl_context);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseDevice)(cl_device_id);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseEvent)(cl_event);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseKernel)(cl_kernel);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseMemObject)(cl_mem);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseProgram)(cl_program);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseSampler)(cl_sampler);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainCommandQueue)(cl_command_queue);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainContext)(cl_context);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainDevice)(cl_device_id);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainEvent)(cl_event);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainKernel)(cl_kernel);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainMemObject)(cl_mem);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainProgram)(cl_program);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainSampler)(cl_sampler);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetUserEventStatus)(cl_event, cl_int);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadCompiler)();
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadPlatformCompiler)(cl_platform_id);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clWaitForEvents)(cl_uint, const cl_event*);
diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp
new file mode 100644
index 0000000..216b22b
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp
@@ -0,0 +1,272 @@
+//
+// AUTOGENERATED, DO NOT EDIT
+//
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP
+#error "Invalid usage"
+#endif
+
+// generated by parser_cl.py
+#undef clBuildProgram
+#define clBuildProgram clBuildProgram_fn
+inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clCompileProgram
+#define clCompileProgram clCompileProgram_fn
+inline cl_int clCompileProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, const char** p6, void (CL_CALLBACK*p7) (cl_program, void*), void* p8) { return clCompileProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
+#undef clCreateBuffer
+#define clCreateBuffer clCreateBuffer_fn
+inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); }
+#undef clCreateCommandQueue
+#define clCreateCommandQueue clCreateCommandQueue_fn
+inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); }
+#undef clCreateContext
+#define clCreateContext clCreateContext_fn
+inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clCreateContextFromType
+#define clCreateContextFromType clCreateContextFromType_fn
+inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); }
+#undef clCreateImage
+#define clCreateImage clCreateImage_fn
+inline cl_mem clCreateImage(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, const cl_image_desc* p3, void* p4, cl_int* p5) { return clCreateImage_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clCreateImage2D
+#define clCreateImage2D clCreateImage2D_fn
+inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); }
+#undef clCreateImage3D
+#define clCreateImage3D clCreateImage3D_fn
+inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); }
+#undef clCreateKernel
+#define clCreateKernel clCreateKernel_fn
+inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); }
+#undef clCreateKernelsInProgram
+#define clCreateKernelsInProgram clCreateKernelsInProgram_fn
+inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); }
+#undef clCreateProgramWithBinary
+#define clCreateProgramWithBinary clCreateProgramWithBinary_fn
+inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); }
+#undef clCreateProgramWithBuiltInKernels
+#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_fn
+inline cl_program clCreateProgramWithBuiltInKernels(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_int* p4) { return clCreateProgramWithBuiltInKernels_pfn(p0, p1, p2, p3, p4); }
+#undef clCreateProgramWithSource
+#define clCreateProgramWithSource clCreateProgramWithSource_fn
+inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); }
+#undef clCreateSampler
+#define clCreateSampler clCreateSampler_fn
+inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); }
+#undef clCreateSubBuffer
+#define clCreateSubBuffer clCreateSubBuffer_fn
+inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); }
+#undef clCreateSubDevices
+#define clCreateSubDevices clCreateSubDevices_fn
+inline cl_int clCreateSubDevices(cl_device_id p0, const cl_device_partition_property* p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clCreateSubDevices_pfn(p0, p1, p2, p3, p4); }
+#undef clCreateUserEvent
+#define clCreateUserEvent clCreateUserEvent_fn
+inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); }
+#undef clEnqueueBarrier
+#define clEnqueueBarrier clEnqueueBarrier_fn
+inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); }
+#undef clEnqueueBarrierWithWaitList
+#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_fn
+inline cl_int clEnqueueBarrierWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueBarrierWithWaitList_pfn(p0, p1, p2, p3); }
+#undef clEnqueueCopyBuffer
+#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn
+inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
+#undef clEnqueueCopyBufferRect
+#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn
+inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); }
+#undef clEnqueueCopyBufferToImage
+#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn
+inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
+#undef clEnqueueCopyImage
+#define clEnqueueCopyImage clEnqueueCopyImage_fn
+inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
+#undef clEnqueueCopyImageToBuffer
+#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn
+inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
+#undef clEnqueueFillBuffer
+#define clEnqueueFillBuffer clEnqueueFillBuffer_fn
+inline cl_int clEnqueueFillBuffer(cl_command_queue p0, cl_mem p1, const void* p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueFillBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
+#undef clEnqueueFillImage
+#define clEnqueueFillImage clEnqueueFillImage_fn
+inline cl_int clEnqueueFillImage(cl_command_queue p0, cl_mem p1, const void* p2, const size_t* p3, const size_t* p4, cl_uint p5, const cl_event* p6, cl_event* p7) { return clEnqueueFillImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7); }
+#undef clEnqueueMapBuffer
+#define clEnqueueMapBuffer clEnqueueMapBuffer_fn
+inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); }
+#undef clEnqueueMapImage
+#define clEnqueueMapImage clEnqueueMapImage_fn
+inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); }
+#undef clEnqueueMarker
+#define clEnqueueMarker clEnqueueMarker_fn
+inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); }
+#undef clEnqueueMarkerWithWaitList
+#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_fn
+inline cl_int clEnqueueMarkerWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueMarkerWithWaitList_pfn(p0, p1, p2, p3); }
+#undef clEnqueueMigrateMemObjects
+#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_fn
+inline cl_int clEnqueueMigrateMemObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_mem_migration_flags p3, cl_uint p4, const cl_event* p5, cl_event* p6) { return clEnqueueMigrateMemObjects_pfn(p0, p1, p2, p3, p4, p5, p6); }
+#undef clEnqueueNDRangeKernel
+#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn
+inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
+#undef clEnqueueNativeKernel
+#define clEnqueueNativeKernel clEnqueueNativeKernel_fn
+inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); }
+#undef clEnqueueReadBuffer
+#define clEnqueueReadBuffer clEnqueueReadBuffer_fn
+inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
+#undef clEnqueueReadBufferRect
+#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn
+inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); }
+#undef clEnqueueReadImage
+#define clEnqueueReadImage clEnqueueReadImage_fn
+inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); }
+#undef clEnqueueTask
+#define clEnqueueTask clEnqueueTask_fn
+inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); }
+#undef clEnqueueUnmapMemObject
+#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn
+inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clEnqueueWaitForEvents
+#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn
+inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); }
+#undef clEnqueueWriteBuffer
+#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn
+inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
+#undef clEnqueueWriteBufferRect
+#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn
+inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); }
+#undef clEnqueueWriteImage
+#define clEnqueueWriteImage clEnqueueWriteImage_fn
+inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); }
+#undef clFinish
+#define clFinish clFinish_fn
+inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); }
+#undef clFlush
+#define clFlush clFlush_fn
+inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); }
+#undef clGetCommandQueueInfo
+#define clGetCommandQueueInfo clGetCommandQueueInfo_fn
+inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetContextInfo
+#define clGetContextInfo clGetContextInfo_fn
+inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetDeviceIDs
+#define clGetDeviceIDs clGetDeviceIDs_fn
+inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); }
+#undef clGetDeviceInfo
+#define clGetDeviceInfo clGetDeviceInfo_fn
+inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetEventInfo
+#define clGetEventInfo clGetEventInfo_fn
+inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetEventProfilingInfo
+#define clGetEventProfilingInfo clGetEventProfilingInfo_fn
+inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetExtensionFunctionAddress
+#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn
+inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); }
+#undef clGetExtensionFunctionAddressForPlatform
+#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_fn
+inline void* clGetExtensionFunctionAddressForPlatform(cl_platform_id p0, const char* p1) { return clGetExtensionFunctionAddressForPlatform_pfn(p0, p1); }
+#undef clGetImageInfo
+#define clGetImageInfo clGetImageInfo_fn
+inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetKernelArgInfo
+#define clGetKernelArgInfo clGetKernelArgInfo_fn
+inline cl_int clGetKernelArgInfo(cl_kernel p0, cl_uint p1, cl_kernel_arg_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelArgInfo_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clGetKernelInfo
+#define clGetKernelInfo clGetKernelInfo_fn
+inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetKernelWorkGroupInfo
+#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn
+inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clGetMemObjectInfo
+#define clGetMemObjectInfo clGetMemObjectInfo_fn
+inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetPlatformIDs
+#define clGetPlatformIDs clGetPlatformIDs_fn
+inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); }
+#undef clGetPlatformInfo
+#define clGetPlatformInfo clGetPlatformInfo_fn
+inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetProgramBuildInfo
+#define clGetProgramBuildInfo clGetProgramBuildInfo_fn
+inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clGetProgramInfo
+#define clGetProgramInfo clGetProgramInfo_fn
+inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetSamplerInfo
+#define clGetSamplerInfo clGetSamplerInfo_fn
+inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); }
+#undef clGetSupportedImageFormats
+#define clGetSupportedImageFormats clGetSupportedImageFormats_fn
+inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clLinkProgram
+#define clLinkProgram clLinkProgram_fn
+inline cl_program clLinkProgram(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, void (CL_CALLBACK*p6) (cl_program, void*), void* p7, cl_int* p8) { return clLinkProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
+#undef clReleaseCommandQueue
+#define clReleaseCommandQueue clReleaseCommandQueue_fn
+inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); }
+#undef clReleaseContext
+#define clReleaseContext clReleaseContext_fn
+inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); }
+#undef clReleaseDevice
+#define clReleaseDevice clReleaseDevice_fn
+inline cl_int clReleaseDevice(cl_device_id p0) { return clReleaseDevice_pfn(p0); }
+#undef clReleaseEvent
+#define clReleaseEvent clReleaseEvent_fn
+inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); }
+#undef clReleaseKernel
+#define clReleaseKernel clReleaseKernel_fn
+inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); }
+#undef clReleaseMemObject
+#define clReleaseMemObject clReleaseMemObject_fn
+inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); }
+#undef clReleaseProgram
+#define clReleaseProgram clReleaseProgram_fn
+inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); }
+#undef clReleaseSampler
+#define clReleaseSampler clReleaseSampler_fn
+inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); }
+#undef clRetainCommandQueue
+#define clRetainCommandQueue clRetainCommandQueue_fn
+inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); }
+#undef clRetainContext
+#define clRetainContext clRetainContext_fn
+inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); }
+#undef clRetainDevice
+#define clRetainDevice clRetainDevice_fn
+inline cl_int clRetainDevice(cl_device_id p0) { return clRetainDevice_pfn(p0); }
+#undef clRetainEvent
+#define clRetainEvent clRetainEvent_fn
+inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); }
+#undef clRetainKernel
+#define clRetainKernel clRetainKernel_fn
+inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); }
+#undef clRetainMemObject
+#define clRetainMemObject clRetainMemObject_fn
+inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); }
+#undef clRetainProgram
+#define clRetainProgram clRetainProgram_fn
+inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); }
+#undef clRetainSampler
+#define clRetainSampler clRetainSampler_fn
+inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); }
+#undef clSetEventCallback
+#define clSetEventCallback clSetEventCallback_fn
+inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); }
+#undef clSetKernelArg
+#define clSetKernelArg clSetKernelArg_fn
+inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); }
+#undef clSetMemObjectDestructorCallback
+#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn
+inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); }
+#undef clSetUserEventStatus
+#define clSetUserEventStatus clSetUserEventStatus_fn
+inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); }
+#undef clUnloadCompiler
+#define clUnloadCompiler clUnloadCompiler_fn
+inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); }
+#undef clUnloadPlatformCompiler
+#define clUnloadPlatformCompiler clUnloadPlatformCompiler_fn
+inline cl_int clUnloadPlatformCompiler(cl_platform_id p0) { return clUnloadPlatformCompiler_pfn(p0); }
+#undef clWaitForEvents
+#define clWaitForEvents clWaitForEvents_fn
+inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); }
diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp
new file mode 100644
index 0000000..0b12aed
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp
@@ -0,0 +1,62 @@
+//
+// AUTOGENERATED, DO NOT EDIT
+//
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP
+#error "Invalid usage"
+#endif
+
+// generated by parser_cl.py
+#define clCreateFromGLBuffer clCreateFromGLBuffer_
+#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_
+#define clCreateFromGLTexture clCreateFromGLTexture_
+#define clCreateFromGLTexture2D clCreateFromGLTexture2D_
+#define clCreateFromGLTexture3D clCreateFromGLTexture3D_
+#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_
+#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_
+#define clGetGLContextInfoKHR clGetGLContextInfoKHR_
+#define clGetGLObjectInfo clGetGLObjectInfo_
+#define clGetGLTextureInfo clGetGLTextureInfo_
+
+#if defined __APPLE__
+#include <OpenCL/cl_gl.h>
+#else
+#include <CL/cl_gl.h>
+#endif
+
+// generated by parser_cl.py
+#undef clCreateFromGLBuffer
+#define clCreateFromGLBuffer clCreateFromGLBuffer_pfn
+#undef clCreateFromGLRenderbuffer
+#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_pfn
+#undef clCreateFromGLTexture
+#define clCreateFromGLTexture clCreateFromGLTexture_pfn
+#undef clCreateFromGLTexture2D
+#define clCreateFromGLTexture2D clCreateFromGLTexture2D_pfn
+#undef clCreateFromGLTexture3D
+#define clCreateFromGLTexture3D clCreateFromGLTexture3D_pfn
+#undef clEnqueueAcquireGLObjects
+#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_pfn
+#undef clEnqueueReleaseGLObjects
+#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_pfn
+#undef clGetGLContextInfoKHR
+#define clGetGLContextInfoKHR clGetGLContextInfoKHR_pfn
+#undef clGetGLObjectInfo
+#define clGetGLObjectInfo clGetGLObjectInfo_pfn
+#undef clGetGLTextureInfo
+#define clGetGLTextureInfo clGetGLTextureInfo_pfn
+
+#ifdef cl_khr_gl_sharing
+
+// generated by parser_cl.py
+extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLBuffer)(cl_context, cl_mem_flags, cl_GLuint, int*);
+extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLRenderbuffer)(cl_context, cl_mem_flags, cl_GLuint, cl_int*);
+extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*);
+extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture2D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*);
+extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture3D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueAcquireGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReleaseGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLContextInfoKHR)(const cl_context_properties*, cl_gl_context_info, size_t, void*, size_t*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLObjectInfo)(cl_mem, cl_gl_object_type*, cl_GLuint*);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLTextureInfo)(cl_mem, cl_gl_texture_info, size_t, void*, size_t*);
+
+#endif // cl_khr_gl_sharing
diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp
new file mode 100644
index 0000000..12f342b
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp
@@ -0,0 +1,42 @@
+//
+// AUTOGENERATED, DO NOT EDIT
+//
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP
+#error "Invalid usage"
+#endif
+
+#ifdef cl_khr_gl_sharing
+
+// generated by parser_cl.py
+#undef clCreateFromGLBuffer
+#define clCreateFromGLBuffer clCreateFromGLBuffer_fn
+inline cl_mem clCreateFromGLBuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, int* p3) { return clCreateFromGLBuffer_pfn(p0, p1, p2, p3); }
+#undef clCreateFromGLRenderbuffer
+#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_fn
+inline cl_mem clCreateFromGLRenderbuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, cl_int* p3) { return clCreateFromGLRenderbuffer_pfn(p0, p1, p2, p3); }
+#undef clCreateFromGLTexture
+#define clCreateFromGLTexture clCreateFromGLTexture_fn
+inline cl_mem clCreateFromGLTexture(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clCreateFromGLTexture2D
+#define clCreateFromGLTexture2D clCreateFromGLTexture2D_fn
+inline cl_mem clCreateFromGLTexture2D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture2D_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clCreateFromGLTexture3D
+#define clCreateFromGLTexture3D clCreateFromGLTexture3D_fn
+inline cl_mem clCreateFromGLTexture3D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture3D_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clEnqueueAcquireGLObjects
+#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_fn
+inline cl_int clEnqueueAcquireGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueAcquireGLObjects_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clEnqueueReleaseGLObjects
+#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_fn
+inline cl_int clEnqueueReleaseGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueReleaseGLObjects_pfn(p0, p1, p2, p3, p4, p5); }
+#undef clGetGLContextInfoKHR
+#define clGetGLContextInfoKHR clGetGLContextInfoKHR_fn
+inline cl_int clGetGLContextInfoKHR(const cl_context_properties* p0, cl_gl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLContextInfoKHR_pfn(p0, p1, p2, p3, p4); }
+#undef clGetGLObjectInfo
+#define clGetGLObjectInfo clGetGLObjectInfo_fn
+inline cl_int clGetGLObjectInfo(cl_mem p0, cl_gl_object_type* p1, cl_GLuint* p2) { return clGetGLObjectInfo_pfn(p0, p1, p2); }
+#undef clGetGLTextureInfo
+#define clGetGLTextureInfo clGetGLTextureInfo_fn
+inline cl_int clGetGLTextureInfo(cl_mem p0, cl_gl_texture_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLTextureInfo_pfn(p0, p1, p2, p3, p4); }
+
+#endif // cl_khr_gl_sharing
diff --git a/IPL/include/opencv/opencv/cxcore.h b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdblas.hpp
similarity index 80%
rename from IPL/include/opencv/opencv/cxcore.h
rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdblas.hpp
index 0982bd7..2ad8ac0 100644
--- a/IPL/include/opencv/opencv/cxcore.h
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdblas.hpp
@@ -10,8 +10,7 @@
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -30,7 +29,7 @@
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
@@ -40,13 +39,15 @@
 //
 //M*/
 
-#ifndef __OPENCV_OLD_CXCORE_H__
-#define __OPENCV_OLD_CXCORE_H__
+#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
+#define OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
 
-//#if defined(__GNUC__)
-//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
-//#endif
+#ifdef HAVE_CLAMDBLAS
 
-#include "opencv2/core/core_c.h"
+#include "opencl_core.hpp"
 
-#endif
+#include "autogenerated/opencl_clamdblas.hpp"
+
+#endif // HAVE_CLAMDBLAS
+
+#endif // OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
diff --git a/IPL/include/opencv/opencv/cxcore.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdfft.hpp
similarity index 80%
rename from IPL/include/opencv/opencv/cxcore.hpp
rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdfft.hpp
index 9af4ac7..a328f72 100644
--- a/IPL/include/opencv/opencv/cxcore.hpp
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_clamdfft.hpp
@@ -10,8 +10,7 @@
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -30,7 +29,7 @@
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
@@ -40,14 +39,15 @@
 //
 //M*/
 
-#ifndef __OPENCV_OLD_CXCORE_HPP__
-#define __OPENCV_OLD_CXCORE_HPP__
+#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
+#define OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
 
-//#if defined(__GNUC__)
-//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
-//#endif
+#ifdef HAVE_CLAMDFFT
 
-#include "cxcore.h"
-#include "opencv2/core.hpp"
+#include "opencl_core.hpp"
 
-#endif
+#include "autogenerated/opencl_clamdfft.hpp"
+
+#endif // HAVE_CLAMDFFT
+
+#endif // OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
diff --git a/IPL/include/opencv/opencv2/photo/photo_c.h b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core.hpp
similarity index 57%
rename from IPL/include/opencv/opencv2/photo/photo_c.h
rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core.hpp
index 07ca9b3..0404b31 100644
--- a/IPL/include/opencv/opencv2/photo/photo_c.h
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core.hpp
@@ -10,8 +10,7 @@
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2008-2012, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -30,7 +29,7 @@
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
@@ -40,35 +39,46 @@
 //
 //M*/
 
-#ifndef __OPENCV_PHOTO_C_H__
-#define __OPENCV_PHOTO_C_H__
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP
+#define OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP
 
-#include "opencv2/core/core_c.h"
+#ifdef HAVE_OPENCL
 
-#ifdef __cplusplus
-extern "C" {
+#ifndef CL_RUNTIME_EXPORT
+#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_CORE_SHARED)) && (defined _WIN32 || defined WINCE) && \
+    !(defined(__OPENCV_BUILD) && defined(OPENCV_MODULE_IS_PART_OF_WORLD))
+#define CL_RUNTIME_EXPORT __declspec(dllimport)
+#else
+#define CL_RUNTIME_EXPORT
+#endif
 #endif
 
-/** @addtogroup photo_c
-  @{
-  */
-
-/* Inpainting algorithms */
-enum InpaintingModes
-{
-    CV_INPAINT_NS      =0,
-    CV_INPAINT_TELEA   =1
-};
+#ifdef HAVE_OPENCL_SVM
+#define clSVMAlloc clSVMAlloc_
+#define clSVMFree clSVMFree_
+#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_
+#define clSetKernelExecInfo clSetKernelExecInfo_
+#define clEnqueueSVMFree clEnqueueSVMFree_
+#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_
+#define clEnqueueSVMMemFill clEnqueueSVMMemFill_
+#define clEnqueueSVMMap clEnqueueSVMMap_
+#define clEnqueueSVMUnmap clEnqueueSVMUnmap_
+#endif
 
+#include "autogenerated/opencl_core.hpp"
 
-/* Inpaints the selected region in the image */
-CVAPI(void) cvInpaint( const CvArr* src, const CvArr* inpaint_mask,
-                       CvArr* dst, double inpaintRange, int flags );
+#ifndef CL_DEVICE_DOUBLE_FP_CONFIG
+#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
+#endif
 
-/** @} */
+#ifndef CL_DEVICE_HALF_FP_CONFIG
+#define CL_DEVICE_HALF_FP_CONFIG 0x1033
+#endif
 
-#ifdef __cplusplus
-} //extern "C"
+#ifndef CL_VERSION_1_2
+#define CV_REQUIRE_OPENCL_1_2_ERROR CV_Error(cv::Error::OpenCLApiCallError, "OpenCV compiled without OpenCL v1.2 support, so we can't use functionality from OpenCL v1.2")
 #endif
 
-#endif //__OPENCV_PHOTO_C_H__
+#endif // HAVE_OPENCL
+
+#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP
diff --git a/IPL/include/opencv/opencv/cxeigen.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp
similarity index 82%
rename from IPL/include/opencv/opencv/cxeigen.hpp
rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp
index 1f04d1a..38fcae9 100644
--- a/IPL/include/opencv/opencv/cxeigen.hpp
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp
@@ -7,11 +7,10 @@
 //  copy or use the software.
 //
 //
-//                          License Agreement
+//                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -30,7 +29,7 @@
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
@@ -40,9 +39,9 @@
 //
 //M*/
 
-#ifndef __OPENCV_OLD_EIGEN_HPP__
-#define __OPENCV_OLD_EIGEN_HPP__
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP
+#define OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP
 
-#include "opencv2/core/eigen.hpp"
+#include "autogenerated/opencl_core_wrappers.hpp"
 
-#endif
+#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP
diff --git a/IPL/include/opencv/opencv/cvaux.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl.hpp
similarity index 75%
rename from IPL/include/opencv/opencv/cvaux.hpp
rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl.hpp
index b0e60a3..659c7d8 100644
--- a/IPL/include/opencv/opencv/cvaux.hpp
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl.hpp
@@ -7,10 +7,10 @@
 //  copy or use the software.
 //
 //
-//                        Intel License Agreement
+//                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -23,13 +23,13 @@
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
-//   * The name of Intel Corporation may not be used to endorse or promote products
+//   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
@@ -39,14 +39,15 @@
 //
 //M*/
 
-#ifndef __OPENCV_OLD_AUX_HPP__
-#define __OPENCV_OLD_AUX_HPP__
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP
+#define OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP
 
-//#if defined(__GNUC__)
-//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
-//#endif
+#if defined HAVE_OPENCL && defined HAVE_OPENGL
 
-#include "cvaux.h"
-#include "opencv2/core/utility.hpp"
+#include "opencl_core.hpp"
 
-#endif
+#include "autogenerated/opencl_gl.hpp"
+
+#endif // defined HAVE_OPENCL && defined HAVE_OPENGL
+
+#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP
diff --git a/IPL/include/opencv/opencv/highgui.h b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp
similarity index 78%
rename from IPL/include/opencv/opencv/highgui.h
rename to IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp
index 0261029..9700004 100644
--- a/IPL/include/opencv/opencv/highgui.h
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp
@@ -7,10 +7,10 @@
 //  copy or use the software.
 //
 //
-//                        Intel License Agreement
+//                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -23,13 +23,13 @@
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
-//   * The name of Intel Corporation may not be used to endorse or promote products
+//   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
@@ -39,10 +39,9 @@
 //
 //M*/
 
-#ifndef __OPENCV_OLD_HIGHGUI_H__
-#define __OPENCV_OLD_HIGHGUI_H__
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP
+#define OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP
 
-#include "opencv2/core/core_c.h"
-#include "opencv2/highgui/highgui_c.h"
+#include "autogenerated/opencl_gl_wrappers.hpp"
 
-#endif
+#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP
diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_20.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_20.hpp
new file mode 100644
index 0000000..9636b19
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_20.hpp
@@ -0,0 +1,48 @@
+/* See LICENSE file in the root OpenCV directory */
+
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP
+#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP
+
+#if defined(HAVE_OPENCL_SVM)
+#include "opencl_core.hpp"
+
+#include "opencl_svm_definitions.hpp"
+
+#undef clSVMAlloc
+#define clSVMAlloc clSVMAlloc_pfn
+#undef clSVMFree
+#define clSVMFree clSVMFree_pfn
+#undef clSetKernelArgSVMPointer
+#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn
+#undef clSetKernelExecInfo
+//#define clSetKernelExecInfo clSetKernelExecInfo_pfn
+#undef clEnqueueSVMFree
+//#define clEnqueueSVMFree clEnqueueSVMFree_pfn
+#undef clEnqueueSVMMemcpy
+#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn
+#undef clEnqueueSVMMemFill
+#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn
+#undef clEnqueueSVMMap
+#define clEnqueueSVMMap clEnqueueSVMMap_pfn
+#undef clEnqueueSVMUnmap
+#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn
+
+extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment);
+extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value);
+//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
+//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
+//        void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
+//        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size,
+        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size,
+        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size,
+        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
+extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr,
+        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
+
+#endif // HAVE_OPENCL_SVM
+
+#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP
diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
new file mode 100644
index 0000000..97c927b
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
@@ -0,0 +1,42 @@
+/* See LICENSE file in the root OpenCV directory */
+
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP
+#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP
+
+#if defined(HAVE_OPENCL_SVM)
+#if defined(CL_VERSION_2_0)
+
+// OpenCL 2.0 contains SVM definitions
+
+#else
+
+typedef cl_bitfield cl_device_svm_capabilities;
+typedef cl_bitfield cl_svm_mem_flags;
+typedef cl_uint     cl_kernel_exec_info;
+
+//
+// TODO Add real values after OpenCL 2.0 release
+//
+
+#ifndef CL_DEVICE_SVM_CAPABILITIES
+#define CL_DEVICE_SVM_CAPABILITIES 0x1053
+
+#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER             (1 << 0)
+#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER               (1 << 1)
+#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM               (1 << 2)
+#define CL_DEVICE_SVM_ATOMICS                         (1 << 3)
+#endif
+
+#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER
+#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10)
+#endif
+
+#ifndef CL_MEM_SVM_ATOMICS
+#define CL_MEM_SVM_ATOMICS (1 << 11)
+#endif
+
+
+#endif // CL_VERSION_2_0
+#endif // HAVE_OPENCL_SVM
+
+#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP
diff --git a/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
new file mode 100644
index 0000000..497bc3d
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
@@ -0,0 +1,166 @@
+/* See LICENSE file in the root OpenCV directory */
+
+#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP
+#define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP
+
+#if defined(HAVE_OPENCL_SVM)
+#include "opencl_core.hpp"
+
+#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD
+//
+//  Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
+//  Below is the original copyright.
+//
+/*******************************************************************************
+ * Copyright (c) 2008-2013 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/*******************************************
+ * Shared Virtual Memory (SVM) extension
+ *******************************************/
+typedef cl_bitfield                      cl_device_svm_capabilities_amd;
+typedef cl_bitfield                      cl_svm_mem_flags_amd;
+typedef cl_uint                          cl_kernel_exec_info_amd;
+
+/* cl_device_info */
+#define CL_DEVICE_SVM_CAPABILITIES_AMD                     0x1053
+#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD  0x1054
+
+/* cl_device_svm_capabilities_amd */
+#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD             (1 << 0)
+#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD               (1 << 1)
+#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD               (1 << 2)
+#define CL_DEVICE_SVM_ATOMICS_AMD                         (1 << 3)
+
+/* cl_svm_mem_flags_amd */
+#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD                  (1 << 10)
+#define CL_MEM_SVM_ATOMICS_AMD                            (1 << 11)
+
+/* cl_mem_info */
+#define CL_MEM_USES_SVM_POINTER_AMD                       0x1109
+
+/* cl_kernel_exec_info_amd */
+#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD                  0x11B6
+#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD     0x11B7
+
+/* cl_command_type */
+#define CL_COMMAND_SVM_FREE_AMD                           0x1209
+#define CL_COMMAND_SVM_MEMCPY_AMD                         0x120A
+#define CL_COMMAND_SVM_MEMFILL_AMD                        0x120B
+#define CL_COMMAND_SVM_MAP_AMD                            0x120C
+#define CL_COMMAND_SVM_UNMAP_AMD                          0x120D
+
+typedef CL_API_ENTRY void*
+(CL_API_CALL * clSVMAllocAMD_fn)(
+    cl_context            /* context */,
+    cl_svm_mem_flags_amd  /* flags */,
+    size_t                /* size */,
+    unsigned int          /* alignment */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY void
+(CL_API_CALL * clSVMFreeAMD_fn)(
+    cl_context  /* context */,
+    void*       /* svm_pointer */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clEnqueueSVMFreeAMD_fn)(
+    cl_command_queue /* command_queue */,
+    cl_uint          /* num_svm_pointers */,
+    void**           /* svm_pointers */,
+    void (CL_CALLBACK *)( /*pfn_free_func*/
+        cl_command_queue /* queue */,
+        cl_uint          /* num_svm_pointers */,
+        void**           /* svm_pointers */,
+        void*            /* user_data */),
+    void*             /* user_data */,
+    cl_uint           /* num_events_in_wait_list */,
+    const cl_event*   /* event_wait_list */,
+    cl_event*         /* event */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)(
+    cl_command_queue /* command_queue */,
+    cl_bool          /* blocking_copy */,
+    void*            /* dst_ptr */,
+    const void*      /* src_ptr */,
+    size_t           /* size */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event*  /* event_wait_list */,
+    cl_event*        /* event */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)(
+    cl_command_queue /* command_queue */,
+    void*            /* svm_ptr */,
+    const void*      /* pattern */,
+    size_t           /* pattern_size */,
+    size_t           /* size */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event*  /* event_wait_list */,
+    cl_event*        /* event */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clEnqueueSVMMapAMD_fn)(
+    cl_command_queue /* command_queue */,
+    cl_bool          /* blocking_map */,
+    cl_map_flags     /* map_flags */,
+    void*            /* svm_ptr */,
+    size_t           /* size */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event*  /* event_wait_list */,
+    cl_event*        /* event */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)(
+    cl_command_queue /* command_queue */,
+    void*            /* svm_ptr */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event*  /* event_wait_list */,
+    cl_event*        /* event */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)(
+    cl_kernel     /* kernel */,
+    cl_uint       /* arg_index */,
+    const void *  /* arg_value */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clSetKernelExecInfoAMD_fn)(
+     cl_kernel                /* kernel */,
+     cl_kernel_exec_info_amd  /* param_name */,
+     size_t                   /* param_value_size */,
+     const void *             /* param_value */
+) CL_EXT_SUFFIX__VERSION_1_2;
+
+#endif
+
+#endif // HAVE_OPENCL_SVM
+
+#endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP
diff --git a/IPL/include/opencv/opencv2/core/opengl.hpp b/IPL/include/opencv/opencv2/core/opengl.hpp
index fd47c52..a311ce2 100644
--- a/IPL/include/opencv/opencv2/core/opengl.hpp
+++ b/IPL/include/opencv/opencv2/core/opengl.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_OPENGL_HPP__
-#define __OPENCV_CORE_OPENGL_HPP__
+#ifndef OPENCV_CORE_OPENGL_HPP
+#define OPENCV_CORE_OPENGL_HPP
 
 #ifndef __cplusplus
 #  error opengl.hpp header must be compiled as C++
@@ -245,7 +245,7 @@ class CV_EXPORTS Buffer
 
     /** @brief Maps OpenGL buffer to CUDA device memory.
 
-    This operatation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time.
+    This operation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time.
 
     A mapped data store must be unmapped with ogl::Buffer::unmapDevice before its buffer object is used.
      */
@@ -548,7 +548,7 @@ calling unmapGLBuffer() function.
 @param accessFlags - data access flags (ACCESS_READ|ACCESS_WRITE).
 @return Returns UMat object
  */
-CV_EXPORTS UMat mapGLBuffer(const Buffer& buffer, int accessFlags = ACCESS_READ|ACCESS_WRITE);
+CV_EXPORTS UMat mapGLBuffer(const Buffer& buffer, AccessFlag accessFlags = ACCESS_READ | ACCESS_WRITE);
 
 /** @brief Unmaps Buffer object (releases UMat, previously mapped from Buffer).
 
@@ -558,13 +558,11 @@ by the call to mapGLBuffer() function.
  */
 CV_EXPORTS void unmapGLBuffer(UMat& u);
 
+//! @}
 }} // namespace cv::ogl
 
 namespace cv { namespace cuda {
 
-//! @addtogroup cuda
-//! @{
-
 /** @brief Sets a CUDA device and initializes it for the current thread with OpenGL interoperability.
 
 This function should be explicitly called after OpenGL context creation and before any CUDA calls.
@@ -573,8 +571,6 @@ This function should be explicitly called after OpenGL context creation and befo
  */
 CV_EXPORTS void setGlDevice(int device = 0);
 
-//! @}
-
 }}
 
 //! @cond IGNORED
@@ -726,4 +722,4 @@ bool cv::ogl::Arrays::empty() const
 
 //! @endcond
 
-#endif /* __OPENCV_CORE_OPENGL_HPP__ */
+#endif /* OPENCV_CORE_OPENGL_HPP */
diff --git a/IPL/include/opencv/opencv2/core/operations.hpp b/IPL/include/opencv/opencv2/core/operations.hpp
index bced1a7..bde28c4 100644
--- a/IPL/include/opencv/opencv2/core/operations.hpp
+++ b/IPL/include/opencv/opencv2/core/operations.hpp
@@ -42,8 +42,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_OPERATIONS_HPP__
-#define __OPENCV_CORE_OPERATIONS_HPP__
+#ifndef OPENCV_CORE_OPERATIONS_HPP
+#define OPENCV_CORE_OPERATIONS_HPP
 
 #ifndef __cplusplus
 #  error operations.hpp header must be compiled as C++
@@ -51,6 +51,16 @@
 
 #include <cstdio>
 
+#if defined(__GNUC__) || defined(__clang__) // at least GCC 3.1+, clang 3.5+
+#  if defined(__MINGW_PRINTF_FORMAT)  // https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/.
+#    define CV_FORMAT_PRINTF(string_idx, first_to_check) __attribute__ ((format (__MINGW_PRINTF_FORMAT, string_idx, first_to_check)))
+#  else
+#    define CV_FORMAT_PRINTF(string_idx, first_to_check) __attribute__ ((format (printf, string_idx, first_to_check)))
+#  endif
+#else
+#  define CV_FORMAT_PRINTF(A, B)
+#endif
+
 //! @cond IGNORED
 
 namespace cv
@@ -61,29 +71,44 @@ namespace cv
 namespace internal
 {
 
-template<typename _Tp, int m> struct Matx_FastInvOp
+template<typename _Tp, int m, int n> struct Matx_FastInvOp
+{
+    bool operator()(const Matx<_Tp, m, n>& a, Matx<_Tp, n, m>& b, int method) const
+    {
+        return invert(a, b, method) != 0;
+    }
+};
+
+template<typename _Tp, int m> struct Matx_FastInvOp<_Tp, m, m>
 {
     bool operator()(const Matx<_Tp, m, m>& a, Matx<_Tp, m, m>& b, int method) const
     {
-        Matx<_Tp, m, m> temp = a;
+        if (method == DECOMP_LU || method == DECOMP_CHOLESKY)
+        {
+            Matx<_Tp, m, m> temp = a;
 
-        // assume that b is all 0's on input => make it a unity matrix
-        for( int i = 0; i < m; i++ )
-            b(i, i) = (_Tp)1;
+            // assume that b is all 0's on input => make it a unity matrix
+            for (int i = 0; i < m; i++)
+                b(i, i) = (_Tp)1;
 
-        if( method == DECOMP_CHOLESKY )
-            return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m);
+            if (method == DECOMP_CHOLESKY)
+                return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m);
 
-        return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0;
+            return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0;
+        }
+        else
+        {
+            return invert(a, b, method) != 0;
+        }
     }
 };
 
-template<typename _Tp> struct Matx_FastInvOp<_Tp, 2>
+template<typename _Tp> struct Matx_FastInvOp<_Tp, 2, 2>
 {
-    bool operator()(const Matx<_Tp, 2, 2>& a, Matx<_Tp, 2, 2>& b, int) const
+    bool operator()(const Matx<_Tp, 2, 2>& a, Matx<_Tp, 2, 2>& b, int /*method*/) const
     {
-        _Tp d = determinant(a);
-        if( d == 0 )
+        _Tp d = (_Tp)determinant(a);
+        if (d == 0)
             return false;
         d = 1/d;
         b(1,1) = a(0,0)*d;
@@ -94,12 +119,12 @@ template<typename _Tp> struct Matx_FastInvOp<_Tp, 2>
     }
 };
 
-template<typename _Tp> struct Matx_FastInvOp<_Tp, 3>
+template<typename _Tp> struct Matx_FastInvOp<_Tp, 3, 3>
 {
-    bool operator()(const Matx<_Tp, 3, 3>& a, Matx<_Tp, 3, 3>& b, int) const
+    bool operator()(const Matx<_Tp, 3, 3>& a, Matx<_Tp, 3, 3>& b, int /*method*/) const
     {
         _Tp d = (_Tp)determinant(a);
-        if( d == 0 )
+        if (d == 0)
             return false;
         d = 1/d;
         b(0,0) = (a(1,1) * a(2,2) - a(1,2) * a(2,1)) * d;
@@ -118,27 +143,43 @@ template<typename _Tp> struct Matx_FastInvOp<_Tp, 3>
 };
 
 
-template<typename _Tp, int m, int n> struct Matx_FastSolveOp
+template<typename _Tp, int m, int l, int n> struct Matx_FastSolveOp
+{
+    bool operator()(const Matx<_Tp, m, l>& a, const Matx<_Tp, m, n>& b,
+                    Matx<_Tp, l, n>& x, int method) const
+    {
+        return cv::solve(a, b, x, method);
+    }
+};
+
+template<typename _Tp, int m, int n> struct Matx_FastSolveOp<_Tp, m, m, n>
 {
     bool operator()(const Matx<_Tp, m, m>& a, const Matx<_Tp, m, n>& b,
                     Matx<_Tp, m, n>& x, int method) const
     {
-        Matx<_Tp, m, m> temp = a;
-        x = b;
-        if( method == DECOMP_CHOLESKY )
-            return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n);
+        if (method == DECOMP_LU || method == DECOMP_CHOLESKY)
+        {
+            Matx<_Tp, m, m> temp = a;
+            x = b;
+            if( method == DECOMP_CHOLESKY )
+                return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n);
 
-        return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0;
+            return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0;
+        }
+        else
+        {
+            return cv::solve(a, b, x, method);
+        }
     }
 };
 
-template<typename _Tp> struct Matx_FastSolveOp<_Tp, 2, 1>
+template<typename _Tp> struct Matx_FastSolveOp<_Tp, 2, 2, 1>
 {
     bool operator()(const Matx<_Tp, 2, 2>& a, const Matx<_Tp, 2, 1>& b,
                     Matx<_Tp, 2, 1>& x, int) const
     {
-        _Tp d = determinant(a);
-        if( d == 0 )
+        _Tp d = (_Tp)determinant(a);
+        if (d == 0)
             return false;
         d = 1/d;
         x(0) = (b(0)*a(1,1) - b(1)*a(0,1))*d;
@@ -147,13 +188,13 @@ template<typename _Tp> struct Matx_FastSolveOp<_Tp, 2, 1>
     }
 };
 
-template<typename _Tp> struct Matx_FastSolveOp<_Tp, 3, 1>
+template<typename _Tp> struct Matx_FastSolveOp<_Tp, 3, 3, 1>
 {
     bool operator()(const Matx<_Tp, 3, 3>& a, const Matx<_Tp, 3, 1>& b,
                     Matx<_Tp, 3, 1>& x, int) const
     {
         _Tp d = (_Tp)determinant(a);
-        if( d == 0 )
+        if (d == 0)
             return false;
         d = 1/d;
         x(0) = d*(b(0)*(a(1,1)*a(2,2) - a(1,2)*a(2,1)) -
@@ -193,15 +234,8 @@ template<typename _Tp, int m, int n> inline
 Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const
 {
     Matx<_Tp, n, m> b;
-    bool ok;
-    if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
-        ok = cv::internal::Matx_FastInvOp<_Tp, m>()(*this, b, method);
-    else
-    {
-        Mat A(*this, false), B(b, false);
-        ok = (invert(A, B, method) != 0);
-    }
-    if( NULL != p_is_ok ) { *p_is_ok = ok; }
+    bool ok = cv::internal::Matx_FastInvOp<_Tp, m, n>()(*this, b, method);
+    if (p_is_ok) *p_is_ok = ok;
     return ok ? b : Matx<_Tp, n, m>::zeros();
 }
 
@@ -209,15 +243,7 @@ template<typename _Tp, int m, int n> template<int l> inline
 Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) const
 {
     Matx<_Tp, n, l> x;
-    bool ok;
-    if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
-        ok = cv::internal::Matx_FastSolveOp<_Tp, m, l>()(*this, rhs, x, method);
-    else
-    {
-        Mat A(*this, false), B(rhs, false), X(x, false);
-        ok = cv::solve(A, B, X, method);
-    }
-
+    bool ok = cv::internal::Matx_FastSolveOp<_Tp, m, n, l>()(*this, rhs, x, method);
     return ok ? x : Matx<_Tp, n, l>::zeros();
 }
 
@@ -236,48 +262,67 @@ Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) c
     template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
     template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, const A, B)
 
-CV_MAT_AUG_OPERATOR  (+=, cv::add(a,b,a), Mat, Mat)
-CV_MAT_AUG_OPERATOR  (+=, cv::add(a,b,a), Mat, Scalar)
-CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat)
-CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Scalar)
-CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
-
-CV_MAT_AUG_OPERATOR  (-=, cv::subtract(a,b,a), Mat, Mat)
-CV_MAT_AUG_OPERATOR  (-=, cv::subtract(a,b,a), Mat, Scalar)
-CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat)
-CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Scalar)
-CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+#define CV_MAT_AUG_OPERATOR_TN(op, cvop, A)                                \
+    template<typename _Tp, int m, int n> static inline A& operator op (A& a, const Matx<_Tp,m,n>& b) { cvop; return a; } \
+    template<typename _Tp, int m, int n> static inline const A& operator op (const A& a, const Matx<_Tp,m,n>& b) { cvop; return a; }
+
+CV_MAT_AUG_OPERATOR  (+=, cv::add(a, b, (const Mat&)a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (+=, cv::add(a, b, (const Mat&)a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(+=, cv::add(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>)
+CV_MAT_AUG_OPERATOR_TN(+=, cv::add(a, Mat(b), (const Mat&)a), Mat)
+CV_MAT_AUG_OPERATOR_TN(+=, cv::add(a, Mat(b), (const Mat&)a), Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (-=, cv::subtract(a, b, (const Mat&)a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (-=, cv::subtract(a, b, (const Mat&)a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>)
+CV_MAT_AUG_OPERATOR_TN(-=, cv::subtract(a, Mat(b), (const Mat&)a), Mat)
+CV_MAT_AUG_OPERATOR_TN(-=, cv::subtract(a, Mat(b), (const Mat&)a), Mat_<_Tp>)
 
 CV_MAT_AUG_OPERATOR  (*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat, Mat)
 CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat)
 CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat_<_Tp>)
 CV_MAT_AUG_OPERATOR  (*=, a.convertTo(a, -1, b), Mat, double)
 CV_MAT_AUG_OPERATOR_T(*=, a.convertTo(a, -1, b), Mat_<_Tp>, double)
+CV_MAT_AUG_OPERATOR_TN(*=, cv::gemm(a, Mat(b), 1, Mat(), 0, a, 0), Mat)
+CV_MAT_AUG_OPERATOR_TN(*=, cv::gemm(a, Mat(b), 1, Mat(), 0, a, 0), Mat_<_Tp>)
 
-CV_MAT_AUG_OPERATOR  (/=, cv::divide(a,b,a), Mat, Mat)
-CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat)
-CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+CV_MAT_AUG_OPERATOR  (/=, cv::divide(a, b, (const Mat&)a), Mat, Mat)
+CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a, b, (const Mat&)a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>)
 CV_MAT_AUG_OPERATOR  (/=, a.convertTo((Mat&)a, -1, 1./b), Mat, double)
 CV_MAT_AUG_OPERATOR_T(/=, a.convertTo((Mat&)a, -1, 1./b), Mat_<_Tp>, double)
-
-CV_MAT_AUG_OPERATOR  (&=, cv::bitwise_and(a,b,a), Mat, Mat)
-CV_MAT_AUG_OPERATOR  (&=, cv::bitwise_and(a,b,a), Mat, Scalar)
-CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat)
-CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Scalar)
-CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
-
-CV_MAT_AUG_OPERATOR  (|=, cv::bitwise_or(a,b,a), Mat, Mat)
-CV_MAT_AUG_OPERATOR  (|=, cv::bitwise_or(a,b,a), Mat, Scalar)
-CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat)
-CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Scalar)
-CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
-
-CV_MAT_AUG_OPERATOR  (^=, cv::bitwise_xor(a,b,a), Mat, Mat)
-CV_MAT_AUG_OPERATOR  (^=, cv::bitwise_xor(a,b,a), Mat, Scalar)
-CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat)
-CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Scalar)
-CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
-
+CV_MAT_AUG_OPERATOR_TN(/=, cv::divide(a, Mat(b), (const Mat&)a), Mat)
+CV_MAT_AUG_OPERATOR_TN(/=, cv::divide(a, Mat(b), (const Mat&)a), Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (&=, cv::bitwise_and(a, b, (const Mat&)a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (&=, cv::bitwise_and(a, b, (const Mat&)a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>)
+CV_MAT_AUG_OPERATOR_TN(&=, cv::bitwise_and(a, Mat(b), (const Mat&)a), Mat)
+CV_MAT_AUG_OPERATOR_TN(&=, cv::bitwise_and(a, Mat(b), (const Mat&)a), Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (|=, cv::bitwise_or(a, b, (const Mat&)a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (|=, cv::bitwise_or(a, b, (const Mat&)a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>)
+CV_MAT_AUG_OPERATOR_TN(|=, cv::bitwise_or(a, Mat(b), (const Mat&)a), Mat)
+CV_MAT_AUG_OPERATOR_TN(|=, cv::bitwise_or(a, Mat(b), (const Mat&)a), Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a, b, (const Mat&)a), Mat_<_Tp>, Mat_<_Tp>)
+CV_MAT_AUG_OPERATOR_TN(^=, cv::bitwise_xor(a, Mat(b), (const Mat&)a), Mat)
+CV_MAT_AUG_OPERATOR_TN(^=, cv::bitwise_xor(a, Mat(b), (const Mat&)a), Mat_<_Tp>)
+
+#undef CV_MAT_AUG_OPERATOR_TN
 #undef CV_MAT_AUG_OPERATOR_T
 #undef CV_MAT_AUG_OPERATOR
 #undef CV_MAT_AUG_OPERATOR1
@@ -349,13 +394,15 @@ inline int    RNG::uniform(int a, int b)       { return a == b ? a : (int)(next(
 inline float  RNG::uniform(float a, float b)   { return ((float)*this)*(b - a) + a; }
 inline double RNG::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
 
+inline bool RNG::operator ==(const RNG& other) const { return state == other.state; }
+
 inline unsigned RNG::next()
 {
     state = (uint64)(unsigned)state* /*CV_RNG_COEFF*/ 4164903690U + (unsigned)(state >> 32);
     return (unsigned)state;
 }
 
-//! returns the next unifomly-distributed random number of the specified type
+//! returns the next uniformly-distributed random number of the specified type
 template<typename _Tp> static inline _Tp randu()
 {
   return (_Tp)theRNG();
@@ -363,12 +410,29 @@ template<typename _Tp> static inline _Tp randu()
 
 ///////////////////////////////// Formatted string generation /////////////////////////////////
 
-CV_EXPORTS String format( const char* fmt, ... );
+/** @brief Returns a text string formatted using the printf-like expression.
+
+The function acts like sprintf but forms and returns an STL string. It can be used to form an error
+message in the Exception constructor.
+@param fmt printf-compatible formatting specifiers.
+
+**Note**:
+|Type|Specifier|
+|-|-|
+|`const char*`|`%s`|
+|`char`|`%c`|
+|`float` / `double`|`%f`,`%g`|
+|`int`, `long`, `long long`|`%d`, `%ld`, ``%lld`|
+|`unsigned`, `unsigned long`, `unsigned long long`|`%u`, `%lu`, `%llu`|
+|`uint64` -> `uintmax_t`, `int64` -> `intmax_t`|`%ju`, `%jd`|
+|`size_t`|`%zu`|
+ */
+CV_EXPORTS String format( const char* fmt, ... ) CV_FORMAT_PRINTF(1, 2);
 
 ///////////////////////////////// Formatted output of cv::Mat /////////////////////////////////
 
 static inline
-Ptr<Formatted> format(InputArray mtx, int fmt)
+Ptr<Formatted> format(InputArray mtx, Formatter::FormatType fmt)
 {
     return Formatter::get(fmt)->format(mtx.getMat());
 }
diff --git a/IPL/include/opencv/opencv2/core/optim.hpp b/IPL/include/opencv/opencv2/core/optim.hpp
index 23e2155..f61a2b9 100644
--- a/IPL/include/opencv/opencv2/core/optim.hpp
+++ b/IPL/include/opencv/opencv2/core/optim.hpp
@@ -39,8 +39,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_OPTIM_HPP__
-#define __OPENCV_OPTIM_HPP__
+#ifndef OPENCV_OPTIM_HPP
+#define OPENCV_OPTIM_HPP
 
 #include "opencv2/core.hpp"
 
@@ -73,7 +73,7 @@ class CV_EXPORTS MinProblemSolver : public Algorithm
     /** @brief Getter for the optimized function.
 
     The optimized function is represented by Function interface, which requires derivatives to
-    implement the sole method calc(double*) to evaluate the function.
+    implement the calc(double*) and getDim() methods to evaluate the function.
 
     @return Smart-pointer to an object that implements Function interface - it represents the
     function that is being optimized. It can be empty, if no function was given so far.
@@ -115,7 +115,7 @@ class CV_EXPORTS MinProblemSolver : public Algorithm
     always sensible) will be used.
 
     @param x The initial point, that will become a centroid of an initial simplex. After the algorithm
-    will terminate, it will be setted to the point where the algorithm stops, the point of possible
+    will terminate, it will be set to the point where the algorithm stops, the point of possible
     minimum.
     @return The value of a function at the point found.
      */
@@ -165,7 +165,7 @@ class CV_EXPORTS DownhillSolver : public MinProblemSolver
 
     /** @brief Sets the initial step that will be used in downhill simplex algorithm.
 
-    Step, together with initial point (givin in DownhillSolver::minimize) are two `n`-dimensional
+    Step, together with initial point (given in DownhillSolver::minimize) are two `n`-dimensional
     vectors that are used to determine the shape of initial simplex. Roughly said, initial point
     determines the position of a simplex (it will become simplex's centroid), while step determines the
     spread (size in each dimension) of a simplex. To be more precise, if \f$s,x_0\in\mathbb{R}^n\f$ are
@@ -219,10 +219,10 @@ converge to it. Another obvious restriction is that it should be possible to com
 a function at any point, thus it is preferable to have analytic expression for gradient and
 computational burden should be born by the user.
 
-The latter responsibility is accompilished via the getGradient method of a
+The latter responsibility is accomplished via the getGradient method of a
 MinProblemSolver::Function interface (which represents function being optimized). This method takes
 point a point in *n*-dimensional space (first argument represents the array of coordinates of that
-point) and comput its gradient (it should be stored in the second argument as an array).
+point) and compute its gradient (it should be stored in the second argument as an array).
 
 @note class ConjGradSolver thus does not add any new methods to the basic MinProblemSolver interface.
 
@@ -288,12 +288,12 @@ Bland's rule <http://en.wikipedia.org/wiki/Bland%27s_rule> is used to prevent cy
 contain 32- or 64-bit floating point numbers. As a convenience, column-vector may be also submitted,
 in the latter case it is understood to correspond to \f$c^T\f$.
 @param Constr `m`-by-`n+1` matrix, whose rightmost column corresponds to \f$b\f$ in formulation above
-and the remaining to \f$A\f$. It should containt 32- or 64-bit floating point numbers.
+and the remaining to \f$A\f$. It should contain 32- or 64-bit floating point numbers.
 @param z The solution will be returned here as a column-vector - it corresponds to \f$c\f$ in the
 formulation above. It will contain 64-bit floating point numbers.
 @return One of cv::SolveLPResult
  */
-CV_EXPORTS_W int solveLP(const Mat& Func, const Mat& Constr, Mat& z);
+CV_EXPORTS_W int solveLP(InputArray Func, InputArray Constr, OutputArray z);
 
 //! @}
 
diff --git a/IPL/include/opencv/opencv2/core/ovx.hpp b/IPL/include/opencv/opencv2/core/ovx.hpp
new file mode 100644
index 0000000..8bb7d54
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/ovx.hpp
@@ -0,0 +1,28 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+// OpenVX related definitions and declarations
+
+#pragma once
+#ifndef OPENCV_OVX_HPP
+#define OPENCV_OVX_HPP
+
+#include "cvdef.h"
+
+namespace cv
+{
+/// Check if use of OpenVX is possible
+CV_EXPORTS_W bool haveOpenVX();
+
+/// Check if use of OpenVX is enabled
+CV_EXPORTS_W bool useOpenVX();
+
+/// Enable/disable use of OpenVX
+CV_EXPORTS_W void setUseOpenVX(bool flag);
+} // namespace cv
+
+#endif // OPENCV_OVX_HPP
diff --git a/IPL/include/opencv/opencv2/core/persistence.hpp b/IPL/include/opencv/opencv2/core/persistence.hpp
index 17686dd..0ddd3c0 100644
--- a/IPL/include/opencv/opencv2/core/persistence.hpp
+++ b/IPL/include/opencv/opencv2/core/persistence.hpp
@@ -41,8 +41,13 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_PERSISTENCE_HPP__
-#define __OPENCV_CORE_PERSISTENCE_HPP__
+#ifndef OPENCV_CORE_PERSISTENCE_HPP
+#define OPENCV_CORE_PERSISTENCE_HPP
+
+#ifndef CV_DOXYGEN
+/// Define to support persistence legacy formats
+#define CV__LEGACY_PERSISTENCE
+#endif
 
 #ifndef __cplusplus
 #  error persistence.hpp header must be compiled as C++
@@ -57,8 +62,9 @@ Several functions that are described below take CvFileStorage\* as inputs and al
 save or to load hierarchical collections that consist of scalar values, standard CXCore objects
 (such as matrices, sequences, graphs), and user-defined objects.
 
-OpenCV can read and write data in XML (<http://www.w3c.org/XML>) or YAML (<http://www.yaml.org>)
-formats. Below is an example of 3x3 floating-point identity matrix A, stored in XML and YAML files
+OpenCV can read and write data in XML (<http://www.w3c.org/XML>), YAML (<http://www.yaml.org>) or
+JSON (<http://www.json.org/>) formats. Below is an example of 3x3 floating-point identity matrix A,
+stored in XML and YAML files
 using CXCore functions:
 XML:
 @code{.xml}
@@ -85,10 +91,9 @@ As it can be seen from the examples, XML uses nested tags to represent hierarchy
 indentation for that purpose (similar to the Python programming language).
 
 The same functions can read and write data in both formats; the particular format is determined by
-the extension of the opened file, ".xml" for XML files and ".yml" or ".yaml" for YAML.
+the extension of the opened file, ".xml" for XML files, ".yml" or ".yaml" for YAML and ".json" for
+JSON.
  */
-typedef struct CvFileStorage CvFileStorage;
-typedef struct CvFileNode CvFileNode;
 
 //! @} core_c
 
@@ -99,27 +104,27 @@ namespace cv {
 
 /** @addtogroup core_xml
 
-XML/YAML file storages.     {#xml_storage}
+XML/YAML/JSON file storages.     {#xml_storage}
 =======================
 Writing to a file storage.
 --------------------------
-You can store and then restore various OpenCV data structures to/from XML (<http://www.w3c.org/XML>)
-or YAML (<http://www.yaml.org>) formats. Also, it is possible store and load arbitrarily complex
-data structures, which include OpenCV data structures, as well as primitive data types (integer and
-floating-point numbers and text strings) as their elements.
+You can store and then restore various OpenCV data structures to/from XML (<http://www.w3c.org/XML>),
+YAML (<http://www.yaml.org>) or JSON (<http://www.json.org/>) formats. Also, it is possible to store
+and load arbitrarily complex data structures, which include OpenCV data structures, as well as
+primitive data types (integer and floating-point numbers and text strings) as their elements.
 
-Use the following procedure to write something to XML or YAML:
+Use the following procedure to write something to XML, YAML or JSON:
 -# Create new FileStorage and open it for writing. It can be done with a single call to
 FileStorage::FileStorage constructor that takes a filename, or you can use the default constructor
-and then call FileStorage::open. Format of the file (XML or YAML) is determined from the filename
-extension (".xml" and ".yml"/".yaml", respectively)
+and then call FileStorage::open. Format of the file (XML, YAML or JSON) is determined from the filename
+extension (".xml", ".yml"/".yaml" and ".json", respectively)
 -# Write all the data you want using the streaming operator `<<`, just like in the case of STL
 streams.
 -# Close the file using FileStorage::release. FileStorage destructor also closes the file.
 
 Here is an example:
 @code
-    #include "opencv2/opencv.hpp"
+    #include "opencv2/core.hpp"
     #include <time.h>
 
     using namespace cv;
@@ -151,7 +156,7 @@ Here is an example:
         return 0;
     }
 @endcode
-The sample above stores to XML and integer, text string (calibration date), 2 matrices, and a custom
+The sample above stores to YML an integer, a text string (calibration date), 2 matrices, and a custom
 structure "feature", which includes feature coordinates and LBP (local binary pattern) value. Here
 is output of the sample:
 @code{.yaml}
@@ -175,19 +180,19 @@ distCoeffs: !!opencv-matrix
    - { x:344, y:158, lbp:[ 1, 1, 0, 0, 0, 0, 1, 0 ] }
 @endcode
 
-As an exercise, you can replace ".yml" with ".xml" in the sample above and see, how the
+As an exercise, you can replace ".yml" with ".xml" or ".json" in the sample above and see, how the
 corresponding XML file will look like.
 
 Several things can be noted by looking at the sample code and the output:
 
--   The produced YAML (and XML) consists of heterogeneous collections that can be nested. There are 2
-    types of collections: named collections (mappings) and unnamed collections (sequences). In mappings
+-   The produced YAML (and XML/JSON) consists of heterogeneous collections that can be nested. There are
+    2 types of collections: named collections (mappings) and unnamed collections (sequences). In mappings
     each element has a name and is accessed by name. This is similar to structures and std::map in
     C/C++ and dictionaries in Python. In sequences elements do not have names, they are accessed by
     indices. This is similar to arrays and std::vector in C/C++ and lists, tuples in Python.
     "Heterogeneous" means that elements of each single collection can have different types.
 
-    Top-level collection in YAML/XML is a mapping. Each matrix is stored as a mapping, and the matrix
+    Top-level collection in YAML/XML/JSON is a mapping. Each matrix is stored as a mapping, and the matrix
     elements are stored as a sequence. Then, there is a sequence of features, where each feature is
     represented a mapping, and lbp value in a nested sequence.
 
@@ -203,7 +208,7 @@ Several things can be noted by looking at the sample code and the output:
 -   To write a sequence, you first write the special string `[`, then write the elements, then
     write the closing `]`.
 
--   In YAML (but not XML), mappings and sequences can be written in a compact Python-like inline
+-   In YAML/JSON (but not XML), mappings and sequences can be written in a compact Python-like inline
     form. In the sample above matrix elements, as well as each feature, including its lbp value, is
     stored in such inline form. To store a mapping/sequence in a compact form, put `:` after the
     opening character, e.g. use `{:` instead of `{` and `[:` instead of `[`. When the
@@ -211,7 +216,7 @@ Several things can be noted by looking at the sample code and the output:
 
 Reading data from a file storage.
 ---------------------------------
-To read the previously written XML or YAML file, do the following:
+To read the previously written XML, YAML or JSON file, do the following:
 -#  Open the file storage using FileStorage::FileStorage constructor or FileStorage::open method.
     In the current implementation the whole file is parsed and the whole representation of file
     storage is built in memory as a hierarchy of file nodes (see FileNode)
@@ -278,12 +283,12 @@ element is a structure of 2 integers, followed by a single-precision floating-po
 equivalent notations of the above specification are `iif`, `2i1f` and so forth. Other examples: `u`
 means that the array consists of bytes, and `2d` means the array consists of pairs of doubles.
 
-@see @ref filestorage.cpp
+@see @ref samples/cpp/filestorage.cpp
 */
 
 //! @{
 
-/** @example filestorage.cpp
+/** @example samples/cpp/filestorage.cpp
 A complete example using the FileStorage interface
 */
 
@@ -292,8 +297,8 @@ A complete example using the FileStorage interface
 class CV_EXPORTS FileNode;
 class CV_EXPORTS FileNodeIterator;
 
-/** @brief XML/YAML file storage class that encapsulates all the information necessary for writing or reading
-data to/from a file.
+/** @brief XML/YAML/JSON file storage class that encapsulates all the information necessary for writing or
+reading data to/from a file.
  */
 class CV_EXPORTS_W FileStorage
 {
@@ -305,13 +310,17 @@ class CV_EXPORTS_W FileStorage
         WRITE       = 1, //!< value, open the file for writing
         APPEND      = 2, //!< value, open the file for appending
         MEMORY      = 4, //!< flag, read data from source or write data to the internal buffer (which is
-                         //!< returned by FileStorage::release)
+        //!< returned by FileStorage::release)
         FORMAT_MASK = (7<<3), //!< mask for format flags
         FORMAT_AUTO = 0,      //!< flag, auto format
         FORMAT_XML  = (1<<3), //!< flag, XML format
-        FORMAT_YAML = (2<<3)  //!< flag, YAML format
+        FORMAT_YAML = (2<<3), //!< flag, YAML format
+        FORMAT_JSON = (3<<3), //!< flag, JSON format
+
+        BASE64      = 64,     //!< flag, write rawdata in Base64 by default. (consider using WRITE_BASE64)
+        WRITE_BASE64 = BASE64 | WRITE, //!< flag, enable both WRITE and BASE64
     };
-    enum
+    enum State
     {
         UNDEFINED      = 0,
         VALUE_EXPECTED = 1,
@@ -321,129 +330,138 @@ class CV_EXPORTS_W FileStorage
 
     /** @brief The constructors.
 
-    The full constructor opens the file. Alternatively you can use the default constructor and then
-    call FileStorage::open.
+     The full constructor opens the file. Alternatively you can use the default constructor and then
+     call FileStorage::open.
      */
     CV_WRAP FileStorage();
 
     /** @overload
-    @param source Name of the file to open or the text string to read the data from. Extension of the
-    file (.xml or .yml/.yaml) determines its format (XML or YAML respectively). Also you can append .gz
-    to work with compressed files, for example myHugeMatrix.xml.gz. If both FileStorage::WRITE and
-    FileStorage::MEMORY flags are specified, source is used just to specify the output file format (e.g.
-    mydata.xml, .yml etc.).
-    @param flags Mode of operation. See  FileStorage::Mode
-    @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and
-    you should use 8-bit encoding instead of it.
-    */
-    CV_WRAP FileStorage(const String& source, int flags, const String& encoding=String());
-
-    /** @overload */
-    FileStorage(CvFileStorage* fs, bool owning=true);
+     @copydoc open()
+     */
+    CV_WRAP FileStorage(const String& filename, int flags, const String& encoding=String());
 
     //! the destructor. calls release()
     virtual ~FileStorage();
 
     /** @brief Opens a file.
 
-    See description of parameters in FileStorage::FileStorage. The method calls FileStorage::release
-    before opening the file.
-    @param filename Name of the file to open or the text string to read the data from.
-       Extension of the file (.xml or .yml/.yaml) determines its format (XML or YAML respectively).
-        Also you can append .gz to work with compressed files, for example myHugeMatrix.xml.gz. If both
-        FileStorage::WRITE and FileStorage::MEMORY flags are specified, source is used just to specify
-        the output file format (e.g. mydata.xml, .yml etc.).
-    @param flags Mode of operation. One of FileStorage::Mode
-    @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and
-    you should use 8-bit encoding instead of it.
+     See description of parameters in FileStorage::FileStorage. The method calls FileStorage::release
+     before opening the file.
+     @param filename Name of the file to open or the text string to read the data from.
+     Extension of the file (.xml, .yml/.yaml or .json) determines its format (XML, YAML or JSON
+     respectively). Also you can append .gz to work with compressed files, for example myHugeMatrix.xml.gz. If both
+     FileStorage::WRITE and FileStorage::MEMORY flags are specified, source is used just to specify
+     the output file format (e.g. mydata.xml, .yml etc.). A file name can also contain parameters.
+     You can use this format, "*?base64" (e.g. "file.json?base64" (case sensitive)), as an alternative to
+     FileStorage::BASE64 flag.
+     @param flags Mode of operation. One of FileStorage::Mode
+     @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and
+     you should use 8-bit encoding instead of it.
      */
     CV_WRAP virtual bool open(const String& filename, int flags, const String& encoding=String());
 
     /** @brief Checks whether the file is opened.
 
-    @returns true if the object is associated with the current file and false otherwise. It is a
-    good practice to call this method after you tried to open a file.
+     @returns true if the object is associated with the current file and false otherwise. It is a
+     good practice to call this method after you tried to open a file.
      */
     CV_WRAP virtual bool isOpened() const;
 
     /** @brief Closes the file and releases all the memory buffers.
 
-    Call this method after all I/O operations with the storage are finished.
+     Call this method after all I/O operations with the storage are finished.
      */
     CV_WRAP virtual void release();
 
     /** @brief Closes the file and releases all the memory buffers.
 
-    Call this method after all I/O operations with the storage are finished. If the storage was
-    opened for writing data and FileStorage::WRITE was specified
+     Call this method after all I/O operations with the storage are finished. If the storage was
+     opened for writing data and FileStorage::WRITE was specified
      */
     CV_WRAP virtual String releaseAndGetString();
 
     /** @brief Returns the first element of the top-level mapping.
-    @returns The first element of the top-level mapping.
+     @returns The first element of the top-level mapping.
      */
     CV_WRAP FileNode getFirstTopLevelNode() const;
 
     /** @brief Returns the top-level mapping
-    @param streamidx Zero-based index of the stream. In most cases there is only one stream in the file.
-    However, YAML supports multiple streams and so there can be several.
-    @returns The top-level mapping.
+     @param streamidx Zero-based index of the stream. In most cases there is only one stream in the file.
+     However, YAML supports multiple streams and so there can be several.
+     @returns The top-level mapping.
      */
     CV_WRAP FileNode root(int streamidx=0) const;
 
     /** @brief Returns the specified element of the top-level mapping.
-    @param nodename Name of the file node.
-    @returns Node with the given name.
+     @param nodename Name of the file node.
+     @returns Node with the given name.
      */
     FileNode operator[](const String& nodename) const;
 
     /** @overload */
-    CV_WRAP FileNode operator[](const char* nodename) const;
+    CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const;
 
-    /** @brief Returns the obsolete C FileStorage structure.
-    @returns Pointer to the underlying C FileStorage structure
+    /**
+     * @brief Simplified writing API to use with bindings.
+     * @param name Name of the written object
+     * @param val Value of the written object
      */
-    CvFileStorage* operator *() { return fs.get(); }
-
-    /** @overload */
-    const CvFileStorage* operator *() const { return fs.get(); }
+    CV_WRAP void write(const String& name, int val);
+    /// @overload
+    CV_WRAP void write(const String& name, double val);
+    /// @overload
+    CV_WRAP void write(const String& name, const String& val);
+    /// @overload
+    CV_WRAP void write(const String& name, const Mat& val);
+    /// @overload
+    CV_WRAP void write(const String& name, const std::vector<String>& val);
 
     /** @brief Writes multiple numbers.
 
-    Writes one or more numbers of the specified format to the currently written structure. Usually it is
-    more convenient to use operator `<<` instead of this method.
-    @param fmt Specification of each array element, see @ref format_spec "format specification"
-    @param vec Pointer to the written array.
-    @param len Number of the uchar elements to write.
+     Writes one or more numbers of the specified format to the currently written structure. Usually it is
+     more convenient to use operator `<<` instead of this method.
+     @param fmt Specification of each array element, see @ref format_spec "format specification"
+     @param vec Pointer to the written array.
+     @param len Number of the uchar elements to write.
      */
-    void writeRaw( const String& fmt, const uchar* vec, size_t len );
+    void writeRaw( const String& fmt, const void* vec, size_t len );
+
+    /** @brief Writes a comment.
 
-    /** @brief Writes the registered C structure (CvMat, CvMatND, CvSeq).
-    @param name Name of the written object.
-    @param obj Pointer to the object.
-    @see ocvWrite for details.
+     The function writes a comment into file storage. The comments are skipped when the storage is read.
+     @param comment The written comment, single-line or multi-line
+     @param append If true, the function tries to put the comment at the end of current line.
+     Else if the comment is multi-line, or if it does not fit at the end of the current
+     line, the comment starts a new line.
      */
-    void writeObj( const String& name, const void* obj );
+    CV_WRAP void writeComment(const String& comment, bool append = false);
+
+    void startWriteStruct(const String& name, int flags, const String& typeName);
+    void endWriteStruct();
 
     /** @brief Returns the normalized object name for the specified name of a file.
-    @param filename Name of a file
-    @returns The normalized object name.
+     @param filename Name of a file
+     @returns The normalized object name.
      */
     static String getDefaultObjectName(const String& filename);
 
-    Ptr<CvFileStorage> fs; //!< the underlying C FileStorage structure
-    String elname; //!< the currently written element
-    std::vector<char> structs; //!< the stack of written structures
-    int state; //!< the writer state
-};
+    /** @brief Returns the current format.
+     * @returns The current format, see FileStorage::Mode
+     */
+    CV_WRAP int getFormat() const;
+
+    int state;
+    std::string elname;
 
-template<> CV_EXPORTS void DefaultDeleter<CvFileStorage>::operator ()(CvFileStorage* obj) const;
+    class Impl;
+    Ptr<Impl> p;
+};
 
 /** @brief File Storage Node class.
 
 The node is used to store each and every element of the file storage opened for reading. When
 XML/YAML file is read, it is first parsed and stored in the memory as a hierarchical collection of
-nodes. Each node can be a “leaf” that is contain a single number or a string, or be a collection of
+nodes. Each node can be a "leaf" that is contain a single number or a string, or be a collection of
 other nodes. There can be named collections (mappings) where each element has a name and it is
 accessed by a name, and ordered collections (sequences) where elements do not have names but rather
 accessed by index. Type of the file node can be determined using FileNode::type method.
@@ -455,7 +473,7 @@ class CV_EXPORTS_W_SIMPLE FileNode
 {
 public:
     //! type of the file storage node
-    enum Type
+    enum
     {
         NONE      = 0, //!< empty node
         INT       = 1, //!< an integer
@@ -463,51 +481,60 @@ class CV_EXPORTS_W_SIMPLE FileNode
         FLOAT     = REAL, //!< synonym or REAL
         STR       = 3, //!< text string in UTF-8 encoding
         STRING    = STR, //!< synonym for STR
-        REF       = 4, //!< integer of size size_t. Typically used for storing complex dynamic structures where some elements reference the others
-        SEQ       = 5, //!< sequence
-        MAP       = 6, //!< mapping
+        SEQ       = 4, //!< sequence
+        MAP       = 5, //!< mapping
         TYPE_MASK = 7,
+
         FLOW      = 8,  //!< compact representation of a sequence or mapping. Used only by YAML writer
-        USER      = 16, //!< a registered object (e.g. a matrix)
-        EMPTY     = 32, //!< empty structure (sequence or mapping)
-        NAMED     = 64  //!< the node has a name (i.e. it is element of a mapping)
+        UNIFORM   = 8,  //!< if set, means that all the collection elements are numbers of the same type (real's or int's).
+        //!< UNIFORM is used only when reading FileStorage; FLOW is used only when writing. So they share the same bit
+        EMPTY     = 16, //!< empty structure (sequence or mapping)
+        NAMED     = 32  //!< the node has a name (i.e. it is element of a mapping).
     };
     /** @brief The constructors.
 
-    These constructors are used to create a default file node, construct it from obsolete structures or
-    from the another file node.
+     These constructors are used to create a default file node, construct it from obsolete structures or
+     from the another file node.
      */
     CV_WRAP FileNode();
 
     /** @overload
-    @param fs Pointer to the obsolete file storage structure.
-    @param node File node to be used as initialization for the created file node.
-    */
-    FileNode(const CvFileStorage* fs, const CvFileNode* node);
+     @param fs Pointer to the file storage structure.
+     @param blockIdx Index of the memory block where the file node is stored
+     @param ofs Offset in bytes from the beginning of the serialized storage
+     */
+    FileNode(const FileStorage* fs, size_t blockIdx, size_t ofs);
 
     /** @overload
-    @param node File node to be used as initialization for the created file node.
-    */
+     @param node File node to be used as initialization for the created file node.
+     */
     FileNode(const FileNode& node);
 
+    FileNode& operator=(const FileNode& node);
+
     /** @brief Returns element of a mapping node or a sequence node.
-    @param nodename Name of an element in the mapping node.
-    @returns Returns the element with the given identifier.
+     @param nodename Name of an element in the mapping node.
+     @returns Returns the element with the given identifier.
      */
     FileNode operator[](const String& nodename) const;
 
     /** @overload
-    @param nodename Name of an element in the mapping node.
-    */
-    CV_WRAP FileNode operator[](const char* nodename) const;
+     @param nodename Name of an element in the mapping node.
+     */
+    CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const;
 
     /** @overload
-    @param i Index of an element in the sequence node.
-    */
-    CV_WRAP FileNode operator[](int i) const;
+     @param i Index of an element in the sequence node.
+     */
+    CV_WRAP_AS(at) FileNode operator[](int i) const;
+
+    /** @brief Returns keys of a mapping node.
+     @returns Keys of a mapping node.
+     */
+    CV_WRAP std::vector<String> keys() const;
 
     /** @brief Returns type of the node.
-    @returns Type of the node. See FileNode::Type
+     @returns Type of the node. See FileNode::Type
      */
     CV_WRAP int type() const;
 
@@ -528,9 +555,11 @@ class CV_EXPORTS_W_SIMPLE FileNode
     //! returns true if the node has a name
     CV_WRAP bool isNamed() const;
     //! returns the node name or an empty string if the node is nameless
-    CV_WRAP String name() const;
+    CV_WRAP std::string name() const;
     //! returns the number of elements in the node, if it is a sequence or mapping, or 1 otherwise.
     CV_WRAP size_t size() const;
+    //! returns raw size of the FileNode in bytes
+    CV_WRAP size_t rawSize() const;
     //! returns the node content as an integer. If the node stores floating-point number, it is rounded.
     operator int() const;
     //! returns the node content as float
@@ -538,15 +567,16 @@ class CV_EXPORTS_W_SIMPLE FileNode
     //! returns the node content as double
     operator double() const;
     //! returns the node content as text string
-    operator String() const;
-#ifndef OPENCV_NOSTL
-    operator std::string() const;
-#endif
+    inline operator std::string() const { return this->string(); }
 
-    //! returns pointer to the underlying file node
-    CvFileNode* operator *();
-    //! returns pointer to the underlying file node
-    const CvFileNode* operator* () const;
+    static bool isMap(int flags);
+    static bool isSeq(int flags);
+    static bool isCollection(int flags);
+    static bool isEmptyCollection(int flags);
+    static bool isFlow(int flags);
+
+    uchar* ptr();
+    const uchar* ptr() const;
 
     //! returns iterator pointing to the first node element
     FileNodeIterator begin() const;
@@ -558,92 +588,95 @@ class CV_EXPORTS_W_SIMPLE FileNode
     Usually it is more convenient to use operator `>>` instead of this method.
     @param fmt Specification of each array element. See @ref format_spec "format specification"
     @param vec Pointer to the destination array.
-    @param len Number of elements to read. If it is greater than number of remaining elements then all
-    of them will be read.
+    @param len Number of bytes to read (buffer size limit). If it is greater than number of
+               remaining elements then all of them will be read.
      */
-    void readRaw( const String& fmt, uchar* vec, size_t len ) const;
-
-    //! reads the registered object and returns pointer to it
-    void* readObj() const;
+    void readRaw( const String& fmt, void* vec, size_t len ) const;
 
-    // do not use wrapper pointer classes for better efficiency
-    const CvFileStorage* fs;
-    const CvFileNode* node;
+    /** Internal method used when reading FileStorage.
+     Sets the type (int, real or string) and value of the previously created node.
+     */
+    void setValue( int type, const void* value, int len=-1 );
+
+    //! Simplified reading API to use with bindings.
+    CV_WRAP double real() const;
+    //! Simplified reading API to use with bindings.
+    CV_WRAP std::string string() const;
+    //! Simplified reading API to use with bindings.
+    CV_WRAP Mat mat() const;
+
+    //protected:
+    const FileStorage* fs;
+    size_t blockIdx;
+    size_t ofs;
 };
 
 
 /** @brief used to iterate through sequences and mappings.
 
-A standard STL notation, with node.begin(), node.end() denoting the beginning and the end of a
-sequence, stored in node. See the data reading sample in the beginning of the section.
+ A standard STL notation, with node.begin(), node.end() denoting the beginning and the end of a
+ sequence, stored in node. See the data reading sample in the beginning of the section.
  */
 class CV_EXPORTS FileNodeIterator
 {
 public:
     /** @brief The constructors.
 
-    These constructors are used to create a default iterator, set it to specific element in a file node
-    or construct it from another iterator.
+     These constructors are used to create a default iterator, set it to specific element in a file node
+     or construct it from another iterator.
      */
     FileNodeIterator();
 
     /** @overload
-    @param fs File storage for the iterator.
-    @param node File node for the iterator.
-    @param ofs Index of the element in the node. The created iterator will point to this element.
-    */
-    FileNodeIterator(const CvFileStorage* fs, const CvFileNode* node, size_t ofs=0);
+     @param node File node - the collection to iterate over;
+        it can be a scalar (equivalent to 1-element collection) or "none" (equivalent to empty collection).
+     @param seekEnd - true if iterator needs to be set after the last element of the node;
+        that is:
+            * node.begin() => FileNodeIterator(node, false)
+            * node.end() => FileNodeIterator(node, true)
+     */
+    FileNodeIterator(const FileNode& node, bool seekEnd);
 
     /** @overload
-    @param it Iterator to be used as initialization for the created iterator.
-    */
+     @param it Iterator to be used as initialization for the created iterator.
+     */
     FileNodeIterator(const FileNodeIterator& it);
 
+    FileNodeIterator& operator=(const FileNodeIterator& it);
+
     //! returns the currently observed element
     FileNode operator *() const;
-    //! accesses the currently observed element methods
-    FileNode operator ->() const;
 
     //! moves iterator to the next node
     FileNodeIterator& operator ++ ();
     //! moves iterator to the next node
     FileNodeIterator operator ++ (int);
-    //! moves iterator to the previous node
-    FileNodeIterator& operator -- ();
-    //! moves iterator to the previous node
-    FileNodeIterator operator -- (int);
     //! moves iterator forward by the specified offset (possibly negative)
     FileNodeIterator& operator += (int ofs);
-    //! moves iterator backward by the specified offset (possibly negative)
-    FileNodeIterator& operator -= (int ofs);
 
     /** @brief Reads node elements to the buffer with the specified format.
 
     Usually it is more convenient to use operator `>>` instead of this method.
     @param fmt Specification of each array element. See @ref format_spec "format specification"
     @param vec Pointer to the destination array.
-    @param maxCount Number of elements to read. If it is greater than number of remaining elements then
-    all of them will be read.
+    @param len Number of bytes to read (buffer size limit). If it is greater than number of
+               remaining elements then all of them will be read.
      */
-    FileNodeIterator& readRaw( const String& fmt, uchar* vec,
-                               size_t maxCount=(size_t)INT_MAX );
+    FileNodeIterator& readRaw( const String& fmt, void* vec,
+                               size_t len=(size_t)INT_MAX );
 
-    struct SeqReader
-    {
-      int          header_size;
-      void*        seq;        /* sequence, beign read; CvSeq      */
-      void*        block;      /* current block;        CvSeqBlock */
-      schar*       ptr;        /* pointer to element be read next */
-      schar*       block_min;  /* pointer to the beginning of block */
-      schar*       block_max;  /* pointer to the end of block */
-      int          delta_index;/* = seq->first->start_index   */
-      schar*       prev_elem;  /* pointer to previous element */
-    };
+    //! returns the number of remaining (not read yet) elements
+    size_t remaining() const;
+
+    bool equalTo(const FileNodeIterator& it) const;
 
-    const CvFileStorage* fs;
-    const CvFileNode* container;
-    SeqReader reader;
-    size_t remaining;
+protected:
+    const FileStorage* fs;
+    size_t blockIdx;
+    size_t ofs;
+    size_t blockSize;
+    size_t nodeNElems;
+    size_t idx;
 };
 
 //! @} core_xml
@@ -659,8 +692,10 @@ CV_EXPORTS void write( FileStorage& fs, const String& name, double value );
 CV_EXPORTS void write( FileStorage& fs, const String& name, const String& value );
 CV_EXPORTS void write( FileStorage& fs, const String& name, const Mat& value );
 CV_EXPORTS void write( FileStorage& fs, const String& name, const SparseMat& value );
+#ifdef CV__LEGACY_PERSISTENCE
 CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector<KeyPoint>& value);
 CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector<DMatch>& value);
+#endif
 
 CV_EXPORTS void writeScalar( FileStorage& fs, int value );
 CV_EXPORTS void writeScalar( FileStorage& fs, float value );
@@ -675,11 +710,15 @@ CV_EXPORTS void writeScalar( FileStorage& fs, const String& value );
 CV_EXPORTS void read(const FileNode& node, int& value, int default_value);
 CV_EXPORTS void read(const FileNode& node, float& value, float default_value);
 CV_EXPORTS void read(const FileNode& node, double& value, double default_value);
-CV_EXPORTS void read(const FileNode& node, String& value, const String& default_value);
+CV_EXPORTS void read(const FileNode& node, std::string& value, const std::string& default_value);
 CV_EXPORTS void read(const FileNode& node, Mat& mat, const Mat& default_mat = Mat() );
 CV_EXPORTS void read(const FileNode& node, SparseMat& mat, const SparseMat& default_mat = SparseMat() );
+#ifdef CV__LEGACY_PERSISTENCE
 CV_EXPORTS void read(const FileNode& node, std::vector<KeyPoint>& keypoints);
 CV_EXPORTS void read(const FileNode& node, std::vector<DMatch>& matches);
+#endif
+CV_EXPORTS void read(const FileNode& node, KeyPoint& value, const KeyPoint& default_value);
+CV_EXPORTS void read(const FileNode& node, DMatch& value, const DMatch& default_value);
 
 template<typename _Tp> static inline void read(const FileNode& node, Point_<_Tp>& value, const Point_<_Tp>& default_value)
 {
@@ -719,6 +758,17 @@ template<typename _Tp, int cn> static inline void read(const FileNode& node, Vec
     value = temp.size() != cn ? default_value : Vec<_Tp, cn>(&temp[0]);
 }
 
+template<typename _Tp, int m, int n> static inline void read(const FileNode& node, Matx<_Tp, m, n>& value, const Matx<_Tp, m, n>& default_matx = Matx<_Tp, m, n>())
+{
+    Mat temp;
+    read(node, temp); // read as a Mat class
+
+    if (temp.empty())
+        value = default_matx;
+    else
+        value = Matx<_Tp, m, n>(temp);
+}
+
 template<typename _Tp> static inline void read(const FileNode& node, Scalar_<_Tp>& value, const Scalar_<_Tp>& default_value)
 {
     std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
@@ -773,7 +823,7 @@ namespace internal
         VecWriterProxy( FileStorage* _fs ) : fs(_fs) {}
         void operator()(const std::vector<_Tp>& vec) const
         {
-            int _fmt = DataType<_Tp>::fmt;
+            int _fmt = traits::SafeFmt<_Tp>::fmt;
             char fmt[] = { (char)((_fmt >> 8) + '1'), (char)_fmt, '\0' };
             fs->writeRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, vec.size() * sizeof(_Tp));
         }
@@ -787,7 +837,7 @@ namespace internal
         VecReaderProxy( FileNodeIterator* _it ) : it(_it) {}
         void operator()(std::vector<_Tp>& vec, size_t count) const
         {
-            count = std::min(count, it->remaining);
+            count = std::min(count, it->remaining());
             vec.resize(count);
             for (size_t i = 0; i < count; i++, ++(*it))
                 read(**it, vec[i], _Tp());
@@ -802,12 +852,14 @@ namespace internal
         VecReaderProxy( FileNodeIterator* _it ) : it(_it) {}
         void operator()(std::vector<_Tp>& vec, size_t count) const
         {
-            size_t remaining = it->remaining;
+            size_t remaining = it->remaining();
             size_t cn = DataType<_Tp>::channels;
-            int _fmt = DataType<_Tp>::fmt;
+            int _fmt = traits::SafeFmt<_Tp>::fmt;
+            CV_Assert((_fmt >> 8) < 9);
             char fmt[] = { (char)((_fmt >> 8)+'1'), (char)_fmt, '\0' };
+            CV_Assert((remaining % cn) == 0);
             size_t remaining1 = remaining / cn;
-            count = count < remaining1 ? count : remaining1;
+            count = count > remaining1 ? remaining1 : count;
             vec.resize(count);
             it->readRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, count*sizeof(_Tp));
         }
@@ -897,6 +949,12 @@ void write(FileStorage& fs, const Vec<_Tp, cn>& v )
         write(fs, v.val[i]);
 }
 
+template<typename _Tp, int m, int n> static inline
+void write(FileStorage& fs, const Matx<_Tp, m, n>& x )
+{
+    write(fs, Mat(x)); // write as a Mat class
+}
+
 template<typename _Tp> static inline
 void write(FileStorage& fs, const Scalar_<_Tp>& s )
 {
@@ -916,11 +974,10 @@ void write(FileStorage& fs, const Range& r )
 template<typename _Tp> static inline
 void write( FileStorage& fs, const std::vector<_Tp>& vec )
 {
-    cv::internal::VecWriterProxy<_Tp, DataType<_Tp>::fmt != 0> w(&fs);
+    cv::internal::VecWriterProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> w(&fs);
     w(vec);
 }
 
-
 template<typename _Tp> static inline
 void write(FileStorage& fs, const String& name, const Point_<_Tp>& pt )
 {
@@ -963,6 +1020,12 @@ void write(FileStorage& fs, const String& name, const Vec<_Tp, cn>& v )
     write(fs, v);
 }
 
+template<typename _Tp, int m, int n> static inline
+void write(FileStorage& fs, const String& name, const Matx<_Tp, m, n>& x )
+{
+    write(fs, name, Mat(x)); // write as a Mat class
+}
+
 template<typename _Tp> static inline
 void write(FileStorage& fs, const String& name, const Scalar_<_Tp>& s )
 {
@@ -977,13 +1040,71 @@ void write(FileStorage& fs, const String& name, const Range& r )
     write(fs, r);
 }
 
+static inline
+void write(FileStorage& fs, const String& name, const KeyPoint& kpt)
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, kpt.pt.x);
+    write(fs, kpt.pt.y);
+    write(fs, kpt.size);
+    write(fs, kpt.angle);
+    write(fs, kpt.response);
+    write(fs, kpt.octave);
+    write(fs, kpt.class_id);
+}
+
+static inline
+void write(FileStorage& fs, const String& name, const DMatch& m)
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, m.queryIdx);
+    write(fs, m.trainIdx);
+    write(fs, m.imgIdx);
+    write(fs, m.distance);
+}
+
+template<typename _Tp, typename std::enable_if< std::is_enum<_Tp>::value >::type* = nullptr>
+static inline void write( FileStorage& fs, const String& name, const _Tp& val )
+{
+    write(fs, name, static_cast<int>(val));
+}
+
 template<typename _Tp> static inline
 void write( FileStorage& fs, const String& name, const std::vector<_Tp>& vec )
 {
-    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+(DataType<_Tp>::fmt != 0 ? FileNode::FLOW : 0));
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+(traits::SafeFmt<_Tp>::fmt != 0 ? FileNode::FLOW : 0));
     write(fs, vec);
 }
 
+template<typename _Tp> static inline
+void write( FileStorage& fs, const String& name, const std::vector< std::vector<_Tp> >& vec )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ);
+    for(size_t i = 0; i < vec.size(); i++)
+    {
+        cv::internal::WriteStructContext ws_(fs, name, FileNode::SEQ+(traits::SafeFmt<_Tp>::fmt != 0 ? FileNode::FLOW : 0));
+        write(fs, vec[i]);
+    }
+}
+
+#ifdef CV__LEGACY_PERSISTENCE
+// This code is not needed anymore, but it is preserved here to keep source compatibility
+// Implementation is similar to templates instantiations
+static inline void write(FileStorage& fs, const KeyPoint& kpt) { write(fs, String(), kpt); }
+static inline void write(FileStorage& fs, const DMatch& m) { write(fs, String(), m); }
+static inline void write(FileStorage& fs, const std::vector<KeyPoint>& vec)
+{
+    cv::internal::VecWriterProxy<KeyPoint, 0> w(&fs);
+    w(vec);
+}
+static inline void write(FileStorage& fs, const std::vector<DMatch>& vec)
+{
+    cv::internal::VecWriterProxy<DMatch, 0> w(&fs);
+    w(vec);
+
+}
+#endif
+
 //! @} FileStorage
 
 //! @relates cv::FileNode
@@ -1032,14 +1153,22 @@ void read(const FileNode& node, short& value, short default_value)
 template<typename _Tp> static inline
 void read( FileNodeIterator& it, std::vector<_Tp>& vec, size_t maxCount = (size_t)INT_MAX )
 {
-    cv::internal::VecReaderProxy<_Tp, DataType<_Tp>::fmt != 0> r(&it);
+    cv::internal::VecReaderProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> r(&it);
     r(vec, maxCount);
 }
 
+template<typename _Tp, typename std::enable_if< std::is_enum<_Tp>::value >::type* = nullptr>
+static inline void read(const FileNode& node, _Tp& value, const _Tp& default_value = static_cast<_Tp>(0))
+{
+    int temp;
+    read(node, temp, static_cast<int>(default_value));
+    value = static_cast<_Tp>(temp);
+}
+
 template<typename _Tp> static inline
 void read( const FileNode& node, std::vector<_Tp>& vec, const std::vector<_Tp>& default_value = std::vector<_Tp>() )
 {
-    if(!node.node)
+    if(node.empty())
         vec = default_value;
     else
     {
@@ -1048,6 +1177,24 @@ void read( const FileNode& node, std::vector<_Tp>& vec, const std::vector<_Tp>&
     }
 }
 
+static inline
+void read( const FileNode& node, std::vector<KeyPoint>& vec, const std::vector<KeyPoint>& default_value )
+{
+    if(node.empty())
+        vec = default_value;
+    else
+        read(node, vec);
+}
+
+static inline
+void read( const FileNode& node, std::vector<DMatch>& vec, const std::vector<DMatch>& default_value )
+{
+    if(node.empty())
+        vec = default_value;
+    else
+        read(node, vec);
+}
+
 //! @} FileNode
 
 //! @relates cv::FileStorage
@@ -1103,7 +1250,7 @@ FileNodeIterator& operator >> (FileNodeIterator& it, _Tp& value)
 template<typename _Tp> static inline
 FileNodeIterator& operator >> (FileNodeIterator& it, std::vector<_Tp>& vec)
 {
-    cv::internal::VecReaderProxy<_Tp, DataType<_Tp>::fmt != 0> r(&it);
+    cv::internal::VecReaderProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> r(&it);
     r(vec, (size_t)INT_MAX);
     return it;
 }
@@ -1130,66 +1277,61 @@ void operator >> (const FileNode& n, std::vector<_Tp>& vec)
     it >> vec;
 }
 
-//! @} FileNode
-
-//! @relates cv::FileNodeIterator
-//! @{
+/** @brief Reads KeyPoint from a file storage.
+*/
+//It needs special handling because it contains two types of fields, int & float.
+static inline
+void operator >> (const FileNode& n, KeyPoint& kpt)
+{
+    FileNodeIterator it = n.begin();
+    it >> kpt.pt.x >> kpt.pt.y >> kpt.size >> kpt.angle >> kpt.response >> kpt.octave >> kpt.class_id;
+}
 
+#ifdef CV__LEGACY_PERSISTENCE
+static inline
+void operator >> (const FileNode& n, std::vector<KeyPoint>& vec)
+{
+    read(n, vec);
+}
 static inline
-bool operator == (const FileNodeIterator& it1, const FileNodeIterator& it2)
+void operator >> (const FileNode& n, std::vector<DMatch>& vec)
 {
-    return it1.fs == it2.fs && it1.container == it2.container &&
-        it1.reader.ptr == it2.reader.ptr && it1.remaining == it2.remaining;
+    read(n, vec);
 }
+#endif
 
+/** @brief Reads DMatch from a file storage.
+*/
+//It needs special handling because it contains two types of fields, int & float.
 static inline
-bool operator != (const FileNodeIterator& it1, const FileNodeIterator& it2)
+void operator >> (const FileNode& n, DMatch& m)
 {
-    return !(it1 == it2);
+    FileNodeIterator it = n.begin();
+    it >> m.queryIdx >> m.trainIdx >> m.imgIdx >> m.distance;
 }
 
+//! @} FileNode
+
+//! @relates cv::FileNodeIterator
+//! @{
+
+CV_EXPORTS bool operator == (const FileNodeIterator& it1, const FileNodeIterator& it2);
+CV_EXPORTS bool operator != (const FileNodeIterator& it1, const FileNodeIterator& it2);
+
 static inline
 ptrdiff_t operator - (const FileNodeIterator& it1, const FileNodeIterator& it2)
 {
-    return it2.remaining - it1.remaining;
+    return it2.remaining() - it1.remaining();
 }
 
 static inline
 bool operator < (const FileNodeIterator& it1, const FileNodeIterator& it2)
 {
-    return it1.remaining > it2.remaining;
+    return it1.remaining() > it2.remaining();
 }
 
 //! @} FileNodeIterator
 
-//! @cond IGNORED
-
-inline FileNode FileStorage::getFirstTopLevelNode() const { FileNode r = root(); FileNodeIterator it = r.begin(); return it != r.end() ? *it : FileNode(); }
-inline FileNode::FileNode() : fs(0), node(0) {}
-inline FileNode::FileNode(const CvFileStorage* _fs, const CvFileNode* _node) : fs(_fs), node(_node) {}
-inline FileNode::FileNode(const FileNode& _node) : fs(_node.fs), node(_node.node) {}
-inline bool FileNode::empty() const    { return node   == 0;    }
-inline bool FileNode::isNone() const   { return type() == NONE; }
-inline bool FileNode::isSeq() const    { return type() == SEQ;  }
-inline bool FileNode::isMap() const    { return type() == MAP;  }
-inline bool FileNode::isInt() const    { return type() == INT;  }
-inline bool FileNode::isReal() const   { return type() == REAL; }
-inline bool FileNode::isString() const { return type() == STR;  }
-inline CvFileNode* FileNode::operator *() { return (CvFileNode*)node; }
-inline const CvFileNode* FileNode::operator* () const { return node; }
-inline FileNode::operator int() const    { int value;    read(*this, value, 0);     return value; }
-inline FileNode::operator float() const  { float value;  read(*this, value, 0.f);   return value; }
-inline FileNode::operator double() const { double value; read(*this, value, 0.);    return value; }
-inline FileNode::operator String() const { String value; read(*this, value, value); return value; }
-inline FileNodeIterator FileNode::begin() const { return FileNodeIterator(fs, node); }
-inline FileNodeIterator FileNode::end() const   { return FileNodeIterator(fs, node, size()); }
-inline void FileNode::readRaw( const String& fmt, uchar* vec, size_t len ) const { begin().readRaw( fmt, vec, len ); }
-inline FileNode FileNodeIterator::operator *() const  { return FileNode(fs, (const CvFileNode*)(const void*)reader.ptr); }
-inline FileNode FileNodeIterator::operator ->() const { return FileNode(fs, (const CvFileNode*)(const void*)reader.ptr); }
-inline String::String(const FileNode& fn): cstr_(0), len_(0) { read(fn, *this, *this); }
-
-//! @endcond
-
 } // cv
 
-#endif // __OPENCV_CORE_PERSISTENCE_HPP__
+#endif // OPENCV_CORE_PERSISTENCE_HPP
diff --git a/IPL/include/opencv/opencv2/core/private.cuda.hpp b/IPL/include/opencv/opencv2/core/private.cuda.hpp
deleted file mode 100644
index d676ce8..0000000
--- a/IPL/include/opencv/opencv2/core/private.cuda.hpp
+++ /dev/null
@@ -1,172 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_CORE_PRIVATE_CUDA_HPP__
-#define __OPENCV_CORE_PRIVATE_CUDA_HPP__
-
-#ifndef __OPENCV_BUILD
-#  error this is a private header which should not be used from outside of the OpenCV library
-#endif
-
-#include "cvconfig.h"
-
-#include "opencv2/core/cvdef.h"
-#include "opencv2/core/base.hpp"
-
-#include "opencv2/core/cuda.hpp"
-
-#ifdef HAVE_CUDA
-#  include <cuda.h>
-#  include <cuda_runtime.h>
-#  include <npp.h>
-#  include "opencv2/core/cuda_stream_accessor.hpp"
-#  include "opencv2/core/cuda/common.hpp"
-
-#  define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
-
-#  define CUDART_MINIMUM_REQUIRED_VERSION 4020
-
-#  if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
-#    error "Insufficient Cuda Runtime library version, please update it."
-#  endif
-
-#  if defined(CUDA_ARCH_BIN_OR_PTX_10)
-#    error "OpenCV CUDA module doesn't support NVIDIA compute capability 1.0"
-#  endif
-#endif
-
-//! @cond IGNORED
-
-namespace cv { namespace cuda {
-    CV_EXPORTS cv::String getNppErrorMessage(int code);
-    CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code);
-
-    CV_EXPORTS GpuMat getInputMat(InputArray _src, Stream& stream);
-
-    CV_EXPORTS GpuMat getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream);
-    static inline GpuMat getOutputMat(OutputArray _dst, Size size, int type, Stream& stream)
-    {
-        return getOutputMat(_dst, size.height, size.width, type, stream);
-    }
-
-    CV_EXPORTS void syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream);
-}}
-
-#ifndef HAVE_CUDA
-
-static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The library is compiled without CUDA support"); }
-
-#else // HAVE_CUDA
-
-static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }
-
-namespace cv { namespace cuda
-{
-    class CV_EXPORTS BufferPool
-    {
-    public:
-        explicit BufferPool(Stream& stream);
-
-        GpuMat getBuffer(int rows, int cols, int type);
-        GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
-
-        GpuMat::Allocator* getAllocator() const { return allocator_; }
-
-    private:
-        GpuMat::Allocator* allocator_;
-    };
-
-    static inline void checkNppError(int code, const char* file, const int line, const char* func)
-    {
-        if (code < 0)
-            cv::error(cv::Error::GpuApiCallError, getNppErrorMessage(code), func, file, line);
-    }
-
-    static inline void checkCudaDriverApiError(int code, const char* file, const int line, const char* func)
-    {
-        if (code != CUDA_SUCCESS)
-            cv::error(cv::Error::GpuApiCallError, getCudaDriverApiErrorMessage(code), func, file, line);
-    }
-
-    template<int n> struct NPPTypeTraits;
-    template<> struct NPPTypeTraits<CV_8U>  { typedef Npp8u npp_type; };
-    template<> struct NPPTypeTraits<CV_8S>  { typedef Npp8s npp_type; };
-    template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
-    template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
-    template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
-    template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
-    template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
-
-    class NppStreamHandler
-    {
-    public:
-        inline explicit NppStreamHandler(Stream& newStream)
-        {
-            oldStream = nppGetStream();
-            nppSetStream(StreamAccessor::getStream(newStream));
-        }
-
-        inline explicit NppStreamHandler(cudaStream_t newStream)
-        {
-            oldStream = nppGetStream();
-            nppSetStream(newStream);
-        }
-
-        inline ~NppStreamHandler()
-        {
-            nppSetStream(oldStream);
-        }
-
-    private:
-        cudaStream_t oldStream;
-    };
-}}
-
-#define nppSafeCall(expr)  cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV_Func)
-#define cuSafeCall(expr)  cv::cuda::checkCudaDriverApiError(expr, __FILE__, __LINE__, CV_Func)
-
-#endif // HAVE_CUDA
-
-//! @endcond
-
-#endif // __OPENCV_CORE_CUDA_PRIVATE_HPP__
diff --git a/IPL/include/opencv/opencv2/core/private.hpp b/IPL/include/opencv/opencv2/core/private.hpp
deleted file mode 100644
index c71ec62..0000000
--- a/IPL/include/opencv/opencv2/core/private.hpp
+++ /dev/null
@@ -1,425 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_CORE_PRIVATE_HPP__
-#define __OPENCV_CORE_PRIVATE_HPP__
-
-#ifndef __OPENCV_BUILD
-#  error this is a private header which should not be used from outside of the OpenCV library
-#endif
-
-#include "opencv2/core.hpp"
-#include "cvconfig.h"
-
-#ifdef HAVE_EIGEN
-#  if defined __GNUC__ && defined __APPLE__
-#    pragma GCC diagnostic ignored "-Wshadow"
-#  endif
-#  include <Eigen/Core>
-#  include "opencv2/core/eigen.hpp"
-#endif
-
-#ifdef HAVE_TBB
-#  include "tbb/tbb_stddef.h"
-#  if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
-#    include "tbb/tbb.h"
-#    include "tbb/task.h"
-#    undef min
-#    undef max
-#  else
-#    undef HAVE_TBB
-#  endif
-#endif
-
-//! @cond IGNORED
-
-namespace cv
-{
-#ifdef HAVE_TBB
-
-    typedef tbb::blocked_range<int> BlockedRange;
-
-    template<typename Body> static inline
-    void parallel_for( const BlockedRange& range, const Body& body )
-    {
-        tbb::parallel_for(range, body);
-    }
-
-    typedef tbb::split Split;
-
-    template<typename Body> static inline
-    void parallel_reduce( const BlockedRange& range, Body& body )
-    {
-        tbb::parallel_reduce(range, body);
-    }
-
-    typedef tbb::concurrent_vector<Rect> ConcurrentRectVector;
-#else
-    class BlockedRange
-    {
-    public:
-        BlockedRange() : _begin(0), _end(0), _grainsize(0) {}
-        BlockedRange(int b, int e, int g=1) : _begin(b), _end(e), _grainsize(g) {}
-        int begin() const { return _begin; }
-        int end() const { return _end; }
-        int grainsize() const { return _grainsize; }
-
-    protected:
-        int _begin, _end, _grainsize;
-    };
-
-    template<typename Body> static inline
-    void parallel_for( const BlockedRange& range, const Body& body )
-    {
-        body(range);
-    }
-    typedef std::vector<Rect> ConcurrentRectVector;
-
-    class Split {};
-
-    template<typename Body> static inline
-    void parallel_reduce( const BlockedRange& range, Body& body )
-    {
-        body(range);
-    }
-#endif
-
-    // Returns a static string if there is a parallel framework,
-    // NULL otherwise.
-    CV_EXPORTS const char* currentParallelFramework();
-} //namespace cv
-
-/****************************************************************************************\
-*                                  Common declarations                                   *
-\****************************************************************************************/
-
-/* the alignment of all the allocated buffers */
-#define  CV_MALLOC_ALIGN    16
-
-/* IEEE754 constants and macros */
-#define  CV_TOGGLE_FLT(x) ((x)^((int)(x) < 0 ? 0x7fffffff : 0))
-#define  CV_TOGGLE_DBL(x) ((x)^((int64)(x) < 0 ? CV_BIG_INT(0x7fffffffffffffff) : 0))
-
-static inline void* cvAlignPtr( const void* ptr, int align = 32 )
-{
-    CV_DbgAssert ( (align & (align-1)) == 0 );
-    return (void*)( ((size_t)ptr + align - 1) & ~(size_t)(align-1) );
-}
-
-static inline int cvAlign( int size, int align )
-{
-    CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
-    return (size + align - 1) & -align;
-}
-
-#ifdef IPL_DEPTH_8U
-static inline cv::Size cvGetMatSize( const CvMat* mat )
-{
-    return cv::Size(mat->cols, mat->rows);
-}
-#endif
-
-namespace cv
-{
-CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int unroll_to = 0);
-}
-
-// property implementation macros
-
-#define CV_IMPL_PROPERTY_RO(type, name, member) \
-    inline type get##name() const { return member; }
-
-#define CV_HELP_IMPL_PROPERTY(r_type, w_type, name, member) \
-    CV_IMPL_PROPERTY_RO(r_type, name, member) \
-    inline void set##name(w_type val) { member = val; }
-
-#define CV_HELP_WRAP_PROPERTY(r_type, w_type, name, internal_name, internal_obj) \
-    r_type get##name() const { return internal_obj.get##internal_name(); } \
-    void set##name(w_type val) { internal_obj.set##internal_name(val); }
-
-#define CV_IMPL_PROPERTY(type, name, member) CV_HELP_IMPL_PROPERTY(type, type, name, member)
-#define CV_IMPL_PROPERTY_S(type, name, member) CV_HELP_IMPL_PROPERTY(type, const type &, name, member)
-
-#define CV_WRAP_PROPERTY(type, name, internal_name, internal_obj)  CV_HELP_WRAP_PROPERTY(type, type, name, internal_name, internal_obj)
-#define CV_WRAP_PROPERTY_S(type, name, internal_name, internal_obj) CV_HELP_WRAP_PROPERTY(type, const type &, name, internal_name, internal_obj)
-
-#define CV_WRAP_SAME_PROPERTY(type, name, internal_obj) CV_WRAP_PROPERTY(type, name, name, internal_obj)
-#define CV_WRAP_SAME_PROPERTY_S(type, name, internal_obj) CV_WRAP_PROPERTY_S(type, name, name, internal_obj)
-
-/****************************************************************************************\
-*                     Structures and macros for integration with IPP                     *
-\****************************************************************************************/
-
-#ifdef HAVE_IPP
-#include "ipp.h"
-
-#ifndef IPP_VERSION_UPDATE // prior to 7.1
-#define IPP_VERSION_UPDATE 0
-#endif
-
-#define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR*10 + IPP_VERSION_UPDATE)
-
-// General define for ipp function disabling
-#define IPP_DISABLE_BLOCK 0
-
-#ifdef CV_MALLOC_ALIGN
-#undef CV_MALLOC_ALIGN
-#endif
-#define CV_MALLOC_ALIGN 32 // required for AVX optimization
-
-#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__)
-
-static inline IppiSize ippiSize(int width, int height)
-{
-    IppiSize size = { width, height };
-    return size;
-}
-
-static inline IppiSize ippiSize(const cv::Size & _size)
-{
-    IppiSize size = { _size.width, _size.height };
-    return size;
-}
-
-static inline IppiBorderType ippiGetBorderType(int borderTypeNI)
-{
-    return borderTypeNI == cv::BORDER_CONSTANT ? ippBorderConst :
-        borderTypeNI == cv::BORDER_WRAP ? ippBorderWrap :
-        borderTypeNI == cv::BORDER_REPLICATE ? ippBorderRepl :
-        borderTypeNI == cv::BORDER_REFLECT_101 ? ippBorderMirror :
-        borderTypeNI == cv::BORDER_REFLECT ? ippBorderMirrorR : (IppiBorderType)-1;
-}
-
-static inline IppDataType ippiGetDataType(int depth)
-{
-    return depth == CV_8U ? ipp8u :
-        depth == CV_8S ? ipp8s :
-        depth == CV_16U ? ipp16u :
-        depth == CV_16S ? ipp16s :
-        depth == CV_32S ? ipp32s :
-        depth == CV_32F ? ipp32f :
-        depth == CV_64F ? ipp64f : (IppDataType)-1;
-}
-
-// IPP temporary buffer hepler
-template<typename T>
-class IppAutoBuffer
-{
-public:
-    IppAutoBuffer() { m_pBuffer = NULL; }
-    IppAutoBuffer(int size) { Alloc(size); }
-    ~IppAutoBuffer() { Release(); }
-    T* Alloc(int size) { m_pBuffer = (T*)ippMalloc(size); return m_pBuffer; }
-    void Release() { if(m_pBuffer) ippFree(m_pBuffer); }
-    inline operator T* () { return (T*)m_pBuffer;}
-    inline operator const T* () const { return (const T*)m_pBuffer;}
-private:
-    // Disable copy operations
-    IppAutoBuffer(IppAutoBuffer &) {};
-    IppAutoBuffer& operator =(const IppAutoBuffer &) {return *this;};
-
-    T* m_pBuffer;
-};
-
-#else
-#define IPP_VERSION_X100 0
-#endif
-
-// There shoud be no API difference in OpenCV between ICV and IPP since 9.0
-#if (defined HAVE_IPP_ICV_ONLY) && IPP_VERSION_X100 >= 900
-#undef HAVE_IPP_ICV_ONLY
-#endif
-
-#ifdef HAVE_IPP_ICV_ONLY
-#define HAVE_ICV 1
-#else
-#define HAVE_ICV 0
-#endif
-
-#if defined HAVE_IPP
-#if IPP_VERSION_X100 >= 900
-#define IPP_INITIALIZER(FEAT)                           \
-{                                                       \
-    if(FEAT)                                            \
-        ippSetCpuFeatures(FEAT);                        \
-    else                                                \
-        ippInit();                                      \
-}
-#elif IPP_VERSION_X100 >= 800
-#define IPP_INITIALIZER(FEAT)                           \
-{                                                       \
-    ippInit();                                          \
-}
-#else
-#define IPP_INITIALIZER(FEAT)                           \
-{                                                       \
-    ippStaticInit();                                    \
-}
-#endif
-
-#ifdef CVAPI_EXPORTS
-#define IPP_INITIALIZER_AUTO                            \
-struct __IppInitializer__                               \
-{                                                       \
-    __IppInitializer__()                                \
-    {IPP_INITIALIZER(cv::ipp::getIppFeatures())}        \
-};                                                      \
-static struct __IppInitializer__ __ipp_initializer__;
-#else
-#define IPP_INITIALIZER_AUTO
-#endif
-#else
-#define IPP_INITIALIZER
-#define IPP_INITIALIZER_AUTO
-#endif
-
-#define CV_IPP_CHECK_COND (cv::ipp::useIPP())
-#define CV_IPP_CHECK() if(CV_IPP_CHECK_COND)
-
-#ifdef HAVE_IPP
-
-#ifdef CV_IPP_RUN_VERBOSE
-#define CV_IPP_RUN_(condition, func, ...)                                   \
-    {                                                                       \
-        if (cv::ipp::useIPP() && (condition) && func)                       \
-        {                                                                   \
-            printf("%s: IPP implementation is running\n", CV_Func);         \
-            fflush(stdout);                                                 \
-            CV_IMPL_ADD(CV_IMPL_IPP);                                       \
-            return __VA_ARGS__;                                             \
-        }                                                                   \
-        else                                                                \
-        {                                                                   \
-            printf("%s: Plain implementation is running\n", CV_Func);       \
-            fflush(stdout);                                                 \
-        }                                                                   \
-    }
-#elif defined CV_IPP_RUN_ASSERT
-#define CV_IPP_RUN_(condition, func, ...)                                   \
-    {                                                                       \
-        if (cv::ipp::useIPP() && (condition))                               \
-        {                                                                   \
-            if(func)                                                        \
-            {                                                               \
-                CV_IMPL_ADD(CV_IMPL_IPP);                                   \
-            }                                                               \
-            else                                                            \
-            {                                                               \
-                setIppErrorStatus();                                        \
-                CV_Error(cv::Error::StsAssert, #func);                      \
-            }                                                               \
-            return __VA_ARGS__;                                             \
-        }                                                                   \
-    }
-#else
-#define CV_IPP_RUN_(condition, func, ...)                                   \
-    if (cv::ipp::useIPP() && (condition) && func)                           \
-    {                                                                       \
-        CV_IMPL_ADD(CV_IMPL_IPP);                                           \
-        return __VA_ARGS__;                                                 \
-    }
-#endif
-
-#else
-#define CV_IPP_RUN_(condition, func, ...)
-#endif
-
-#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_(condition, func, __VA_ARGS__)
-
-
-#ifndef IPPI_CALL
-#  define IPPI_CALL(func) CV_Assert((func) >= 0)
-#endif
-
-/* IPP-compatible return codes */
-typedef enum CvStatus
-{
-    CV_BADMEMBLOCK_ERR          = -113,
-    CV_INPLACE_NOT_SUPPORTED_ERR= -112,
-    CV_UNMATCHED_ROI_ERR        = -111,
-    CV_NOTFOUND_ERR             = -110,
-    CV_BADCONVERGENCE_ERR       = -109,
-
-    CV_BADDEPTH_ERR             = -107,
-    CV_BADROI_ERR               = -106,
-    CV_BADHEADER_ERR            = -105,
-    CV_UNMATCHED_FORMATS_ERR    = -104,
-    CV_UNSUPPORTED_COI_ERR      = -103,
-    CV_UNSUPPORTED_CHANNELS_ERR = -102,
-    CV_UNSUPPORTED_DEPTH_ERR    = -101,
-    CV_UNSUPPORTED_FORMAT_ERR   = -100,
-
-    CV_BADARG_ERR               = -49,  //ipp comp
-    CV_NOTDEFINED_ERR           = -48,  //ipp comp
-
-    CV_BADCHANNELS_ERR          = -47,  //ipp comp
-    CV_BADRANGE_ERR             = -44,  //ipp comp
-    CV_BADSTEP_ERR              = -29,  //ipp comp
-
-    CV_BADFLAG_ERR              =  -12,
-    CV_DIV_BY_ZERO_ERR          =  -11, //ipp comp
-    CV_BADCOEF_ERR              =  -10,
-
-    CV_BADFACTOR_ERR            =  -7,
-    CV_BADPOINT_ERR             =  -6,
-    CV_BADSCALE_ERR             =  -4,
-    CV_OUTOFMEM_ERR             =  -3,
-    CV_NULLPTR_ERR              =  -2,
-    CV_BADSIZE_ERR              =  -1,
-    CV_NO_ERR                   =   0,
-    CV_OK                       =   CV_NO_ERR
-}
-CvStatus;
-
-#ifdef HAVE_TEGRA_OPTIMIZATION
-namespace tegra {
-
-CV_EXPORTS bool useTegra();
-CV_EXPORTS void setUseTegra(bool flag);
-
-}
-#endif
-
-//! @endcond
-
-#endif // __OPENCV_CORE_PRIVATE_HPP__
diff --git a/IPL/include/opencv/opencv2/core/ptr.inl.hpp b/IPL/include/opencv/opencv2/core/ptr.inl.hpp
deleted file mode 100644
index 3f6f214..0000000
--- a/IPL/include/opencv/opencv2/core/ptr.inl.hpp
+++ /dev/null
@@ -1,365 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the copyright holders or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_CORE_PTR_INL_HPP__
-#define __OPENCV_CORE_PTR_INL_HPP__
-
-#include <algorithm>
-
-//! @cond IGNORED
-
-namespace cv {
-
-template<typename Y>
-void DefaultDeleter<Y>::operator () (Y* p) const
-{
-    delete p;
-}
-
-namespace detail
-{
-
-struct PtrOwner
-{
-    PtrOwner() : refCount(1)
-    {}
-
-    void incRef()
-    {
-        CV_XADD(&refCount, 1);
-    }
-
-    void decRef()
-    {
-        if (CV_XADD(&refCount, -1) == 1) deleteSelf();
-    }
-
-protected:
-    /* This doesn't really need to be virtual, since PtrOwner is never deleted
-       directly, but it doesn't hurt and it helps avoid warnings. */
-    virtual ~PtrOwner()
-    {}
-
-    virtual void deleteSelf() = 0;
-
-private:
-    unsigned int refCount;
-
-    // noncopyable
-    PtrOwner(const PtrOwner&);
-    PtrOwner& operator = (const PtrOwner&);
-};
-
-template<typename Y, typename D>
-struct PtrOwnerImpl : PtrOwner
-{
-    PtrOwnerImpl(Y* p, D d) : owned(p), deleter(d)
-    {}
-
-    void deleteSelf()
-    {
-        deleter(owned);
-        delete this;
-    }
-
-private:
-    Y* owned;
-    D deleter;
-};
-
-
-}
-
-template<typename T>
-Ptr<T>::Ptr() : owner(NULL), stored(NULL)
-{}
-
-template<typename T>
-template<typename Y>
-Ptr<T>::Ptr(Y* p)
-  : owner(p
-      ? new detail::PtrOwnerImpl<Y, DefaultDeleter<Y> >(p, DefaultDeleter<Y>())
-      : NULL),
-    stored(p)
-{}
-
-template<typename T>
-template<typename Y, typename D>
-Ptr<T>::Ptr(Y* p, D d)
-  : owner(p
-      ? new detail::PtrOwnerImpl<Y, D>(p, d)
-      : NULL),
-    stored(p)
-{}
-
-template<typename T>
-Ptr<T>::Ptr(const Ptr& o) : owner(o.owner), stored(o.stored)
-{
-    if (owner) owner->incRef();
-}
-
-template<typename T>
-template<typename Y>
-Ptr<T>::Ptr(const Ptr<Y>& o) : owner(o.owner), stored(o.stored)
-{
-    if (owner) owner->incRef();
-}
-
-template<typename T>
-template<typename Y>
-Ptr<T>::Ptr(const Ptr<Y>& o, T* p) : owner(o.owner), stored(p)
-{
-    if (owner) owner->incRef();
-}
-
-template<typename T>
-Ptr<T>::~Ptr()
-{
-    release();
-}
-
-template<typename T>
-Ptr<T>& Ptr<T>::operator = (const Ptr<T>& o)
-{
-    Ptr(o).swap(*this);
-    return *this;
-}
-
-template<typename T>
-template<typename Y>
-Ptr<T>& Ptr<T>::operator = (const Ptr<Y>& o)
-{
-    Ptr(o).swap(*this);
-    return *this;
-}
-
-template<typename T>
-void Ptr<T>::release()
-{
-    if (owner) owner->decRef();
-    owner = NULL;
-    stored = NULL;
-}
-
-template<typename T>
-template<typename Y>
-void Ptr<T>::reset(Y* p)
-{
-    Ptr(p).swap(*this);
-}
-
-template<typename T>
-template<typename Y, typename D>
-void Ptr<T>::reset(Y* p, D d)
-{
-    Ptr(p, d).swap(*this);
-}
-
-template<typename T>
-void Ptr<T>::swap(Ptr<T>& o)
-{
-    std::swap(owner, o.owner);
-    std::swap(stored, o.stored);
-}
-
-template<typename T>
-T* Ptr<T>::get() const
-{
-    return stored;
-}
-
-template<typename T>
-typename detail::RefOrVoid<T>::type Ptr<T>::operator * () const
-{
-    return *stored;
-}
-
-template<typename T>
-T* Ptr<T>::operator -> () const
-{
-    return stored;
-}
-
-template<typename T>
-Ptr<T>::operator T* () const
-{
-    return stored;
-}
-
-
-template<typename T>
-bool Ptr<T>::empty() const
-{
-    return !stored;
-}
-
-template<typename T>
-template<typename Y>
-Ptr<Y> Ptr<T>::staticCast() const
-{
-    return Ptr<Y>(*this, static_cast<Y*>(stored));
-}
-
-template<typename T>
-template<typename Y>
-Ptr<Y> Ptr<T>::constCast() const
-{
-    return Ptr<Y>(*this, const_cast<Y*>(stored));
-}
-
-template<typename T>
-template<typename Y>
-Ptr<Y> Ptr<T>::dynamicCast() const
-{
-    return Ptr<Y>(*this, dynamic_cast<Y*>(stored));
-}
-
-#ifdef CV_CXX_MOVE_SEMANTICS
-
-template<typename T>
-Ptr<T>::Ptr(Ptr&& o) : owner(o.owner), stored(o.stored)
-{
-    o.owner = NULL;
-    o.stored = NULL;
-}
-
-template<typename T>
-Ptr<T>& Ptr<T>::operator = (Ptr<T>&& o)
-{
-    release();
-    owner = o.owner;
-    stored = o.stored;
-    o.owner = NULL;
-    o.stored = NULL;
-    return *this;
-}
-
-#endif
-
-
-template<typename T>
-void swap(Ptr<T>& ptr1, Ptr<T>& ptr2){
-    ptr1.swap(ptr2);
-}
-
-template<typename T>
-bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
-{
-    return ptr1.get() == ptr2.get();
-}
-
-template<typename T>
-bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
-{
-    return ptr1.get() != ptr2.get();
-}
-
-template<typename T>
-Ptr<T> makePtr()
-{
-    return Ptr<T>(new T());
-}
-
-template<typename T, typename A1>
-Ptr<T> makePtr(const A1& a1)
-{
-    return Ptr<T>(new T(a1));
-}
-
-template<typename T, typename A1, typename A2>
-Ptr<T> makePtr(const A1& a1, const A2& a2)
-{
-    return Ptr<T>(new T(a1, a2));
-}
-
-template<typename T, typename A1, typename A2, typename A3>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3)
-{
-    return Ptr<T>(new T(a1, a2, a3));
-}
-
-template<typename T, typename A1, typename A2, typename A3, typename A4>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4)
-{
-    return Ptr<T>(new T(a1, a2, a3, a4));
-}
-
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5)
-{
-    return Ptr<T>(new T(a1, a2, a3, a4, a5));
-}
-
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6)
-{
-    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6));
-}
-
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7)
-{
-    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7));
-}
-
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8)
-{
-    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8));
-}
-
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9)
-{
-    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9));
-}
-
-template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
-Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10)
-{
-    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10));
-}
-
-} // namespace cv
-
-//! @endcond
-
-#endif // __OPENCV_CORE_PTR_INL_HPP__
diff --git a/IPL/include/opencv/opencv2/core/saturate.hpp b/IPL/include/opencv/opencv2/core/saturate.hpp
index 1442eab..8127e3d 100644
--- a/IPL/include/opencv/opencv2/core/saturate.hpp
+++ b/IPL/include/opencv/opencv2/core/saturate.hpp
@@ -42,8 +42,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_SATURATE_HPP__
-#define __OPENCV_CORE_SATURATE_HPP__
+#ifndef OPENCV_CORE_SATURATE_HPP
+#define OPENCV_CORE_SATURATE_HPP
 
 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/fast_math.hpp"
@@ -58,8 +58,8 @@ namespace cv
 
 /** @brief Template function for accurate conversion from one primitive type to another.
 
- The functions saturate_cast resemble the standard C++ cast operations, such as static_cast\<T\>()
- and others. They perform an efficient and accurate conversion from one primitive type to another
+ The function saturate_cast resembles the standard C++ cast operations, such as static_cast\<T\>()
+ and others. It perform an efficient and accurate conversion from one primitive type to another
  (see the introduction chapter). saturate in the name means that when the input value v is out of the
  range of the target type, the result is not formed just by taking low bits of the input, but instead
  the value is clipped. For example:
@@ -74,8 +74,6 @@ namespace cv
  the floating-point value is first rounded to the nearest integer and then clipped if needed (when
  the target type is 8- or 16-bit).
 
- This operation is used in the simplest or most complex image processing functions in OpenCV.
-
  @param v Function parameter.
  @sa add, subtract, multiply, divide, Mat::convertTo
  */
@@ -136,15 +134,46 @@ template<> inline short saturate_cast<short>(double v)       { int iv = cvRound(
 template<> inline short saturate_cast<short>(int64 v)        { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
 template<> inline short saturate_cast<short>(uint64 v)       { return (short)std::min(v, (uint64)SHRT_MAX); }
 
+template<> inline int saturate_cast<int>(unsigned v)         { return (int)std::min(v, (unsigned)INT_MAX); }
+template<> inline int saturate_cast<int>(int64 v)            { return (int)((uint64)(v - INT_MIN) <= (uint64)UINT_MAX ? v : v > 0 ? INT_MAX : INT_MIN); }
+template<> inline int saturate_cast<int>(uint64 v)           { return (int)std::min(v, (uint64)INT_MAX); }
 template<> inline int saturate_cast<int>(float v)            { return cvRound(v); }
 template<> inline int saturate_cast<int>(double v)           { return cvRound(v); }
 
+template<> inline unsigned saturate_cast<unsigned>(schar v)  { return (unsigned)std::max(v, (schar)0); }
+template<> inline unsigned saturate_cast<unsigned>(short v)  { return (unsigned)std::max(v, (short)0); }
+template<> inline unsigned saturate_cast<unsigned>(int v)    { return (unsigned)std::max(v, (int)0); }
+template<> inline unsigned saturate_cast<unsigned>(int64 v)  { return (unsigned)((uint64)v <= (uint64)UINT_MAX ? v : v > 0 ? UINT_MAX : 0); }
+template<> inline unsigned saturate_cast<unsigned>(uint64 v) { return (unsigned)std::min(v, (uint64)UINT_MAX); }
 // we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
-template<> inline unsigned saturate_cast<unsigned>(float v)  { return cvRound(v); }
-template<> inline unsigned saturate_cast<unsigned>(double v) { return cvRound(v); }
+template<> inline unsigned saturate_cast<unsigned>(float v)  { return static_cast<unsigned>(cvRound(v)); }
+template<> inline unsigned saturate_cast<unsigned>(double v) { return static_cast<unsigned>(cvRound(v)); }
+
+template<> inline uint64 saturate_cast<uint64>(schar v)      { return (uint64)std::max(v, (schar)0); }
+template<> inline uint64 saturate_cast<uint64>(short v)      { return (uint64)std::max(v, (short)0); }
+template<> inline uint64 saturate_cast<uint64>(int v)        { return (uint64)std::max(v, (int)0); }
+template<> inline uint64 saturate_cast<uint64>(int64 v)      { return (uint64)std::max(v, (int64)0); }
+
+template<> inline int64 saturate_cast<int64>(uint64 v)       { return (int64)std::min(v, (uint64)LLONG_MAX); }
+
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(float16_t v) { return saturate_cast<_Tp>((float)v); }
+
+// in theory, we could use a LUT for 8u/8s->16f conversion,
+// but with hardware support for FP32->FP16 conversion the current approach is preferable
+template<> inline float16_t saturate_cast<float16_t>(uchar v)   { return float16_t((float)v); }
+template<> inline float16_t saturate_cast<float16_t>(schar v)   { return float16_t((float)v); }
+template<> inline float16_t saturate_cast<float16_t>(ushort v)  { return float16_t((float)v); }
+template<> inline float16_t saturate_cast<float16_t>(short v)   { return float16_t((float)v); }
+template<> inline float16_t saturate_cast<float16_t>(unsigned v){ return float16_t((float)v); }
+template<> inline float16_t saturate_cast<float16_t>(int v)     { return float16_t((float)v); }
+template<> inline float16_t saturate_cast<float16_t>(uint64 v)  { return float16_t((float)v); }
+template<> inline float16_t saturate_cast<float16_t>(int64 v)   { return float16_t((float)v); }
+template<> inline float16_t saturate_cast<float16_t>(float v)   { return float16_t(v); }
+template<> inline float16_t saturate_cast<float16_t>(double v)  { return float16_t((float)v); }
 
 //! @}
 
 } // cv
 
-#endif // __OPENCV_CORE_SATURATE_HPP__
+#endif // OPENCV_CORE_SATURATE_HPP
diff --git a/IPL/include/opencv/opencv2/core/simd_intrinsics.hpp b/IPL/include/opencv/opencv2/core/simd_intrinsics.hpp
new file mode 100644
index 0000000..c50923f
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/simd_intrinsics.hpp
@@ -0,0 +1,88 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_SIMD_INTRINSICS_HPP
+#define OPENCV_CORE_SIMD_INTRINSICS_HPP
+
+/**
+Helper header to support SIMD intrinsics (universal intrinsics) in user code.
+Intrinsics documentation: https://docs.opencv.org/master/df/d91/group__core__hal__intrin.html
+
+
+Checks of target CPU instruction set based on compiler definitions don't work well enough.
+More reliable solutions require utilization of configuration systems (like CMake).
+
+So, probably you need to specify your own configuration.
+
+You can do that via CMake in this way:
+    add_definitions(/DOPENCV_SIMD_CONFIG_HEADER=opencv_simd_config_custom.hpp)
+or
+    add_definitions(/DOPENCV_SIMD_CONFIG_INCLUDE_DIR=1)
+
+Additionally you may need to add include directory to your files:
+    include_directories("${CMAKE_CURRENT_LIST_DIR}/opencv_config_${MYTARGET}")
+
+These files can be pre-generated for target configurations of your application
+or generated by CMake on the fly (use CMAKE_BINARY_DIR for that).
+
+Notes:
+- H/W capability checks are still responsibility of your application
+- runtime dispatching is not covered by this helper header
+*/
+
+#ifdef __OPENCV_BUILD
+#error "Use core/hal/intrin.hpp during OpenCV build"
+#endif
+
+#ifdef OPENCV_HAL_INTRIN_HPP
+#error "core/simd_intrinsics.hpp must be included before core/hal/intrin.hpp"
+#endif
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/version.hpp"
+
+#ifdef OPENCV_SIMD_CONFIG_HEADER
+#include CVAUX_STR(OPENCV_SIMD_CONFIG_HEADER)
+#elif defined(OPENCV_SIMD_CONFIG_INCLUDE_DIR)
+#include "opencv_simd_config.hpp"  // corresponding directory should be added via -I compiler parameter
+#else  // custom config headers
+
+#if (!defined(CV_AVX_512F) || !CV_AVX_512F) && (defined(__AVX512__) || defined(__AVX512F__))
+#  include <immintrin.h>
+#  undef CV_AVX_512F
+#  define CV_AVX_512F 1
+#  ifndef OPENCV_SIMD_DONT_ASSUME_SKX  // Skylake-X with AVX-512F/CD/BW/DQ/VL
+#    undef CV_AVX512_SKX
+#    define CV_AVX512_SKX 1
+#    undef CV_AVX_512CD
+#    define CV_AVX_512CD 1
+#    undef CV_AVX_512BW
+#    define CV_AVX_512BW 1
+#    undef CV_AVX_512DQ
+#    define CV_AVX_512DQ 1
+#    undef CV_AVX_512VL
+#    define CV_AVX_512VL 1
+#  endif
+#endif // AVX512
+
+// GCC/Clang: -mavx2
+// MSVC: /arch:AVX2
+#if defined __AVX2__
+#  include <immintrin.h>
+#  undef CV_AVX2
+#  define CV_AVX2 1
+#  if defined __F16C__
+#    undef CV_FP16
+#    define CV_FP16 1
+#  endif
+#endif
+
+#endif
+
+// SSE / NEON / VSX is handled by cv_cpu_dispatch.h compatibility block
+#include "cv_cpu_dispatch.h"
+
+#include "hal/intrin.hpp"
+
+#endif // OPENCV_CORE_SIMD_INTRINSICS_HPP
diff --git a/IPL/include/opencv/opencv2/core/softfloat.hpp b/IPL/include/opencv/opencv2/core/softfloat.hpp
new file mode 100644
index 0000000..485e15c
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/softfloat.hpp
@@ -0,0 +1,514 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This file is based on files from package issued with the following license:
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3c, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#pragma once
+#ifndef softfloat_h
+#define softfloat_h 1
+
+#include "cvdef.h"
+
+namespace cv
+{
+
+/** @addtogroup core_utils_softfloat
+
+  [SoftFloat](http://www.jhauser.us/arithmetic/SoftFloat.html) is a software implementation
+  of floating-point calculations according to IEEE 754 standard.
+  All calculations are done in integers, that's why they are machine-independent and bit-exact.
+  This library can be useful in accuracy-critical parts like look-up tables generation, tests, etc.
+  OpenCV contains a subset of SoftFloat partially rewritten to C++.
+
+  ### Types
+
+  There are two basic types: @ref softfloat and @ref softdouble.
+  These types are binary compatible with float and double types respectively
+  and support conversions to/from them.
+  Other types from original SoftFloat library like fp16 or fp128 were thrown away
+  as well as quiet/signaling NaN support, on-the-fly rounding mode switch
+  and exception flags (though exceptions can be implemented in the future).
+
+  ### Operations
+
+  Both types support the following:
+  - Construction from signed and unsigned 32-bit and 64 integers,
+  float/double or raw binary representation
+  - Conversions between each other, to float or double and to int
+  using @ref cvRound, @ref cvTrunc, @ref cvFloor, @ref cvCeil or a bunch of
+  saturate_cast functions
+  - Add, subtract, multiply, divide, remainder, square root, FMA with absolute precision
+  - Comparison operations
+  - Explicit sign, exponent and significand manipulation through get/set methods,
+ number state indicators (isInf, isNan, isSubnormal)
+  - Type-specific constants like eps, minimum/maximum value, best pi approximation, etc.
+  - min(), max(), abs(), exp(), log() and pow() functions
+
+*/
+//! @{
+
+struct softfloat;
+struct softdouble;
+
+struct CV_EXPORTS softfloat
+{
+public:
+    /** @brief Default constructor */
+    softfloat() { v = 0; }
+    /** @brief Copy constructor */
+    softfloat( const softfloat& c) { v = c.v; }
+    /** @brief Assign constructor */
+    softfloat& operator=( const softfloat& c )
+    {
+        if(&c != this) v = c.v;
+        return *this;
+    }
+    /** @brief Construct from raw
+
+    Builds new value from raw binary representation
+    */
+    static const softfloat fromRaw( const uint32_t a ) { softfloat x; x.v = a; return x; }
+
+    /** @brief Construct from integer */
+    explicit softfloat( const uint32_t );
+    explicit softfloat( const uint64_t );
+    explicit softfloat( const int32_t );
+    explicit softfloat( const int64_t );
+
+#ifdef CV_INT32_T_IS_LONG_INT
+    // for platforms with int32_t = long int
+    explicit softfloat( const int a ) { *this = softfloat(static_cast<int32_t>(a)); }
+#endif
+
+    /** @brief Construct from float */
+    explicit softfloat( const float a ) { Cv32suf s; s.f = a; v = s.u; }
+
+    /** @brief Type casts  */
+    operator softdouble() const;
+    operator float() const { Cv32suf s; s.u = v; return s.f; }
+
+    /** @brief Basic arithmetics */
+    softfloat operator + (const softfloat&) const;
+    softfloat operator - (const softfloat&) const;
+    softfloat operator * (const softfloat&) const;
+    softfloat operator / (const softfloat&) const;
+    softfloat operator - () const { softfloat x; x.v = v ^ (1U << 31); return x; }
+
+    /** @brief Remainder operator
+
+    A quote from original SoftFloat manual:
+
+    > The IEEE Standard remainder operation computes the value
+    > a - n * b, where n is the integer closest to a / b.
+    > If a / b is exactly halfway between two integers, n is the even integer
+    > closest to a / b. The IEEE Standard’s remainder operation is always exact and so requires no rounding.
+    > Depending on the relative magnitudes of the operands, the remainder functions
+    > can take considerably longer to execute than the other SoftFloat functions.
+    > This is an inherent characteristic of the remainder operation itself and is not a flaw
+    > in the SoftFloat implementation.
+    */
+    softfloat operator % (const softfloat&) const;
+
+    softfloat& operator += (const softfloat& a) { *this = *this + a; return *this; }
+    softfloat& operator -= (const softfloat& a) { *this = *this - a; return *this; }
+    softfloat& operator *= (const softfloat& a) { *this = *this * a; return *this; }
+    softfloat& operator /= (const softfloat& a) { *this = *this / a; return *this; }
+    softfloat& operator %= (const softfloat& a) { *this = *this % a; return *this; }
+
+    /** @brief Comparison operations
+
+     - Any operation with NaN produces false
+       + The only exception is when x is NaN: x != y for any y.
+     - Positive and negative zeros are equal
+    */
+    bool operator == ( const softfloat& ) const;
+    bool operator != ( const softfloat& ) const;
+    bool operator >  ( const softfloat& ) const;
+    bool operator >= ( const softfloat& ) const;
+    bool operator <  ( const softfloat& ) const;
+    bool operator <= ( const softfloat& ) const;
+
+    /** @brief NaN state indicator */
+    inline bool isNaN() const { return (v & 0x7fffffff)  > 0x7f800000; }
+    /** @brief Inf state indicator */
+    inline bool isInf() const { return (v & 0x7fffffff) == 0x7f800000; }
+    /** @brief Subnormal number indicator */
+    inline bool isSubnormal() const { return ((v >> 23) & 0xFF) == 0; }
+
+    /** @brief Get sign bit */
+    inline bool getSign() const { return (v >> 31) != 0; }
+    /** @brief Construct a copy with new sign bit */
+    inline softfloat setSign(bool sign) const { softfloat x; x.v = (v & ((1U << 31) - 1)) | ((uint32_t)sign << 31); return x; }
+    /** @brief Get 0-based exponent */
+    inline int getExp() const { return ((v >> 23) & 0xFF) - 127; }
+    /** @brief Construct a copy with new 0-based exponent */
+    inline softfloat setExp(int e) const { softfloat x; x.v = (v & 0x807fffff) | (((e + 127) & 0xFF) << 23 ); return x; }
+
+    /** @brief Get a fraction part
+
+    Returns a number 1 <= x < 2 with the same significand
+    */
+    inline softfloat getFrac() const
+    {
+        uint_fast32_t vv = (v & 0x007fffff) | (127 << 23);
+        return softfloat::fromRaw(vv);
+    }
+    /** @brief Construct a copy with provided significand
+
+    Constructs a copy of a number with significand taken from parameter
+    */
+    inline softfloat setFrac(const softfloat& s) const
+    {
+        softfloat x;
+        x.v = (v & 0xff800000) | (s.v & 0x007fffff);
+        return x;
+    }
+
+    /** @brief Zero constant */
+    static softfloat zero() { return softfloat::fromRaw( 0 ); }
+    /** @brief Positive infinity constant */
+    static softfloat  inf() { return softfloat::fromRaw( 0xFF << 23 ); }
+    /** @brief Default NaN constant */
+    static softfloat  nan() { return softfloat::fromRaw( 0x7fffffff ); }
+    /** @brief One constant */
+    static softfloat  one() { return softfloat::fromRaw(  127 << 23 ); }
+    /** @brief Smallest normalized value */
+    static softfloat  min() { return softfloat::fromRaw( 0x01 << 23 ); }
+    /** @brief Difference between 1 and next representable value */
+    static softfloat  eps() { return softfloat::fromRaw( (127 - 23) << 23 ); }
+    /** @brief Biggest finite value */
+    static softfloat  max() { return softfloat::fromRaw( (0xFF << 23) - 1 ); }
+    /** @brief Correct pi approximation */
+    static softfloat   pi() { return softfloat::fromRaw( 0x40490fdb ); }
+
+    uint32_t v;
+};
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+
+struct CV_EXPORTS softdouble
+{
+public:
+    /** @brief Default constructor */
+    softdouble() : v(0) { }
+    /** @brief Copy constructor */
+    softdouble( const softdouble& c) { v = c.v; }
+    /** @brief Assign constructor */
+    softdouble& operator=( const softdouble& c )
+    {
+        if(&c != this) v = c.v;
+        return *this;
+    }
+    /** @brief Construct from raw
+
+    Builds new value from raw binary representation
+    */
+    static softdouble fromRaw( const uint64_t a ) { softdouble x; x.v = a; return x; }
+
+    /** @brief Construct from integer */
+    explicit softdouble( const uint32_t );
+    explicit softdouble( const uint64_t );
+    explicit softdouble( const  int32_t );
+    explicit softdouble( const  int64_t );
+
+#ifdef CV_INT32_T_IS_LONG_INT
+    // for platforms with int32_t = long int
+    explicit softdouble( const int a ) { *this = softdouble(static_cast<int32_t>(a)); }
+#endif
+
+    /** @brief Construct from double */
+    explicit softdouble( const double a ) { Cv64suf s; s.f = a; v = s.u; }
+
+    /** @brief Type casts  */
+    operator softfloat() const;
+    operator double() const { Cv64suf s; s.u = v; return s.f; }
+
+    /** @brief Basic arithmetics */
+    softdouble operator + (const softdouble&) const;
+    softdouble operator - (const softdouble&) const;
+    softdouble operator * (const softdouble&) const;
+    softdouble operator / (const softdouble&) const;
+    softdouble operator - () const { softdouble x; x.v = v ^ (1ULL << 63); return x; }
+
+    /** @brief Remainder operator
+
+    A quote from original SoftFloat manual:
+
+    > The IEEE Standard remainder operation computes the value
+    > a - n * b, where n is the integer closest to a / b.
+    > If a / b is exactly halfway between two integers, n is the even integer
+    > closest to a / b. The IEEE Standard’s remainder operation is always exact and so requires no rounding.
+    > Depending on the relative magnitudes of the operands, the remainder functions
+    > can take considerably longer to execute than the other SoftFloat functions.
+    > This is an inherent characteristic of the remainder operation itself and is not a flaw
+    > in the SoftFloat implementation.
+    */
+    softdouble operator % (const softdouble&) const;
+
+    softdouble& operator += (const softdouble& a) { *this = *this + a; return *this; }
+    softdouble& operator -= (const softdouble& a) { *this = *this - a; return *this; }
+    softdouble& operator *= (const softdouble& a) { *this = *this * a; return *this; }
+    softdouble& operator /= (const softdouble& a) { *this = *this / a; return *this; }
+    softdouble& operator %= (const softdouble& a) { *this = *this % a; return *this; }
+
+    /** @brief Comparison operations
+
+     - Any operation with NaN produces false
+       + The only exception is when x is NaN: x != y for any y.
+     - Positive and negative zeros are equal
+    */
+    bool operator == ( const softdouble& ) const;
+    bool operator != ( const softdouble& ) const;
+    bool operator >  ( const softdouble& ) const;
+    bool operator >= ( const softdouble& ) const;
+    bool operator <  ( const softdouble& ) const;
+    bool operator <= ( const softdouble& ) const;
+
+    /** @brief NaN state indicator */
+    inline bool isNaN() const { return (v & 0x7fffffffffffffff)  > 0x7ff0000000000000; }
+    /** @brief Inf state indicator */
+    inline bool isInf() const { return (v & 0x7fffffffffffffff) == 0x7ff0000000000000; }
+    /** @brief Subnormal number indicator */
+    inline bool isSubnormal() const { return ((v >> 52) & 0x7FF) == 0; }
+
+    /** @brief Get sign bit */
+    inline bool getSign() const { return (v >> 63) != 0; }
+    /** @brief Construct a copy with new sign bit */
+    softdouble setSign(bool sign) const { softdouble x; x.v = (v & ((1ULL << 63) - 1)) | ((uint_fast64_t)(sign) << 63); return x; }
+    /** @brief Get 0-based exponent */
+    inline int getExp() const { return ((v >> 52) & 0x7FF) - 1023; }
+    /** @brief Construct a copy with new 0-based exponent */
+    inline softdouble setExp(int e) const
+    {
+        softdouble x;
+        x.v = (v & 0x800FFFFFFFFFFFFF) | ((uint_fast64_t)((e + 1023) & 0x7FF) << 52);
+        return x;
+    }
+
+    /** @brief Get a fraction part
+
+    Returns a number 1 <= x < 2 with the same significand
+    */
+    inline softdouble getFrac() const
+    {
+        uint_fast64_t vv = (v & 0x000FFFFFFFFFFFFF) | ((uint_fast64_t)(1023) << 52);
+        return softdouble::fromRaw(vv);
+    }
+    /** @brief Construct a copy with provided significand
+
+    Constructs a copy of a number with significand taken from parameter
+    */
+    inline softdouble setFrac(const softdouble& s) const
+    {
+        softdouble x;
+        x.v = (v & 0xFFF0000000000000) | (s.v & 0x000FFFFFFFFFFFFF);
+        return x;
+    }
+
+    /** @brief Zero constant */
+    static softdouble zero() { return softdouble::fromRaw( 0 ); }
+    /** @brief Positive infinity constant */
+    static softdouble  inf() { return softdouble::fromRaw( (uint_fast64_t)(0x7FF) << 52 ); }
+    /** @brief Default NaN constant */
+    static softdouble  nan() { return softdouble::fromRaw( CV_BIG_INT(0x7FFFFFFFFFFFFFFF) ); }
+    /** @brief One constant */
+    static softdouble  one() { return softdouble::fromRaw( (uint_fast64_t)( 1023) << 52 ); }
+    /** @brief Smallest normalized value */
+    static softdouble  min() { return softdouble::fromRaw( (uint_fast64_t)( 0x01) << 52 ); }
+    /** @brief Difference between 1 and next representable value */
+    static softdouble  eps() { return softdouble::fromRaw( (uint_fast64_t)( 1023 - 52 ) << 52 ); }
+    /** @brief Biggest finite value */
+    static softdouble  max() { return softdouble::fromRaw( ((uint_fast64_t)(0x7FF) << 52) - 1 ); }
+    /** @brief Correct pi approximation */
+    static softdouble   pi() { return softdouble::fromRaw( CV_BIG_INT(0x400921FB54442D18) ); }
+
+    uint64_t v;
+};
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+
+/** @brief Fused Multiplication and Addition
+
+Computes (a*b)+c with single rounding
+*/
+CV_EXPORTS softfloat  mulAdd( const softfloat&  a, const softfloat&  b, const softfloat & c);
+CV_EXPORTS softdouble mulAdd( const softdouble& a, const softdouble& b, const softdouble& c);
+
+/** @brief Square root */
+CV_EXPORTS softfloat  sqrt( const softfloat&  a );
+CV_EXPORTS softdouble sqrt( const softdouble& a );
+}
+
+/*----------------------------------------------------------------------------
+| Ported from OpenCV and added for usability
+*----------------------------------------------------------------------------*/
+
+/** @brief Truncates number to integer with minimum magnitude */
+CV_EXPORTS int cvTrunc(const cv::softfloat&  a);
+CV_EXPORTS int cvTrunc(const cv::softdouble& a);
+
+/** @brief Rounds a number to nearest even integer */
+CV_EXPORTS int cvRound(const cv::softfloat&  a);
+CV_EXPORTS int cvRound(const cv::softdouble& a);
+
+/** @brief Rounds a number to nearest even long long integer */
+CV_EXPORTS int64_t cvRound64(const cv::softdouble& a);
+
+/** @brief Rounds a number down to integer */
+CV_EXPORTS int cvFloor(const cv::softfloat&  a);
+CV_EXPORTS int cvFloor(const cv::softdouble& a);
+
+/** @brief Rounds number up to integer */
+CV_EXPORTS int  cvCeil(const cv::softfloat&  a);
+CV_EXPORTS int  cvCeil(const cv::softdouble& a);
+
+namespace cv
+{
+/** @brief Saturate casts */
+template<typename _Tp> static inline _Tp saturate_cast(softfloat  a) { return _Tp(a); }
+template<typename _Tp> static inline _Tp saturate_cast(softdouble a) { return _Tp(a); }
+
+template<> inline uchar saturate_cast<uchar>(softfloat  a) { return (uchar)std::max(std::min(cvRound(a), (int)UCHAR_MAX), 0); }
+template<> inline uchar saturate_cast<uchar>(softdouble a) { return (uchar)std::max(std::min(cvRound(a), (int)UCHAR_MAX), 0); }
+
+template<> inline schar saturate_cast<schar>(softfloat  a) { return (schar)std::min(std::max(cvRound(a), (int)SCHAR_MIN), (int)SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(softdouble a) { return (schar)std::min(std::max(cvRound(a), (int)SCHAR_MIN), (int)SCHAR_MAX); }
+
+template<> inline ushort saturate_cast<ushort>(softfloat  a) { return (ushort)std::max(std::min(cvRound(a), (int)USHRT_MAX), 0); }
+template<> inline ushort saturate_cast<ushort>(softdouble a) { return (ushort)std::max(std::min(cvRound(a), (int)USHRT_MAX), 0); }
+
+template<> inline short saturate_cast<short>(softfloat  a) { return (short)std::min(std::max(cvRound(a), (int)SHRT_MIN), (int)SHRT_MAX); }
+template<> inline short saturate_cast<short>(softdouble a) { return (short)std::min(std::max(cvRound(a), (int)SHRT_MIN), (int)SHRT_MAX); }
+
+template<> inline int saturate_cast<int>(softfloat  a) { return cvRound(a); }
+template<> inline int saturate_cast<int>(softdouble a) { return cvRound(a); }
+
+template<> inline int64_t saturate_cast<int64_t>(softfloat  a) { return cvRound(a); }
+template<> inline int64_t saturate_cast<int64_t>(softdouble a) { return cvRound64(a); }
+
+/** @brief Saturate cast to unsigned integer and unsigned long long integer
+We intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
+*/
+template<> inline unsigned saturate_cast<unsigned>(softfloat  a) { return cvRound(a); }
+template<> inline unsigned saturate_cast<unsigned>(softdouble a) { return cvRound(a); }
+
+template<> inline uint64_t saturate_cast<uint64_t>(softfloat  a) { return cvRound(a); }
+template<> inline uint64_t saturate_cast<uint64_t>(softdouble a) { return cvRound64(a); }
+
+/** @brief Min and Max functions */
+inline softfloat  min(const softfloat&  a, const softfloat&  b) { return (a > b) ? b : a; }
+inline softdouble min(const softdouble& a, const softdouble& b) { return (a > b) ? b : a; }
+
+inline softfloat  max(const softfloat&  a, const softfloat&  b) { return (a > b) ? a : b; }
+inline softdouble max(const softdouble& a, const softdouble& b) { return (a > b) ? a : b; }
+
+/** @brief Absolute value */
+inline softfloat  abs( softfloat  a) { softfloat  x; x.v = a.v & ((1U   << 31) - 1); return x; }
+inline softdouble abs( softdouble a) { softdouble x; x.v = a.v & ((1ULL << 63) - 1); return x; }
+
+/** @brief Exponent
+
+Special cases:
+- exp(NaN) is NaN
+- exp(-Inf) == 0
+- exp(+Inf) == +Inf
+*/
+CV_EXPORTS softfloat  exp( const softfloat&  a);
+CV_EXPORTS softdouble exp( const softdouble& a);
+
+/** @brief Natural logarithm
+
+Special cases:
+- log(NaN), log(x < 0) are NaN
+- log(0) == -Inf
+*/
+CV_EXPORTS softfloat  log( const softfloat&  a );
+CV_EXPORTS softdouble log( const softdouble& a );
+
+/** @brief Raising to the power
+
+Special cases:
+- x**NaN is NaN for any x
+- ( |x| == 1 )**Inf is NaN
+- ( |x|  > 1 )**+Inf or ( |x| < 1 )**-Inf is +Inf
+- ( |x|  > 1 )**-Inf or ( |x| < 1 )**+Inf is 0
+- x ** 0 == 1 for any x
+- x ** 1 == 1 for any x
+- NaN ** y is NaN for any other y
+- Inf**(y < 0) == 0
+- Inf ** y is +Inf for any other y
+- (x < 0)**y is NaN for any other y if x can't be correctly rounded to integer
+- 0 ** 0 == 1
+- 0 ** (y < 0) is +Inf
+- 0 ** (y > 0) is 0
+*/
+CV_EXPORTS softfloat  pow( const softfloat&  a, const softfloat&  b);
+CV_EXPORTS softdouble pow( const softdouble& a, const softdouble& b);
+
+/** @brief Cube root
+
+Special cases:
+- cbrt(NaN) is NaN
+- cbrt(+/-Inf) is +/-Inf
+*/
+CV_EXPORTS softfloat cbrt( const softfloat& a );
+
+/** @brief Sine
+
+Special cases:
+- sin(Inf) or sin(NaN) is NaN
+- sin(x) == x when sin(x) is close to zero
+*/
+CV_EXPORTS softdouble sin( const softdouble& a );
+
+/** @brief Cosine
+ *
+Special cases:
+- cos(Inf) or cos(NaN) is NaN
+- cos(x) == +/- 1 when cos(x) is close to +/- 1
+*/
+CV_EXPORTS softdouble cos( const softdouble& a );
+
+//! @} core_utils_softfloat
+
+} // cv::
+
+#endif
diff --git a/IPL/include/opencv/opencv2/core/sse_utils.hpp b/IPL/include/opencv/opencv2/core/sse_utils.hpp
index c87b029..0906583 100644
--- a/IPL/include/opencv/opencv2/core/sse_utils.hpp
+++ b/IPL/include/opencv/opencv2/core/sse_utils.hpp
@@ -39,8 +39,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_SSE_UTILS_HPP__
-#define __OPENCV_CORE_SSE_UTILS_HPP__
+#ifndef OPENCV_CORE_SSE_UTILS_HPP
+#define OPENCV_CORE_SSE_UTILS_HPP
 
 #ifndef __cplusplus
 #  error sse_utils.hpp header must be compiled as C++
@@ -567,7 +567,7 @@ inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m
 
 inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
 {
-    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+    enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) };
 
     __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
     __m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
@@ -588,7 +588,7 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m12
 inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
                               __m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
 {
-    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+    enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) };
 
     __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
     __m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
@@ -615,7 +615,7 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
 inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
                               __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
 {
-    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+    enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) };
 
     __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
     __m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
@@ -649,4 +649,4 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m12
 
 //! @}
 
-#endif //__OPENCV_CORE_SSE_UTILS_HPP__
+#endif //OPENCV_CORE_SSE_UTILS_HPP
diff --git a/IPL/include/opencv/opencv2/core/traits.hpp b/IPL/include/opencv/opencv2/core/traits.hpp
index 49bc844..52ab083 100644
--- a/IPL/include/opencv/opencv2/core/traits.hpp
+++ b/IPL/include/opencv/opencv2/core/traits.hpp
@@ -41,19 +41,23 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_TRAITS_HPP__
-#define __OPENCV_CORE_TRAITS_HPP__
+#ifndef OPENCV_CORE_TRAITS_HPP
+#define OPENCV_CORE_TRAITS_HPP
 
 #include "opencv2/core/cvdef.h"
 
 namespace cv
 {
 
+//#define OPENCV_TRAITS_ENABLE_DEPRECATED
+
 //! @addtogroup core_basic
 //! @{
 
 /** @brief Template "trait" class for OpenCV primitive data types.
 
+@note Deprecated. This is replaced by "single purpose" traits: traits::Type and traits::Depth
+
 A primitive OpenCV data type is one of unsigned char, bool, signed char, unsigned short, signed
 short, int, float, double, or a tuple of values of one of these types, where all the values in the
 tuple have the same type. Any primitive type from the list can be defined by an identifier in the
@@ -102,10 +106,13 @@ So, such traits are used to tell OpenCV which data type you are working with, ev
 not native to OpenCV. For example, the matrix B initialization above is compiled because OpenCV
 defines the proper specialized template class DataType\<complex\<_Tp\> \> . This mechanism is also
 useful (and used in OpenCV this way) for generic algorithms implementations.
+
+@note Default values were dropped to stop confusing developers about using of unsupported types (see #7599)
 */
 template<typename _Tp> class DataType
 {
 public:
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
     typedef _Tp         value_type;
     typedef value_type  work_type;
     typedef value_type  channel_type;
@@ -116,6 +123,7 @@ template<typename _Tp> class DataType
            fmt          = 0,
            type = CV_MAKETYPE(depth, channels)
          };
+#endif
 };
 
 template<> class DataType<bool>
@@ -253,6 +261,20 @@ template<> class DataType<double>
          };
 };
 
+template<> class DataType<float16_t>
+{
+public:
+    typedef float16_t   value_type;
+    typedef float       work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_16F,
+           channels     = 1,
+           fmt          = (int)'h',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
 
 /** @brief A helper class for cv::DataType
 
@@ -270,11 +292,14 @@ template<typename _Tp> class DataDepth
 };
 
 
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
 
 template<int _depth> class TypeDepth
 {
+#ifdef OPENCV_TRAITS_ENABLE_LEGACY_DEFAULTS
     enum { depth = CV_USRTYPE1 };
     typedef void value_type;
+#endif
 };
 
 template<> class TypeDepth<CV_8U>
@@ -319,8 +344,74 @@ template<> class TypeDepth<CV_64F>
     typedef double value_type;
 };
 
+template<> class TypeDepth<CV_16F>
+{
+    enum { depth = CV_16F };
+    typedef float16_t value_type;
+};
+
+#endif
+
 //! @}
 
+namespace traits {
+
+namespace internal {
+#define CV_CREATE_MEMBER_CHECK(X) \
+template<typename T> class CheckMember_##X { \
+    struct Fallback { int X; }; \
+    struct Derived : T, Fallback { }; \
+    template<typename U, U> struct Check; \
+    typedef char CV_NO[1]; \
+    typedef char CV_YES[2]; \
+    template<typename U> static CV_NO & func(Check<int Fallback::*, &U::X> *); \
+    template<typename U> static CV_YES & func(...); \
+public: \
+    typedef CheckMember_##X type; \
+    enum { value = sizeof(func<Derived>(0)) == sizeof(CV_YES) }; \
+};
+
+CV_CREATE_MEMBER_CHECK(fmt)
+CV_CREATE_MEMBER_CHECK(type)
+
+} // namespace internal
+
+
+template<typename T>
+struct Depth
+{ enum { value = DataType<T>::depth }; };
+
+template<typename T>
+struct Type
+{ enum { value = DataType<T>::type }; };
+
+/** Similar to traits::Type<T> but has value = -1 in case of unknown type (instead of compiler error) */
+template<typename T, bool available = internal::CheckMember_type< DataType<T> >::value >
+struct SafeType {};
+
+template<typename T>
+struct SafeType<T, false>
+{ enum { value = -1 }; };
+
+template<typename T>
+struct SafeType<T, true>
+{ enum { value = Type<T>::value }; };
+
+
+template<typename T, bool available = internal::CheckMember_fmt< DataType<T> >::value >
+struct SafeFmt {};
+
+template<typename T>
+struct SafeFmt<T, false>
+{ enum { fmt = 0 }; };
+
+template<typename T>
+struct SafeFmt<T, true>
+{ enum { fmt = DataType<T>::fmt }; };
+
+
+} // namespace
+
 } // cv
 
-#endif // __OPENCV_CORE_TRAITS_HPP__
+#endif // OPENCV_CORE_TRAITS_HPP
diff --git a/IPL/include/opencv/opencv2/core/types.hpp b/IPL/include/opencv/opencv2/core/types.hpp
index e166556..819fd52 100644
--- a/IPL/include/opencv/opencv2/core/types.hpp
+++ b/IPL/include/opencv/opencv2/core/types.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_TYPES_HPP__
-#define __OPENCV_CORE_TYPES_HPP__
+#ifndef OPENCV_CORE_TYPES_HPP
+#define OPENCV_CORE_TYPES_HPP
 
 #ifndef __cplusplus
 #  error types.hpp header must be compiled as C++
@@ -51,6 +51,7 @@
 #include <climits>
 #include <cfloat>
 #include <vector>
+#include <limits>
 
 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/cvstd.hpp"
@@ -74,7 +75,7 @@ template<typename _Tp> class Complex
 {
 public:
 
-    //! constructors
+    //! default constructor
     Complex();
     Complex( _Tp _re, _Tp _im = 0 );
 
@@ -97,14 +98,23 @@ template<typename _Tp> class DataType< Complex<_Tp> >
     typedef _Tp          channel_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = 2,
-           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels) };
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+    };
 
     typedef Vec<channel_type, channels> vec_type;
 };
 
+namespace traits {
+template<typename _Tp>
+struct Depth< Complex<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Complex<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; };
+} // namespace
 
 
 //////////////////////////////// Point_ ////////////////////////////////
@@ -149,14 +159,16 @@ template<typename _Tp> class Point_
 public:
     typedef _Tp value_type;
 
-    // various constructors
+    //! default constructor
     Point_();
     Point_(_Tp _x, _Tp _y);
     Point_(const Point_& pt);
+    Point_(Point_&& pt) CV_NOEXCEPT;
     Point_(const Size_<_Tp>& sz);
     Point_(const Vec<_Tp, 2>& v);
 
     Point_& operator = (const Point_& pt);
+    Point_& operator = (Point_&& pt) CV_NOEXCEPT;
     //! conversion to another data type
     template<typename _Tp2> operator Point_<_Tp2>() const;
 
@@ -171,11 +183,12 @@ template<typename _Tp> class Point_
     double cross(const Point_& pt) const;
     //! checks whether the point is inside the specified rectangle
     bool inside(const Rect_<_Tp>& r) const;
-
-    _Tp x, y; //< the point coordinates
+    _Tp x; //!< x coordinate of the point
+    _Tp y; //!< y coordinate of the point
 };
 
 typedef Point_<int> Point2i;
+typedef Point_<int64> Point2l;
 typedef Point_<float> Point2f;
 typedef Point_<double> Point2d;
 typedef Point2i Point;
@@ -188,15 +201,23 @@ template<typename _Tp> class DataType< Point_<_Tp> >
     typedef _Tp                                       channel_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = 2,
-           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels)
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
          };
 
     typedef Vec<channel_type, channels> vec_type;
 };
 
+namespace traits {
+template<typename _Tp>
+struct Depth< Point_<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Point_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; };
+} // namespace
 
 
 //////////////////////////////// Point3_ ////////////////////////////////
@@ -220,14 +241,16 @@ template<typename _Tp> class Point3_
 public:
     typedef _Tp value_type;
 
-    // various constructors
+    //! default constructor
     Point3_();
     Point3_(_Tp _x, _Tp _y, _Tp _z);
     Point3_(const Point3_& pt);
+    Point3_(Point3_&& pt) CV_NOEXCEPT;
     explicit Point3_(const Point_<_Tp>& pt);
     Point3_(const Vec<_Tp, 3>& v);
 
     Point3_& operator = (const Point3_& pt);
+    Point3_& operator = (Point3_&& pt) CV_NOEXCEPT;
     //! conversion to another data type
     template<typename _Tp2> operator Point3_<_Tp2>() const;
     //! conversion to cv::Vec<>
@@ -239,8 +262,9 @@ template<typename _Tp> class Point3_
     double ddot(const Point3_& pt) const;
     //! cross product of the 2 3D points
     Point3_ cross(const Point3_& pt) const;
-
-    _Tp x, y, z; //< the point coordinates
+    _Tp x; //!< x coordinate of the 3D point
+    _Tp y; //!< y coordinate of the 3D point
+    _Tp z; //!< z coordinate of the 3D point
 };
 
 typedef Point3_<int> Point3i;
@@ -255,16 +279,23 @@ template<typename _Tp> class DataType< Point3_<_Tp> >
     typedef _Tp                                        channel_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = 3,
-           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels)
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
          };
 
     typedef Vec<channel_type, channels> vec_type;
 };
 
-
+namespace traits {
+template<typename _Tp>
+struct Depth< Point3_<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Point3_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 3) }; };
+} // namespace
 
 //////////////////////////////// Size_ ////////////////////////////////
 
@@ -286,23 +317,31 @@ template<typename _Tp> class Size_
 public:
     typedef _Tp value_type;
 
-    //! various constructors
+    //! default constructor
     Size_();
     Size_(_Tp _width, _Tp _height);
     Size_(const Size_& sz);
+    Size_(Size_&& sz) CV_NOEXCEPT;
     Size_(const Point_<_Tp>& pt);
 
     Size_& operator = (const Size_& sz);
+    Size_& operator = (Size_&& sz) CV_NOEXCEPT;
     //! the area (width*height)
     _Tp area() const;
+    //! aspect ratio (width/height)
+    double aspectRatio() const;
+    //! true if empty
+    bool empty() const;
 
     //! conversion of another data type.
     template<typename _Tp2> operator Size_<_Tp2>() const;
 
-    _Tp width, height; // the width and the height
+    _Tp width; //!< the width
+    _Tp height; //!< the height
 };
 
 typedef Size_<int> Size2i;
+typedef Size_<int64> Size2l;
 typedef Size_<float> Size2f;
 typedef Size_<double> Size2d;
 typedef Size2i Size;
@@ -315,16 +354,23 @@ template<typename _Tp> class DataType< Size_<_Tp> >
     typedef _Tp                                      channel_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = 2,
-           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels)
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
          };
 
     typedef Vec<channel_type, channels> vec_type;
 };
 
-
+namespace traits {
+template<typename _Tp>
+struct Depth< Size_<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Size_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; };
+} // namespace
 
 //////////////////////////////// Rect_ ////////////////////////////////
 
@@ -376,14 +422,16 @@ template<typename _Tp> class Rect_
 public:
     typedef _Tp value_type;
 
-    //! various constructors
+    //! default constructor
     Rect_();
     Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height);
     Rect_(const Rect_& r);
+    Rect_(Rect_&& r) CV_NOEXCEPT;
     Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz);
     Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2);
 
     Rect_& operator = ( const Rect_& r );
+    Rect_& operator = ( Rect_&& r ) CV_NOEXCEPT;
     //! the top-left corner
     Point_<_Tp> tl() const;
     //! the bottom-right corner
@@ -393,6 +441,8 @@ template<typename _Tp> class Rect_
     Size_<_Tp> size() const;
     //! area (width*height) of the rectangle
     _Tp area() const;
+    //! true if empty
+    bool empty() const;
 
     //! conversion to another data type
     template<typename _Tp2> operator Rect_<_Tp2>() const;
@@ -400,7 +450,10 @@ template<typename _Tp> class Rect_
     //! checks whether the rectangle contains the point
     bool contains(const Point_<_Tp>& pt) const;
 
-    _Tp x, y, width, height; //< the top-left corner, as well as width and height of the rectangle
+    _Tp x; //!< x coordinate of the top-left corner
+    _Tp y; //!< y coordinate of the top-left corner
+    _Tp width; //!< width of the rectangle
+    _Tp height; //!< height of the rectangle
 };
 
 typedef Rect_<int> Rect2i;
@@ -416,40 +469,33 @@ template<typename _Tp> class DataType< Rect_<_Tp> >
     typedef _Tp                                      channel_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = 4,
-           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels)
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
          };
 
     typedef Vec<channel_type, channels> vec_type;
 };
 
-
+namespace traits {
+template<typename _Tp>
+struct Depth< Rect_<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Rect_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 4) }; };
+} // namespace
 
 ///////////////////////////// RotatedRect /////////////////////////////
 
 /** @brief The class represents rotated (i.e. not up-right) rectangles on a plane.
 
 Each rectangle is specified by the center point (mass center), length of each side (represented by
-cv::Size2f structure) and the rotation angle in degrees.
+#Size2f structure) and the rotation angle in degrees.
 
 The sample below demonstrates how to use RotatedRect:
-@code
-    Mat image(200, 200, CV_8UC3, Scalar(0));
-    RotatedRect rRect = RotatedRect(Point2f(100,100), Size2f(100,50), 30);
-
-    Point2f vertices[4];
-    rRect.points(vertices);
-    for (int i = 0; i < 4; i++)
-        line(image, vertices[i], vertices[(i+1)%4], Scalar(0,255,0));
-
-    Rect brect = rRect.boundingRect();
-    rectangle(image, brect, Scalar(255,0,0));
-
-    imshow("rectangles", image);
-    waitKey(0);
-@endcode
+@snippet snippets/core_various.cpp RotatedRect_demo
 ![image](pics/rotatedrect.png)
 
 @sa CamShift, fitEllipse, minAreaRect, CvBox2D
@@ -457,9 +503,9 @@ The sample below demonstrates how to use RotatedRect:
 class CV_EXPORTS RotatedRect
 {
 public:
-    //! various constructors
+    //! default constructor
     RotatedRect();
-    /**
+    /** full constructor
     @param center The rectangle mass center.
     @param size Width and height of the rectangle.
     @param angle The rotation angle in a clockwise direction. When the angle is 0, 90, 180, 270 etc.,
@@ -473,15 +519,19 @@ class CV_EXPORTS RotatedRect
     RotatedRect(const Point2f& point1, const Point2f& point2, const Point2f& point3);
 
     /** returns 4 vertices of the rectangle
-    @param pts The points array for storing rectangle vertices.
+    @param pts The points array for storing rectangle vertices. The order is bottomLeft, topLeft, topRight, bottomRight.
     */
     void points(Point2f pts[]) const;
-    //! returns the minimal up-right rectangle containing the rotated rectangle
+    //! returns the minimal up-right integer rectangle containing the rotated rectangle
     Rect boundingRect() const;
-
-    Point2f center; //< the rectangle mass center
-    Size2f size;    //< width and height of the rectangle
-    float angle;    //< the rotation angle. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle.
+    //! returns the minimal (exact) floating point rectangle containing the rotated rectangle, not intended for use with images
+    Rect_<float> boundingRect2f() const;
+    //! returns the rectangle mass center
+    Point2f center;
+    //! returns width and height of the rectangle
+    Size2f size;
+    //! returns the rotation angle. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle.
+    float angle;
 };
 
 template<> class DataType< RotatedRect >
@@ -492,15 +542,23 @@ template<> class DataType< RotatedRect >
     typedef float        channel_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = (int)sizeof(value_type)/sizeof(channel_type), // 5
-           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels)
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
          };
 
     typedef Vec<channel_type, channels> vec_type;
 };
 
+namespace traits {
+template<>
+struct Depth< RotatedRect > { enum { value = Depth<float>::value }; };
+template<>
+struct Type< RotatedRect > { enum { value = CV_MAKETYPE(Depth<float>::value, (int)sizeof(RotatedRect)/sizeof(float)) }; };
+} // namespace
 
 
 //////////////////////////////// Range /////////////////////////////////
@@ -548,33 +606,47 @@ template<> class DataType<Range>
     typedef int        channel_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = 2,
-           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels)
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
          };
 
     typedef Vec<channel_type, channels> vec_type;
 };
 
+namespace traits {
+template<>
+struct Depth< Range > { enum { value = Depth<int>::value }; };
+template<>
+struct Type< Range > { enum { value = CV_MAKETYPE(Depth<int>::value, 2) }; };
+} // namespace
 
 
 //////////////////////////////// Scalar_ ///////////////////////////////
 
 /** @brief Template class for a 4-element vector derived from Vec.
 
-Being derived from Vec\<_Tp, 4\> , Scalar_ and Scalar can be used just as typical 4-element
+Being derived from Vec\<_Tp, 4\> , Scalar\_ and Scalar can be used just as typical 4-element
 vectors. In addition, they can be converted to/from CvScalar . The type Scalar is widely used in
 OpenCV to pass pixel values.
 */
 template<typename _Tp> class Scalar_ : public Vec<_Tp, 4>
 {
 public:
-    //! various constructors
+    //! default constructor
     Scalar_();
     Scalar_(_Tp v0, _Tp v1, _Tp v2=0, _Tp v3=0);
     Scalar_(_Tp v0);
 
+    Scalar_(const Scalar_& s);
+    Scalar_(Scalar_&& s) CV_NOEXCEPT;
+
+    Scalar_& operator=(const Scalar_& s);
+    Scalar_& operator=(Scalar_&& s) CV_NOEXCEPT;
+
     template<typename _Tp2, int cn>
     Scalar_(const Vec<_Tp2, cn>& v);
 
@@ -587,10 +659,10 @@ template<typename _Tp> class Scalar_ : public Vec<_Tp, 4>
     //! per-element product
     Scalar_<_Tp> mul(const Scalar_<_Tp>& a, double scale=1 ) const;
 
-    // returns (v0, -v1, -v2, -v3)
+    //! returns (v0, -v1, -v2, -v3)
     Scalar_<_Tp> conj() const;
 
-    // returns true iff v1 == v2 == v3 == 0
+    //! returns true iff v1 == v2 == v3 == 0
     bool isReal() const;
 };
 
@@ -604,15 +676,23 @@ template<typename _Tp> class DataType< Scalar_<_Tp> >
     typedef _Tp                                        channel_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = 4,
-           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels)
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
          };
 
     typedef Vec<channel_type, channels> vec_type;
 };
 
+namespace traits {
+template<typename _Tp>
+struct Depth< Scalar_<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Scalar_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 4) }; };
+} // namespace
 
 
 /////////////////////////////// KeyPoint ////////////////////////////////
@@ -620,14 +700,13 @@ template<typename _Tp> class DataType< Scalar_<_Tp> >
 /** @brief Data structure for salient point detectors.
 
 The class instance stores a keypoint, i.e. a point feature found by one of many available keypoint
-detectors, such as Harris corner detector, cv::FAST, cv::StarDetector, cv::SURF, cv::SIFT,
-cv::LDetector etc.
+detectors, such as Harris corner detector, #FAST, %StarDetector, %SURF, %SIFT etc.
 
 The keypoint is characterized by the 2D position, scale (proportional to the diameter of the
 neighborhood that needs to be taken into account), orientation and some other parameters. The
 keypoint neighborhood is then analyzed by another algorithm that builds a descriptor (usually
 represented as a feature vector). The keypoints representing the same object in different images
-can then be matched using cv::KDTree or another method.
+can then be matched using %KDTree or another method.
 */
 class CV_EXPORTS_W_SIMPLE KeyPoint
 {
@@ -699,6 +778,7 @@ class CV_EXPORTS_W_SIMPLE KeyPoint
     CV_PROP_RW int class_id; //!< object class (if the keypoints need to be clustered by an object they belong to)
 };
 
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
 template<> class DataType<KeyPoint>
 {
 public:
@@ -715,7 +795,7 @@ template<> class DataType<KeyPoint>
 
     typedef Vec<channel_type, channels> vec_type;
 };
-
+#endif
 
 
 //////////////////////////////// DMatch /////////////////////////////////
@@ -732,9 +812,9 @@ class CV_EXPORTS_W_SIMPLE DMatch
     CV_WRAP DMatch(int _queryIdx, int _trainIdx, float _distance);
     CV_WRAP DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance);
 
-    CV_PROP_RW int queryIdx; // query descriptor index
-    CV_PROP_RW int trainIdx; // train descriptor index
-    CV_PROP_RW int imgIdx;   // train image index
+    CV_PROP_RW int queryIdx; //!< query descriptor index
+    CV_PROP_RW int trainIdx; //!< train descriptor index
+    CV_PROP_RW int imgIdx;   //!< train image index
 
     CV_PROP_RW float distance;
 
@@ -742,6 +822,7 @@ class CV_EXPORTS_W_SIMPLE DMatch
     bool operator<(const DMatch &m) const;
 };
 
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
 template<> class DataType<DMatch>
 {
 public:
@@ -758,7 +839,7 @@ template<> class DataType<DMatch>
 
     typedef Vec<channel_type, channels> vec_type;
 };
-
+#endif
 
 
 ///////////////////////////// TermCriteria //////////////////////////////
@@ -790,9 +871,16 @@ class CV_EXPORTS TermCriteria
     */
     TermCriteria(int type, int maxCount, double epsilon);
 
+    inline bool isValid() const
+    {
+        const bool isCount = (type & COUNT) && maxCount > 0;
+        const bool isEps = (type & EPS) && !cvIsNaN(epsilon);
+        return isCount || isEps;
+    }
+
     int type; //!< the type of termination criteria: COUNT, EPS or COUNT + EPS
-    int maxCount; // the maximum number of iterations/elements
-    double epsilon; // the desired accuracy
+    int maxCount; //!< the maximum number of iterations/elements
+    double epsilon; //!< the desired accuracy
 };
 
 
@@ -872,15 +960,24 @@ template<> class DataType<Moments>
     typedef double      channel_type;
 
     enum { generic_type = 0,
-           depth        = DataType<channel_type>::depth,
            channels     = (int)(sizeof(value_type)/sizeof(channel_type)), // 24
-           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
-           type         = CV_MAKETYPE(depth, channels)
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
          };
 
     typedef Vec<channel_type, channels> vec_type;
 };
 
+namespace traits {
+template<>
+struct Depth< Moments > { enum { value = Depth<double>::value }; };
+template<>
+struct Type< Moments > { enum { value = CV_MAKETYPE(Depth<double>::value, (int)(sizeof(Moments)/sizeof(double))) }; };
+} // namespace
+
 //! @} imgproc_shape
 
 //! @cond IGNORED
@@ -1031,7 +1128,8 @@ Complex<_Tp> operator / (const Complex<_Tp>& a, const Complex<_Tp>& b)
 template<typename _Tp> static inline
 Complex<_Tp>& operator /= (Complex<_Tp>& a, const Complex<_Tp>& b)
 {
-    return (a = a / b);
+    a = a / b;
+    return a;
 }
 
 template<typename _Tp> static inline
@@ -1070,6 +1168,10 @@ template<typename _Tp> inline
 Point_<_Tp>::Point_(const Point_& pt)
     : x(pt.x), y(pt.y) {}
 
+template<typename _Tp> inline
+Point_<_Tp>::Point_(Point_&& pt) CV_NOEXCEPT
+    : x(std::move(pt.x)), y(std::move(pt.y)) {}
+
 template<typename _Tp> inline
 Point_<_Tp>::Point_(const Size_<_Tp>& sz)
     : x(sz.width), y(sz.height) {}
@@ -1085,6 +1187,13 @@ Point_<_Tp>& Point_<_Tp>::operator = (const Point_& pt)
     return *this;
 }
 
+template<typename _Tp> inline
+Point_<_Tp>& Point_<_Tp>::operator = (Point_&& pt) CV_NOEXCEPT
+{
+    x = std::move(pt.x); y = std::move(pt.y);
+    return *this;
+}
+
 template<typename _Tp> template<typename _Tp2> inline
 Point_<_Tp>::operator Point_<_Tp2>() const
 {
@@ -1106,7 +1215,7 @@ _Tp Point_<_Tp>::dot(const Point_& pt) const
 template<typename _Tp> inline
 double Point_<_Tp>::ddot(const Point_& pt) const
 {
-    return (double)x*pt.x + (double)y*pt.y;
+    return (double)x*(double)(pt.x) + (double)y*(double)(pt.y);
 }
 
 template<typename _Tp> inline
@@ -1297,6 +1406,20 @@ Point_<_Tp> operator / (const Point_<_Tp>& a, double b)
 }
 
 
+template<typename _AccTp> static inline _AccTp normL2Sqr(const Point_<int>& pt);
+template<typename _AccTp> static inline _AccTp normL2Sqr(const Point_<int64>& pt);
+template<typename _AccTp> static inline _AccTp normL2Sqr(const Point_<float>& pt);
+template<typename _AccTp> static inline _AccTp normL2Sqr(const Point_<double>& pt);
+
+template<> inline int normL2Sqr<int>(const Point_<int>& pt) { return pt.dot(pt); }
+template<> inline int64 normL2Sqr<int64>(const Point_<int64>& pt) { return pt.dot(pt); }
+template<> inline float normL2Sqr<float>(const Point_<float>& pt) { return pt.dot(pt); }
+template<> inline double normL2Sqr<double>(const Point_<int>& pt) { return pt.dot(pt); }
+
+template<> inline double normL2Sqr<double>(const Point_<float>& pt) { return pt.ddot(pt); }
+template<> inline double normL2Sqr<double>(const Point_<double>& pt) { return pt.ddot(pt); }
+
+
 
 //////////////////////////////// 3D Point ///////////////////////////////
 
@@ -1312,6 +1435,10 @@ template<typename _Tp> inline
 Point3_<_Tp>::Point3_(const Point3_& pt)
     : x(pt.x), y(pt.y), z(pt.z) {}
 
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_(Point3_&& pt) CV_NOEXCEPT
+    : x(std::move(pt.x)), y(std::move(pt.y)), z(std::move(pt.z)) {}
+
 template<typename _Tp> inline
 Point3_<_Tp>::Point3_(const Point_<_Tp>& pt)
     : x(pt.x), y(pt.y), z(_Tp()) {}
@@ -1339,6 +1466,13 @@ Point3_<_Tp>& Point3_<_Tp>::operator = (const Point3_& pt)
     return *this;
 }
 
+template<typename _Tp> inline
+Point3_<_Tp>& Point3_<_Tp>::operator = (Point3_&& pt) CV_NOEXCEPT
+{
+    x = std::move(pt.x); y = std::move(pt.y); z = std::move(pt.z);
+    return *this;
+}
+
 template<typename _Tp> inline
 _Tp Point3_<_Tp>::dot(const Point3_& pt) const
 {
@@ -1555,6 +1689,10 @@ template<typename _Tp> inline
 Size_<_Tp>::Size_(const Size_& sz)
     : width(sz.width), height(sz.height) {}
 
+template<typename _Tp> inline
+Size_<_Tp>::Size_(Size_&& sz) CV_NOEXCEPT
+    : width(std::move(sz.width)), height(std::move(sz.height)) {}
+
 template<typename _Tp> inline
 Size_<_Tp>::Size_(const Point_<_Tp>& pt)
     : width(pt.x), height(pt.y) {}
@@ -1572,12 +1710,35 @@ Size_<_Tp>& Size_<_Tp>::operator = (const Size_<_Tp>& sz)
     return *this;
 }
 
+template<typename _Tp> inline
+Size_<_Tp>& Size_<_Tp>::operator = (Size_<_Tp>&& sz) CV_NOEXCEPT
+{
+    width = std::move(sz.width); height = std::move(sz.height);
+    return *this;
+}
+
 template<typename _Tp> inline
 _Tp Size_<_Tp>::area() const
 {
-    return width * height;
+    const _Tp result = width * height;
+    CV_DbgAssert(!std::numeric_limits<_Tp>::is_integer
+        || width == 0 || result / width == height); // make sure the result fits in the return value
+    return result;
+}
+
+template<typename _Tp> inline
+double Size_<_Tp>::aspectRatio() const
+{
+    return width / static_cast<double>(height);
+}
+
+template<typename _Tp> inline
+bool Size_<_Tp>::empty() const
+{
+    return width <= 0 || height <= 0;
 }
 
+
 template<typename _Tp> static inline
 Size_<_Tp>& operator *= (Size_<_Tp>& a, _Tp b)
 {
@@ -1670,6 +1831,10 @@ template<typename _Tp> inline
 Rect_<_Tp>::Rect_(const Rect_<_Tp>& r)
     : x(r.x), y(r.y), width(r.width), height(r.height) {}
 
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_(Rect_<_Tp>&& r) CV_NOEXCEPT
+    : x(std::move(r.x)), y(std::move(r.y)), width(std::move(r.width)), height(std::move(r.height)) {}
+
 template<typename _Tp> inline
 Rect_<_Tp>::Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz)
     : x(org.x), y(org.y), width(sz.width), height(sz.height) {}
@@ -1693,6 +1858,16 @@ Rect_<_Tp>& Rect_<_Tp>::operator = ( const Rect_<_Tp>& r )
     return *this;
 }
 
+template<typename _Tp> inline
+Rect_<_Tp>& Rect_<_Tp>::operator = ( Rect_<_Tp>&& r ) CV_NOEXCEPT
+{
+    x = std::move(r.x);
+    y = std::move(r.y);
+    width = std::move(r.width);
+    height = std::move(r.height);
+    return *this;
+}
+
 template<typename _Tp> inline
 Point_<_Tp> Rect_<_Tp>::tl() const
 {
@@ -1714,7 +1889,16 @@ Size_<_Tp> Rect_<_Tp>::size() const
 template<typename _Tp> inline
 _Tp Rect_<_Tp>::area() const
 {
-    return width * height;
+    const _Tp result = width * height;
+    CV_DbgAssert(!std::numeric_limits<_Tp>::is_integer
+        || width == 0 || result / width == height); // make sure the result fits in the return value
+    return result;
+}
+
+template<typename _Tp> inline
+bool Rect_<_Tp>::empty() const
+{
+    return width <= 0 || height <= 0;
 }
 
 template<typename _Tp> template<typename _Tp2> inline
@@ -1757,8 +1941,11 @@ Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Size_<_Tp>& b )
 template<typename _Tp> static inline
 Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Size_<_Tp>& b )
 {
-    a.width -= b.width;
-    a.height -= b.height;
+    const _Tp width = a.width - b.width;
+    const _Tp height = a.height - b.height;
+    CV_DbgAssert(width >= 0 && height >= 0);
+    a.width = width;
+    a.height = height;
     return a;
 }
 
@@ -1779,12 +1966,17 @@ Rect_<_Tp>& operator &= ( Rect_<_Tp>& a, const Rect_<_Tp>& b )
 template<typename _Tp> static inline
 Rect_<_Tp>& operator |= ( Rect_<_Tp>& a, const Rect_<_Tp>& b )
 {
-    _Tp x1 = std::min(a.x, b.x);
-    _Tp y1 = std::min(a.y, b.y);
-    a.width = std::max(a.x + a.width, b.x + b.width) - x1;
-    a.height = std::max(a.y + a.height, b.y + b.height) - y1;
-    a.x = x1;
-    a.y = y1;
+    if (a.empty()) {
+        a = b;
+    }
+    else if (!b.empty()) {
+        _Tp x1 = std::min(a.x, b.x);
+        _Tp y1 = std::min(a.y, b.y);
+        a.width = std::max(a.x + a.width, b.x + b.width) - x1;
+        a.height = std::max(a.y + a.height, b.y + b.height) - y1;
+        a.x = x1;
+        a.y = y1;
+    }
     return a;
 }
 
@@ -1818,6 +2010,15 @@ Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Size_<_Tp>& b)
     return Rect_<_Tp>( a.x, a.y, a.width + b.width, a.height + b.height );
 }
 
+template<typename _Tp> static inline
+Rect_<_Tp> operator - (const Rect_<_Tp>& a, const Size_<_Tp>& b)
+{
+    const _Tp width = a.width - b.width;
+    const _Tp height = a.height - b.height;
+    CV_DbgAssert(width >= 0 && height >= 0);
+    return Rect_<_Tp>( a.x, a.y, width, height );
+}
+
 template<typename _Tp> static inline
 Rect_<_Tp> operator & (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
 {
@@ -1832,7 +2033,26 @@ Rect_<_Tp> operator | (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
     return c |= b;
 }
 
+/**
+ * @brief measure dissimilarity between two sample sets
+ *
+ * computes the complement of the Jaccard Index as described in <https://en.wikipedia.org/wiki/Jaccard_index>.
+ * For rectangles this reduces to computing the intersection over the union.
+ */
+template<typename _Tp> static inline
+double jaccardDistance(const Rect_<_Tp>& a, const Rect_<_Tp>& b) {
+    _Tp Aa = a.area();
+    _Tp Ab = b.area();
+
+    if ((Aa + Ab) <= std::numeric_limits<_Tp>::epsilon()) {
+        // jaccard_index = 1 -> distance = 0
+        return 0.0;
+    }
 
+    double Aab = (a & b).area();
+    // distance = 1 - jaccard_index
+    return 1.0 - Aab / (Aa + Ab - Aab);
+}
 
 ////////////////////////////// RotatedRect //////////////////////////////
 
@@ -1844,8 +2064,6 @@ inline
 RotatedRect::RotatedRect(const Point2f& _center, const Size2f& _size, float _angle)
     : center(_center), size(_size), angle(_angle) {}
 
-
-
 ///////////////////////////////// Range /////////////////////////////////
 
 inline
@@ -1945,6 +2163,36 @@ Scalar_<_Tp>::Scalar_(_Tp v0, _Tp v1, _Tp v2, _Tp v3)
     this->val[3] = v3;
 }
 
+template<typename _Tp> inline
+Scalar_<_Tp>::Scalar_(const Scalar_<_Tp>& s) : Vec<_Tp, 4>(s) {
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp>::Scalar_(Scalar_<_Tp>&& s) CV_NOEXCEPT {
+    this->val[0] = std::move(s.val[0]);
+    this->val[1] = std::move(s.val[1]);
+    this->val[2] = std::move(s.val[2]);
+    this->val[3] = std::move(s.val[3]);
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp>& Scalar_<_Tp>::operator=(const Scalar_<_Tp>& s) {
+    this->val[0] = s.val[0];
+    this->val[1] = s.val[1];
+    this->val[2] = s.val[2];
+    this->val[3] = s.val[3];
+    return *this;
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp>& Scalar_<_Tp>::operator=(Scalar_<_Tp>&& s) CV_NOEXCEPT {
+    this->val[0] = std::move(s.val[0]);
+    this->val[1] = std::move(s.val[1]);
+    this->val[2] = std::move(s.val[2]);
+    this->val[3] = std::move(s.val[3]);
+    return *this;
+}
+
 template<typename _Tp> template<typename _Tp2, int cn> inline
 Scalar_<_Tp>::Scalar_(const Vec<_Tp2, cn>& v)
 {
@@ -2225,4 +2473,4 @@ TermCriteria::TermCriteria(int _type, int _maxCount, double _epsilon)
 
 } // cv
 
-#endif //__OPENCV_CORE_TYPES_HPP__
+#endif //OPENCV_CORE_TYPES_HPP
diff --git a/IPL/include/opencv/opencv2/core/types_c.h b/IPL/include/opencv/opencv2/core/types_c.h
index cb39587..e453626 100644
--- a/IPL/include/opencv/opencv2/core/types_c.h
+++ b/IPL/include/opencv/opencv2/core/types_c.h
@@ -41,12 +41,37 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_TYPES_H__
-#define __OPENCV_CORE_TYPES_H__
+#ifndef OPENCV_CORE_TYPES_H
+#define OPENCV_CORE_TYPES_H
+
+#ifdef CV__ENABLE_C_API_CTORS  // invalid C API ctors (must be removed)
+#if defined(_WIN32) && !defined(CV__SKIP_MESSAGE_MALFORMED_C_API_CTORS)
+#error "C API ctors don't work on Win32: https://github.com/opencv/opencv/issues/15990"
+#endif
+#endif
+
+//#define CV__VALIDATE_UNUNITIALIZED_VARS 1  // C++11 & GCC only
+
+#ifdef __cplusplus
+
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#define CV_STRUCT_INITIALIZER {0,}
+#else
+#if defined(__GNUC__) && __GNUC__ == 4  // GCC 4.x warns on "= {}" initialization, fixed in GCC 5.0
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
+#define CV_STRUCT_INITIALIZER {}
+#endif
+
+#else
+#define CV_STRUCT_INITIALIZER {0}
+#endif
+
 
 #ifdef HAVE_IPL
 #  ifndef __IPL_H__
-#    if defined WIN32 || defined _WIN32
+#    if defined _WIN32
 #      include <ipl.h>
 #    else
 #      include <ipl/ipl.h>
@@ -65,7 +90,7 @@
 #include <float.h>
 #endif // SKIP_INCLUDES
 
-#if defined WIN32 || defined _WIN32
+#if defined _WIN32
 #  define CV_CDECL __cdecl
 #  define CV_STDCALL __stdcall
 #else
@@ -130,24 +155,24 @@ enum {
  CV_BadImageSize=              -10,  /**< image size is invalid           */
  CV_BadOffset=                 -11,  /**< offset is invalid               */
  CV_BadDataPtr=                -12,  /**/
- CV_BadStep=                   -13,  /**/
+ CV_BadStep=                   -13,  /**< image step is wrong, this may happen for a non-continuous matrix */
  CV_BadModelOrChSeq=           -14,  /**/
- CV_BadNumChannels=            -15,  /**/
+ CV_BadNumChannels=            -15,  /**< bad number of channels, for example, some functions accept only single channel matrices */
  CV_BadNumChannel1U=           -16,  /**/
- CV_BadDepth=                  -17,  /**/
+ CV_BadDepth=                  -17,  /**< input image depth is not supported by the function */
  CV_BadAlphaChannel=           -18,  /**/
- CV_BadOrder=                  -19,  /**/
- CV_BadOrigin=                 -20,  /**/
- CV_BadAlign=                  -21,  /**/
+ CV_BadOrder=                  -19,  /**< number of dimensions is out of range */
+ CV_BadOrigin=                 -20,  /**< incorrect input origin               */
+ CV_BadAlign=                  -21,  /**< incorrect input align                */
  CV_BadCallBack=               -22,  /**/
  CV_BadTileSize=               -23,  /**/
- CV_BadCOI=                    -24,  /**/
- CV_BadROISize=                -25,  /**/
+ CV_BadCOI=                    -24,  /**< input COI is not supported           */
+ CV_BadROISize=                -25,  /**< incorrect input roi                  */
  CV_MaskIsTiled=               -26,  /**/
  CV_StsNullPtr=                -27,  /**< null pointer */
  CV_StsVecLengthErr=           -28,  /**< incorrect vector length */
- CV_StsFilterStructContentErr= -29,  /**< incorr. filter structure content */
- CV_StsKernelStructContentErr= -30,  /**< incorr. transform kernel content */
+ CV_StsFilterStructContentErr= -29,  /**< incorrect filter structure content */
+ CV_StsKernelStructContentErr= -30,  /**< incorrect transform kernel content */
  CV_StsFilterOffsetErr=        -31,  /**< incorrect filter offset value */
  CV_StsBadSize=                -201, /**< the input/output structure size is incorrect  */
  CV_StsDivByZero=              -202, /**< division by zero */
@@ -163,14 +188,14 @@ enum {
  CV_StsParseError=             -212, /**< invalid syntax/structure of the parsed file */
  CV_StsNotImplemented=         -213, /**< the requested function/feature is not implemented */
  CV_StsBadMemBlock=            -214, /**< an allocated block has been corrupted */
- CV_StsAssert=                 -215, /**< assertion failed */
- CV_GpuNotSupported=           -216,
- CV_GpuApiCallError=           -217,
- CV_OpenGlNotSupported=        -218,
- CV_OpenGlApiCallError=        -219,
- CV_OpenCLApiCallError=        -220,
+ CV_StsAssert=                 -215, /**< assertion failed   */
+ CV_GpuNotSupported=           -216, /**< no CUDA support    */
+ CV_GpuApiCallError=           -217, /**< GPU API call error */
+ CV_OpenGlNotSupported=        -218, /**< no OpenGL support  */
+ CV_OpenGlApiCallError=        -219, /**< OpenGL API call error */
+ CV_OpenCLApiCallError=        -220, /**< OpenCL API call error */
  CV_OpenCLDoubleNotSupported=  -221,
- CV_OpenCLInitError=           -222,
+ CV_OpenCLInitError=           -222, /**< OpenCL initialization error */
  CV_OpenCLNoAMDBlasFft=        -223
 };
 
@@ -285,6 +310,11 @@ CV_INLINE double cvRandReal( CvRNG* rng )
 #define IPL_BORDER_REFLECT    2
 #define IPL_BORDER_WRAP       3
 
+#ifdef __cplusplus
+typedef struct _IplImage IplImage;
+CV_EXPORTS _IplImage cvIplImage(const cv::Mat& m);
+#endif
+
 /** The IplImage is taken from the Intel Image Processing Library, in which the format is native. OpenCV
 only supports a subset of possible IplImage formats, as outlined in the parameter list above.
 
@@ -294,9 +324,6 @@ hand, the Intel Image Processing Library processes the area of intersection betw
 destination images (or ROIs), allowing them to vary independently.
 */
 typedef struct
-#ifdef __cplusplus
-  CV_EXPORTS
-#endif
 _IplImage
 {
     int  nSize;             /**< sizeof(IplImage) */
@@ -330,13 +357,22 @@ _IplImage
                                (not necessarily aligned) -
                                needed for correct deallocation */
 
-#ifdef __cplusplus
+#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     _IplImage() {}
-    _IplImage(const cv::Mat& m);
+    _IplImage(const cv::Mat& m) { *this = cvIplImage(m); }
 #endif
 }
 IplImage;
 
+CV_INLINE IplImage cvIplImage()
+{
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    IplImage self = CV_STRUCT_INITIALIZER; self.nSize = sizeof(IplImage); return self;
+#else
+    return _IplImage();
+#endif
+}
+
 typedef struct _IplTileInfo IplTileInfo;
 
 typedef struct _IplROI
@@ -409,6 +445,11 @@ IplConvKernelFP;
 #define CV_MAT_MAGIC_VAL    0x42420000
 #define CV_TYPE_NAME_MAT    "opencv-matrix"
 
+#ifdef __cplusplus
+typedef struct CvMat CvMat;
+CV_INLINE CvMat cvMat(const cv::Mat& m);
+#endif
+
 /** Matrix elements are stored row by row. Element (i, j) (i - 0-based row index, j - 0-based column
 index) of a matrix can be retrieved or modified using CV_MAT_ELEM macro:
 
@@ -455,13 +496,10 @@ typedef struct CvMat
     int cols;
 #endif
 
-
-#ifdef __cplusplus
+#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvMat() {}
-    CvMat(const CvMat& m) { memcpy(this, &m, sizeof(CvMat));}
-    CvMat(const cv::Mat& m);
+    CvMat(const cv::Mat& m) { *this = cvMat(m); }
 #endif
-
 }
 CvMat;
 
@@ -524,14 +562,34 @@ CV_INLINE CvMat cvMat( int rows, int cols, int type, void* data CV_DEFAULT(NULL)
 }
 
 #ifdef __cplusplus
-inline CvMat::CvMat(const cv::Mat& m)
+
+CV_INLINE CvMat cvMat(const cv::Mat& m)
 {
+    CvMat self;
     CV_DbgAssert(m.dims <= 2);
-    *this = cvMat(m.rows, m.dims == 1 ? 1 : m.cols, m.type(), m.data);
-    step = (int)m.step[0];
-    type = (type & ~cv::Mat::CONTINUOUS_FLAG) | (m.flags & cv::Mat::CONTINUOUS_FLAG);
+    self = cvMat(m.rows, m.dims == 1 ? 1 : m.cols, m.type(), m.data);
+    self.step = (int)m.step[0];
+    self.type = (self.type & ~cv::Mat::CONTINUOUS_FLAG) | (m.flags & cv::Mat::CONTINUOUS_FLAG);
+    return self;
+}
+CV_INLINE CvMat cvMat()
+{
+#if !defined(CV__ENABLE_C_API_CTORS)
+    CvMat self = CV_STRUCT_INITIALIZER; return self;
+#else
+    return CvMat();
+#endif
 }
+CV_INLINE CvMat cvMat(const CvMat& m)
+{
+#if !defined(CV__ENABLE_C_API_CTORS)
+    CvMat self = CV_STRUCT_INITIALIZER; memcpy(&self, &m, sizeof(self)); return self;
+#else
+    return CvMat(m);
 #endif
+}
+
+#endif // __cplusplus
 
 
 #define CV_MAT_ELEM_PTR_FAST( mat, row, col, pix_size )  \
@@ -614,15 +672,16 @@ CV_INLINE int cvIplDepth( int type )
 #define CV_TYPE_NAME_MATND    "opencv-nd-matrix"
 
 #define CV_MAX_DIM            32
-#define CV_MAX_DIM_HEAP       1024
+
+#ifdef __cplusplus
+typedef struct CvMatND CvMatND;
+CV_EXPORTS CvMatND cvMatND(const cv::Mat& m);
+#endif
 
 /**
   @deprecated consider using cv::Mat instead
   */
 typedef struct
-#ifdef __cplusplus
-  CV_EXPORTS
-#endif
 CvMatND
 {
     int type;
@@ -647,13 +706,23 @@ CvMatND
     }
     dim[CV_MAX_DIM];
 
-#ifdef __cplusplus
+#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvMatND() {}
-    CvMatND(const cv::Mat& m);
+    CvMatND(const cv::Mat& m) { *this = cvMatND(m); }
 #endif
 }
 CvMatND;
 
+
+CV_INLINE CvMatND cvMatND()
+{
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvMatND self = CV_STRUCT_INITIALIZER; return self;
+#else
+    return CvMatND();
+#endif
+}
+
 #define CV_IS_MATND_HDR(mat) \
     ((mat) != NULL && (((const CvMatND*)(mat))->type & CV_MAGIC_MASK) == CV_MATND_MAGIC_VAL)
 
@@ -670,11 +739,7 @@ CvMatND;
 
 struct CvSet;
 
-typedef struct
-#ifdef __cplusplus
-  CV_EXPORTS
-#endif
-CvSparseMat
+typedef struct CvSparseMat
 {
     int type;
     int dims;
@@ -689,13 +754,13 @@ CvSparseMat
     int size[CV_MAX_DIM];
 
 #ifdef __cplusplus
-    void copyToSparseMat(cv::SparseMat& m) const;
+    CV_EXPORTS void copyToSparseMat(cv::SparseMat& m) const;
 #endif
 }
 CvSparseMat;
 
 #ifdef __cplusplus
-    CV_EXPORTS CvSparseMat* cvCreateSparseMat(const cv::SparseMat& m);
+CV_EXPORTS CvSparseMat* cvCreateSparseMat(const cv::SparseMat& m);
 #endif
 
 #define CV_IS_SPARSE_MAT_HDR(mat) \
@@ -782,10 +847,23 @@ typedef struct CvRect
     int width;
     int height;
 
-#ifdef __cplusplus
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+    CvRect() __attribute__(( warning("Non-initialized variable") )) {};
+    template<typename _Tp> CvRect(const std::initializer_list<_Tp> list)
+    {
+        CV_Assert(list.size() == 0 || list.size() == 4);
+        x = y = width = height = 0;
+        if (list.size() == 4)
+        {
+            x = list.begin()[0]; y = list.begin()[1]; width = list.begin()[2]; height = list.begin()[3];
+        }
+    };
+#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvRect(int _x = 0, int _y = 0, int w = 0, int h = 0): x(_x), y(_y), width(w), height(h) {}
     template<typename _Tp>
     CvRect(const cv::Rect_<_Tp>& r): x(cv::saturate_cast<int>(r.x)), y(cv::saturate_cast<int>(r.y)), width(cv::saturate_cast<int>(r.width)), height(cv::saturate_cast<int>(r.height)) {}
+#endif
+#ifdef __cplusplus
     template<typename _Tp>
     operator cv::Rect_<_Tp>() const { return cv::Rect_<_Tp>((_Tp)x, (_Tp)y, (_Tp)width, (_Tp)height); }
 #endif
@@ -795,16 +873,16 @@ CvRect;
 /** constructs CvRect structure. */
 CV_INLINE  CvRect  cvRect( int x, int y, int width, int height )
 {
-    CvRect r;
-
-    r.x = x;
-    r.y = y;
-    r.width = width;
-    r.height = height;
-
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvRect r = {x, y, width, height};
+#else
+    CvRect r(x, y , width, height);
+#endif
     return r;
 }
-
+#ifdef __cplusplus
+CV_INLINE CvRect cvRect(const cv::Rect& rc) { return cvRect(rc.x, rc.y, rc.width, rc.height); }
+#endif
 
 CV_INLINE  IplROI  cvRectToROI( CvRect rect, int coi )
 {
@@ -826,39 +904,41 @@ CV_INLINE  CvRect  cvROIToRect( IplROI roi )
 
 /*********************************** CvTermCriteria *************************************/
 
-#define CV_TERMCRIT_ITER    1
-#define CV_TERMCRIT_NUMBER  CV_TERMCRIT_ITER
-#define CV_TERMCRIT_EPS     2
+#define cv::TermCriteria::MAX_ITER    1
+#define CV_TERMCRIT_NUMBER  cv::TermCriteria::MAX_ITER
+#define cv::TermCriteria::EPS     2
 
 /** @sa TermCriteria
  */
 typedef struct CvTermCriteria
 {
     int    type;  /**< may be combination of
-                     CV_TERMCRIT_ITER
-                     CV_TERMCRIT_EPS */
+                     cv::TermCriteria::MAX_ITER
+                     cv::TermCriteria::EPS */
     int    max_iter;
     double epsilon;
-
-#ifdef __cplusplus
+#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvTermCriteria(int _type = 0, int _iter = 0, double _eps = 0) : type(_type), max_iter(_iter), epsilon(_eps)  {}
     CvTermCriteria(const cv::TermCriteria& t) : type(t.type), max_iter(t.maxCount), epsilon(t.epsilon)  {}
+#endif
+#ifdef __cplusplus
     operator cv::TermCriteria() const { return cv::TermCriteria(type, max_iter, epsilon); }
 #endif
-
 }
 CvTermCriteria;
 
 CV_INLINE  CvTermCriteria  cvTermCriteria( int type, int max_iter, double epsilon )
 {
-    CvTermCriteria t;
-
-    t.type = type;
-    t.max_iter = max_iter;
-    t.epsilon = (float)epsilon;
-
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvTermCriteria t = { type, max_iter, (float)epsilon};
+#else
+    CvTermCriteria t(type, max_iter, epsilon);
+#endif
     return t;
 }
+#ifdef __cplusplus
+CV_INLINE CvTermCriteria cvTermCriteria(const cv::TermCriteria& t) { return cvTermCriteria(t.type, t.maxCount, t.epsilon); }
+#endif
 
 
 /******************************* CvPoint and variants ***********************************/
@@ -868,10 +948,23 @@ typedef struct CvPoint
     int x;
     int y;
 
-#ifdef __cplusplus
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+    CvPoint() __attribute__(( warning("Non-initialized variable") )) {}
+    template<typename _Tp> CvPoint(const std::initializer_list<_Tp> list)
+    {
+        CV_Assert(list.size() == 0 || list.size() == 2);
+        x = y = 0;
+        if (list.size() == 2)
+        {
+            x = list.begin()[0]; y = list.begin()[1];
+        }
+    };
+#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvPoint(int _x = 0, int _y = 0): x(_x), y(_y) {}
     template<typename _Tp>
     CvPoint(const cv::Point_<_Tp>& pt): x((int)pt.x), y((int)pt.y) {}
+#endif
+#ifdef __cplusplus
     template<typename _Tp>
     operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); }
 #endif
@@ -881,24 +974,39 @@ CvPoint;
 /** constructs CvPoint structure. */
 CV_INLINE  CvPoint  cvPoint( int x, int y )
 {
-    CvPoint p;
-
-    p.x = x;
-    p.y = y;
-
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvPoint p = {x, y};
+#else
+    CvPoint p(x, y);
+#endif
     return p;
 }
-
+#ifdef __cplusplus
+CV_INLINE CvPoint cvPoint(const cv::Point& pt) { return cvPoint(pt.x, pt.y); }
+#endif
 
 typedef struct CvPoint2D32f
 {
     float x;
     float y;
 
-#ifdef __cplusplus
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+    CvPoint2D32f() __attribute__(( warning("Non-initialized variable") )) {}
+    template<typename _Tp> CvPoint2D32f(const std::initializer_list<_Tp> list)
+    {
+        CV_Assert(list.size() == 0 || list.size() == 2);
+        x = y = 0;
+        if (list.size() == 2)
+        {
+            x = list.begin()[0]; y = list.begin()[1];
+        }
+    };
+#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvPoint2D32f(float _x = 0, float _y = 0): x(_x), y(_y) {}
     template<typename _Tp>
     CvPoint2D32f(const cv::Point_<_Tp>& pt): x((float)pt.x), y((float)pt.y) {}
+#endif
+#ifdef __cplusplus
     template<typename _Tp>
     operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); }
 #endif
@@ -908,13 +1016,26 @@ CvPoint2D32f;
 /** constructs CvPoint2D32f structure. */
 CV_INLINE  CvPoint2D32f  cvPoint2D32f( double x, double y )
 {
-    CvPoint2D32f p;
-
-    p.x = (float)x;
-    p.y = (float)y;
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvPoint2D32f p = { (float)x, (float)y };
+#else
+    CvPoint2D32f p((float)x, (float)y);
+#endif
+    return p;
+}
 
+#ifdef __cplusplus
+template<typename _Tp>
+CvPoint2D32f cvPoint2D32f(const cv::Point_<_Tp>& pt)
+{
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvPoint2D32f p = { (float)pt.x, (float)pt.y };
+#else
+    CvPoint2D32f p((float)pt.x, (float)pt.y);
+#endif
     return p;
 }
+#endif
 
 /** converts CvPoint to CvPoint2D32f. */
 CV_INLINE  CvPoint2D32f  cvPointTo32f( CvPoint point )
@@ -925,10 +1046,11 @@ CV_INLINE  CvPoint2D32f  cvPointTo32f( CvPoint point )
 /** converts CvPoint2D32f to CvPoint. */
 CV_INLINE  CvPoint  cvPointFrom32f( CvPoint2D32f point )
 {
-    CvPoint ipt;
-    ipt.x = cvRound(point.x);
-    ipt.y = cvRound(point.y);
-
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvPoint ipt = { cvRound(point.x), cvRound(point.y) };
+#else
+    CvPoint ipt(cvRound(point.x), cvRound(point.y));
+#endif
     return ipt;
 }
 
@@ -939,10 +1061,23 @@ typedef struct CvPoint3D32f
     float y;
     float z;
 
-#ifdef __cplusplus
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+    CvPoint3D32f() __attribute__(( warning("Non-initialized variable") )) {}
+    template<typename _Tp> CvPoint3D32f(const std::initializer_list<_Tp> list)
+    {
+        CV_Assert(list.size() == 0 || list.size() == 3);
+        x = y = z = 0;
+        if (list.size() == 3)
+        {
+            x = list.begin()[0]; y = list.begin()[1]; z = list.begin()[2];
+        }
+    };
+#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvPoint3D32f(float _x = 0, float _y = 0, float _z = 0): x(_x), y(_y), z(_z) {}
     template<typename _Tp>
     CvPoint3D32f(const cv::Point3_<_Tp>& pt): x((float)pt.x), y((float)pt.y), z((float)pt.z) {}
+#endif
+#ifdef __cplusplus
     template<typename _Tp>
     operator cv::Point3_<_Tp>() const { return cv::Point3_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y), cv::saturate_cast<_Tp>(z)); }
 #endif
@@ -952,31 +1087,51 @@ CvPoint3D32f;
 /** constructs CvPoint3D32f structure. */
 CV_INLINE  CvPoint3D32f  cvPoint3D32f( double x, double y, double z )
 {
-    CvPoint3D32f p;
-
-    p.x = (float)x;
-    p.y = (float)y;
-    p.z = (float)z;
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvPoint3D32f p = { (float)x, (float)y, (float)z };
+#else
+    CvPoint3D32f p((float)x, (float)y, (float)z);
+#endif
+    return p;
+}
 
+#ifdef __cplusplus
+template<typename _Tp>
+CvPoint3D32f cvPoint3D32f(const cv::Point3_<_Tp>& pt)
+{
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvPoint3D32f p  = { (float)pt.x, (float)pt.y, (float)pt.z };
+#else
+    CvPoint3D32f p((float)pt.x, (float)pt.y, (float)pt.z);
+#endif
     return p;
 }
+#endif
 
 
 typedef struct CvPoint2D64f
 {
     double x;
     double y;
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+    CvPoint2D64f() __attribute__(( warning("Non-initialized variable") )) {}
+    template<typename _Tp> CvPoint2D64f(const std::initializer_list<_Tp> list)
+    {
+        CV_Assert(list.size() == 0 || list.size() == 2);
+        x = y = 0;
+        if (list.size() == 2)
+        {
+            x = list.begin()[0]; y = list.begin()[1];
+        }
+    };
+#endif
 }
 CvPoint2D64f;
 
 /** constructs CvPoint2D64f structure.*/
 CV_INLINE  CvPoint2D64f  cvPoint2D64f( double x, double y )
 {
-    CvPoint2D64f p;
-
-    p.x = x;
-    p.y = y;
-
+    CvPoint2D64f p = { x, y };
     return p;
 }
 
@@ -986,18 +1141,25 @@ typedef struct CvPoint3D64f
     double x;
     double y;
     double z;
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+    CvPoint3D64f() __attribute__(( warning("Non-initialized variable") )) {}
+    template<typename _Tp> CvPoint3D64f(const std::initializer_list<_Tp> list)
+    {
+        CV_Assert(list.size() == 0 || list.size() == 3);
+        x = y = z = 0;
+        if (list.size() == 3)
+        {
+            x = list.begin()[0]; y = list.begin()[1]; z = list.begin()[2];
+        }
+    };
+#endif
 }
 CvPoint3D64f;
 
 /** constructs CvPoint3D64f structure. */
 CV_INLINE  CvPoint3D64f  cvPoint3D64f( double x, double y, double z )
 {
-    CvPoint3D64f p;
-
-    p.x = x;
-    p.y = y;
-    p.z = z;
-
+    CvPoint3D64f p = { x, y, z };
     return p;
 }
 
@@ -1009,10 +1171,23 @@ typedef struct CvSize
     int width;
     int height;
 
-#ifdef __cplusplus
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+    CvSize() __attribute__(( warning("Non-initialized variable") )) {}
+    template<typename _Tp> CvSize(const std::initializer_list<_Tp> list)
+    {
+        CV_Assert(list.size() == 0 || list.size() == 2);
+        width = 0; height = 0;
+        if (list.size() == 2)
+        {
+            width = list.begin()[0]; height = list.begin()[1];
+        }
+    };
+#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvSize(int w = 0, int h = 0): width(w), height(h) {}
     template<typename _Tp>
     CvSize(const cv::Size_<_Tp>& sz): width(cv::saturate_cast<int>(sz.width)), height(cv::saturate_cast<int>(sz.height)) {}
+#endif
+#ifdef __cplusplus
     template<typename _Tp>
     operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); }
 #endif
@@ -1022,23 +1197,48 @@ CvSize;
 /** constructs CvSize structure. */
 CV_INLINE  CvSize  cvSize( int width, int height )
 {
-    CvSize s;
-
-    s.width = width;
-    s.height = height;
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvSize s = { width, height };
+#else
+    CvSize s(width, height);
+#endif
+    return s;
+}
 
+#ifdef __cplusplus
+CV_INLINE CvSize cvSize(const cv::Size& sz)
+{
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvSize s = { sz.width, sz.height };
+#else
+    CvSize s(sz.width, sz.height);
+#endif
     return s;
 }
+#endif
 
 typedef struct CvSize2D32f
 {
     float width;
     float height;
 
-#ifdef __cplusplus
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+    CvSize2D32f() __attribute__(( warning("Non-initialized variable") )) {}
+    template<typename _Tp> CvSize2D32f(const std::initializer_list<_Tp> list)
+    {
+        CV_Assert(list.size() == 0 || list.size() == 2);
+        width = 0; height = 0;
+        if (list.size() == 2)
+        {
+            width = list.begin()[0]; height = list.begin()[1];
+        }
+    };
+#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvSize2D32f(float w = 0, float h = 0): width(w), height(h) {}
     template<typename _Tp>
     CvSize2D32f(const cv::Size_<_Tp>& sz): width(cv::saturate_cast<float>(sz.width)), height(cv::saturate_cast<float>(sz.height)) {}
+#endif
+#ifdef __cplusplus
     template<typename _Tp>
     operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); }
 #endif
@@ -1048,13 +1248,25 @@ CvSize2D32f;
 /** constructs CvSize2D32f structure. */
 CV_INLINE  CvSize2D32f  cvSize2D32f( double width, double height )
 {
-    CvSize2D32f s;
-
-    s.width = (float)width;
-    s.height = (float)height;
-
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvSize2D32f s = { (float)width, (float)height };
+#else
+    CvSize2D32f s((float)width, (float)height);
+#endif
+    return s;
+}
+#ifdef __cplusplus
+template<typename _Tp>
+CvSize2D32f cvSize2D32f(const cv::Size_<_Tp>& sz)
+{
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvSize2D32f s = { (float)sz.width, (float)sz.height };
+#else
+    CvSize2D32f s((float)sz.width, (float)sz.height);
+#endif
     return s;
 }
+#endif
 
 /** @sa RotatedRect
  */
@@ -1065,15 +1277,37 @@ typedef struct CvBox2D
     float angle;          /**< Angle between the horizontal axis           */
                           /**< and the first side (i.e. length) in degrees */
 
-#ifdef __cplusplus
+#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvBox2D(CvPoint2D32f c = CvPoint2D32f(), CvSize2D32f s = CvSize2D32f(), float a = 0) : center(c), size(s), angle(a) {}
     CvBox2D(const cv::RotatedRect& rr) : center(rr.center), size(rr.size), angle(rr.angle) {}
+#endif
+#ifdef __cplusplus
     operator cv::RotatedRect() const { return cv::RotatedRect(center, size, angle); }
 #endif
 }
 CvBox2D;
 
 
+#ifdef __cplusplus
+CV_INLINE CvBox2D cvBox2D(CvPoint2D32f c = CvPoint2D32f(), CvSize2D32f s = CvSize2D32f(), float a = 0)
+{
+    CvBox2D self;
+    self.center = c;
+    self.size = s;
+    self.angle = a;
+    return self;
+}
+CV_INLINE CvBox2D cvBox2D(const cv::RotatedRect& rr)
+{
+    CvBox2D self;
+    self.center = cvPoint2D32f(rr.center);
+    self.size = cvSize2D32f(rr.size);
+    self.angle = rr.angle;
+    return self;
+}
+#endif
+
+
 /** Line iterator state: */
 typedef struct CvLineIterator
 {
@@ -1099,7 +1333,19 @@ typedef struct CvSlice
 {
     int  start_index, end_index;
 
-#if defined(__cplusplus) && !defined(__CUDACC__)
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+    CvSlice() __attribute__(( warning("Non-initialized variable") )) {}
+    template<typename _Tp> CvSlice(const std::initializer_list<_Tp> list)
+    {
+        CV_Assert(list.size() == 0 || list.size() == 2);
+        start_index = end_index = 0;
+        if (list.size() == 2)
+        {
+            start_index = list.begin()[0]; end_index = list.begin()[1];
+        }
+    };
+#endif
+#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) && !defined(__CUDACC__)
     CvSlice(int start = 0, int end = 0) : start_index(start), end_index(end) {}
     CvSlice(const cv::Range& r) { *this = (r.start != INT_MIN && r.end != INT_MAX) ? CvSlice(r.start, r.end) : CvSlice(0, CV_WHOLE_SEQ_END_INDEX); }
     operator cv::Range() const { return (start_index == 0 && end_index == CV_WHOLE_SEQ_END_INDEX ) ? cv::Range::all() : cv::Range(start_index, end_index); }
@@ -1109,13 +1355,21 @@ CvSlice;
 
 CV_INLINE  CvSlice  cvSlice( int start, int end )
 {
-    CvSlice slice;
-    slice.start_index = start;
-    slice.end_index = end;
-
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus) && !defined(__CUDACC__))
+    CvSlice slice = { start, end };
+#else
+    CvSlice slice(start, end);
+#endif
     return slice;
 }
 
+#if defined(__cplusplus)
+CV_INLINE  CvSlice  cvSlice(const cv::Range& r)
+{
+    CvSlice slice = (r.start != INT_MIN && r.end != INT_MAX) ? cvSlice(r.start, r.end) : cvSlice(0, CV_WHOLE_SEQ_END_INDEX);
+    return slice;
+}
+#endif
 
 
 /************************************* CvScalar *****************************************/
@@ -1125,13 +1379,22 @@ typedef struct CvScalar
 {
     double val[4];
 
-#ifdef __cplusplus
+#ifdef CV__VALIDATE_UNUNITIALIZED_VARS
+    CvScalar() __attribute__(( warning("Non-initialized variable") )) {}
+    CvScalar(const std::initializer_list<double> list)
+    {
+        CV_Assert(list.size() == 0 || list.size() == 4);
+        val[0] = val[1] = val[2] = val[3] = 0;
+        if (list.size() == 4)
+        {
+            val[0] = list.begin()[0]; val[1] = list.begin()[1]; val[2] = list.begin()[2]; val[3] = list.begin()[3];
+        }
+    };
+#elif defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvScalar() {}
     CvScalar(double d0, double d1 = 0, double d2 = 0, double d3 = 0) { val[0] = d0; val[1] = d1; val[2] = d2; val[3] = d3; }
     template<typename _Tp>
     CvScalar(const cv::Scalar_<_Tp>& s) { val[0] = s.val[0]; val[1] = s.val[1]; val[2] = s.val[2]; val[3] = s.val[3]; }
-    template<typename _Tp>
-    operator cv::Scalar_<_Tp>() const { return cv::Scalar_<_Tp>(cv::saturate_cast<_Tp>(val[0]), cv::saturate_cast<_Tp>(val[1]), cv::saturate_cast<_Tp>(val[2]), cv::saturate_cast<_Tp>(val[3])); }
     template<typename _Tp, int cn>
     CvScalar(const cv::Vec<_Tp, cn>& v)
     {
@@ -1140,22 +1403,59 @@ typedef struct CvScalar
         for( ; i < 4; i++ ) val[i] = 0;
     }
 #endif
+#ifdef __cplusplus
+    template<typename _Tp>
+    operator cv::Scalar_<_Tp>() const { return cv::Scalar_<_Tp>(cv::saturate_cast<_Tp>(val[0]), cv::saturate_cast<_Tp>(val[1]), cv::saturate_cast<_Tp>(val[2]), cv::saturate_cast<_Tp>(val[3])); }
+#endif
 }
 CvScalar;
 
 CV_INLINE  CvScalar  cvScalar( double val0, double val1 CV_DEFAULT(0),
                                double val2 CV_DEFAULT(0), double val3 CV_DEFAULT(0))
 {
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvScalar scalar = CV_STRUCT_INITIALIZER;
+#else
     CvScalar scalar;
+#endif
     scalar.val[0] = val0; scalar.val[1] = val1;
     scalar.val[2] = val2; scalar.val[3] = val3;
     return scalar;
 }
 
+#ifdef __cplusplus
+CV_INLINE CvScalar cvScalar()
+{
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvScalar scalar = CV_STRUCT_INITIALIZER;
+#else
+    CvScalar scalar;
+#endif
+    scalar.val[0] = scalar.val[1] = scalar.val[2] = scalar.val[3] = 0;
+    return scalar;
+}
+CV_INLINE CvScalar cvScalar(const cv::Scalar& s)
+{
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvScalar scalar = CV_STRUCT_INITIALIZER;
+#else
+    CvScalar scalar;
+#endif
+    scalar.val[0] = s.val[0];
+    scalar.val[1] = s.val[1];
+    scalar.val[2] = s.val[2];
+    scalar.val[3] = s.val[3];
+    return scalar;
+}
+#endif
 
 CV_INLINE  CvScalar  cvRealScalar( double val0 )
 {
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvScalar scalar = CV_STRUCT_INITIALIZER;
+#else
     CvScalar scalar;
+#endif
     scalar.val[0] = val0;
     scalar.val[1] = scalar.val[2] = scalar.val[3] = 0;
     return scalar;
@@ -1163,7 +1463,11 @@ CV_INLINE  CvScalar  cvRealScalar( double val0 )
 
 CV_INLINE  CvScalar  cvScalarAll( double val0123 )
 {
+#if !(defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus))
+    CvScalar scalar = CV_STRUCT_INITIALIZER;
+#else
     CvScalar scalar;
+#endif
     scalar.val[0] = val0123;
     scalar.val[1] = val0123;
     scalar.val[2] = val0123;
@@ -1216,7 +1520,7 @@ typedef struct CvSeqBlock
 {
     struct CvSeqBlock*  prev; /**< Previous sequence block.                   */
     struct CvSeqBlock*  next; /**< Next sequence block.                       */
-  int    start_index;         /**< Index of the first element in the block +  */
+    int    start_index;       /**< Index of the first element in the block +  */
                               /**< sequence->first->start_index.              */
     int    count;             /**< Number of elements in the block.           */
     schar* data;              /**< Pointer to the first element of the block. */
@@ -1361,7 +1665,7 @@ CvGraph;
 
 /** @} */
 
-/*********************************** Chain/Countour *************************************/
+/*********************************** Chain/Contour *************************************/
 
 typedef struct CvChain
 {
@@ -1403,7 +1707,7 @@ typedef CvContour CvPoint2DSeq;
 #define CV_SEQ_ELTYPE_POINT          CV_32SC2  /**< (x,y) */
 #define CV_SEQ_ELTYPE_CODE           CV_8UC1   /**< freeman code: 0..7 */
 #define CV_SEQ_ELTYPE_GENERIC        0
-#define CV_SEQ_ELTYPE_PTR            CV_USRTYPE1
+#define CV_SEQ_ELTYPE_PTR            CV_MAKE_TYPE(CV_8U, 8 /*sizeof(void*)*/)
 #define CV_SEQ_ELTYPE_PPOINT         CV_SEQ_ELTYPE_PTR  /**< &(x,y) */
 #define CV_SEQ_ELTYPE_INDEX          CV_32SC1  /**< #(x,y) */
 #define CV_SEQ_ELTYPE_GRAPH_EDGE     0  /**< &next_o, &next_d, &vtx_o, &vtx_d */
@@ -1655,6 +1959,8 @@ CvSeqReader;
 *             Data structures for persistence (a.k.a serialization) functionality        *
 \****************************************************************************************/
 
+#if 0
+
 /** "black box" file storage */
 typedef struct CvFileStorage CvFileStorage;
 
@@ -1669,6 +1975,9 @@ typedef struct CvFileStorage CvFileStorage;
 #define CV_STORAGE_FORMAT_AUTO   0
 #define CV_STORAGE_FORMAT_XML    8
 #define CV_STORAGE_FORMAT_YAML  16
+#define CV_STORAGE_FORMAT_JSON  24
+#define CV_STORAGE_BASE64       64
+#define CV_STORAGE_WRITE_BASE64  (CV_STORAGE_BASE64 | CV_STORAGE_WRITE)
 
 /** @brief List of attributes. :
 
@@ -1738,7 +2047,7 @@ typedef struct CvString
 }
 CvString;
 
-/** All the keys (names) of elements in the readed file storage
+/** All the keys (names) of elements in the read file storage
    are stored in the hash to speed up the lookup operations: */
 typedef struct CvStringHashNode
 {
@@ -1804,31 +2113,10 @@ typedef struct CvTypeInfo
     CvCloneFunc clone; /**< creates a copy of the object */
 }
 CvTypeInfo;
-
-
-/**** System data types ******/
-
-typedef struct CvPluginFuncInfo
-{
-    void** func_addr;
-    void* default_func_addr;
-    const char* func_names;
-    int search_modules;
-    int loaded_from;
-}
-CvPluginFuncInfo;
-
-typedef struct CvModuleInfo
-{
-    struct CvModuleInfo* next;
-    const char* name;
-    const char* version;
-    CvPluginFuncInfo* func_tab;
-}
-CvModuleInfo;
+#endif
 
 /** @} */
 
-#endif /*__OPENCV_CORE_TYPES_H__*/
+#endif /*OPENCV_CORE_TYPES_H*/
 
 /* End of file. */
diff --git a/IPL/include/opencv/opencv2/core/utility.hpp b/IPL/include/opencv/opencv2/core/utility.hpp
index 1e6249d..3d2a035 100644
--- a/IPL/include/opencv/opencv2/core/utility.hpp
+++ b/IPL/include/opencv/opencv2/core/utility.hpp
@@ -42,8 +42,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_CORE_UTILITY_H__
-#define __OPENCV_CORE_UTILITY_H__
+#ifndef OPENCV_CORE_UTILITY_H
+#define OPENCV_CORE_UTILITY_H
 
 #ifndef __cplusplus
 #  error utility.hpp header must be compiled as C++
@@ -54,34 +54,17 @@
 #endif
 
 #include "opencv2/core.hpp"
+#include <ostream>
 
-namespace cv
-{
+#include <functional>
 
-#ifdef CV_COLLECT_IMPL_DATA
-CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays
-CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays
-// Get stored implementation flags and fucntions names arrays
-// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which fucntion
-CV_EXPORTS int getImpl(std::vector<int> &impl, std::vector<String> &funName);
-
-CV_EXPORTS bool useCollection(); // return implementation collection state
-CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state
-
-#define CV_IMPL_PLAIN  0x01 // native CPU OpenCV implementation
-#define CV_IMPL_OCL    0x02 // OpenCL implementation
-#define CV_IMPL_IPP    0x04 // IPP implementation
-#define CV_IMPL_MT     0x10 // multithreaded implementation
-
-#define CV_IMPL_ADD(impl)                                                   \
-    if(cv::useCollection())                                                 \
-    {                                                                       \
-        cv::addImpl(impl, CV_Func);                                         \
-    }
-#else
-#define CV_IMPL_ADD(impl)
+#if !defined(_M_CEE)
+#include <mutex>  // std::mutex, std::lock_guard
 #endif
 
+namespace cv
+{
+
 //! @addtogroup core_utils
 //! @{
 
@@ -102,7 +85,7 @@ CV_EXPORTS void setUseCollection(bool flag); // set implementation collection st
  \code
  void my_func(const cv::Mat& m)
  {
-    cv::AutoBuffer<float> buf; // create automatic buffer containing 1000 floats
+    cv::AutoBuffer<float> buf(1000); // create automatic buffer containing 1000 floats
 
     buf.allocate(m.rows); // if m.rows <= 1000, the pre-allocated buffer is used,
                           // otherwise the buffer of "m.rows" floats will be allocated
@@ -111,7 +94,11 @@ CV_EXPORTS void setUseCollection(bool flag); // set implementation collection st
  }
  \endcode
 */
+#ifdef OPENCV_ENABLE_MEMORY_SANITIZER
+template<typename _Tp, size_t fixed_size = 0> class AutoBuffer
+#else
 template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
+#endif
 {
 public:
     typedef _Tp value_type;
@@ -119,7 +106,7 @@ template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
     //! the default constructor
     AutoBuffer();
     //! constructor taking the real buffer size
-    AutoBuffer(size_t _size);
+    explicit AutoBuffer(size_t _size);
 
     //! the copy constructor
     AutoBuffer(const AutoBuffer<_Tp, fixed_size>& buf);
@@ -137,17 +124,29 @@ template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
     void resize(size_t _size);
     //! returns the current buffer size
     size_t size() const;
-    //! returns pointer to the real buffer, stack-allocated or head-allocated
-    operator _Tp* ();
-    //! returns read-only pointer to the real buffer, stack-allocated or head-allocated
-    operator const _Tp* () const;
+    //! returns pointer to the real buffer, stack-allocated or heap-allocated
+    inline _Tp* data() { return ptr; }
+    //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated
+    inline const _Tp* data() const { return ptr; }
+
+#if !defined(OPENCV_DISABLE_DEPRECATED_COMPATIBILITY) // use to .data() calls instead
+    //! returns pointer to the real buffer, stack-allocated or heap-allocated
+    operator _Tp* () { return ptr; }
+    //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated
+    operator const _Tp* () const { return ptr; }
+#else
+    //! returns a reference to the element at specified location. No bounds checking is performed in Release builds.
+    inline _Tp& operator[] (size_t i) { CV_DbgCheckLT(i, sz, "out of range"); return ptr[i]; }
+    //! returns a reference to the element at specified location. No bounds checking is performed in Release builds.
+    inline const _Tp& operator[] (size_t i) const { CV_DbgCheckLT(i, sz, "out of range"); return ptr[i]; }
+#endif
 
 protected:
     //! pointer to the real buffer, can point to buf if the buffer is small enough
     _Tp* ptr;
     //! size of the real buffer
     size_t sz;
-    //! pre-allocated buffer. At least 1 element to confirm C++ standard reqirements
+    //! pre-allocated buffer. At least 1 element to confirm C++ standard requirements
     _Tp buf[(fixed_size > 0) ? fixed_size : 1];
 };
 
@@ -177,13 +176,6 @@ extern "C" typedef int (*ErrorCallback)( int status, const char* func_name,
 */
 CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback, void* userdata=0, void** prevUserdata=0);
 
-/** @brief Returns a text string formatted using the printf-like expression.
-
-The function acts like sprintf but forms and returns an STL string. It can be used to form an error
-message in the Exception constructor.
-@param fmt printf-compatible formatting specifiers.
- */
-CV_EXPORTS String format( const char* fmt, ... );
 CV_EXPORTS String tempfile( const char* suffix = 0);
 CV_EXPORTS void glob(String pattern, std::vector<String>& result, bool recursive = false);
 
@@ -193,15 +185,15 @@ If threads == 0, OpenCV will disable threading optimizations and run all it's fu
 sequentially. Passing threads \< 0 will reset threads number to system default. This function must
 be called outside of parallel region.
 
-OpenCV will try to run it's functions with specified threads number, but some behaviour differs from
+OpenCV will try to run its functions with specified threads number, but some behaviour differs from
 framework:
--   `TBB` – User-defined parallel constructions will run with the same threads number, if
-    another does not specified. If late on user creates own scheduler, OpenCV will be use it.
--   `OpenMP` – No special defined behaviour.
--   `Concurrency` – If threads == 1, OpenCV will disable threading optimizations and run it's
+-   `TBB` - User-defined parallel constructions will run with the same threads number, if
+    another is not specified. If later on user creates his own scheduler, OpenCV will use it.
+-   `OpenMP` - No special defined behaviour.
+-   `Concurrency` - If threads == 1, OpenCV will disable threading optimizations and run its
     functions sequentially.
--   `GCD` – Supports only values \<= 0.
--   `C=` – No special defined behaviour.
+-   `GCD` - Supports only values \<= 0.
+-   `C=` - No special defined behaviour.
 @param nthreads Number of threads used by OpenCV.
 @sa getNumThreads, getThreadNum
  */
@@ -212,13 +204,13 @@ CV_EXPORTS_W void setNumThreads(int nthreads);
 Always returns 1 if OpenCV is built without threading support.
 
 The exact meaning of return value depends on the threading framework used by OpenCV library:
-- `TBB` – The number of threads, that OpenCV will try to use for parallel regions. If there is
+- `TBB` - The number of threads, that OpenCV will try to use for parallel regions. If there is
   any tbb::thread_scheduler_init in user code conflicting with OpenCV, then function returns
   default number of threads used by TBB library.
-- `OpenMP` – An upper bound on the number of threads that could be used to form a new team.
-- `Concurrency` – The number of threads, that OpenCV will try to use for parallel regions.
-- `GCD` – Unsupported; returns the GCD thread pool limit (512) for compatibility.
-- `C=` – The number of threads, that OpenCV will try to use for parallel regions, if before
+- `OpenMP` - An upper bound on the number of threads that could be used to form a new team.
+- `Concurrency` - The number of threads, that OpenCV will try to use for parallel regions.
+- `GCD` - Unsupported; returns the GCD thread pool limit (512) for compatibility.
+- `C=` - The number of threads, that OpenCV will try to use for parallel regions, if before
   called setNumThreads with threads \> 0, otherwise returns the number of logical CPUs,
   available for the process.
 @sa setNumThreads, getThreadNum
@@ -228,13 +220,15 @@ CV_EXPORTS_W int getNumThreads();
 /** @brief Returns the index of the currently executed thread within the current parallel region. Always
 returns 0 if called outside of parallel region.
 
-The exact meaning of return value depends on the threading framework used by OpenCV library:
-- `TBB` – Unsupported with current 4.1 TBB release. May be will be supported in future.
-- `OpenMP` – The thread number, within the current team, of the calling thread.
-- `Concurrency` – An ID for the virtual processor that the current context is executing on (0
+@deprecated Current implementation doesn't corresponding to this documentation.
+
+The exact meaning of the return value depends on the threading framework used by OpenCV library:
+- `TBB` - Unsupported with current 4.1 TBB release. Maybe will be supported in future.
+- `OpenMP` - The thread number, within the current team, of the calling thread.
+- `Concurrency` - An ID for the virtual processor that the current context is executing on (0
   for master thread and unique number for others, but not necessary 1,2,3,...).
-- `GCD` – System calling thread's ID. Never returns 0 inside parallel region.
-- `C=` – The index of the current parallel task.
+- `GCD` - System calling thread's ID. Never returns 0 inside parallel region.
+- `C=` - The index of the current parallel task.
 @sa setNumThreads, getNumThreads
  */
 CV_EXPORTS_W int getThreadNum();
@@ -247,11 +241,29 @@ architecture.
  */
 CV_EXPORTS_W const String& getBuildInformation();
 
+/** @brief Returns library version string
+
+For example "3.4.1-dev".
+
+@sa getMajorVersion, getMinorVersion, getRevisionVersion
+*/
+CV_EXPORTS_W String getVersionString();
+
+/** @brief Returns major library version */
+CV_EXPORTS_W int getVersionMajor();
+
+/** @brief Returns minor library version */
+CV_EXPORTS_W int getVersionMinor();
+
+/** @brief Returns revision field of the library version */
+CV_EXPORTS_W int getVersionRevision();
+
 /** @brief Returns the number of ticks.
 
 The function returns the number of ticks after the certain event (for example, when the machine was
 turned on). It can be used to initialize RNG or to measure a function execution time by reading the
-tick count before and after the function call. See also the tick frequency.
+tick count before and after the function call.
+@sa getTickFrequency, TickMeter
  */
 CV_EXPORTS_W int64 getTickCount();
 
@@ -264,9 +276,139 @@ execution time in seconds:
     // do something ...
     t = ((double)getTickCount() - t)/getTickFrequency();
 @endcode
+@sa getTickCount, TickMeter
  */
 CV_EXPORTS_W double getTickFrequency();
 
+/** @brief a Class to measure passing time.
+
+The class computes passing time by counting the number of ticks per second. That is, the following code computes the
+execution time in seconds:
+@code
+TickMeter tm;
+tm.start();
+// do something ...
+tm.stop();
+std::cout << tm.getTimeSec();
+@endcode
+
+It is also possible to compute the average time over multiple runs:
+@code
+TickMeter tm;
+for (int i = 0; i < 100; i++)
+{
+    tm.start();
+    // do something ...
+    tm.stop();
+}
+double average_time = tm.getTimeSec() / tm.getCounter();
+std::cout << "Average time in second per iteration is: " << average_time << std::endl;
+@endcode
+@sa getTickCount, getTickFrequency
+*/
+
+class CV_EXPORTS_W TickMeter
+{
+public:
+    //! the default constructor
+    CV_WRAP TickMeter()
+    {
+    reset();
+    }
+
+    /**
+    starts counting ticks.
+    */
+    CV_WRAP void start()
+    {
+    startTime = cv::getTickCount();
+    }
+
+    /**
+    stops counting ticks.
+    */
+    CV_WRAP void stop()
+    {
+    int64 time = cv::getTickCount();
+    if (startTime == 0)
+    return;
+    ++counter;
+    sumTime += (time - startTime);
+    startTime = 0;
+    }
+
+    /**
+    returns counted ticks.
+    */
+    CV_WRAP int64 getTimeTicks() const
+    {
+    return sumTime;
+    }
+
+    /**
+    returns passed time in microseconds.
+    */
+    CV_WRAP double getTimeMicro() const
+    {
+    return getTimeMilli()*1e3;
+    }
+
+    /**
+    returns passed time in milliseconds.
+    */
+    CV_WRAP double getTimeMilli() const
+    {
+    return getTimeSec()*1e3;
+    }
+
+    /**
+    returns passed time in seconds.
+    */
+    CV_WRAP double getTimeSec()   const
+    {
+    return (double)getTimeTicks() / getTickFrequency();
+    }
+
+    /**
+    returns internal counter value.
+    */
+    CV_WRAP int64 getCounter() const
+    {
+    return counter;
+    }
+
+    /**
+    resets internal values.
+    */
+    CV_WRAP void reset()
+    {
+    startTime = 0;
+    sumTime = 0;
+    counter = 0;
+    }
+
+private:
+    int64 counter;
+    int64 sumTime;
+    int64 startTime;
+};
+
+/** @brief output operator
+@code
+TickMeter tm;
+tm.start();
+// do something ...
+tm.stop();
+std::cout << tm;
+@endcode
+*/
+
+static inline
+std::ostream& operator << (std::ostream& out, const TickMeter& tm)
+{
+    return out << tm.getTimeSec() << "sec";
+}
+
 /** @brief Returns the number of CPU ticks.
 
 The function returns the current number of CPU ticks on some architectures (such as x86, x64,
@@ -291,6 +433,24 @@ in OpenCV.
  */
 CV_EXPORTS_W bool checkHardwareSupport(int feature);
 
+/** @brief Returns feature name by ID
+
+Returns empty string if feature is not defined
+*/
+CV_EXPORTS_W String getHardwareFeatureName(int feature);
+
+/** @brief Returns list of CPU features enabled during compilation.
+
+Returned value is a string containing space separated list of CPU features with following markers:
+
+- no markers - baseline features
+- prefix `*` - features enabled in dispatcher
+- suffix `?` - features enabled but not available in HW
+
+Example: `SSE SSE2 SSE3 *SSE4.1 *SSE4.2 *FP16 *AVX *AVX2 *AVX512-SKX?`
+*/
+CV_EXPORTS_W std::string getCPUFeaturesLine();
+
 /** @brief Returns the number of logical CPUs available for the process.
  */
 CV_EXPORTS_W int getNumberOfCPUs();
@@ -305,12 +465,13 @@ The function returns the aligned pointer of the same type as the input pointer:
  */
 template<typename _Tp> static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_Tp))
 {
+    CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2
     return (_Tp*)(((size_t)ptr + n-1) & -n);
 }
 
 /** @brief Aligns a buffer size to the specified number of bytes.
 
-The function returns the minimum number that is greater or equal to sz and is divisible by n :
+The function returns the minimum number that is greater than or equal to sz and is divisible by n :
 \f[\texttt{(sz + n-1) & -n}\f]
 @param sz Buffer size to align.
 @param n Alignment size that must be a power of two.
@@ -321,9 +482,80 @@ static inline size_t alignSize(size_t sz, int n)
     return (sz + n-1) & -n;
 }
 
+/** @brief Integer division with result round up.
+
+Use this function instead of `ceil((float)a / b)` expressions.
+
+@sa alignSize
+*/
+static inline int divUp(int a, unsigned int b)
+{
+    CV_DbgAssert(a >= 0);
+    return (a + b - 1) / b;
+}
+/** @overload */
+static inline size_t divUp(size_t a, unsigned int b)
+{
+    return (a + b - 1) / b;
+}
+
+/** @brief Round first value up to the nearest multiple of second value.
+
+Use this function instead of `ceil((float)a / b) * b` expressions.
+
+@sa divUp
+*/
+static inline int roundUp(int a, unsigned int b)
+{
+    CV_DbgAssert(a >= 0);
+    return a + b - 1 - (a + b -1) % b;
+}
+/** @overload */
+static inline size_t roundUp(size_t a, unsigned int b)
+{
+    return a + b - 1 - (a + b - 1) % b;
+}
+
+/** @brief Alignment check of passed values
+
+Usage: `isAligned<sizeof(int)>(...)`
+
+@note Alignment(N) must be a power of 2 (2**k, 2^k)
+*/
+template<int N, typename T> static inline
+bool isAligned(const T& data)
+{
+    CV_StaticAssert((N & (N - 1)) == 0, "");  // power of 2
+    return (((size_t)data) & (N - 1)) == 0;
+}
+/** @overload */
+template<int N> static inline
+bool isAligned(const void* p1)
+{
+    return isAligned<N>((size_t)p1);
+}
+/** @overload */
+template<int N> static inline
+bool isAligned(const void* p1, const void* p2)
+{
+    return isAligned<N>(((size_t)p1)|((size_t)p2));
+}
+/** @overload */
+template<int N> static inline
+bool isAligned(const void* p1, const void* p2, const void* p3)
+{
+    return isAligned<N>(((size_t)p1)|((size_t)p2)|((size_t)p3));
+}
+/** @overload */
+template<int N> static inline
+bool isAligned(const void* p1, const void* p2, const void* p3, const void* p4)
+{
+    return isAligned<N>(((size_t)p1)|((size_t)p2)|((size_t)p3)|((size_t)p4));
+}
+
 /** @brief Enables or disables the optimized code.
 
-The function can be used to dynamically turn on and off optimized code (code that uses SSE2, AVX,
+The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2,
 and other instructions on the platforms that support it). It sets a global flag that is further
 checked by OpenCV functions. Since the flag is not checked in the inner OpenCV loops, it is only
 safe to call the function on the very top level in your application where you can be sure that no
@@ -342,7 +574,7 @@ The function returns true if the optimized code is enabled. Otherwise, it return
  */
 CV_EXPORTS_W bool useOptimized();
 
-static inline size_t getElemSize(int type) { return CV_ELEM_SIZE(type); }
+static inline size_t getElemSize(int type) { return (size_t)CV_ELEM_SIZE(type); }
 
 /////////////////////////////// Parallel Primitives //////////////////////////////////
 
@@ -359,17 +591,38 @@ class CV_EXPORTS ParallelLoopBody
 */
 CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
 
+class ParallelLoopBodyLambdaWrapper : public ParallelLoopBody
+{
+private:
+    std::function<void(const Range&)> m_functor;
+public:
+    ParallelLoopBodyLambdaWrapper(std::function<void(const Range&)> functor) :
+        m_functor(functor)
+    { }
+
+    virtual void operator() (const cv::Range& range) const CV_OVERRIDE
+    {
+        m_functor(range);
+    }
+};
+
+inline void parallel_for_(const Range& range, std::function<void(const Range&)> functor, double nstripes=-1.)
+{
+    parallel_for_(range, ParallelLoopBodyLambdaWrapper(functor), nstripes);
+}
+
 /////////////////////////////// forEach method of cv::Mat ////////////////////////////
 template<typename _Tp, typename Functor> inline
 void Mat::forEach_impl(const Functor& operation) {
     if (false) {
-        operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast<int*>(NULL));
-        // If your compiler fail in this line.
+        operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast<int*>(0));
+        // If your compiler fails in this line.
         // Please check that your functor signature is
-        //     (_Tp&, const int*)   <- multidimential
-        //  or (_Tp&, void*)        <- in case of you don't need current idx.
+        //     (_Tp&, const int*)   <- multi-dimensional
+        //  or (_Tp&, void*)        <- in case you don't need current idx.
     }
 
+    CV_Assert(!empty());
     CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX);
     const int LINES = static_cast<int>(this->total() / this->size[this->dims - 1]);
 
@@ -377,11 +630,12 @@ void Mat::forEach_impl(const Functor& operation) {
     {
     public:
         PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation)
-            : mat(frame), op(_operation) {};
-        virtual ~PixelOperationWrapper(){};
+            : mat(frame), op(_operation) {}
+        virtual ~PixelOperationWrapper(){}
         // ! Overloaded virtual operator
         // convert range call to row call.
-        virtual void operator()(const Range &range) const {
+        virtual void operator()(const Range &range) const CV_OVERRIDE
+        {
             const int DIMS = mat->dims;
             const int COLS = mat->size[DIMS - 1];
             if (DIMS <= 2) {
@@ -389,7 +643,7 @@ void Mat::forEach_impl(const Functor& operation) {
                     this->rowCall2(row, COLS);
                 }
             } else {
-                std::vector<int> idx(COLS); /// idx is modified in this->rowCall
+                std::vector<int> idx(DIMS); /// idx is modified in this->rowCall
                 idx[DIMS - 2] = range.start - 1;
 
                 for (int line_num = range.start; line_num < range.end; ++line_num) {
@@ -407,7 +661,7 @@ void Mat::forEach_impl(const Functor& operation) {
                     this->rowCall(&idx[0], COLS, DIMS);
                 }
             }
-        };
+        }
     private:
         Mat_<_Tp>* const mat;
         const Functor op;
@@ -444,12 +698,12 @@ void Mat::forEach_impl(const Functor& operation) {
                 op(*pixel++, static_cast<const int*>(idx));
                 idx[1]++;
             }
-        };
+        }
         PixelOperationWrapper& operator=(const PixelOperationWrapper &) {
             CV_Assert(false);
             // We can not remove this implementation because Visual Studio warning C4822.
             return *this;
-        };
+        }
     };
 
     parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast<Mat_<_Tp>*>(this), operation));
@@ -457,84 +711,11 @@ void Mat::forEach_impl(const Functor& operation) {
 
 /////////////////////////// Synchronization Primitives ///////////////////////////////
 
-class CV_EXPORTS Mutex
-{
-public:
-    Mutex();
-    ~Mutex();
-    Mutex(const Mutex& m);
-    Mutex& operator = (const Mutex& m);
-
-    void lock();
-    bool trylock();
-    void unlock();
-
-    struct Impl;
-protected:
-    Impl* impl;
-};
-
-class CV_EXPORTS AutoLock
-{
-public:
-    AutoLock(Mutex& m) : mutex(&m) { mutex->lock(); }
-    ~AutoLock() { mutex->unlock(); }
-protected:
-    Mutex* mutex;
-private:
-    AutoLock(const AutoLock&);
-    AutoLock& operator = (const AutoLock&);
-};
-
-// TLS interface
-class CV_EXPORTS TLSDataContainer
-{
-protected:
-    TLSDataContainer();
-    virtual ~TLSDataContainer();
-
-    void  gatherData(std::vector<void*> &data) const;
-#if OPENCV_ABI_COMPATIBILITY > 300
-    void* getData() const;
-    void  release();
-
-private:
-#else
-    void  release();
-
-public:
-    void* getData() const;
+#if !defined(_M_CEE)
+typedef std::recursive_mutex Mutex;
+typedef std::lock_guard<cv::Mutex> AutoLock;
 #endif
-    virtual void* createDataInstance() const = 0;
-    virtual void  deleteDataInstance(void* pData) const = 0;
 
-    int key_;
-};
-
-// Main TLS data class
-template <typename T>
-class TLSData : protected TLSDataContainer
-{
-public:
-    inline TLSData()        {}
-    inline ~TLSData()       { release();            } // Release key and delete associated data
-    inline T* get() const   { return (T*)getData(); } // Get data assosiated with key
-
-     // Get data from all threads
-    inline void gather(std::vector<T*> &data) const
-    {
-        std::vector<void*> &dataVoid = reinterpret_cast<std::vector<void*>&>(data);
-        gatherData(dataVoid);
-    }
-
-private:
-    virtual void* createDataInstance() const {return new T;}                // Wrapper to allocate data by template
-    virtual void  deleteDataInstance(void* pData) const {delete (T*)pData;} // Wrapper to release data by template
-
-    // Disable TLS copy operations
-    TLSData(TLSData &) {};
-    TLSData& operator =(const TLSData &) {return *this;};
-};
 
 /** @brief Designed for command line parsing
 
@@ -569,7 +750,7 @@ The sample below demonstrates how to use CommandLineParser:
 
 ### Keys syntax
 
-The keys parameter is a string containing several blocks, each one is enclosed in curley braces and
+The keys parameter is a string containing several blocks, each one is enclosed in curly braces and
 describes one argument. Each argument contains three parts separated by the `|` symbol:
 
 -# argument names is a space-separated list of option synonyms (to mark argument as positional, prefix it with the `@` symbol)
@@ -640,7 +821,7 @@ class CV_EXPORTS CommandLineParser
 
     This method returns the path to the executable from the command line (`argv[0]`).
 
-    For example, if the application has been started with such command:
+    For example, if the application has been started with such a command:
     @code{.sh}
     $ ./bin/my-executable
     @endcode
@@ -727,7 +908,7 @@ class CV_EXPORTS CommandLineParser
 
     /** @brief Check for parsing errors
 
-    Returns true if error occured while accessing the parameters (bad conversion, missing arguments,
+    Returns false if error occurred while accessing the parameters (bad conversion, missing arguments,
     etc.). Call @ref printErrors to print error messages list.
      */
     bool check() const;
@@ -746,15 +927,15 @@ class CV_EXPORTS CommandLineParser
     */
     void printMessage() const;
 
-    /** @brief Print list of errors occured
+    /** @brief Print list of errors occurred
 
     @sa check
     */
     void printErrors() const;
 
 protected:
-    void getByName(const String& name, bool space_delete, int type, void* dst) const;
-    void getByIndex(int index, bool space_delete, int type, void* dst) const;
+    void getByName(const String& name, bool space_delete, Param type, void* dst) const;
+    void getByIndex(int index, bool space_delete, Param type, void* dst) const;
 
     struct Impl;
     Impl* impl;
@@ -817,10 +998,10 @@ AutoBuffer<_Tp, fixed_size>::allocate(size_t _size)
         return;
     }
     deallocate();
+    sz = _size;
     if(_size > fixed_size)
     {
         ptr = new _Tp[_size];
-        sz = _size;
     }
 }
 
@@ -863,31 +1044,166 @@ template<typename _Tp, size_t fixed_size> inline size_t
 AutoBuffer<_Tp, fixed_size>::size() const
 { return sz; }
 
-template<typename _Tp, size_t fixed_size> inline
-AutoBuffer<_Tp, fixed_size>::operator _Tp* ()
-{ return ptr; }
+//! @endcond
 
-template<typename _Tp, size_t fixed_size> inline
-AutoBuffer<_Tp, fixed_size>::operator const _Tp* () const
-{ return ptr; }
 
-#ifndef OPENCV_NOSTL
-template<> inline std::string CommandLineParser::get<std::string>(int index, bool space_delete) const
+// Basic Node class for tree building
+template<class OBJECT>
+class CV_EXPORTS Node
 {
-    return get<String>(index, space_delete);
-}
-template<> inline std::string CommandLineParser::get<std::string>(const String& name, bool space_delete) const
+public:
+    Node()
+    {
+        m_pParent  = 0;
+    }
+    Node(OBJECT& payload) : m_payload(payload)
+    {
+        m_pParent  = 0;
+    }
+    ~Node()
+    {
+        removeChilds();
+        if (m_pParent)
+        {
+            int idx = m_pParent->findChild(this);
+            if (idx >= 0)
+                m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx);
+        }
+    }
+
+    Node<OBJECT>* findChild(OBJECT& payload) const
+    {
+        for(size_t i = 0; i < this->m_childs.size(); i++)
+        {
+            if(this->m_childs[i]->m_payload == payload)
+                return this->m_childs[i];
+        }
+        return NULL;
+    }
+
+    int findChild(Node<OBJECT> *pNode) const
+    {
+        for (size_t i = 0; i < this->m_childs.size(); i++)
+        {
+            if(this->m_childs[i] == pNode)
+                return (int)i;
+        }
+        return -1;
+    }
+
+    void addChild(Node<OBJECT> *pNode)
+    {
+        if(!pNode)
+            return;
+
+        CV_Assert(pNode->m_pParent == 0);
+        pNode->m_pParent = this;
+        this->m_childs.push_back(pNode);
+    }
+
+    void removeChilds()
+    {
+        for(size_t i = 0; i < m_childs.size(); i++)
+        {
+            m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming
+            delete m_childs[i];
+        }
+        m_childs.clear();
+    }
+
+    int getDepth()
+    {
+        int   count   = 0;
+        Node *pParent = m_pParent;
+        while(pParent) count++, pParent = pParent->m_pParent;
+        return count;
+    }
+
+public:
+    OBJECT                     m_payload;
+    Node<OBJECT>*              m_pParent;
+    std::vector<Node<OBJECT>*> m_childs;
+};
+
+
+namespace samples {
+
+//! @addtogroup core_utils_samples
+// This section describes utility functions for OpenCV samples.
+//
+// @note Implementation of these utilities is not thread-safe.
+//
+//! @{
+
+/** @brief Try to find requested data file
+
+Search directories:
+
+1. Directories passed via `addSamplesDataSearchPath()`
+2. OPENCV_SAMPLES_DATA_PATH_HINT environment variable
+3. OPENCV_SAMPLES_DATA_PATH environment variable
+   If parameter value is not empty and nothing is found then stop searching.
+4. Detects build/install path based on:
+   a. current working directory (CWD)
+   b. and/or binary module location (opencv_core/opencv_world, doesn't work with static linkage)
+5. Scan `<source>/{,data,samples/data}` directories if build directory is detected or the current directory is in source tree.
+6. Scan `<install>/share/OpenCV` directory if install directory is detected.
+
+@see cv::utils::findDataFile
+
+@param relative_path Relative path to data file
+@param required Specify "file not found" handling.
+       If true, function prints information message and raises cv::Exception.
+       If false, function returns empty result
+@param silentMode Disables messages
+@return Returns path (absolute or relative to the current directory) or empty string if file is not found
+*/
+CV_EXPORTS_W cv::String findFile(const cv::String& relative_path, bool required = true, bool silentMode = false);
+
+CV_EXPORTS_W cv::String findFileOrKeep(const cv::String& relative_path, bool silentMode = false);
+
+inline cv::String findFileOrKeep(const cv::String& relative_path, bool silentMode)
 {
-    return get<String>(name, space_delete);
+    cv::String res = findFile(relative_path, false, silentMode);
+    if (res.empty())
+        return relative_path;
+    return res;
 }
-#endif // OPENCV_NOSTL
 
-//! @endcond
+/** @brief Override search data path by adding new search location
+
+Use this only to override default behavior
+Passed paths are used in LIFO order.
+
+@param path Path to used samples data
+*/
+CV_EXPORTS_W void addSamplesDataSearchPath(const cv::String& path);
+
+/** @brief Append samples search data sub directory
+
+General usage is to add OpenCV modules name (`<opencv_contrib>/modules/<name>/samples/data` -> `<name>/samples/data` + `modules/<name>/samples/data`).
+Passed subdirectories are used in LIFO order.
+
+@param subdir samples data sub directory
+*/
+CV_EXPORTS_W void addSamplesDataSearchSubDirectory(const cv::String& subdir);
+
+//! @}
+} // namespace samples
+
+namespace utils {
+
+CV_EXPORTS int getThreadID();
+
+} // namespace
 
 } //namespace cv
 
-#ifndef DISABLE_OPENCV_24_COMPATIBILITY
-#include "opencv2/core/core_c.h"
+#ifdef CV_COLLECT_IMPL_DATA
+#include "opencv2/core/utils/instrumentation.hpp"
+#else
+/// Collect implementation data on OpenCV function call. Requires ENABLE_IMPL_COLLECTION build option.
+#define CV_IMPL_ADD(impl)
 #endif
 
-#endif //__OPENCV_CORE_UTILITY_H__
+#endif //OPENCV_CORE_UTILITY_H
diff --git a/IPL/include/opencv/opencv2/core/utils/allocator_stats.hpp b/IPL/include/opencv/opencv2/core/utils/allocator_stats.hpp
new file mode 100644
index 0000000..79e9338
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/utils/allocator_stats.hpp
@@ -0,0 +1,29 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_ALLOCATOR_STATS_HPP
+#define OPENCV_CORE_ALLOCATOR_STATS_HPP
+
+#include "../cvdef.h"
+
+namespace cv { namespace utils {
+
+class AllocatorStatisticsInterface
+{
+protected:
+    AllocatorStatisticsInterface() {}
+    virtual ~AllocatorStatisticsInterface() {}
+public:
+    virtual uint64_t getCurrentUsage() const = 0;
+    virtual uint64_t getTotalUsage() const = 0;
+    virtual uint64_t getNumberOfAllocations() const = 0;
+    virtual uint64_t getPeakUsage() const = 0;
+
+    /** set peak usage = current usage */
+    virtual void resetPeakUsage() = 0;
+};
+
+}} // namespace
+
+#endif // OPENCV_CORE_ALLOCATOR_STATS_HPP
diff --git a/IPL/include/opencv/opencv2/core/utils/allocator_stats.impl.hpp b/IPL/include/opencv/opencv2/core/utils/allocator_stats.impl.hpp
new file mode 100644
index 0000000..61fcf15
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/utils/allocator_stats.impl.hpp
@@ -0,0 +1,150 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP
+#define OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP
+
+#include "./allocator_stats.hpp"
+
+#ifdef CV_CXX11
+#include <atomic>
+#endif
+
+//#define OPENCV_DISABLE_ALLOCATOR_STATS
+
+namespace cv { namespace utils {
+
+#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE
+#if defined(__GNUC__) && (\
+        (defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 4) || \
+        (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8)) \
+    )
+#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE int
+#endif
+#endif
+
+#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE
+#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE long long
+#endif
+
+#ifdef CV__ALLOCATOR_STATS_LOG
+namespace {
+#endif
+
+class AllocatorStatistics : public AllocatorStatisticsInterface
+{
+#ifdef OPENCV_DISABLE_ALLOCATOR_STATS
+
+public:
+    AllocatorStatistics() {}
+    ~AllocatorStatistics() CV_OVERRIDE {}
+
+    uint64_t getCurrentUsage() const CV_OVERRIDE { return 0; }
+    uint64_t getTotalUsage() const CV_OVERRIDE { return 0; }
+    uint64_t getNumberOfAllocations() const CV_OVERRIDE { return 0; }
+    uint64_t getPeakUsage() const CV_OVERRIDE { return 0; }
+
+    /** set peak usage = current usage */
+    void resetPeakUsage() CV_OVERRIDE {};
+
+    void onAllocate(size_t /*sz*/) {}
+    void onFree(size_t /*sz*/) {}
+
+#elif defined(CV_CXX11)
+
+protected:
+    typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t;
+    std::atomic<counter_t> curr, total, total_allocs, peak;
+public:
+    AllocatorStatistics() {}
+    ~AllocatorStatistics() CV_OVERRIDE {}
+
+    uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr.load(); }
+    uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total.load(); }
+    uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs.load(); }
+    uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak.load(); }
+
+    /** set peak usage = current usage */
+    void resetPeakUsage() CV_OVERRIDE { peak.store(curr.load()); }
+
+    // Controller interface
+    void onAllocate(size_t sz)
+    {
+#ifdef CV__ALLOCATOR_STATS_LOG
+        CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr.load()));
+#endif
+
+        counter_t new_curr = curr.fetch_add((counter_t)sz) + (counter_t)sz;
+
+        // peak = std::max((uint64_t)peak, new_curr);
+        auto prev_peak = peak.load();
+        while (prev_peak < new_curr)
+        {
+            if (peak.compare_exchange_weak(prev_peak, new_curr))
+                break;
+        }
+        // end of peak = max(...)
+
+        total += (counter_t)sz;
+        total_allocs++;
+    }
+    void onFree(size_t sz)
+    {
+#ifdef CV__ALLOCATOR_STATS_LOG
+        CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr.load()));
+#endif
+        curr -= (counter_t)sz;
+    }
+
+#else  // non C++11
+
+protected:
+    typedef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE counter_t;
+    volatile counter_t curr, total, total_allocs, peak;  // overflow is possible, CV_XADD operates with 'int' only
+public:
+    AllocatorStatistics()
+        : curr(0), total(0), total_allocs(0), peak(0)
+    {}
+    ~AllocatorStatistics() CV_OVERRIDE {}
+
+    uint64_t getCurrentUsage() const CV_OVERRIDE { return (uint64_t)curr; }
+    uint64_t getTotalUsage() const CV_OVERRIDE { return (uint64_t)total; }
+    uint64_t getNumberOfAllocations() const CV_OVERRIDE { return (uint64_t)total_allocs; }
+    uint64_t getPeakUsage() const CV_OVERRIDE { return (uint64_t)peak; }
+
+    void resetPeakUsage() CV_OVERRIDE { peak = curr; }
+
+    // Controller interface
+    void onAllocate(size_t sz)
+    {
+#ifdef CV__ALLOCATOR_STATS_LOG
+        CV__ALLOCATOR_STATS_LOG(cv::format("allocate: %lld (curr=%lld)", (long long int)sz, (long long int)curr));
+#endif
+
+        counter_t new_curr = (counter_t)CV_XADD(&curr, (counter_t)sz) + (counter_t)sz;
+
+        peak = std::max((counter_t)peak, new_curr);  // non-thread safe
+
+        //CV_XADD(&total, (uint64_t)sz);  // overflow with int, non-reliable...
+        total += sz;
+
+        CV_XADD(&total_allocs, (counter_t)1);
+    }
+    void onFree(size_t sz)
+    {
+#ifdef CV__ALLOCATOR_STATS_LOG
+        CV__ALLOCATOR_STATS_LOG(cv::format("free: %lld (curr=%lld)", (long long int)sz, (long long int)curr));
+#endif
+        CV_XADD(&curr, (counter_t)-sz);
+    }
+#endif
+};
+
+#ifdef CV__ALLOCATOR_STATS_LOG
+} // namespace
+#endif
+
+}} // namespace
+
+#endif // OPENCV_CORE_ALLOCATOR_STATS_IMPL_HPP
diff --git a/IPL/include/opencv/opencv2/core/utils/filesystem.hpp b/IPL/include/opencv/opencv2/core/utils/filesystem.hpp
new file mode 100644
index 0000000..a98d220
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/utils/filesystem.hpp
@@ -0,0 +1,82 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_UTILS_FILESYSTEM_HPP
+#define OPENCV_UTILS_FILESYSTEM_HPP
+
+namespace cv { namespace utils { namespace fs {
+
+
+CV_EXPORTS bool exists(const cv::String& path);
+CV_EXPORTS bool isDirectory(const cv::String& path);
+
+CV_EXPORTS void remove_all(const cv::String& path);
+
+
+CV_EXPORTS cv::String getcwd();
+
+/** @brief Converts path p to a canonical absolute path
+ * Symlinks are processed if there is support for them on running platform.
+ *
+ * @param path input path. Target file/directory should exist.
+ */
+CV_EXPORTS cv::String canonical(const cv::String& path);
+
+/** Join path components */
+CV_EXPORTS cv::String join(const cv::String& base, const cv::String& path);
+
+/** Get parent directory */
+CV_EXPORTS cv::String getParent(const cv::String &path);
+CV_EXPORTS std::wstring getParent(const std::wstring& path);
+
+/**
+ * Generate a list of all files that match the globbing pattern.
+ *
+ * Result entries are prefixed by base directory path.
+ *
+ * @param directory base directory
+ * @param pattern filter pattern (based on '*'/'?' symbols). Use empty string to disable filtering and return all results
+ * @param[out] result result of globing.
+ * @param recursive scan nested directories too
+ * @param includeDirectories include directories into results list
+ */
+CV_EXPORTS void glob(const cv::String& directory, const cv::String& pattern,
+        CV_OUT std::vector<cv::String>& result,
+        bool recursive = false, bool includeDirectories = false);
+
+/**
+ * Generate a list of all files that match the globbing pattern.
+ *
+ * @param directory base directory
+ * @param pattern filter pattern (based on '*'/'?' symbols). Use empty string to disable filtering and return all results
+ * @param[out] result globbing result with relative paths from base directory
+ * @param recursive scan nested directories too
+ * @param includeDirectories include directories into results list
+ */
+CV_EXPORTS void glob_relative(const cv::String& directory, const cv::String& pattern,
+        CV_OUT std::vector<cv::String>& result,
+        bool recursive = false, bool includeDirectories = false);
+
+
+CV_EXPORTS bool createDirectory(const cv::String& path);
+CV_EXPORTS bool createDirectories(const cv::String& path);
+
+#ifdef __OPENCV_BUILD
+// TODO
+//CV_EXPORTS cv::String getTempDirectory();
+
+/**
+ * @brief Returns directory to store OpenCV cache files
+ * Create sub-directory in common OpenCV cache directory if it doesn't exist.
+ * @param sub_directory_name name of sub-directory. NULL or "" value asks to return root cache directory.
+ * @param configuration_name optional name of configuration parameter name which overrides default behavior.
+ * @return Path to cache directory. Returns empty string if cache directories support is not available. Returns "disabled" if cache disabled by user.
+ */
+CV_EXPORTS cv::String getCacheDirectory(const char* sub_directory_name, const char* configuration_name = NULL);
+
+#endif
+
+}}} // namespace
+
+#endif // OPENCV_UTILS_FILESYSTEM_HPP
diff --git a/IPL/include/opencv/opencv2/core/utils/instrumentation.hpp b/IPL/include/opencv/opencv2/core/utils/instrumentation.hpp
new file mode 100644
index 0000000..3639867
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/utils/instrumentation.hpp
@@ -0,0 +1,125 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_UTILS_INSTR_HPP
+#define OPENCV_UTILS_INSTR_HPP
+
+#include <opencv2/core/utility.hpp>
+#include <opencv2/core/utils/tls.hpp>
+
+namespace cv {
+
+//! @addtogroup core_utils
+//! @{
+
+#ifdef CV_COLLECT_IMPL_DATA
+CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays
+CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays
+// Get stored implementation flags and functions names arrays
+// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which function
+CV_EXPORTS int getImpl(std::vector<int> &impl, std::vector<String> &funName);
+
+CV_EXPORTS bool useCollection(); // return implementation collection state
+CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state
+
+#define CV_IMPL_PLAIN  0x01 // native CPU OpenCV implementation
+#define CV_IMPL_OCL    0x02 // OpenCL implementation
+#define CV_IMPL_IPP    0x04 // IPP implementation
+#define CV_IMPL_MT     0x10 // multithreaded implementation
+
+#undef CV_IMPL_ADD
+#define CV_IMPL_ADD(impl)                                                   \
+    if(cv::useCollection())                                                 \
+    {                                                                       \
+        cv::addImpl(impl, CV_Func);                                         \
+    }
+#endif
+
+// Instrumentation external interface
+namespace instr
+{
+
+#if !defined OPENCV_ABI_CHECK
+
+enum TYPE
+{
+    TYPE_GENERAL = 0,   // OpenCV API function, e.g. exported function
+    TYPE_MARKER,        // Information marker
+    TYPE_WRAPPER,       // Wrapper function for implementation
+    TYPE_FUN,           // Simple function call
+};
+
+enum IMPL
+{
+    IMPL_PLAIN = 0,
+    IMPL_IPP,
+    IMPL_OPENCL,
+};
+
+struct NodeDataTls
+{
+    NodeDataTls()
+    {
+        m_ticksTotal = 0;
+    }
+    uint64      m_ticksTotal;
+};
+
+class CV_EXPORTS NodeData
+{
+public:
+    NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
+    NodeData(NodeData &ref);
+    ~NodeData();
+    NodeData& operator=(const NodeData&);
+
+    cv::String          m_funName;
+    cv::instr::TYPE     m_instrType;
+    cv::instr::IMPL     m_implType;
+    const char*         m_fileName;
+    int                 m_lineNum;
+    void*               m_retAddress;
+    bool                m_alwaysExpand;
+    bool                m_funError;
+
+    volatile int         m_counter;
+    volatile uint64      m_ticksTotal;
+    TLSDataAccumulator<NodeDataTls> m_tls;
+    int                  m_threads;
+
+    // No synchronization
+    double getTotalMs()   const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; }
+    double getMeanMs()    const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; }
+};
+bool operator==(const NodeData& lhs, const NodeData& rhs);
+
+typedef Node<NodeData> InstrNode;
+
+CV_EXPORTS InstrNode* getTrace();
+
+#endif // !defined OPENCV_ABI_CHECK
+
+
+CV_EXPORTS bool       useInstrumentation();
+CV_EXPORTS void       setUseInstrumentation(bool flag);
+CV_EXPORTS void       resetTrace();
+
+enum FLAGS
+{
+    FLAGS_NONE              = 0,
+    FLAGS_MAPPING           = 0x01,
+    FLAGS_EXPAND_SAME_NAMES = 0x02,
+};
+
+CV_EXPORTS void       setFlags(FLAGS modeFlags);
+static inline void    setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); }
+CV_EXPORTS FLAGS      getFlags();
+
+} // namespace instr
+
+//! @}
+
+} // namespace
+
+#endif // OPENCV_UTILS_TLS_HPP
diff --git a/IPL/include/opencv/opencv2/core/utils/logger.defines.hpp b/IPL/include/opencv/opencv2/core/utils/logger.defines.hpp
new file mode 100644
index 0000000..7d73f02
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/utils/logger.defines.hpp
@@ -0,0 +1,42 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_LOGGER_DEFINES_HPP
+#define OPENCV_LOGGER_DEFINES_HPP
+
+//! @addtogroup core_logging
+//! @{
+
+// Supported logging levels and their semantic
+#define CV_LOG_LEVEL_SILENT 0          //!< for using in setLogLevel() call
+#define CV_LOG_LEVEL_FATAL 1           //!< Fatal (critical) error (unrecoverable internal error)
+#define CV_LOG_LEVEL_ERROR 2           //!< Error message
+#define CV_LOG_LEVEL_WARN 3            //!< Warning message
+#define CV_LOG_LEVEL_INFO 4            //!< Info message
+#define CV_LOG_LEVEL_DEBUG 5           //!< Debug message. Disabled in the "Release" build.
+#define CV_LOG_LEVEL_VERBOSE 6         //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build.
+
+namespace cv {
+namespace utils {
+namespace logging {
+
+//! Supported logging levels and their semantic
+enum LogLevel {
+    LOG_LEVEL_SILENT = 0,              //!< for using in setLogVevel() call
+    LOG_LEVEL_FATAL = 1,               //!< Fatal (critical) error (unrecoverable internal error)
+    LOG_LEVEL_ERROR = 2,               //!< Error message
+    LOG_LEVEL_WARNING = 3,             //!< Warning message
+    LOG_LEVEL_INFO = 4,                //!< Info message
+    LOG_LEVEL_DEBUG = 5,               //!< Debug message. Disabled in the "Release" build.
+    LOG_LEVEL_VERBOSE = 6,             //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build.
+#ifndef CV_DOXYGEN
+    ENUM_LOG_LEVEL_FORCE_INT = INT_MAX
+#endif
+};
+
+}}} // namespace
+
+//! @}
+
+#endif // OPENCV_LOGGER_DEFINES_HPP
diff --git a/IPL/include/opencv/opencv2/core/utils/logger.hpp b/IPL/include/opencv/opencv2/core/utils/logger.hpp
new file mode 100644
index 0000000..accb860
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/utils/logger.hpp
@@ -0,0 +1,218 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_LOGGER_HPP
+#define OPENCV_LOGGER_HPP
+
+#include <iostream>
+#include <sstream>
+#include <limits.h> // INT_MAX
+
+#include "logger.defines.hpp"
+#include "logtag.hpp"
+
+namespace cv {
+namespace utils {
+namespace logging {
+
+//! @addtogroup core_logging
+//! @{
+
+/** Set global logging level
+@return previous logging level
+*/
+CV_EXPORTS LogLevel setLogLevel(LogLevel logLevel);
+/** Get global logging level */
+CV_EXPORTS LogLevel getLogLevel();
+
+CV_EXPORTS void registerLogTag(cv::utils::logging::LogTag* plogtag);
+
+CV_EXPORTS void setLogTagLevel(const char* tag, cv::utils::logging::LogLevel level);
+
+CV_EXPORTS cv::utils::logging::LogLevel getLogTagLevel(const char* tag);
+
+namespace internal {
+
+/** Get global log tag */
+CV_EXPORTS cv::utils::logging::LogTag* getGlobalLogTag();
+
+/** Write log message */
+CV_EXPORTS void writeLogMessage(LogLevel logLevel, const char* message);
+
+/** Write log message */
+CV_EXPORTS void writeLogMessageEx(LogLevel logLevel, const char* tag, const char* file, int line, const char* func, const char* message);
+
+} // namespace
+
+struct LogTagAuto
+    : public LogTag
+{
+    inline LogTagAuto(const char* _name, LogLevel _level)
+        : LogTag(_name, _level)
+    {
+        registerLogTag(this);
+    }
+};
+
+/**
+ * \def CV_LOG_STRIP_LEVEL
+ *
+ * Define CV_LOG_STRIP_LEVEL=CV_LOG_LEVEL_[DEBUG|INFO|WARN|ERROR|FATAL|SILENT] to compile out anything at that and before that logging level
+ */
+#ifndef CV_LOG_STRIP_LEVEL
+# if defined NDEBUG
+#   define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG
+# else
+#   define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE
+# endif
+#endif
+
+#define CV_LOGTAG_PTR_CAST(expr) static_cast<const cv::utils::logging::LogTag*>(expr)
+
+// CV_LOGTAG_EXPAND_NAME is intended to be re-defined (undef and then define again)
+// to allows logging users to use a shorter name argument when calling
+// CV_LOG_WITH_TAG or its related macros such as CV_LOG_INFO.
+//
+// This macro is intended to modify the tag argument as a string (token), via
+// preprocessor token pasting or metaprogramming techniques. A typical usage
+// is to apply a prefix, such as
+// ...... #define CV_LOGTAG_EXPAND_NAME(tag) cv_logtag_##tag
+//
+// It is permitted to re-define to a hard-coded expression, ignoring the tag.
+// This would work identically like the CV_LOGTAG_FALLBACK macro.
+//
+// Important: When the logging macro is called with tag being NULL, a user-defined
+// CV_LOGTAG_EXPAND_NAME may expand it into cv_logtag_0, cv_logtag_NULL, or
+// cv_logtag_nullptr. Use with care. Also be mindful of C++ symbol redefinitions.
+//
+// If there is significant amount of logging code with tag being NULL, it is
+// recommended to use (re-define) CV_LOGTAG_FALLBACK to inject locally a default
+// tag at the beginning of a compilation unit, to minimize lines of code changes.
+//
+#define CV_LOGTAG_EXPAND_NAME(tag) tag
+
+// CV_LOGTAG_FALLBACK is intended to be re-defined (undef and then define again)
+// by any other compilation units to provide a log tag when the logging statement
+// does not specify one. The macro needs to expand into a C++ expression that can
+// be static_cast into (cv::utils::logging::LogTag*). Null (nullptr) is permitted.
+#define CV_LOGTAG_FALLBACK nullptr
+
+// CV_LOGTAG_GLOBAL is the tag used when a log tag is not specified in the logging
+// statement nor the compilation unit. The macro needs to expand into a C++
+// expression that can be static_cast into (cv::utils::logging::LogTag*). Must be
+// non-null. Do not re-define.
+#define CV_LOGTAG_GLOBAL cv::utils::logging::internal::getGlobalLogTag()
+
+#define CV_LOG_WITH_TAG(tag, msgLevel, extra_check0, extra_check1, ...) \
+    for(;;) { \
+        extra_check0; \
+        const auto cv_temp_msglevel = (cv::utils::logging::LogLevel)(msgLevel); \
+        if (cv_temp_msglevel >= (CV_LOG_STRIP_LEVEL)) break; \
+        auto cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_EXPAND_NAME(tag)); \
+        if (!cv_temp_logtagptr) cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_FALLBACK); \
+        if (!cv_temp_logtagptr) cv_temp_logtagptr = CV_LOGTAG_PTR_CAST(CV_LOGTAG_GLOBAL); \
+        if (cv_temp_logtagptr && (cv_temp_msglevel > cv_temp_logtagptr->level)) break; \
+        extra_check1; \
+        std::stringstream cv_temp_logstream; \
+        cv_temp_logstream << __VA_ARGS__; \
+        cv::utils::logging::internal::writeLogMessageEx( \
+            cv_temp_msglevel, \
+            (cv_temp_logtagptr ? cv_temp_logtagptr->name : nullptr), \
+            __FILE__, \
+            __LINE__, \
+            CV_Func, \
+            cv_temp_logstream.str().c_str()); \
+        break; \
+    }
+
+#define CV_LOG_FATAL(tag, ...)   CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_FATAL, , , __VA_ARGS__)
+#define CV_LOG_ERROR(tag, ...)   CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, , , __VA_ARGS__)
+#define CV_LOG_WARNING(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, , , __VA_ARGS__)
+#define CV_LOG_INFO(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, , , __VA_ARGS__)
+#define CV_LOG_DEBUG(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, , , __VA_ARGS__)
+#define CV_LOG_VERBOSE(tag, v, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), , , __VA_ARGS__)
+
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO
+#undef CV_LOG_INFO
+#define CV_LOG_INFO(tag, ...)
+#endif
+
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG
+#undef CV_LOG_DEBUG
+#define CV_LOG_DEBUG(tag, ...)
+#endif
+
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE
+#undef CV_LOG_VERBOSE
+#define CV_LOG_VERBOSE(tag, v, ...)
+#endif
+
+//! @cond IGNORED
+#define CV__LOG_ONCE_CHECK_PRE \
+    static bool _cv_log_once_ ## __LINE__ = false; \
+    if (_cv_log_once_ ## __LINE__) break;
+
+#define CV__LOG_ONCE_CHECK_POST \
+    _cv_log_once_ ## __LINE__ = true;
+
+#define CV__LOG_IF_CHECK(logging_cond) \
+    if (!(logging_cond)) break;
+
+//! @endcond
+
+
+// CV_LOG_ONCE_XXX macros
+
+#define CV_LOG_ONCE_ERROR(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__)
+#define CV_LOG_ONCE_WARNING(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__)
+#define CV_LOG_ONCE_INFO(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__)
+#define CV_LOG_ONCE_DEBUG(tag, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__)
+#define CV_LOG_ONCE_VERBOSE(tag, v, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), CV__LOG_ONCE_CHECK_PRE, CV__LOG_ONCE_CHECK_POST, __VA_ARGS__)
+
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO
+#undef CV_LOG_ONCE_INFO
+#define CV_LOG_ONCE_INFO(tag, ...)
+#endif
+
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG
+#undef CV_LOG_ONCE_DEBUG
+#define CV_LOG_ONCE_DEBUG(tag, ...)
+#endif
+
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE
+#undef CV_LOG_ONCE_VERBOSE
+#define CV_LOG_ONCE_VERBOSE(tag, v, ...)
+#endif
+
+
+// CV_LOG_IF_XXX macros
+
+#define CV_LOG_IF_FATAL(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_FATAL, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__)
+#define CV_LOG_IF_ERROR(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_ERROR, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__)
+#define CV_LOG_IF_WARNING(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_WARNING, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__)
+#define CV_LOG_IF_INFO(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_INFO, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__)
+#define CV_LOG_IF_DEBUG(tag, logging_cond, ...) CV_LOG_WITH_TAG(tag, cv::utils::logging::LOG_LEVEL_DEBUG, , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__)
+#define CV_LOG_IF_VERBOSE(tag, v, logging_cond, ...) CV_LOG_WITH_TAG(tag, (cv::utils::logging::LOG_LEVEL_VERBOSE + (int)(v)), , CV__LOG_IF_CHECK(logging_cond), __VA_ARGS__)
+
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO
+#undef CV_LOG_IF_INFO
+#define CV_LOG_IF_INFO(tag, logging_cond, ...)
+#endif
+
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG
+#undef CV_LOG_IF_DEBUG
+#define CV_LOG_IF_DEBUG(tag, logging_cond, ...)
+#endif
+
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE
+#undef CV_LOG_IF_VERBOSE
+#define CV_LOG_IF_VERBOSE(tag, v, logging_cond, ...)
+#endif
+
+
+//! @}
+
+}}} // namespace
+
+#endif // OPENCV_LOGGER_HPP
diff --git a/IPL/include/opencv/opencv2/core/utils/logtag.hpp b/IPL/include/opencv/opencv2/core/utils/logtag.hpp
new file mode 100644
index 0000000..4089720
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/utils/logtag.hpp
@@ -0,0 +1,28 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_LOGTAG_HPP
+#define OPENCV_CORE_LOGTAG_HPP
+
+#include "opencv2/core/cvstd.hpp"
+#include "logger.defines.hpp"
+
+namespace cv {
+namespace utils {
+namespace logging {
+
+struct LogTag
+{
+    const char* name;
+    LogLevel level;
+
+    inline LogTag(const char* _name, LogLevel _level)
+        : name(_name)
+        , level(_level)
+    {}
+};
+
+}}}
+
+#endif
diff --git a/IPL/include/opencv/opencv2/core/utils/tls.hpp b/IPL/include/opencv/opencv2/core/utils/tls.hpp
new file mode 100644
index 0000000..697a7b0
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/utils/tls.hpp
@@ -0,0 +1,233 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_UTILS_TLS_HPP
+#define OPENCV_UTILS_TLS_HPP
+
+#include <opencv2/core/utility.hpp>
+
+namespace cv {
+
+//! @addtogroup core_utils
+//! @{
+
+namespace details { class TlsStorage; }
+
+/** TLS container base implementation
+ *
+ * Don't use directly.
+ *
+ * @sa TLSData, TLSDataAccumulator templates
+ */
+class CV_EXPORTS TLSDataContainer
+{
+protected:
+    TLSDataContainer();
+    virtual ~TLSDataContainer();
+
+    /// @deprecated use detachData() instead
+    void  gatherData(std::vector<void*> &data) const;
+    /// get TLS data and detach all data from threads (similar to cleanup() call)
+    void  detachData(std::vector<void*>& data);
+
+    void* getData() const;
+    void  release();
+
+protected:
+    virtual void* createDataInstance() const = 0;
+    virtual void  deleteDataInstance(void* pData) const = 0;
+
+private:
+    int key_;
+
+    friend class cv::details::TlsStorage;  // core/src/system.cpp
+
+public:
+    void cleanup(); //!< Release created TLS data container objects. It is similar to release() call, but it keeps TLS container valid.
+
+private:
+    // Disable copy/assign (noncopyable pattern)
+    TLSDataContainer(TLSDataContainer &) = delete;
+    TLSDataContainer& operator =(const TLSDataContainer &) = delete;
+};
+
+
+/** @brief Simple TLS data class
+ *
+ * @sa TLSDataAccumulator
+ */
+template <typename T>
+class TLSData : protected TLSDataContainer
+{
+public:
+    inline TLSData() {}
+    inline ~TLSData() { release(); }
+
+    inline T* get() const   { return (T*)getData(); }  //!< Get data associated with key
+    inline T& getRef() const { T* ptr = (T*)getData(); CV_DbgAssert(ptr); return *ptr; }  //!< Get data associated with key
+
+    /// Release associated thread data
+    inline void cleanup()
+    {
+        TLSDataContainer::cleanup();
+    }
+
+protected:
+    /// Wrapper to allocate data by template
+    virtual void* createDataInstance() const CV_OVERRIDE { return new T; }
+    /// Wrapper to release data by template
+    virtual void  deleteDataInstance(void* pData) const CV_OVERRIDE { delete (T*)pData; }
+};
+
+
+/// TLS data accumulator with gathering methods
+template <typename T>
+class TLSDataAccumulator : public TLSData<T>
+{
+    mutable cv::Mutex mutex;
+    mutable std::vector<T*> dataFromTerminatedThreads;
+    std::vector<T*> detachedData;
+    bool cleanupMode;
+public:
+    TLSDataAccumulator() : cleanupMode(false) {}
+    ~TLSDataAccumulator()
+    {
+        release();
+    }
+
+    /** @brief Get data from all threads
+     * @deprecated replaced by detachData()
+     *
+     * Lifetime of vector data is valid until next detachData()/cleanup()/release() calls
+     *
+     * @param[out] data result buffer (should be empty)
+     */
+    void gather(std::vector<T*> &data) const
+    {
+        CV_Assert(cleanupMode == false);  // state is not valid
+        CV_Assert(data.empty());
+        {
+            std::vector<void*> &dataVoid = reinterpret_cast<std::vector<void*>&>(data);
+            TLSDataContainer::gatherData(dataVoid);
+        }
+        {
+            AutoLock lock(mutex);
+            data.reserve(data.size() + dataFromTerminatedThreads.size());
+            for (typename std::vector<T*>::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i)
+            {
+                data.push_back((T*)*i);
+            }
+        }
+    }
+
+    /** @brief Get and detach data from all threads
+     *
+     * Call cleanupDetachedData() when returned vector is not needed anymore.
+     *
+     * @return Vector with associated data. Content is preserved (including lifetime of attached data pointers) until next detachData()/cleanupDetachedData()/cleanup()/release() calls
+     */
+    std::vector<T*>& detachData()
+    {
+        CV_Assert(cleanupMode == false);  // state is not valid
+        std::vector<void*> dataVoid;
+        {
+            TLSDataContainer::detachData(dataVoid);
+        }
+        {
+            AutoLock lock(mutex);
+            detachedData.reserve(dataVoid.size() + dataFromTerminatedThreads.size());
+            for (typename std::vector<T*>::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i)
+            {
+                detachedData.push_back((T*)*i);
+            }
+            dataFromTerminatedThreads.clear();
+            for (typename std::vector<void*>::const_iterator i = dataVoid.begin(); i != dataVoid.end(); ++i)
+            {
+                detachedData.push_back((T*)(void*)*i);
+            }
+        }
+        dataVoid.clear();
+        return detachedData;
+    }
+
+    /// Release associated thread data returned by detachData() call
+    void cleanupDetachedData()
+    {
+        AutoLock lock(mutex);
+        cleanupMode = true;
+        _cleanupDetachedData();
+        cleanupMode = false;
+    }
+
+    /// Release associated thread data
+    void cleanup()
+    {
+        cleanupMode = true;
+        TLSDataContainer::cleanup();
+
+        AutoLock lock(mutex);
+        _cleanupDetachedData();
+        _cleanupTerminatedData();
+        cleanupMode = false;
+    }
+
+    /// Release associated thread data and free TLS key
+    void release()
+    {
+        cleanupMode = true;
+        TLSDataContainer::release();
+        {
+            AutoLock lock(mutex);
+            _cleanupDetachedData();
+            _cleanupTerminatedData();
+        }
+    }
+
+protected:
+    // synchronized
+    void _cleanupDetachedData()
+    {
+        for (typename std::vector<T*>::iterator i = detachedData.begin(); i != detachedData.end(); ++i)
+        {
+            deleteDataInstance((T*)*i);
+        }
+        detachedData.clear();
+    }
+
+    // synchronized
+    void _cleanupTerminatedData()
+    {
+        for (typename std::vector<T*>::iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i)
+        {
+            deleteDataInstance((T*)*i);
+        }
+        dataFromTerminatedThreads.clear();
+    }
+
+protected:
+    virtual void* createDataInstance() const CV_OVERRIDE
+    {
+        // Note: we can collect all allocated data here, but this would require raced mutex locks
+        return new T;
+    }
+    virtual void  deleteDataInstance(void* pData) const CV_OVERRIDE
+    {
+        if (cleanupMode)
+        {
+            delete (T*)pData;
+        }
+        else
+        {
+            AutoLock lock(mutex);
+            dataFromTerminatedThreads.push_back((T*)pData);
+        }
+    }
+};
+
+
+//! @}
+
+} // namespace
+
+#endif // OPENCV_UTILS_TLS_HPP
diff --git a/IPL/include/opencv/opencv2/core/utils/trace.hpp b/IPL/include/opencv/opencv2/core/utils/trace.hpp
new file mode 100644
index 0000000..ef5d35b
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/utils/trace.hpp
@@ -0,0 +1,252 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_TRACE_HPP
+#define OPENCV_TRACE_HPP
+
+#include <opencv2/core/cvdef.h>
+
+namespace cv {
+namespace utils {
+namespace trace {
+
+//! @addtogroup core_logging
+//! @{
+
+//! Macro to trace function
+#define CV_TRACE_FUNCTION()
+
+#define CV_TRACE_FUNCTION_SKIP_NESTED()
+
+//! Trace code scope.
+//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "initialize".
+#define CV_TRACE_REGION(name_as_static_string_literal)
+//! mark completed of the current opened region and create new one
+//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "step1".
+#define CV_TRACE_REGION_NEXT(name_as_static_string_literal)
+
+//! Macro to trace argument value
+#define CV_TRACE_ARG(arg_id)
+
+//! Macro to trace argument value (expanded version)
+#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value)
+
+//! @cond IGNORED
+#define CV_TRACE_NS cv::utils::trace
+
+#if !defined(OPENCV_DISABLE_TRACE) && defined(__EMSCRIPTEN__)
+#define OPENCV_DISABLE_TRACE 1
+#endif
+
+namespace details {
+
+#ifndef __OPENCV_TRACE
+# if defined __OPENCV_BUILD && !defined __OPENCV_TESTS && !defined __OPENCV_APPS
+#   define __OPENCV_TRACE 1
+# else
+#   define __OPENCV_TRACE 0
+# endif
+#endif
+
+#ifndef CV_TRACE_FILENAME
+# define CV_TRACE_FILENAME __FILE__
+#endif
+
+#ifndef CV__TRACE_FUNCTION
+# if defined _MSC_VER
+#   define CV__TRACE_FUNCTION __FUNCSIG__
+# elif defined __GNUC__
+#   define CV__TRACE_FUNCTION __PRETTY_FUNCTION__
+# else
+#   define CV__TRACE_FUNCTION "<unknown>"
+# endif
+#endif
+
+//! Thread-local instance (usually allocated on stack)
+class CV_EXPORTS Region
+{
+public:
+    struct LocationExtraData;
+    struct LocationStaticStorage
+    {
+        LocationExtraData** ppExtra;   //< implementation specific data
+        const char* name;              //< region name (function name or other custom name)
+        const char* filename;          //< source code filename
+        int line;                      //< source code line
+        int flags;                     //< flags (implementation code path: Plain, IPP, OpenCL)
+    };
+
+    Region(const LocationStaticStorage& location);
+    inline ~Region()
+    {
+        if (implFlags != 0)
+            destroy();
+        CV_DbgAssert(implFlags == 0);
+        CV_DbgAssert(pImpl == NULL);
+    }
+
+    class Impl;
+    Impl* pImpl; // NULL if current region is not active
+    int implFlags; // see RegionFlag, 0 if region is ignored
+
+    bool isActive() const { return pImpl != NULL; }
+
+    void destroy();
+private:
+    Region(const Region&); // disabled
+    Region& operator= (const Region&); // disabled
+};
+
+//! Specify region flags
+enum RegionLocationFlag {
+    REGION_FLAG_FUNCTION = (1 << 0),             //< region is function (=1) / nested named region (=0)
+    REGION_FLAG_APP_CODE = (1 << 1),             //< region is Application code (=1) / OpenCV library code (=0)
+    REGION_FLAG_SKIP_NESTED = (1 << 2),          //< avoid processing of nested regions
+
+    REGION_FLAG_IMPL_IPP = (1 << 16),            //< region is part of IPP code path
+    REGION_FLAG_IMPL_OPENCL = (2 << 16),         //< region is part of OpenCL code path
+    REGION_FLAG_IMPL_OPENVX = (3 << 16),         //< region is part of OpenVX code path
+
+    REGION_FLAG_IMPL_MASK = (15 << 16),
+
+    REGION_FLAG_REGION_FORCE = (1 << 30),
+    REGION_FLAG_REGION_NEXT = (1 << 31),         //< close previous region (see #CV_TRACE_REGION_NEXT macro)
+
+    ENUM_REGION_FLAG_FORCE_INT = INT_MAX
+};
+
+struct CV_EXPORTS TraceArg {
+public:
+    struct ExtraData;
+    ExtraData** ppExtra;
+    const char* name;
+    int flags;
+};
+/** @brief Add meta information to current region (function)
+ * See CV_TRACE_ARG macro
+ * @param arg argument information structure (global static cache)
+ * @param value argument value (can by dynamic string literal in case of string, static allocation is not required)
+ */
+CV_EXPORTS void traceArg(const TraceArg& arg, const char* value);
+//! @overload
+CV_EXPORTS void traceArg(const TraceArg& arg, int value);
+//! @overload
+CV_EXPORTS void traceArg(const TraceArg& arg, int64 value);
+//! @overload
+CV_EXPORTS void traceArg(const TraceArg& arg, double value);
+
+#define CV__TRACE_LOCATION_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_, loc_id), __LINE__)
+#define CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_extra_, loc_id) , __LINE__)
+
+#define CV__TRACE_DEFINE_LOCATION_(loc_id, name, flags) \
+    static CV_TRACE_NS::details::Region::LocationExtraData* CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) = 0; \
+    static const CV_TRACE_NS::details::Region::LocationStaticStorage \
+        CV__TRACE_LOCATION_VARNAME(loc_id) = { &(CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id)), name, CV_TRACE_FILENAME, __LINE__, flags};
+
+#define CV__TRACE_DEFINE_LOCATION_FN(name, flags) CV__TRACE_DEFINE_LOCATION_(fn, name, ((flags) | CV_TRACE_NS::details::REGION_FLAG_FUNCTION))
+
+
+#define CV__TRACE_OPENCV_FUNCTION() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, 0); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_OPENCV_FUNCTION_NAME(name) \
+    CV__TRACE_DEFINE_LOCATION_FN(name, 0); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_APP_FUNCTION() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_APP_FUNCTION_NAME(name) \
+    CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+
+#define CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_OPENCV_FUNCTION_NAME_SKIP_NESTED(name) \
+    CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_APP_FUNCTION_SKIP_NESTED() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED | CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+
+#define CV__TRACE_REGION_(name_as_static_string_literal, flags) \
+    CV__TRACE_DEFINE_LOCATION_(region, name_as_static_string_literal, flags); \
+    CV_TRACE_NS::details::Region CVAUX_CONCAT(__region_, __LINE__)(CV__TRACE_LOCATION_VARNAME(region));
+
+#define CV__TRACE_REGION(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, 0)
+#define CV__TRACE_REGION_NEXT(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, CV_TRACE_NS::details::REGION_FLAG_REGION_NEXT)
+
+#define CV__TRACE_ARG_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_ ## arg_id, __LINE__)
+#define CV__TRACE_ARG_EXTRA_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_extra_ ## arg_id, __LINE__)
+
+#define CV__TRACE_DEFINE_ARG_(arg_id, name, flags) \
+    static CV_TRACE_NS::details::TraceArg::ExtraData* CV__TRACE_ARG_EXTRA_VARNAME(arg_id) = 0; \
+    static const CV_TRACE_NS::details::TraceArg \
+        CV__TRACE_ARG_VARNAME(arg_id) = { &(CV__TRACE_ARG_EXTRA_VARNAME(arg_id)), name, flags };
+
+#define CV__TRACE_ARG_VALUE(arg_id, arg_name, value) \
+        CV__TRACE_DEFINE_ARG_(arg_id, arg_name, 0); \
+        CV_TRACE_NS::details::traceArg((CV__TRACE_ARG_VARNAME(arg_id)), value);
+
+#define CV__TRACE_ARG(arg_id) CV_TRACE_ARG_VALUE(arg_id, #arg_id, (arg_id))
+
+} // namespace
+
+#ifndef OPENCV_DISABLE_TRACE
+#undef CV_TRACE_FUNCTION
+#undef CV_TRACE_FUNCTION_SKIP_NESTED
+#if __OPENCV_TRACE
+#define CV_TRACE_FUNCTION CV__TRACE_OPENCV_FUNCTION
+#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED
+#else
+#define CV_TRACE_FUNCTION CV__TRACE_APP_FUNCTION
+#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_APP_FUNCTION_SKIP_NESTED
+#endif
+
+#undef CV_TRACE_REGION
+#define CV_TRACE_REGION CV__TRACE_REGION
+
+#undef CV_TRACE_REGION_NEXT
+#define CV_TRACE_REGION_NEXT CV__TRACE_REGION_NEXT
+
+#undef CV_TRACE_ARG_VALUE
+#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) \
+        if (__region_fn.isActive()) \
+        { \
+            CV__TRACE_ARG_VALUE(arg_id, arg_name, value); \
+        }
+
+#undef CV_TRACE_ARG
+#define CV_TRACE_ARG CV__TRACE_ARG
+
+#endif // OPENCV_DISABLE_TRACE
+
+#ifdef OPENCV_TRACE_VERBOSE
+#define CV_TRACE_FUNCTION_VERBOSE CV_TRACE_FUNCTION
+#define CV_TRACE_REGION_VERBOSE CV_TRACE_REGION
+#define CV_TRACE_REGION_NEXT_VERBOSE CV_TRACE_REGION_NEXT
+#define CV_TRACE_ARG_VALUE_VERBOSE CV_TRACE_ARG_VALUE
+#define CV_TRACE_ARG_VERBOSE CV_TRACE_ARG
+#else
+#define CV_TRACE_FUNCTION_VERBOSE(...)
+#define CV_TRACE_REGION_VERBOSE(...)
+#define CV_TRACE_REGION_NEXT_VERBOSE(...)
+#define CV_TRACE_ARG_VALUE_VERBOSE(...)
+#define CV_TRACE_ARG_VERBOSE(...)
+#endif
+
+//! @endcond
+
+//! @}
+
+}}} // namespace
+
+#endif // OPENCV_TRACE_HPP
diff --git a/IPL/include/opencv/opencv2/core/va_intel.hpp b/IPL/include/opencv/opencv2/core/va_intel.hpp
index f4bb8a6..f665470 100644
--- a/IPL/include/opencv/opencv2/core/va_intel.hpp
+++ b/IPL/include/opencv/opencv2/core/va_intel.hpp
@@ -5,8 +5,8 @@
 // Copyright (C) 2015, Itseez, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 
-#ifndef __OPENCV_CORE_VA_INTEL_HPP__
-#define __OPENCV_CORE_VA_INTEL_HPP__
+#ifndef OPENCV_CORE_VA_INTEL_HPP
+#define OPENCV_CORE_VA_INTEL_HPP
 
 #ifndef __cplusplus
 #  error va_intel.hpp header must be compiled as C++
@@ -31,8 +31,9 @@ This section describes Intel VA-API/OpenCL (CL-VA) interoperability.
 
 To enable CL-VA interoperability support, configure OpenCV using CMake with WITH_VA_INTEL=ON . Currently VA-API is
 supported on Linux only. You should also install Intel Media Server Studio (MSS) to use this feature. You may
-have to specify the path(s) to MSS components for cmake in environment variables: VA_INTEL_MSDK_ROOT for Media SDK
-(default is "/opt/intel/mediasdk"), and VA_INTEL_IOCL_ROOT for Intel OpenCL (default is "/opt/intel/opencl").
+have to specify the path(s) to MSS components for cmake in environment variables:
+
+- VA_INTEL_IOCL_ROOT for Intel OpenCL (default is "/opt/intel/opencl").
 
 To use CL-VA interoperability you should first create VADisplay (libva), and then call initializeContextFromVA()
 function to create OpenCL context and set up interoperability.
@@ -74,4 +75,4 @@ CV_EXPORTS void convertFromVASurface(VADisplay display, VASurfaceID surface, Siz
 
 }} // namespace cv::va_intel
 
-#endif /* __OPENCV_CORE_VA_INTEL_HPP__ */
+#endif /* OPENCV_CORE_VA_INTEL_HPP */
diff --git a/IPL/include/opencv/opencv2/core/version.hpp b/IPL/include/opencv/opencv2/core/version.hpp
index a69d42f..0ef883e 100644
--- a/IPL/include/opencv/opencv2/core/version.hpp
+++ b/IPL/include/opencv/opencv2/core/version.hpp
@@ -1,64 +1,19 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright( C) 2000-2015, Intel Corporation, all rights reserved.
-// Copyright (C) 2011-2013, NVIDIA Corporation, all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Copyright (C) 2015, Itseez Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-//(including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort(including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
 
-/*
-  definition of the current version of OpenCV
-  Usefull to test in user programs
-*/
+#ifndef OPENCV_VERSION_HPP
+#define OPENCV_VERSION_HPP
 
-#ifndef __OPENCV_VERSION_HPP__
-#define __OPENCV_VERSION_HPP__
-
-#define CV_VERSION_MAJOR    3
-#define CV_VERSION_MINOR    1
+#define CV_VERSION_MAJOR    4
+#define CV_VERSION_MINOR    3
 #define CV_VERSION_REVISION 0
 #define CV_VERSION_STATUS   "-dev"
 
 #define CVAUX_STR_EXP(__A)  #__A
 #define CVAUX_STR(__A)      CVAUX_STR_EXP(__A)
 
-#define CVAUX_STRW_EXP(__A)  L#__A
+#define CVAUX_STRW_EXP(__A)  L ## #__A
 #define CVAUX_STRW(__A)      CVAUX_STRW_EXP(__A)
 
 #define CV_VERSION          CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) CV_VERSION_STATUS
@@ -68,4 +23,4 @@
 #define CV_MINOR_VERSION    CV_VERSION_MINOR
 #define CV_SUBMINOR_VERSION CV_VERSION_REVISION
 
-#endif
+#endif // OPENCV_VERSION_HPP
diff --git a/IPL/include/opencv/opencv2/core/vsx_utils.hpp b/IPL/include/opencv/opencv2/core/vsx_utils.hpp
new file mode 100644
index 0000000..08ae890
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core/vsx_utils.hpp
@@ -0,0 +1,1040 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef OPENCV_HAL_VSX_UTILS_HPP
+#define OPENCV_HAL_VSX_UTILS_HPP
+
+#include "opencv2/core/cvdef.h"
+
+#ifndef SKIP_INCLUDES
+#   include <assert.h>
+#endif
+
+//! @addtogroup core_utils_vsx
+//! @{
+#if CV_VSX
+
+#define __VSX_S16__(c, v) (c){v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}
+#define __VSX_S8__(c, v)  (c){v, v, v, v, v, v, v, v}
+#define __VSX_S4__(c, v)  (c){v, v, v, v}
+#define __VSX_S2__(c, v)  (c){v, v}
+
+typedef __vector unsigned char vec_uchar16;
+#define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__}
+#define vec_uchar16_sp(c)    (__VSX_S16__(vec_uchar16, (unsigned char)c))
+#define vec_uchar16_c(v)     ((vec_uchar16)(v))
+#define vec_uchar16_z        vec_uchar16_sp(0)
+
+typedef __vector signed char vec_char16;
+#define vec_char16_set(...) (vec_char16){__VA_ARGS__}
+#define vec_char16_sp(c)    (__VSX_S16__(vec_char16, (signed char)c))
+#define vec_char16_c(v)     ((vec_char16)(v))
+#define vec_char16_z        vec_char16_sp(0)
+
+typedef __vector unsigned short vec_ushort8;
+#define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__}
+#define vec_ushort8_sp(c)    (__VSX_S8__(vec_ushort8, (unsigned short)c))
+#define vec_ushort8_c(v)     ((vec_ushort8)(v))
+#define vec_ushort8_z        vec_ushort8_sp(0)
+
+typedef __vector signed short vec_short8;
+#define vec_short8_set(...) (vec_short8){__VA_ARGS__}
+#define vec_short8_sp(c)    (__VSX_S8__(vec_short8, (signed short)c))
+#define vec_short8_c(v)     ((vec_short8)(v))
+#define vec_short8_z        vec_short8_sp(0)
+
+typedef __vector unsigned int vec_uint4;
+#define vec_uint4_set(...) (vec_uint4){__VA_ARGS__}
+#define vec_uint4_sp(c)    (__VSX_S4__(vec_uint4, (unsigned int)c))
+#define vec_uint4_c(v)     ((vec_uint4)(v))
+#define vec_uint4_z        vec_uint4_sp(0)
+
+typedef __vector signed int vec_int4;
+#define vec_int4_set(...)  (vec_int4){__VA_ARGS__}
+#define vec_int4_sp(c)     (__VSX_S4__(vec_int4, (signed int)c))
+#define vec_int4_c(v)      ((vec_int4)(v))
+#define vec_int4_z         vec_int4_sp(0)
+
+typedef __vector float vec_float4;
+#define vec_float4_set(...)  (vec_float4){__VA_ARGS__}
+#define vec_float4_sp(c)     (__VSX_S4__(vec_float4, c))
+#define vec_float4_c(v)      ((vec_float4)(v))
+#define vec_float4_z         vec_float4_sp(0)
+
+typedef __vector unsigned long long vec_udword2;
+#define vec_udword2_set(...) (vec_udword2){__VA_ARGS__}
+#define vec_udword2_sp(c)    (__VSX_S2__(vec_udword2, (unsigned long long)c))
+#define vec_udword2_c(v)     ((vec_udword2)(v))
+#define vec_udword2_z        vec_udword2_sp(0)
+
+typedef __vector signed long long vec_dword2;
+#define vec_dword2_set(...) (vec_dword2){__VA_ARGS__}
+#define vec_dword2_sp(c)    (__VSX_S2__(vec_dword2, (signed long long)c))
+#define vec_dword2_c(v)     ((vec_dword2)(v))
+#define vec_dword2_z        vec_dword2_sp(0)
+
+typedef  __vector double vec_double2;
+#define vec_double2_set(...) (vec_double2){__VA_ARGS__}
+#define vec_double2_c(v)     ((vec_double2)(v))
+#define vec_double2_sp(c)    (__VSX_S2__(vec_double2, c))
+#define vec_double2_z        vec_double2_sp(0)
+
+#define vec_bchar16           __vector __bool char
+#define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__}
+#define vec_bchar16_c(v)     ((vec_bchar16)(v))
+
+#define vec_bshort8           __vector __bool short
+#define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__}
+#define vec_bshort8_c(v)     ((vec_bshort8)(v))
+
+#define vec_bint4             __vector __bool int
+#define vec_bint4_set(...)   (vec_bint4){__VA_ARGS__}
+#define vec_bint4_c(v)       ((vec_bint4)(v))
+
+#define vec_bdword2            __vector __bool long long
+#define vec_bdword2_set(...)  (vec_bdword2){__VA_ARGS__}
+#define vec_bdword2_c(v)      ((vec_bdword2)(v))
+
+#define VSX_FINLINE(tp) extern inline tp __attribute__((always_inline))
+
+#define VSX_REDIRECT_1RG(rt, rg, fnm, fn2)   \
+VSX_FINLINE(rt) fnm(const rg& a) { return fn2(a); }
+
+#define VSX_REDIRECT_2RG(rt, rg, fnm, fn2)   \
+VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); }
+
+/*
+ * GCC VSX compatibility
+**/
+#if defined(__GNUG__) && !defined(__clang__)
+
+// inline asm helper
+#define VSX_IMPL_1RG(rt, rg, opc, fnm) \
+VSX_FINLINE(rt) fnm(const rg& a)       \
+{ rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "=wa" (rs) : "wa" (a)); return rs; }
+
+#define VSX_IMPL_1VRG(rt, rg, opc, fnm) \
+VSX_FINLINE(rt) fnm(const rg& a)        \
+{ rt rs; __asm__ __volatile__(#opc" %0,%1" : "=v" (rs) : "v" (a)); return rs; }
+
+#define VSX_IMPL_2VRG_F(rt, rg, fopc, fnm)     \
+VSX_FINLINE(rt) fnm(const rg& a, const rg& b)  \
+{ rt rs; __asm__ __volatile__(fopc : "=v" (rs) : "v" (a), "v" (b)); return rs; }
+
+#define VSX_IMPL_2VRG(rt, rg, opc, fnm) VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%1,%2", fnm)
+
+#if __GNUG__ < 8
+
+    // Support for int4 -> dword2 expanding multiply was added in GCC 8.
+    #ifdef vec_mule
+        #undef vec_mule
+    #endif
+    #ifdef vec_mulo
+        #undef vec_mulo
+    #endif
+
+    VSX_REDIRECT_2RG(vec_ushort8,  vec_uchar16,  vec_mule, __builtin_vec_mule)
+    VSX_REDIRECT_2RG(vec_short8,  vec_char16,  vec_mule, __builtin_vec_mule)
+    VSX_REDIRECT_2RG(vec_int4,  vec_short8,  vec_mule, __builtin_vec_mule)
+    VSX_REDIRECT_2RG(vec_uint4,  vec_ushort8,  vec_mule, __builtin_vec_mule)
+    VSX_REDIRECT_2RG(vec_ushort8,  vec_uchar16,  vec_mulo, __builtin_vec_mulo)
+    VSX_REDIRECT_2RG(vec_short8,  vec_char16,  vec_mulo, __builtin_vec_mulo)
+    VSX_REDIRECT_2RG(vec_int4,  vec_short8,  vec_mulo, __builtin_vec_mulo)
+    VSX_REDIRECT_2RG(vec_uint4,  vec_ushort8,  vec_mulo, __builtin_vec_mulo)
+
+    // dword2 support arrived in ISA 2.07 and GCC 8+
+    VSX_IMPL_2VRG(vec_dword2,  vec_int4,  vmulosw, vec_mule)
+    VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmulouw, vec_mule)
+    VSX_IMPL_2VRG(vec_dword2,  vec_int4,  vmulesw, vec_mulo)
+    VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmuleuw, vec_mulo)
+
+#endif
+
+#if __GNUG__ < 7
+// up to GCC 6 vec_mul only supports precisions and llong
+#   ifdef vec_mul
+#       undef vec_mul
+#   endif
+/*
+ * there's no a direct instruction for supporting 8-bit, 16-bit multiplication in ISA 2.07,
+ * XLC Implement it by using instruction "multiply even", "multiply odd" and "permute"
+**/
+#   define VSX_IMPL_MULH(Tvec, cperm)                                        \
+    VSX_FINLINE(Tvec) vec_mul(const Tvec& a, const Tvec& b)                  \
+    {                                                                        \
+        static const vec_uchar16 ev_od = {cperm};                            \
+        return vec_perm((Tvec)vec_mule(a, b), (Tvec)vec_mulo(a, b), ev_od);  \
+    }
+    #define VSX_IMPL_MULH_P16 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
+    VSX_IMPL_MULH(vec_char16,  VSX_IMPL_MULH_P16)
+    VSX_IMPL_MULH(vec_uchar16, VSX_IMPL_MULH_P16)
+    #define VSX_IMPL_MULH_P8 0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29
+    VSX_IMPL_MULH(vec_short8,  VSX_IMPL_MULH_P8)
+    VSX_IMPL_MULH(vec_ushort8, VSX_IMPL_MULH_P8)
+    // vmuluwm can be used for unsigned or signed integers, that's what they said
+    VSX_IMPL_2VRG(vec_int4,  vec_int4,  vmuluwm, vec_mul)
+    VSX_IMPL_2VRG(vec_uint4, vec_uint4, vmuluwm, vec_mul)
+    // redirect to GCC builtin vec_mul, since it already supports precisions and llong
+    VSX_REDIRECT_2RG(vec_float4,  vec_float4,  vec_mul, __builtin_vec_mul)
+    VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mul, __builtin_vec_mul)
+    VSX_REDIRECT_2RG(vec_dword2,  vec_dword2,  vec_mul, __builtin_vec_mul)
+    VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mul, __builtin_vec_mul)
+#endif // __GNUG__ < 7
+
+#if __GNUG__ < 6
+/*
+ * Instruction "compare greater than or equal" in ISA 2.07 only supports single
+ * and double precision.
+ * In XLC and new versions of GCC implement integers by using instruction "greater than" and NOR.
+**/
+#   ifdef vec_cmpge
+#       undef vec_cmpge
+#   endif
+#   ifdef vec_cmple
+#       undef vec_cmple
+#   endif
+#   define vec_cmple(a, b) vec_cmpge(b, a)
+#   define VSX_IMPL_CMPGE(rt, rg, opc, fnm) \
+    VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%2,%1\n\t xxlnor %x0,%x0,%x0", fnm)
+
+    VSX_IMPL_CMPGE(vec_bchar16, vec_char16,  vcmpgtsb, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bchar16, vec_uchar16, vcmpgtub, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bshort8, vec_short8,  vcmpgtsh, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bshort8, vec_ushort8, vcmpgtuh, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bint4,   vec_int4,    vcmpgtsw, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bint4,   vec_uint4,   vcmpgtuw, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bdword2, vec_dword2,  vcmpgtsd, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bdword2, vec_udword2, vcmpgtud, vec_cmpge)
+
+// redirect to GCC builtin cmpge, since it already supports precisions
+    VSX_REDIRECT_2RG(vec_bint4,   vec_float4,  vec_cmpge, __builtin_vec_cmpge)
+    VSX_REDIRECT_2RG(vec_bdword2, vec_double2, vec_cmpge, __builtin_vec_cmpge)
+
+// up to gcc5 vec_nor doesn't support bool long long
+#   undef vec_nor
+    template<typename T>
+    VSX_REDIRECT_2RG(T, T, vec_nor, __builtin_vec_nor)
+
+    VSX_FINLINE(vec_bdword2) vec_nor(const vec_bdword2& a, const vec_bdword2& b)
+    { return vec_bdword2_c(__builtin_vec_nor(vec_dword2_c(a), vec_dword2_c(b))); }
+
+// vec_packs doesn't support double words in gcc4 and old versions of gcc5
+#   undef vec_packs
+    VSX_REDIRECT_2RG(vec_char16,  vec_short8,  vec_packs, __builtin_vec_packs)
+    VSX_REDIRECT_2RG(vec_uchar16, vec_ushort8, vec_packs, __builtin_vec_packs)
+    VSX_REDIRECT_2RG(vec_short8,  vec_int4,    vec_packs, __builtin_vec_packs)
+    VSX_REDIRECT_2RG(vec_ushort8, vec_uint4,   vec_packs, __builtin_vec_packs)
+
+    VSX_IMPL_2VRG_F(vec_int4,  vec_dword2,  "vpksdss %0,%2,%1", vec_packs)
+    VSX_IMPL_2VRG_F(vec_uint4, vec_udword2, "vpkudus %0,%2,%1", vec_packs)
+#endif // __GNUG__ < 6
+
+#if __GNUG__ < 5
+// vec_xxpermdi in gcc4 missing little-endian supports just like clang
+#   define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1)))
+// same as vec_xxpermdi
+#   undef vec_vbpermq
+    VSX_IMPL_2VRG(vec_udword2, vec_uchar16, vbpermq, vec_vbpermq)
+    VSX_IMPL_2VRG(vec_dword2,  vec_char16, vbpermq, vec_vbpermq)
+#else
+#   define vec_permi vec_xxpermdi
+#endif // __GNUG__ < 5
+
+// shift left double by word immediate
+#ifndef vec_sldw
+#   define vec_sldw __builtin_vsx_xxsldwi
+#endif
+
+// vector population count
+VSX_IMPL_1VRG(vec_uchar16, vec_uchar16, vpopcntb, vec_popcntu)
+VSX_IMPL_1VRG(vec_uchar16, vec_char16,  vpopcntb, vec_popcntu)
+VSX_IMPL_1VRG(vec_ushort8, vec_ushort8, vpopcnth, vec_popcntu)
+VSX_IMPL_1VRG(vec_ushort8, vec_short8,  vpopcnth, vec_popcntu)
+VSX_IMPL_1VRG(vec_uint4,   vec_uint4,   vpopcntw, vec_popcntu)
+VSX_IMPL_1VRG(vec_uint4,   vec_int4,    vpopcntw, vec_popcntu)
+VSX_IMPL_1VRG(vec_udword2, vec_udword2, vpopcntd, vec_popcntu)
+VSX_IMPL_1VRG(vec_udword2, vec_dword2,  vpopcntd, vec_popcntu)
+
+// converts between single and double-precision
+VSX_REDIRECT_1RG(vec_float4,  vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp)
+VSX_REDIRECT_1RG(vec_double2, vec_float4,  vec_cvfo, __builtin_vsx_xvcvspdp)
+
+// converts word and doubleword to double-precision
+#undef vec_ctd
+VSX_IMPL_1RG(vec_double2, vec_int4,    xvcvsxwdp, vec_ctdo)
+VSX_IMPL_1RG(vec_double2, vec_uint4,   xvcvuxwdp, vec_ctdo)
+VSX_IMPL_1RG(vec_double2, vec_dword2,  xvcvsxddp, vec_ctd)
+VSX_IMPL_1RG(vec_double2, vec_udword2, xvcvuxddp, vec_ctd)
+
+// converts word and doubleword to single-precision
+#undef vec_ctf
+VSX_IMPL_1RG(vec_float4, vec_int4,    xvcvsxwsp, vec_ctf)
+VSX_IMPL_1RG(vec_float4, vec_uint4,   xvcvuxwsp, vec_ctf)
+VSX_IMPL_1RG(vec_float4, vec_dword2,  xvcvsxdsp, vec_ctfo)
+VSX_IMPL_1RG(vec_float4, vec_udword2, xvcvuxdsp, vec_ctfo)
+
+// converts single and double precision to signed word
+#undef vec_cts
+VSX_IMPL_1RG(vec_int4,  vec_double2, xvcvdpsxws, vec_ctso)
+VSX_IMPL_1RG(vec_int4,  vec_float4,  xvcvspsxws, vec_cts)
+
+// converts single and double precision to unsigned word
+#undef vec_ctu
+VSX_IMPL_1RG(vec_uint4, vec_double2, xvcvdpuxws, vec_ctuo)
+VSX_IMPL_1RG(vec_uint4, vec_float4,  xvcvspuxws, vec_ctu)
+
+// converts single and double precision to signed doubleword
+#undef vec_ctsl
+VSX_IMPL_1RG(vec_dword2, vec_double2, xvcvdpsxds, vec_ctsl)
+VSX_IMPL_1RG(vec_dword2, vec_float4,  xvcvspsxds, vec_ctslo)
+
+// converts single and double precision to unsigned doubleword
+#undef vec_ctul
+VSX_IMPL_1RG(vec_udword2, vec_double2, xvcvdpuxds, vec_ctul)
+VSX_IMPL_1RG(vec_udword2, vec_float4,  xvcvspuxds, vec_ctulo)
+
+// just in case if GCC doesn't define it
+#ifndef vec_xl
+#   define vec_xl vec_vsx_ld
+#   define vec_xst vec_vsx_st
+#endif
+
+#endif // GCC VSX compatibility
+
+/*
+ * CLANG VSX compatibility
+**/
+#if defined(__clang__) && !defined(__IBMCPP__)
+
+/*
+ * CLANG doesn't support %x<n> in the inline asm template which fixes register number
+ * when using any of the register constraints wa, wd, wf
+ *
+ * For more explanation checkout PowerPC and IBM RS6000 in https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
+ * Also there's already an open bug https://bugs.llvm.org/show_bug.cgi?id=31837
+ *
+ * So we're not able to use inline asm and only use built-in functions that CLANG supports
+ * and use __builtin_convertvector if clang missing any of vector conversions built-in functions
+ *
+ * todo: clang asm template bug is fixed, need to reconsider the current workarounds.
+*/
+
+// convert vector helper
+#define VSX_IMPL_CONVERT(rt, rg, fnm) \
+VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); }
+
+#if __clang_major__ < 5
+// implement vec_permi in a dirty way
+#   define VSX_IMPL_CLANG_4_PERMI(Tvec)                                                 \
+    VSX_FINLINE(Tvec) vec_permi(const Tvec& a, const Tvec& b, unsigned const char c)    \
+    {                                                                                   \
+        switch (c)                                                                      \
+        {                                                                               \
+        case 0:                                                                         \
+            return vec_mergeh(a, b);                                                    \
+        case 1:                                                                         \
+            return vec_mergel(vec_mergeh(a, a), b);                                     \
+        case 2:                                                                         \
+            return vec_mergeh(vec_mergel(a, a), b);                                     \
+        default:                                                                        \
+            return vec_mergel(a, b);                                                    \
+        }                                                                               \
+    }
+    VSX_IMPL_CLANG_4_PERMI(vec_udword2)
+    VSX_IMPL_CLANG_4_PERMI(vec_dword2)
+    VSX_IMPL_CLANG_4_PERMI(vec_double2)
+
+// vec_xxsldwi is missing in clang 4
+#   define vec_xxsldwi(a, b, c) vec_sld(a, b, (c) * 4)
+#else
+// vec_xxpermdi is missing little-endian supports in clang 4 just like gcc4
+#   define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1)))
+#endif // __clang_major__ < 5
+
+// shift left double by word immediate
+#ifndef vec_sldw
+#   define vec_sldw vec_xxsldwi
+#endif
+
+// Implement vec_rsqrt since clang only supports vec_rsqrte
+#ifndef vec_rsqrt
+    VSX_FINLINE(vec_float4) vec_rsqrt(const vec_float4& a)
+    { return vec_div(vec_float4_sp(1), vec_sqrt(a)); }
+
+    VSX_FINLINE(vec_double2) vec_rsqrt(const vec_double2& a)
+    { return vec_div(vec_double2_sp(1), vec_sqrt(a)); }
+#endif
+
+// vec_promote missing support for doubleword
+VSX_FINLINE(vec_dword2) vec_promote(long long a, int b)
+{
+    vec_dword2 ret = vec_dword2_z;
+    ret[b & 1] = a;
+    return ret;
+}
+
+VSX_FINLINE(vec_udword2) vec_promote(unsigned long long a, int b)
+{
+    vec_udword2 ret = vec_udword2_z;
+    ret[b & 1] = a;
+    return ret;
+}
+
+// vec_popcnt should return unsigned but clang has different thought just like gcc in vec_vpopcnt
+#define VSX_IMPL_POPCNTU(Tvec, Tvec2, ucast)   \
+VSX_FINLINE(Tvec) vec_popcntu(const Tvec2& a)  \
+{ return ucast(vec_popcnt(a)); }
+VSX_IMPL_POPCNTU(vec_uchar16, vec_char16, vec_uchar16_c);
+VSX_IMPL_POPCNTU(vec_ushort8, vec_short8, vec_ushort8_c);
+VSX_IMPL_POPCNTU(vec_uint4,   vec_int4,   vec_uint4_c);
+VSX_IMPL_POPCNTU(vec_udword2, vec_dword2, vec_udword2_c);
+// redirect unsigned types
+VSX_REDIRECT_1RG(vec_uchar16, vec_uchar16, vec_popcntu, vec_popcnt)
+VSX_REDIRECT_1RG(vec_ushort8, vec_ushort8, vec_popcntu, vec_popcnt)
+VSX_REDIRECT_1RG(vec_uint4,   vec_uint4,   vec_popcntu, vec_popcnt)
+VSX_REDIRECT_1RG(vec_udword2, vec_udword2, vec_popcntu, vec_popcnt)
+
+// converts between single and double precision
+VSX_REDIRECT_1RG(vec_float4,  vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp)
+VSX_REDIRECT_1RG(vec_double2, vec_float4,  vec_cvfo, __builtin_vsx_xvcvspdp)
+
+// converts word and doubleword to double-precision
+#ifdef vec_ctd
+#   undef vec_ctd
+#endif
+VSX_REDIRECT_1RG(vec_double2, vec_int4,  vec_ctdo, __builtin_vsx_xvcvsxwdp)
+VSX_REDIRECT_1RG(vec_double2, vec_uint4, vec_ctdo, __builtin_vsx_xvcvuxwdp)
+
+VSX_IMPL_CONVERT(vec_double2, vec_dword2,  vec_ctd)
+VSX_IMPL_CONVERT(vec_double2, vec_udword2, vec_ctd)
+
+// converts word and doubleword to single-precision
+#if __clang_major__ > 4
+#   undef vec_ctf
+#endif
+VSX_IMPL_CONVERT(vec_float4, vec_int4,    vec_ctf)
+VSX_IMPL_CONVERT(vec_float4, vec_uint4,   vec_ctf)
+VSX_REDIRECT_1RG(vec_float4, vec_dword2,  vec_ctfo, __builtin_vsx_xvcvsxdsp)
+VSX_REDIRECT_1RG(vec_float4, vec_udword2, vec_ctfo, __builtin_vsx_xvcvuxdsp)
+
+// converts single and double precision to signed word
+#if __clang_major__ > 4
+#   undef vec_cts
+#endif
+VSX_REDIRECT_1RG(vec_int4,  vec_double2, vec_ctso, __builtin_vsx_xvcvdpsxws)
+VSX_IMPL_CONVERT(vec_int4,  vec_float4,  vec_cts)
+
+// converts single and double precision to unsigned word
+#if __clang_major__ > 4
+#   undef vec_ctu
+#endif
+VSX_REDIRECT_1RG(vec_uint4, vec_double2, vec_ctuo, __builtin_vsx_xvcvdpuxws)
+VSX_IMPL_CONVERT(vec_uint4, vec_float4,  vec_ctu)
+
+// converts single and double precision to signed doubleword
+#ifdef vec_ctsl
+#   undef vec_ctsl
+#endif
+VSX_IMPL_CONVERT(vec_dword2, vec_double2, vec_ctsl)
+// __builtin_convertvector unable to convert, xvcvspsxds is missing on it
+VSX_FINLINE(vec_dword2) vec_ctslo(const vec_float4& a)
+{ return vec_ctsl(vec_cvfo(a)); }
+
+// converts single and double precision to unsigned doubleword
+#ifdef vec_ctul
+#   undef vec_ctul
+#endif
+VSX_IMPL_CONVERT(vec_udword2, vec_double2, vec_ctul)
+// __builtin_convertvector unable to convert, xvcvspuxds is missing on it
+VSX_FINLINE(vec_udword2) vec_ctulo(const vec_float4& a)
+{ return vec_ctul(vec_cvfo(a)); }
+
+#endif // CLANG VSX compatibility
+
+/*
+ * Common GCC, CLANG compatibility
+**/
+#if defined(__GNUG__) && !defined(__IBMCPP__)
+
+#ifdef vec_cvf
+#   undef vec_cvf
+#endif
+
+#define VSX_IMPL_CONV_EVEN_4_2(rt, rg, fnm, fn2) \
+VSX_FINLINE(rt) fnm(const rg& a)                 \
+{ return fn2(vec_sldw(a, a, 1)); }
+
+VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_float4, vec_cvf,  vec_cvfo)
+VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_int4,   vec_ctd,  vec_ctdo)
+VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_uint4,  vec_ctd,  vec_ctdo)
+
+VSX_IMPL_CONV_EVEN_4_2(vec_dword2,  vec_float4, vec_ctsl, vec_ctslo)
+VSX_IMPL_CONV_EVEN_4_2(vec_udword2, vec_float4, vec_ctul, vec_ctulo)
+
+#define VSX_IMPL_CONV_EVEN_2_4(rt, rg, fnm, fn2) \
+VSX_FINLINE(rt) fnm(const rg& a)                 \
+{                                                \
+    rt v4 = fn2(a);                              \
+    return vec_sldw(v4, v4, 3);                  \
+}
+
+VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_double2, vec_cvf, vec_cvfo)
+VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_dword2,  vec_ctf, vec_ctfo)
+VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_udword2, vec_ctf, vec_ctfo)
+
+VSX_IMPL_CONV_EVEN_2_4(vec_int4,   vec_double2, vec_cts, vec_ctso)
+VSX_IMPL_CONV_EVEN_2_4(vec_uint4,  vec_double2, vec_ctu, vec_ctuo)
+
+// Only for Eigen!
+/*
+ * changing behavior of conversion intrinsics for gcc has effect on Eigen
+ * so we redefine old behavior again only on gcc, clang
+*/
+#if !defined(__clang__) || __clang_major__ > 4
+    // ignoring second arg since Eigen only truncates toward zero
+#   define VSX_IMPL_CONV_2VARIANT(rt, rg, fnm, fn2)     \
+    VSX_FINLINE(rt) fnm(const rg& a, int only_truncate) \
+    {                                                   \
+        assert(only_truncate == 0);                     \
+        CV_UNUSED(only_truncate);                            \
+        return fn2(a);                                  \
+    }
+    VSX_IMPL_CONV_2VARIANT(vec_int4,   vec_float4,  vec_cts, vec_cts)
+    VSX_IMPL_CONV_2VARIANT(vec_float4, vec_int4,    vec_ctf, vec_ctf)
+    // define vec_cts for converting double precision to signed doubleword
+    // which isn't combitable with xlc but its okay since Eigen only use it for gcc
+    VSX_IMPL_CONV_2VARIANT(vec_dword2, vec_double2, vec_cts, vec_ctsl)
+#endif // Eigen
+
+#endif // Common GCC, CLANG compatibility
+
+/*
+ * XLC VSX compatibility
+**/
+#if defined(__IBMCPP__)
+
+// vector population count
+#define vec_popcntu vec_popcnt
+
+// overload and redirect with setting second arg to zero
+// since we only support conversions without the second arg
+#define VSX_IMPL_OVERLOAD_Z2(rt, rg, fnm) \
+VSX_FINLINE(rt) fnm(const rg& a) { return fnm(a, 0); }
+
+VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_int4,    vec_ctd)
+VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_uint4,   vec_ctd)
+VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_dword2,  vec_ctd)
+VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_udword2, vec_ctd)
+
+VSX_IMPL_OVERLOAD_Z2(vec_float4,  vec_int4,    vec_ctf)
+VSX_IMPL_OVERLOAD_Z2(vec_float4,  vec_uint4,   vec_ctf)
+VSX_IMPL_OVERLOAD_Z2(vec_float4,  vec_dword2,  vec_ctf)
+VSX_IMPL_OVERLOAD_Z2(vec_float4,  vec_udword2, vec_ctf)
+
+VSX_IMPL_OVERLOAD_Z2(vec_int4,    vec_double2, vec_cts)
+VSX_IMPL_OVERLOAD_Z2(vec_int4,    vec_float4,  vec_cts)
+
+VSX_IMPL_OVERLOAD_Z2(vec_uint4,   vec_double2, vec_ctu)
+VSX_IMPL_OVERLOAD_Z2(vec_uint4,   vec_float4,  vec_ctu)
+
+VSX_IMPL_OVERLOAD_Z2(vec_dword2,  vec_double2, vec_ctsl)
+VSX_IMPL_OVERLOAD_Z2(vec_dword2,  vec_float4,  vec_ctsl)
+
+VSX_IMPL_OVERLOAD_Z2(vec_udword2, vec_double2, vec_ctul)
+VSX_IMPL_OVERLOAD_Z2(vec_udword2, vec_float4,  vec_ctul)
+
+// fixme: implement conversions of odd-numbered elements in a dirty way
+// since xlc doesn't support VSX registers operand in inline asm.
+#define VSX_IMPL_CONV_ODD_4_2(rt, rg, fnm, fn2) \
+VSX_FINLINE(rt) fnm(const rg& a) { return fn2(vec_sldw(a, a, 3)); }
+
+VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_float4, vec_cvfo,  vec_cvf)
+VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_int4,   vec_ctdo,  vec_ctd)
+VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_uint4,  vec_ctdo,  vec_ctd)
+
+VSX_IMPL_CONV_ODD_4_2(vec_dword2,  vec_float4, vec_ctslo, vec_ctsl)
+VSX_IMPL_CONV_ODD_4_2(vec_udword2, vec_float4, vec_ctulo, vec_ctul)
+
+#define VSX_IMPL_CONV_ODD_2_4(rt, rg, fnm, fn2)  \
+VSX_FINLINE(rt) fnm(const rg& a)                 \
+{                                                \
+    rt v4 = fn2(a);                              \
+    return vec_sldw(v4, v4, 1);                  \
+}
+
+VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_double2, vec_cvfo, vec_cvf)
+VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_dword2,  vec_ctfo, vec_ctf)
+VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_udword2, vec_ctfo, vec_ctf)
+
+VSX_IMPL_CONV_ODD_2_4(vec_int4,   vec_double2, vec_ctso, vec_cts)
+VSX_IMPL_CONV_ODD_2_4(vec_uint4,  vec_double2, vec_ctuo, vec_ctu)
+
+#endif // XLC VSX compatibility
+
+// ignore GCC warning that caused by -Wunused-but-set-variable in rare cases
+#if defined(__GNUG__) && !defined(__clang__)
+#   define VSX_UNUSED(Tvec) Tvec __attribute__((__unused__))
+#else // CLANG, XLC
+#   define VSX_UNUSED(Tvec) Tvec
+#endif
+
+// gcc can find his way in casting log int and XLC, CLANG ambiguous
+#if defined(__clang__) || defined(__IBMCPP__)
+    VSX_FINLINE(vec_udword2) vec_splats(uint64 v)
+    { return vec_splats((unsigned long long) v); }
+
+    VSX_FINLINE(vec_dword2) vec_splats(int64 v)
+    { return vec_splats((long long) v); }
+
+    VSX_FINLINE(vec_udword2) vec_promote(uint64 a, int b)
+    { return vec_promote((unsigned long long) a, b); }
+
+    VSX_FINLINE(vec_dword2) vec_promote(int64 a, int b)
+    { return vec_promote((long long) a, b); }
+#endif
+
+/*
+ * implement vsx_ld(offset, pointer), vsx_st(vector, offset, pointer)
+ * load and set using offset depend on the pointer type
+ *
+ * implement vsx_ldf(offset, pointer), vsx_stf(vector, offset, pointer)
+ * load and set using offset depend on fixed bytes size
+ *
+ * Note: In clang vec_xl and vec_xst fails to load unaligned addresses
+ * so we are using vec_vsx_ld, vec_vsx_st instead
+*/
+
+#if defined(__clang__) && !defined(__IBMCPP__)
+#   define vsx_ldf  vec_vsx_ld
+#   define vsx_stf  vec_vsx_st
+#else // GCC , XLC
+#   define vsx_ldf  vec_xl
+#   define vsx_stf  vec_xst
+#endif
+
+#define VSX_OFFSET(o, p) ((o) * sizeof(*(p)))
+#define vsx_ld(o, p) vsx_ldf(VSX_OFFSET(o, p), p)
+#define vsx_st(v, o, p) vsx_stf(v, VSX_OFFSET(o, p), p)
+
+/*
+ * implement vsx_ld2(offset, pointer), vsx_st2(vector, offset, pointer) to load and store double words
+ * In GCC vec_xl and vec_xst it maps to vec_vsx_ld, vec_vsx_st which doesn't support long long
+ * and in CLANG we are using vec_vsx_ld, vec_vsx_st because vec_xl, vec_xst fails to load unaligned addresses
+ *
+ * In XLC vec_xl and vec_xst fail to cast int64(long int) to long long
+*/
+#if (defined(__GNUG__) || defined(__clang__)) && !defined(__IBMCPP__)
+    VSX_FINLINE(vec_udword2) vsx_ld2(long o, const uint64* p)
+    { return vec_udword2_c(vsx_ldf(VSX_OFFSET(o, p), (unsigned int*)p)); }
+
+    VSX_FINLINE(vec_dword2) vsx_ld2(long o, const int64* p)
+    { return vec_dword2_c(vsx_ldf(VSX_OFFSET(o, p), (int*)p)); }
+
+    VSX_FINLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p)
+    { vsx_stf(vec_uint4_c(vec), VSX_OFFSET(o, p), (unsigned int*)p); }
+
+    VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p)
+    { vsx_stf(vec_int4_c(vec), VSX_OFFSET(o, p), (int*)p); }
+#else // XLC
+    VSX_FINLINE(vec_udword2) vsx_ld2(long o, const uint64* p)
+    { return vsx_ldf(VSX_OFFSET(o, p), (unsigned long long*)p); }
+
+    VSX_FINLINE(vec_dword2) vsx_ld2(long o, const int64* p)
+    { return vsx_ldf(VSX_OFFSET(o, p), (long long*)p); }
+
+    VSX_FINLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p)
+    { vsx_stf(vec, VSX_OFFSET(o, p), (unsigned long long*)p); }
+
+    VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p)
+    { vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); }
+#endif
+
+// Store lower 8 byte
+#define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0)
+
+// Store higher 8 byte
+#define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1)
+
+// Load 64-bits of integer data to lower part
+#define VSX_IMPL_LOAD_L8(Tvec, Tp)                  \
+VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p)            \
+{ return ((Tvec)vec_promote(*((uint64*)p), 0)); }
+
+VSX_IMPL_LOAD_L8(vec_uchar16, uchar)
+VSX_IMPL_LOAD_L8(vec_char16,  schar)
+VSX_IMPL_LOAD_L8(vec_ushort8, ushort)
+VSX_IMPL_LOAD_L8(vec_short8,  short)
+VSX_IMPL_LOAD_L8(vec_uint4,   uint)
+VSX_IMPL_LOAD_L8(vec_int4,    int)
+VSX_IMPL_LOAD_L8(vec_float4,  float)
+VSX_IMPL_LOAD_L8(vec_udword2, uint64)
+VSX_IMPL_LOAD_L8(vec_dword2,  int64)
+VSX_IMPL_LOAD_L8(vec_double2, double)
+
+// logical not
+#define vec_not(a) vec_nor(a, a)
+
+// power9 yaya
+// not equal
+#ifndef vec_cmpne
+#   define vec_cmpne(a, b) vec_not(vec_cmpeq(a, b))
+#endif
+
+// absolute difference
+#ifndef vec_absd
+#   define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b))
+#endif
+
+/*
+ * Implement vec_unpacklu and vec_unpackhu
+ * since vec_unpackl, vec_unpackh only support signed integers
+**/
+#define VSX_IMPL_UNPACKU(rt, rg, zero)      \
+VSX_FINLINE(rt) vec_unpacklu(const rg& a)   \
+{ return (rt)(vec_mergel(a, zero)); }       \
+VSX_FINLINE(rt) vec_unpackhu(const rg& a)   \
+{ return (rt)(vec_mergeh(a, zero));  }
+
+VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z)
+VSX_IMPL_UNPACKU(vec_uint4,   vec_ushort8, vec_ushort8_z)
+VSX_IMPL_UNPACKU(vec_udword2, vec_uint4,   vec_uint4_z)
+
+/*
+ * Implement vec_mergesqe and vec_mergesqo
+ * Merges the sequence values of even and odd elements of two vectors
+*/
+#define VSX_IMPL_PERM(rt, fnm, ...)            \
+VSX_FINLINE(rt) fnm(const rt& a, const rt& b)  \
+{ static const vec_uchar16 perm = {__VA_ARGS__}; return vec_perm(a, b, perm); }
+
+// 16
+#define perm16_mergesqe 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+#define perm16_mergesqo 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+VSX_IMPL_PERM(vec_uchar16, vec_mergesqe, perm16_mergesqe)
+VSX_IMPL_PERM(vec_uchar16, vec_mergesqo, perm16_mergesqo)
+VSX_IMPL_PERM(vec_char16,  vec_mergesqe, perm16_mergesqe)
+VSX_IMPL_PERM(vec_char16,  vec_mergesqo, perm16_mergesqo)
+// 8
+#define perm8_mergesqe 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+#define perm8_mergesqo 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
+VSX_IMPL_PERM(vec_ushort8, vec_mergesqe, perm8_mergesqe)
+VSX_IMPL_PERM(vec_ushort8, vec_mergesqo, perm8_mergesqo)
+VSX_IMPL_PERM(vec_short8,  vec_mergesqe, perm8_mergesqe)
+VSX_IMPL_PERM(vec_short8,  vec_mergesqo, perm8_mergesqo)
+// 4
+#define perm4_mergesqe 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+#define perm4_mergesqo 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+VSX_IMPL_PERM(vec_uint4,  vec_mergesqe, perm4_mergesqe)
+VSX_IMPL_PERM(vec_uint4,  vec_mergesqo, perm4_mergesqo)
+VSX_IMPL_PERM(vec_int4,   vec_mergesqe, perm4_mergesqe)
+VSX_IMPL_PERM(vec_int4,   vec_mergesqo, perm4_mergesqo)
+VSX_IMPL_PERM(vec_float4, vec_mergesqe, perm4_mergesqe)
+VSX_IMPL_PERM(vec_float4, vec_mergesqo, perm4_mergesqo)
+// 2
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqe, vec_mergeh)
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqo, vec_mergel)
+VSX_REDIRECT_2RG(vec_dword2,  vec_dword2,  vec_mergesqe, vec_mergeh)
+VSX_REDIRECT_2RG(vec_dword2,  vec_dword2,  vec_mergesqo, vec_mergel)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqe, vec_mergeh)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqo, vec_mergel)
+
+/*
+ * Implement vec_mergesqh and vec_mergesql
+ * Merges the sequence most and least significant halves of two vectors
+*/
+#define VSX_IMPL_MERGESQHL(Tvec)                                    \
+VSX_FINLINE(Tvec) vec_mergesqh(const Tvec& a, const Tvec& b)        \
+{ return (Tvec)vec_mergeh(vec_udword2_c(a), vec_udword2_c(b)); }    \
+VSX_FINLINE(Tvec) vec_mergesql(const Tvec& a, const Tvec& b)        \
+{ return (Tvec)vec_mergel(vec_udword2_c(a), vec_udword2_c(b)); }
+VSX_IMPL_MERGESQHL(vec_uchar16)
+VSX_IMPL_MERGESQHL(vec_char16)
+VSX_IMPL_MERGESQHL(vec_ushort8)
+VSX_IMPL_MERGESQHL(vec_short8)
+VSX_IMPL_MERGESQHL(vec_uint4)
+VSX_IMPL_MERGESQHL(vec_int4)
+VSX_IMPL_MERGESQHL(vec_float4)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqh, vec_mergeh)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesql, vec_mergel)
+VSX_REDIRECT_2RG(vec_dword2,  vec_dword2,  vec_mergesqh, vec_mergeh)
+VSX_REDIRECT_2RG(vec_dword2,  vec_dword2,  vec_mergesql, vec_mergel)
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqh, vec_mergeh)
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesql, vec_mergel)
+
+
+// 2 and 4 channels interleave for all types except 2 lanes
+#define VSX_IMPL_ST_INTERLEAVE(Tp, Tvec)                                    \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr)  \
+{                                                                           \
+    vsx_stf(vec_mergeh(a, b), 0, ptr);                                      \
+    vsx_stf(vec_mergel(a, b), 16, ptr);                                     \
+}                                                                           \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,           \
+                                     const Tvec& c, const Tvec& d, Tp* ptr) \
+{                                                                           \
+    Tvec ac = vec_mergeh(a, c);                                             \
+    Tvec bd = vec_mergeh(b, d);                                             \
+    vsx_stf(vec_mergeh(ac, bd), 0, ptr);                                    \
+    vsx_stf(vec_mergel(ac, bd), 16, ptr);                                   \
+    ac = vec_mergel(a, c);                                                  \
+    bd = vec_mergel(b, d);                                                  \
+    vsx_stf(vec_mergeh(ac, bd), 32, ptr);                                   \
+    vsx_stf(vec_mergel(ac, bd), 48, ptr);                                   \
+}
+VSX_IMPL_ST_INTERLEAVE(uchar,  vec_uchar16)
+VSX_IMPL_ST_INTERLEAVE(schar,  vec_char16)
+VSX_IMPL_ST_INTERLEAVE(ushort, vec_ushort8)
+VSX_IMPL_ST_INTERLEAVE(short,  vec_short8)
+VSX_IMPL_ST_INTERLEAVE(uint,   vec_uint4)
+VSX_IMPL_ST_INTERLEAVE(int,    vec_int4)
+VSX_IMPL_ST_INTERLEAVE(float,  vec_float4)
+
+// 2 and 4 channels deinterleave for 16 lanes
+#define VSX_IMPL_ST_DINTERLEAVE_8(Tp, Tvec)                                 \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)      \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(16, ptr);                                              \
+    a = vec_mergesqe(v0, v1);                                               \
+    b = vec_mergesqo(v0, v1);                                               \
+}                                                                           \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,      \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(16, ptr);                                              \
+    Tvec v2 = vsx_ld(32, ptr);                                              \
+    Tvec v3 = vsx_ld(48, ptr);                                              \
+    Tvec m0 = vec_mergesqe(v0, v1);                                         \
+    Tvec m1 = vec_mergesqe(v2, v3);                                         \
+    a = vec_mergesqe(m0, m1);                                               \
+    c = vec_mergesqo(m0, m1);                                               \
+    m0 = vec_mergesqo(v0, v1);                                              \
+    m1 = vec_mergesqo(v2, v3);                                              \
+    b = vec_mergesqe(m0, m1);                                               \
+    d = vec_mergesqo(m0, m1);                                               \
+}
+VSX_IMPL_ST_DINTERLEAVE_8(uchar, vec_uchar16)
+VSX_IMPL_ST_DINTERLEAVE_8(schar, vec_char16)
+
+// 2 and 4 channels deinterleave for 8 lanes
+#define VSX_IMPL_ST_DINTERLEAVE_16(Tp, Tvec)                                \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)      \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(8, ptr);                                               \
+    a = vec_mergesqe(v0, v1);                                               \
+    b = vec_mergesqo(v0, v1);                                               \
+}                                                                           \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,      \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(8, ptr);                                               \
+    Tvec m0 = vec_mergeh(v0, v1);                                           \
+    Tvec m1 = vec_mergel(v0, v1);                                           \
+    Tvec ab0 = vec_mergeh(m0, m1);                                          \
+    Tvec cd0 = vec_mergel(m0, m1);                                          \
+    v0 = vsx_ld(16, ptr);                                                   \
+    v1 = vsx_ld(24, ptr);                                                   \
+    m0 = vec_mergeh(v0, v1);                                                \
+    m1 = vec_mergel(v0, v1);                                                \
+    Tvec ab1 = vec_mergeh(m0, m1);                                          \
+    Tvec cd1 = vec_mergel(m0, m1);                                          \
+    a = vec_mergesqh(ab0, ab1);                                             \
+    b = vec_mergesql(ab0, ab1);                                             \
+    c = vec_mergesqh(cd0, cd1);                                             \
+    d = vec_mergesql(cd0, cd1);                                             \
+}
+VSX_IMPL_ST_DINTERLEAVE_16(ushort, vec_ushort8)
+VSX_IMPL_ST_DINTERLEAVE_16(short,  vec_short8)
+
+// 2 and 4 channels deinterleave for 4 lanes
+#define VSX_IMPL_ST_DINTERLEAVE_32(Tp, Tvec)                                \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)      \
+{                                                                           \
+    a = vsx_ld(0, ptr);                                                     \
+    b = vsx_ld(4, ptr);                                                     \
+    Tvec m0 = vec_mergeh(a, b);                                             \
+    Tvec m1 = vec_mergel(a, b);                                             \
+    a = vec_mergeh(m0, m1);                                                 \
+    b = vec_mergel(m0, m1);                                                 \
+}                                                                           \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,      \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(4, ptr);                                               \
+    Tvec v2 = vsx_ld(8, ptr);                                               \
+    Tvec v3 = vsx_ld(12, ptr);                                              \
+    Tvec m0 = vec_mergeh(v0, v2);                                           \
+    Tvec m1 = vec_mergeh(v1, v3);                                           \
+    a = vec_mergeh(m0, m1);                                                 \
+    b = vec_mergel(m0, m1);                                                 \
+    m0 = vec_mergel(v0, v2);                                                \
+    m1 = vec_mergel(v1, v3);                                                \
+    c = vec_mergeh(m0, m1);                                                 \
+    d = vec_mergel(m0, m1);                                                 \
+}
+VSX_IMPL_ST_DINTERLEAVE_32(uint,  vec_uint4)
+VSX_IMPL_ST_DINTERLEAVE_32(int,   vec_int4)
+VSX_IMPL_ST_DINTERLEAVE_32(float, vec_float4)
+
+// 2 and 4 channels interleave and deinterleave for 2 lanes
+#define VSX_IMPL_ST_D_INTERLEAVE_64(Tp, Tvec, ld_func, st_func)             \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr)  \
+{                                                                           \
+    st_func(vec_mergeh(a, b), 0, ptr);                                      \
+    st_func(vec_mergel(a, b), 2, ptr);                                      \
+}                                                                           \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,           \
+                                     const Tvec& c, const Tvec& d, Tp* ptr) \
+{                                                                           \
+    st_func(vec_mergeh(a, b), 0, ptr);                                      \
+    st_func(vec_mergeh(c, d), 2, ptr);                                      \
+    st_func(vec_mergel(a, b), 4, ptr);                                      \
+    st_func(vec_mergel(c, d), 6, ptr);                                      \
+}                                                                           \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)      \
+{                                                                           \
+    Tvec m0 = ld_func(0, ptr);                                              \
+    Tvec m1 = ld_func(2, ptr);                                              \
+    a = vec_mergeh(m0, m1);                                                 \
+    b = vec_mergel(m0, m1);                                                 \
+}                                                                           \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,      \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = ld_func(0, ptr);                                              \
+    Tvec v1 = ld_func(2, ptr);                                              \
+    Tvec v2 = ld_func(4, ptr);                                              \
+    Tvec v3 = ld_func(6, ptr);                                              \
+    a = vec_mergeh(v0, v2);                                                 \
+    b = vec_mergel(v0, v2);                                                 \
+    c = vec_mergeh(v1, v3);                                                 \
+    d = vec_mergel(v1, v3);                                                 \
+}
+VSX_IMPL_ST_D_INTERLEAVE_64(int64,  vec_dword2,  vsx_ld2, vsx_st2)
+VSX_IMPL_ST_D_INTERLEAVE_64(uint64, vec_udword2, vsx_ld2, vsx_st2)
+VSX_IMPL_ST_D_INTERLEAVE_64(double, vec_double2, vsx_ld,  vsx_st)
+
+/* 3 channels */
+#define VSX_IMPL_ST_INTERLEAVE_3CH_16(Tp, Tvec)                                                   \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,                                 \
+                                     const Tvec& c, Tp* ptr)                                      \
+{                                                                                                 \
+    static const vec_uchar16 a12 = {0, 16, 0, 1, 17, 0, 2, 18, 0, 3, 19, 0, 4, 20, 0, 5};         \
+    static const vec_uchar16 a123 = {0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15};    \
+    vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr);                                       \
+    static const vec_uchar16 b12 = {21, 0, 6, 22, 0, 7, 23, 0, 8, 24, 0, 9, 25, 0, 10, 26};       \
+    static const vec_uchar16 b123 = {0, 21, 2, 3, 22, 5, 6, 23, 8, 9, 24, 11, 12, 25, 14, 15};    \
+    vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 16, ptr);                                      \
+    static const vec_uchar16 c12 = {0, 11, 27, 0, 12, 28, 0, 13, 29, 0, 14, 30, 0, 15, 31, 0};    \
+    static const vec_uchar16 c123 = {26, 1, 2, 27, 4, 5, 28, 7, 8, 29, 10, 11, 30, 13, 14, 31};   \
+    vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 32, ptr);                                      \
+}                                                                                                 \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c)                   \
+{                                                                                                 \
+    Tvec v1 = vsx_ld(0, ptr);                                                                     \
+    Tvec v2 = vsx_ld(16, ptr);                                                                    \
+    Tvec v3 = vsx_ld(32, ptr);                                                                    \
+    static const vec_uchar16 a12_perm = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 0, 0, 0, 0, 0};  \
+    static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 20, 23, 26, 29};  \
+    a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm);                                      \
+    static const vec_uchar16 b12_perm = {1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 0, 0, 0, 0, 0}; \
+    static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 18, 21, 24, 27, 30};  \
+    b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm);                                      \
+    static const vec_uchar16 c12_perm = {2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 0, 0, 0, 0, 0};  \
+    static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 19, 22, 25, 28, 31};  \
+    c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm);                                      \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_16(uchar, vec_uchar16)
+VSX_IMPL_ST_INTERLEAVE_3CH_16(schar, vec_char16)
+
+#define VSX_IMPL_ST_INTERLEAVE_3CH_8(Tp, Tvec)                                                    \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,                                 \
+                                     const Tvec& c, Tp* ptr)                                      \
+{                                                                                                 \
+    static const vec_uchar16 a12 = {0, 1, 16, 17, 0, 0, 2, 3, 18, 19, 0, 0, 4, 5, 20, 21};        \
+    static const vec_uchar16 a123 = {0, 1, 2, 3, 16, 17, 6, 7, 8, 9, 18, 19, 12, 13, 14, 15};     \
+    vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr);                                       \
+    static const vec_uchar16 b12 = {0, 0, 6, 7, 22, 23, 0, 0, 8, 9, 24, 25, 0, 0, 10, 11};        \
+    static const vec_uchar16 b123 = {20, 21, 2, 3, 4, 5, 22, 23, 8, 9, 10, 11, 24, 25, 14, 15};   \
+    vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 8, ptr);                                       \
+    static const vec_uchar16 c12 = {26, 27, 0, 0, 12, 13, 28, 29, 0, 0, 14, 15, 30, 31, 0, 0};    \
+    static const vec_uchar16 c123 = {0, 1, 26, 27, 4, 5, 6, 7, 28, 29, 10, 11, 12, 13, 30, 31};   \
+    vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 16, ptr);                                      \
+}                                                                                                 \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c)                   \
+{                                                                                                 \
+    Tvec v1 = vsx_ld(0, ptr);                                                                     \
+    Tvec v2 = vsx_ld(8, ptr);                                                                     \
+    Tvec v3 = vsx_ld(16, ptr);                                                                    \
+    static const vec_uchar16 a12_perm = {0, 1, 6, 7, 12, 13, 18, 19, 24, 25, 30, 31, 0, 0, 0, 0}; \
+    static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 21, 26, 27};  \
+    a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm);                                      \
+    static const vec_uchar16 b12_perm = {2, 3, 8, 9, 14, 15, 20, 21, 26, 27, 0, 0, 0, 0, 0, 0};   \
+    static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 22, 23, 28, 29};  \
+    b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm);                                      \
+    static const vec_uchar16 c12_perm = {4, 5, 10, 11, 16, 17, 22, 23, 28, 29, 0, 0, 0, 0, 0, 0}; \
+    static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18, 19, 24, 25, 30, 31};  \
+    c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm);                                      \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_8(ushort, vec_ushort8)
+VSX_IMPL_ST_INTERLEAVE_3CH_8(short,  vec_short8)
+
+#define VSX_IMPL_ST_INTERLEAVE_3CH_4(Tp, Tvec)                                                     \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,                                  \
+                                     const Tvec& c, Tp* ptr)                                       \
+{                                                                                                  \
+    Tvec hbc = vec_mergeh(b, c);                                                                   \
+    static const vec_uchar16 ahbc = {0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 4, 5, 6, 7};      \
+    vsx_st(vec_perm(a, hbc, ahbc), 0, ptr);                                                        \
+    Tvec lab = vec_mergel(a, b);                                                                   \
+    vsx_st(vec_sld(lab, hbc, 8), 4, ptr);                                                          \
+    static const vec_uchar16 clab = {8, 9, 10, 11, 24, 25, 26, 27, 28, 29, 30, 31, 12, 13, 14, 15};\
+    vsx_st(vec_perm(c, lab, clab), 8, ptr);                                                        \
+}                                                                                                  \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c)                    \
+{                                                                                                  \
+    Tvec v1 = vsx_ld(0, ptr);                                                                      \
+    Tvec v2 = vsx_ld(4, ptr);                                                                      \
+    Tvec v3 = vsx_ld(8, ptr);                                                                      \
+    static const vec_uchar16 flp = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31};   \
+    a = vec_perm(v1, vec_sld(v3, v2, 8), flp);                                                     \
+    static const vec_uchar16 flp2 = {28, 29, 30, 31, 0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19};  \
+    b = vec_perm(v2, vec_sld(v1, v3, 8), flp2);                                                    \
+    c = vec_perm(vec_sld(v2, v1, 8), v3, flp);                                                     \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_4(uint,  vec_uint4)
+VSX_IMPL_ST_INTERLEAVE_3CH_4(int,   vec_int4)
+VSX_IMPL_ST_INTERLEAVE_3CH_4(float, vec_float4)
+
+#define VSX_IMPL_ST_INTERLEAVE_3CH_2(Tp, Tvec, ld_func, st_func)     \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,    \
+                                     const Tvec& c, Tp* ptr)         \
+{                                                                    \
+    st_func(vec_mergeh(a, b), 0, ptr);                               \
+    st_func(vec_permi(c, a, 1), 2, ptr);                             \
+    st_func(vec_mergel(b, c), 4, ptr);                               \
+}                                                                    \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a,        \
+                                       Tvec& b, Tvec& c)             \
+{                                                                    \
+    Tvec v1 = ld_func(0, ptr);                                       \
+    Tvec v2 = ld_func(2, ptr);                                       \
+    Tvec v3 = ld_func(4, ptr);                                       \
+    a = vec_permi(v1, v2, 1);                                        \
+    b = vec_permi(v1, v3, 2);                                        \
+    c = vec_permi(v2, v3, 1);                                        \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_2(int64,  vec_dword2,  vsx_ld2, vsx_st2)
+VSX_IMPL_ST_INTERLEAVE_3CH_2(uint64, vec_udword2, vsx_ld2, vsx_st2)
+VSX_IMPL_ST_INTERLEAVE_3CH_2(double, vec_double2, vsx_ld,  vsx_st)
+
+#endif // CV_VSX
+
+//! @}
+
+#endif // OPENCV_HAL_VSX_UTILS_HPP
diff --git a/IPL/include/opencv/opencv2/core/wimage.hpp b/IPL/include/opencv/opencv2/core/wimage.hpp
deleted file mode 100644
index ef9d398..0000000
--- a/IPL/include/opencv/opencv2/core/wimage.hpp
+++ /dev/null
@@ -1,603 +0,0 @@
-/*M//////////////////////////////////////////////////////////////////////////////
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to
-//  this license.  If you do not agree to this license, do not download,
-//  install, copy or use the software.
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2008, Google, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//  * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//  * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//  * The name of Intel Corporation or contributors may not be used to endorse
-//     or promote products derived from this software without specific
-//     prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is"
-// and any express or implied warranties, including, but not limited to, the
-// implied warranties of merchantability and fitness for a particular purpose
-// are disclaimed. In no event shall the Intel Corporation or contributors be
-// liable for any direct, indirect, incidental, special, exemplary, or
-// consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-/////////////////////////////////////////////////////////////////////////////////
-//M*/
-
-#ifndef __OPENCV_CORE_WIMAGE_HPP__
-#define __OPENCV_CORE_WIMAGE_HPP__
-
-#include "opencv2/core/core_c.h"
-
-#ifdef __cplusplus
-
-namespace cv {
-
-//! @addtogroup core
-//! @{
-
-template <typename T> class WImage;
-template <typename T> class WImageBuffer;
-template <typename T> class WImageView;
-
-template<typename T, int C> class WImageC;
-template<typename T, int C> class WImageBufferC;
-template<typename T, int C> class WImageViewC;
-
-// Commonly used typedefs.
-typedef WImage<uchar>            WImage_b;
-typedef WImageView<uchar>        WImageView_b;
-typedef WImageBuffer<uchar>      WImageBuffer_b;
-
-typedef WImageC<uchar, 1>        WImage1_b;
-typedef WImageViewC<uchar, 1>    WImageView1_b;
-typedef WImageBufferC<uchar, 1>  WImageBuffer1_b;
-
-typedef WImageC<uchar, 3>        WImage3_b;
-typedef WImageViewC<uchar, 3>    WImageView3_b;
-typedef WImageBufferC<uchar, 3>  WImageBuffer3_b;
-
-typedef WImage<float>            WImage_f;
-typedef WImageView<float>        WImageView_f;
-typedef WImageBuffer<float>      WImageBuffer_f;
-
-typedef WImageC<float, 1>        WImage1_f;
-typedef WImageViewC<float, 1>    WImageView1_f;
-typedef WImageBufferC<float, 1>  WImageBuffer1_f;
-
-typedef WImageC<float, 3>        WImage3_f;
-typedef WImageViewC<float, 3>    WImageView3_f;
-typedef WImageBufferC<float, 3>  WImageBuffer3_f;
-
-// There isn't a standard for signed and unsigned short so be more
-// explicit in the typename for these cases.
-typedef WImage<short>            WImage_16s;
-typedef WImageView<short>        WImageView_16s;
-typedef WImageBuffer<short>      WImageBuffer_16s;
-
-typedef WImageC<short, 1>        WImage1_16s;
-typedef WImageViewC<short, 1>    WImageView1_16s;
-typedef WImageBufferC<short, 1>  WImageBuffer1_16s;
-
-typedef WImageC<short, 3>        WImage3_16s;
-typedef WImageViewC<short, 3>    WImageView3_16s;
-typedef WImageBufferC<short, 3>  WImageBuffer3_16s;
-
-typedef WImage<ushort>            WImage_16u;
-typedef WImageView<ushort>        WImageView_16u;
-typedef WImageBuffer<ushort>      WImageBuffer_16u;
-
-typedef WImageC<ushort, 1>        WImage1_16u;
-typedef WImageViewC<ushort, 1>    WImageView1_16u;
-typedef WImageBufferC<ushort, 1>  WImageBuffer1_16u;
-
-typedef WImageC<ushort, 3>        WImage3_16u;
-typedef WImageViewC<ushort, 3>    WImageView3_16u;
-typedef WImageBufferC<ushort, 3>  WImageBuffer3_16u;
-
-/** @brief Image class which provides a thin layer around an IplImage.
-
-The goals of the class design are:
-
-    -# All the data has explicit ownership to avoid memory leaks
-    -# No hidden allocations or copies for performance.
-    -# Easy access to OpenCV methods (which will access IPP if available)
-    -# Can easily treat external data as an image
-    -# Easy to create images which are subsets of other images
-    -# Fast pixel access which can take advantage of number of channels if known at compile time.
-
-The WImage class is the image class which provides the data accessors. The 'W' comes from the fact
-that it is also a wrapper around the popular but inconvenient IplImage class. A WImage can be
-constructed either using a WImageBuffer class which allocates and frees the data, or using a
-WImageView class which constructs a subimage or a view into external data. The view class does no
-memory management. Each class actually has two versions, one when the number of channels is known
-at compile time and one when it isn't. Using the one with the number of channels specified can
-provide some compile time optimizations by using the fact that the number of channels is a
-constant.
-
-We use the convention (c,r) to refer to column c and row r with (0,0) being the upper left corner.
-This is similar to standard Euclidean coordinates with the first coordinate varying in the
-horizontal direction and the second coordinate varying in the vertical direction. Thus (c,r) is
-usually in the domain [0, width) X [0, height)
-
-Example usage:
-@code
-WImageBuffer3_b  im(5,7);  // Make a 5X7 3 channel image of type uchar
-WImageView3_b  sub_im(im, 2,2, 3,3); // 3X3 submatrix
-vector<float> vec(10, 3.0f);
-WImageView1_f user_im(&vec[0], 2, 5);  // 2X5 image w/ supplied data
-
-im.SetZero();  // same as cvSetZero(im.Ipl())
-*im(2, 3) = 15;  // Modify the element at column 2, row 3
-MySetRand(&sub_im);
-
-// Copy the second row into the first.  This can be done with no memory
-// allocation and will use SSE if IPP is available.
-int w = im.Width();
-im.View(0,0, w,1).CopyFrom(im.View(0,1, w,1));
-
-// Doesn't care about source of data since using WImage
-void MySetRand(WImage_b* im) { // Works with any number of channels
-for (int r = 0; r < im->Height(); ++r) {
- float* row = im->Row(r);
- for (int c = 0; c < im->Width(); ++c) {
-    for (int ch = 0; ch < im->Channels(); ++ch, ++row) {
-      *row = uchar(rand() & 255);
-    }
- }
-}
-}
-@endcode
-
-Functions that are not part of the basic image allocation, viewing, and access should come from
-OpenCV, except some useful functions that are not part of OpenCV can be found in wimage_util.h
-*/
-template<typename T>
-class WImage
-{
-public:
-    typedef T BaseType;
-
-    // WImage is an abstract class with no other virtual methods so make the
-    // destructor virtual.
-    virtual ~WImage() = 0;
-
-    // Accessors
-    IplImage* Ipl() {return image_; }
-    const IplImage* Ipl() const {return image_; }
-    T* ImageData() { return reinterpret_cast<T*>(image_->imageData); }
-    const T* ImageData() const {
-        return reinterpret_cast<const T*>(image_->imageData);
-    }
-
-    int Width() const {return image_->width; }
-    int Height() const {return image_->height; }
-
-    // WidthStep is the number of bytes to go to the pixel with the next y coord
-    int WidthStep() const {return image_->widthStep; }
-
-    int Channels() const {return image_->nChannels; }
-    int ChannelSize() const {return sizeof(T); }  // number of bytes per channel
-
-    // Number of bytes per pixel
-    int PixelSize() const {return Channels() * ChannelSize(); }
-
-    // Return depth type (e.g. IPL_DEPTH_8U, IPL_DEPTH_32F) which is the number
-    // of bits per channel and with the signed bit set.
-    // This is known at compile time using specializations.
-    int Depth() const;
-
-    inline const T* Row(int r) const {
-        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep);
-    }
-
-    inline T* Row(int r) {
-        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep);
-    }
-
-    // Pixel accessors which returns a pointer to the start of the channel
-    inline T* operator() (int c, int r)  {
-        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep) +
-            c*Channels();
-    }
-
-    inline const T* operator() (int c, int r) const  {
-        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep) +
-            c*Channels();
-    }
-
-    // Copy the contents from another image which is just a convenience to cvCopy
-    void CopyFrom(const WImage<T>& src) { cvCopy(src.Ipl(), image_); }
-
-    // Set contents to zero which is just a convenient to cvSetZero
-    void SetZero() { cvSetZero(image_); }
-
-    // Construct a view into a region of this image
-    WImageView<T> View(int c, int r, int width, int height);
-
-protected:
-    // Disallow copy and assignment
-    WImage(const WImage&);
-    void operator=(const WImage&);
-
-    explicit WImage(IplImage* img) : image_(img) {
-        assert(!img || img->depth == Depth());
-    }
-
-    void SetIpl(IplImage* image) {
-        assert(!image || image->depth == Depth());
-        image_ = image;
-    }
-
-    IplImage* image_;
-};
-
-
-/** Image class when both the pixel type and number of channels
-are known at compile time.  This wrapper will speed up some of the operations
-like accessing individual pixels using the () operator.
-*/
-template<typename T, int C>
-class WImageC : public WImage<T>
-{
-public:
-    typedef typename WImage<T>::BaseType BaseType;
-    enum { kChannels = C };
-
-    explicit WImageC(IplImage* img) : WImage<T>(img) {
-        assert(!img || img->nChannels == Channels());
-    }
-
-    // Construct a view into a region of this image
-    WImageViewC<T, C> View(int c, int r, int width, int height);
-
-    // Copy the contents from another image which is just a convenience to cvCopy
-    void CopyFrom(const WImageC<T, C>& src) {
-        cvCopy(src.Ipl(), WImage<T>::image_);
-    }
-
-    // WImageC is an abstract class with no other virtual methods so make the
-    // destructor virtual.
-    virtual ~WImageC() = 0;
-
-    int Channels() const {return C; }
-
-protected:
-    // Disallow copy and assignment
-    WImageC(const WImageC&);
-    void operator=(const WImageC&);
-
-    void SetIpl(IplImage* image) {
-        assert(!image || image->depth == WImage<T>::Depth());
-        WImage<T>::SetIpl(image);
-    }
-};
-
-/** Image class which owns the data, so it can be allocated and is always
-freed.  It cannot be copied but can be explicity cloned.
-*/
-template<typename T>
-class WImageBuffer : public WImage<T>
-{
-public:
-    typedef typename WImage<T>::BaseType BaseType;
-
-    // Default constructor which creates an object that can be
-    WImageBuffer() : WImage<T>(0) {}
-
-    WImageBuffer(int width, int height, int nchannels) : WImage<T>(0) {
-        Allocate(width, height, nchannels);
-    }
-
-    // Constructor which takes ownership of a given IplImage so releases
-    // the image on destruction.
-    explicit WImageBuffer(IplImage* img) : WImage<T>(img) {}
-
-    // Allocate an image.  Does nothing if current size is the same as
-    // the new size.
-    void Allocate(int width, int height, int nchannels);
-
-    // Set the data to point to an image, releasing the old data
-    void SetIpl(IplImage* img) {
-        ReleaseImage();
-        WImage<T>::SetIpl(img);
-    }
-
-    // Clone an image which reallocates the image if of a different dimension.
-    void CloneFrom(const WImage<T>& src) {
-        Allocate(src.Width(), src.Height(), src.Channels());
-        CopyFrom(src);
-    }
-
-    ~WImageBuffer() {
-        ReleaseImage();
-    }
-
-    // Release the image if it isn't null.
-    void ReleaseImage() {
-        if (WImage<T>::image_) {
-            IplImage* image = WImage<T>::image_;
-            cvReleaseImage(&image);
-            WImage<T>::SetIpl(0);
-        }
-    }
-
-    bool IsNull() const {return WImage<T>::image_ == NULL; }
-
-private:
-    // Disallow copy and assignment
-    WImageBuffer(const WImageBuffer&);
-    void operator=(const WImageBuffer&);
-};
-
-/** Like a WImageBuffer class but when the number of channels is known at compile time.
-*/
-template<typename T, int C>
-class WImageBufferC : public WImageC<T, C>
-{
-public:
-    typedef typename WImage<T>::BaseType BaseType;
-    enum { kChannels = C };
-
-    // Default constructor which creates an object that can be
-    WImageBufferC() : WImageC<T, C>(0) {}
-
-    WImageBufferC(int width, int height) : WImageC<T, C>(0) {
-        Allocate(width, height);
-    }
-
-    // Constructor which takes ownership of a given IplImage so releases
-    // the image on destruction.
-    explicit WImageBufferC(IplImage* img) : WImageC<T, C>(img) {}
-
-    // Allocate an image.  Does nothing if current size is the same as
-    // the new size.
-    void Allocate(int width, int height);
-
-    // Set the data to point to an image, releasing the old data
-    void SetIpl(IplImage* img) {
-        ReleaseImage();
-        WImageC<T, C>::SetIpl(img);
-    }
-
-    // Clone an image which reallocates the image if of a different dimension.
-    void CloneFrom(const WImageC<T, C>& src) {
-        Allocate(src.Width(), src.Height());
-        CopyFrom(src);
-    }
-
-    ~WImageBufferC() {
-        ReleaseImage();
-    }
-
-    // Release the image if it isn't null.
-    void ReleaseImage() {
-        if (WImage<T>::image_) {
-            IplImage* image = WImage<T>::image_;
-            cvReleaseImage(&image);
-            WImageC<T, C>::SetIpl(0);
-        }
-    }
-
-    bool IsNull() const {return WImage<T>::image_ == NULL; }
-
-private:
-    // Disallow copy and assignment
-    WImageBufferC(const WImageBufferC&);
-    void operator=(const WImageBufferC&);
-};
-
-/** View into an image class which allows treating a subimage as an image or treating external data
-as an image
-*/
-template<typename T> class WImageView : public WImage<T>
-{
-public:
-    typedef typename WImage<T>::BaseType BaseType;
-
-    // Construct a subimage.  No checks are done that the subimage lies
-    // completely inside the original image.
-    WImageView(WImage<T>* img, int c, int r, int width, int height);
-
-    // Refer to external data.
-    // If not given width_step assumed to be same as width.
-    WImageView(T* data, int width, int height, int channels, int width_step = -1);
-
-    // Refer to external data.  This does NOT take ownership
-    // of the supplied IplImage.
-    WImageView(IplImage* img) : WImage<T>(img) {}
-
-    // Copy constructor
-    WImageView(const WImage<T>& img) : WImage<T>(0) {
-        header_ = *(img.Ipl());
-        WImage<T>::SetIpl(&header_);
-    }
-
-    WImageView& operator=(const WImage<T>& img) {
-        header_ = *(img.Ipl());
-        WImage<T>::SetIpl(&header_);
-        return *this;
-    }
-
-protected:
-    IplImage header_;
-};
-
-
-template<typename T, int C>
-class WImageViewC : public WImageC<T, C>
-{
-public:
-    typedef typename WImage<T>::BaseType BaseType;
-    enum { kChannels = C };
-
-    // Default constructor needed for vectors of views.
-    WImageViewC();
-
-    virtual ~WImageViewC() {}
-
-    // Construct a subimage.  No checks are done that the subimage lies
-    // completely inside the original image.
-    WImageViewC(WImageC<T, C>* img,
-        int c, int r, int width, int height);
-
-    // Refer to external data
-    WImageViewC(T* data, int width, int height, int width_step = -1);
-
-    // Refer to external data.  This does NOT take ownership
-    // of the supplied IplImage.
-    WImageViewC(IplImage* img) : WImageC<T, C>(img) {}
-
-    // Copy constructor which does a shallow copy to allow multiple views
-    // of same data.  gcc-4.1.1 gets confused if both versions of
-    // the constructor and assignment operator are not provided.
-    WImageViewC(const WImageC<T, C>& img) : WImageC<T, C>(0) {
-        header_ = *(img.Ipl());
-        WImageC<T, C>::SetIpl(&header_);
-    }
-    WImageViewC(const WImageViewC<T, C>& img) : WImageC<T, C>(0) {
-        header_ = *(img.Ipl());
-        WImageC<T, C>::SetIpl(&header_);
-    }
-
-    WImageViewC& operator=(const WImageC<T, C>& img) {
-        header_ = *(img.Ipl());
-        WImageC<T, C>::SetIpl(&header_);
-        return *this;
-    }
-    WImageViewC& operator=(const WImageViewC<T, C>& img) {
-        header_ = *(img.Ipl());
-        WImageC<T, C>::SetIpl(&header_);
-        return *this;
-    }
-
-protected:
-    IplImage header_;
-};
-
-
-// Specializations for depth
-template<>
-inline int WImage<uchar>::Depth() const {return IPL_DEPTH_8U; }
-template<>
-inline int WImage<signed char>::Depth() const {return IPL_DEPTH_8S; }
-template<>
-inline int WImage<short>::Depth() const {return IPL_DEPTH_16S; }
-template<>
-inline int WImage<ushort>::Depth() const {return IPL_DEPTH_16U; }
-template<>
-inline int WImage<int>::Depth() const {return IPL_DEPTH_32S; }
-template<>
-inline int WImage<float>::Depth() const {return IPL_DEPTH_32F; }
-template<>
-inline int WImage<double>::Depth() const {return IPL_DEPTH_64F; }
-
-template<typename T> inline WImage<T>::~WImage() {}
-template<typename T, int C> inline WImageC<T, C>::~WImageC() {}
-
-template<typename T>
-inline void WImageBuffer<T>::Allocate(int width, int height, int nchannels)
-{
-    if (IsNull() || WImage<T>::Width() != width ||
-        WImage<T>::Height() != height || WImage<T>::Channels() != nchannels) {
-        ReleaseImage();
-        WImage<T>::image_ = cvCreateImage(cvSize(width, height),
-            WImage<T>::Depth(), nchannels);
-    }
-}
-
-template<typename T, int C>
-inline void WImageBufferC<T, C>::Allocate(int width, int height)
-{
-    if (IsNull() || WImage<T>::Width() != width || WImage<T>::Height() != height) {
-        ReleaseImage();
-        WImageC<T, C>::SetIpl(cvCreateImage(cvSize(width, height),WImage<T>::Depth(), C));
-    }
-}
-
-template<typename T>
-WImageView<T>::WImageView(WImage<T>* img, int c, int r, int width, int height)
-        : WImage<T>(0)
-{
-    header_ = *(img->Ipl());
-    header_.imageData = reinterpret_cast<char*>((*img)(c, r));
-    header_.width = width;
-    header_.height = height;
-    WImage<T>::SetIpl(&header_);
-}
-
-template<typename T>
-WImageView<T>::WImageView(T* data, int width, int height, int nchannels, int width_step)
-          : WImage<T>(0)
-{
-    cvInitImageHeader(&header_, cvSize(width, height), WImage<T>::Depth(), nchannels);
-    header_.imageData = reinterpret_cast<char*>(data);
-    if (width_step > 0) {
-        header_.widthStep = width_step;
-    }
-    WImage<T>::SetIpl(&header_);
-}
-
-template<typename T, int C>
-WImageViewC<T, C>::WImageViewC(WImageC<T, C>* img, int c, int r, int width, int height)
-        : WImageC<T, C>(0)
-{
-    header_ = *(img->Ipl());
-    header_.imageData = reinterpret_cast<char*>((*img)(c, r));
-    header_.width = width;
-    header_.height = height;
-    WImageC<T, C>::SetIpl(&header_);
-}
-
-template<typename T, int C>
-WImageViewC<T, C>::WImageViewC() : WImageC<T, C>(0) {
-    cvInitImageHeader(&header_, cvSize(0, 0), WImage<T>::Depth(), C);
-    header_.imageData = reinterpret_cast<char*>(0);
-    WImageC<T, C>::SetIpl(&header_);
-}
-
-template<typename T, int C>
-WImageViewC<T, C>::WImageViewC(T* data, int width, int height, int width_step)
-    : WImageC<T, C>(0)
-{
-    cvInitImageHeader(&header_, cvSize(width, height), WImage<T>::Depth(), C);
-    header_.imageData = reinterpret_cast<char*>(data);
-    if (width_step > 0) {
-        header_.widthStep = width_step;
-    }
-    WImageC<T, C>::SetIpl(&header_);
-}
-
-// Construct a view into a region of an image
-template<typename T>
-WImageView<T> WImage<T>::View(int c, int r, int width, int height) {
-    return WImageView<T>(this, c, r, width, height);
-}
-
-template<typename T, int C>
-WImageViewC<T, C> WImageC<T, C>::View(int c, int r, int width, int height) {
-    return WImageViewC<T, C>(this, c, r, width, height);
-}
-
-//! @} core
-
-}  // end of namespace
-
-#endif // __cplusplus
-
-#endif
diff --git a/IPL/include/opencv/opencv2/core_detect.hpp b/IPL/include/opencv/opencv2/core_detect.hpp
new file mode 100644
index 0000000..46a5017
--- /dev/null
+++ b/IPL/include/opencv/opencv2/core_detect.hpp
@@ -0,0 +1,149 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef _OPENCV_DNN_OBJDETECT_CORE_DETECT_HPP_
+#define _OPENCV_DNN_OBJDETECT_CORE_DETECT_HPP_
+
+#include <vector>
+#include <memory>
+
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+
+/** @defgroup dnn_objdetect DNN used for object detection
+*/
+
+namespace cv
+{
+namespace dnn_objdetect
+{
+
+    //! @addtogroup dnn_objdetect
+    //! @{
+
+    /** @brief Structure to hold the details pertaining to a single bounding box
+     */
+    typedef struct
+    {
+      int xmin, xmax;
+      int ymin, ymax;
+      size_t class_idx;
+      std::string label_name;
+      double class_prob;
+    } object;
+
+
+    /** @brief A class to post process model predictions
+     */
+    class CV_EXPORTS InferBbox
+    {
+      public:
+        /** @brief Default constructer
+        @param _delta_bbox Blob containing relative coordinates of bounding boxes
+        @param _class_scores Blob containing the probability values of each class
+        @param _conf_scores Blob containing the confidence scores
+         */
+        InferBbox(Mat _delta_bbox, Mat _class_scores, Mat _conf_scores);
+
+        /** @brief Filters the bounding boxes.
+         */
+        void filter(double thresh =  0.8);
+
+        /** @brief Vector which holds the final detections of the model
+         */
+        std::vector<object> detections;
+
+      protected:
+        /** @brief Transform relative coordinates from ConvDet to bounding box coordinates
+        @param bboxes Vector to hold the predicted bounding boxes
+         */
+        void transform_bboxes(std::vector<std::vector<double> > *bboxes);
+
+        /** @brief Computes final probability values of each bounding box
+        @param final_probs Vector to hold the probability values
+         */
+        void final_probability_dist(std::vector<std::vector<double> > *final_probs);
+
+        /** @brief Transform bounding boxes from [x, y, h, w] to [xmin, ymin, xmax, ymax]
+        @param pre Vector conatining initial co-ordinates
+        @param post Vector containing the transformed co-ordinates
+         */
+        void transform_bboxes_inv(std::vector<std::vector<double> > *pre,
+                                  std::vector<std::vector<double> > *post);
+
+        /** @brief Ensures that the bounding box values are within image boundaries
+        @param min_max_boxes Vector containing bounding boxes of the form [xmin, ymin, xmax, ymax]
+         */
+        void assert_predictions(std::vector<std::vector<double> > *min_max_boxes);
+
+        /** @brief Filter top `n` predictions
+        @param probs Final probability values of bounding boxes
+        @param boxes Predicted bounding box co-ordinates
+        @param top_n_boxes Contains bounding box co-ordinates of top `n` boxes
+        @param top_n_idxs Containes class indices of top `n` bounding boxes
+        @param top_n_probs Contains probability values of top `n` bounding boxes
+         */
+        void filter_top_n(std::vector<std::vector<double> > *probs,
+                          std::vector<std::vector<double> > *boxes,
+                          std::vector<std::vector<double> > &top_n_boxes,
+                          std::vector<size_t> &top_n_idxs,
+                          std::vector<double> &top_n_probs);
+
+        /** @brief Wrapper to apply Non-Maximal Supression
+        @param top_n_boxes Contains bounding box co-ordinates of top `n` boxes
+        @param top_n_idxs Containes class indices of top `n` bounding boxes
+        @param top_n_probs Contains probability values of top `n` bounding boxes
+         */
+        void nms_wrapper(std::vector<std::vector<double> > &top_n_boxes,
+                         std::vector<size_t> &top_n_idxs,
+                         std::vector<double> &top_n_probs);
+
+       /** @brief Applies Non-Maximal Supression
+       @param boxes Bounding box co-ordinates belonging to one class
+       @param probs Probability values of boxes belonging to one class
+        */
+        std::vector<bool> non_maximal_suppression(std::vector<std::vector<double> >
+                                         *boxes, std::vector<double> *probs);
+
+       /** @brief Computes intersection over union of bounding boxes
+       @param boxes Vector of bounding box co-ordinates
+       @param base_box Base box wrt which IOU is calculated
+       @param iou Vector to store IOU values
+        */
+        void intersection_over_union(std::vector<std::vector<double> > *boxes,
+                          std::vector<double> *base_box, std::vector<double> *iou);
+
+        static inline bool comparator (std::pair<double, size_t> l1,
+            std::pair<double, size_t> l2)
+        {
+          return l1.first > l2.first;
+        }
+
+      private:
+        Mat delta_bbox;
+        Mat class_scores;
+        Mat conf_scores;
+
+        unsigned int image_width;
+        unsigned int image_height;
+
+        unsigned int W, H;
+        std::vector<std::vector<double> > anchors_values;
+        std::vector<std::pair<double, double> > anchor_center;
+        std::vector<std::pair<double, double> > anchor_shapes;
+
+        std::vector<std::string> label_map;
+
+        unsigned int num_classes;
+        unsigned int anchors_per_grid;
+        size_t anchors;
+        double intersection_thresh;
+        double nms_intersection_thresh;
+        size_t n_top_detections;
+        double epsilon;
+    };
+
+    //! @}
+} // namespace dnn_objdetect
+} // namespace cv
+#endif
diff --git a/IPL/include/opencv/opencv2/cvconfig.h b/IPL/include/opencv/opencv2/cvconfig.h
index 1a6b85e..b6a8ca6 100644
--- a/IPL/include/opencv/opencv2/cvconfig.h
+++ b/IPL/include/opencv/opencv2/cvconfig.h
@@ -1,199 +1,168 @@
-/* OpenCV compiled as static or dynamic libs */
-#define BUILD_SHARED_LIBS
-
-/* Compile for 'real' NVIDIA GPU architectures */
-#define CUDA_ARCH_BIN ""
-
-/* Create PTX or BIN for 1.0 compute capability */
-/* #undef CUDA_ARCH_BIN_OR_PTX_10 */
-
-/* NVIDIA GPU features are used */
-#define CUDA_ARCH_FEATURES ""
-
-/* Compile for 'virtual' NVIDIA PTX architectures */
-#define CUDA_ARCH_PTX ""
-
-/* AVFoundation video libraries */
-/* #undef HAVE_AVFOUNDATION */
-
-/* V4L capturing support */
-/* #undef HAVE_CAMV4L */
-
-/* V4L2 capturing support */
-/* #undef HAVE_CAMV4L2 */
-
-/* Carbon windowing environment */
-/* #undef HAVE_CARBON */
-
-/* AMD's Basic Linear Algebra Subprograms Library*/
-/* #undef HAVE_CLAMDBLAS */
-
-/* AMD's OpenCL Fast Fourier Transform Library*/
-/* #undef HAVE_CLAMDFFT */
-
-/* Clp support */
-/* #undef HAVE_CLP */
-
-/* Cocoa API */
-/* #undef HAVE_COCOA */
-
-/* C= */
-/* #undef HAVE_CSTRIPES */
-
-/* NVidia Cuda Basic Linear Algebra Subprograms (BLAS) API*/
-/* #undef HAVE_CUBLAS */
-
-/* NVidia Cuda Runtime API*/
-/* #undef HAVE_CUDA */
-
-/* NVidia Cuda Fast Fourier Transform (FFT) API*/
-/* #undef HAVE_CUFFT */
-
-/* IEEE1394 capturing support */
-/* #undef HAVE_DC1394 */
-
-/* IEEE1394 capturing support - libdc1394 v2.x */
-/* #undef HAVE_DC1394_2 */
-
-/* DirectX */
-#define HAVE_DIRECTX
-#define HAVE_DIRECTX_NV12
-#define HAVE_D3D11
-#define HAVE_D3D10
-#define HAVE_D3D9
-
-/* DirectShow Video Capture library */
-#define HAVE_DSHOW
-
-/* Eigen Matrix & Linear Algebra Library */
-/* #undef HAVE_EIGEN */
-
-/* FFMpeg video library */
-#define HAVE_FFMPEG
-
-/* ffmpeg's libswscale */
-#define HAVE_FFMPEG_SWSCALE
-
-/* ffmpeg in Gentoo */
-#define HAVE_GENTOO_FFMPEG
-
-/* Geospatial Data Abstraction Library */
-/* #undef HAVE_GDAL */
-
-/* GStreamer multimedia framework */
-/* #undef HAVE_GSTREAMER */
-
-/* GTK+ 2.0 Thread support */
-/* #undef HAVE_GTHREAD */
-
-/* GTK+ 2.x toolkit */
-/* #undef HAVE_GTK */
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-/* #undef HAVE_INTTYPES_H */
-
-/* Intel Perceptual Computing SDK library */
-/* #undef HAVE_INTELPERC */
-
-/* Intel Integrated Performance Primitives */
-#define HAVE_IPP
-#define HAVE_IPP_ICV_ONLY
-
-/* Intel IPP Async */
-/* #undef HAVE_IPP_A */
-
-/* JPEG-2000 codec */
-#define HAVE_JASPER
-
-/* IJG JPEG codec */
-#define HAVE_JPEG
-
-/* libpng/png.h needs to be included */
-/* #undef HAVE_LIBPNG_PNG_H */
-
-/* V4L/V4L2 capturing support via libv4l */
-/* #undef HAVE_LIBV4L */
-
-/* Microsoft Media Foundation Capture library */
-/* #undef HAVE_MSMF */
-
-/* NVidia Video Decoding API*/
-/* #undef HAVE_NVCUVID */
-
-/* NVidia Video Encoding API*/
-/* #undef HAVE_NVCUVENC */
-
-/* OpenCL Support */
-#define HAVE_OPENCL
-/* #undef HAVE_OPENCL_STATIC */
-/* #undef HAVE_OPENCL_SVM */
-
-/* OpenEXR codec */
-#define HAVE_OPENEXR
-
-/* OpenGL support*/
-/* #undef HAVE_OPENGL */
-
-/* OpenNI library */
-/* #undef HAVE_OPENNI */
-
-/* OpenNI library */
-/* #undef HAVE_OPENNI2 */
-
-/* PNG codec */
-#define HAVE_PNG
-
-/* Posix threads (pthreads) */
-/* #undef HAVE_PTHREADS */
-
-/* parallel_for with pthreads */
-/* #undef HAVE_PTHREADS_PF */
-
-/* Qt support */
-/* #undef HAVE_QT */
-
-/* Qt OpenGL support */
-/* #undef HAVE_QT_OPENGL */
-
-/* QuickTime video libraries */
-/* #undef HAVE_QUICKTIME */
-
-/* QTKit video libraries */
-/* #undef HAVE_QTKIT */
-
-/* Intel Threading Building Blocks */
-/* #undef HAVE_TBB */
-
-/* TIFF codec */
-#define HAVE_TIFF
-
-/* Unicap video capture library */
-/* #undef HAVE_UNICAP */
-
-/* Video for Windows support */
-#define HAVE_VFW
-
-/* V4L2 capturing support in videoio.h */
-/* #undef HAVE_VIDEOIO */
-
-/* Win32 UI */
-#define HAVE_WIN32UI
-
-/* XIMEA camera support */
-/* #undef HAVE_XIMEA */
-
-/* Xine video library */
-/* #undef HAVE_XINE */
-
-/* Define if your processor stores words with the most significant byte
-   first (like Motorola and SPARC, unlike Intel and VAX). */
-/* #undef WORDS_BIGENDIAN */
-
-/* gPhoto2 library */
-/* #undef HAVE_GPHOTO2 */
-
-/* VA library (libva) */
-/* #undef HAVE_VA */
-
-/* Intel VA-API/OpenCL */
-/* #undef HAVE_VA_INTEL */
+#ifndef OPENCV_CVCONFIG_H_INCLUDED
+#define OPENCV_CVCONFIG_H_INCLUDED
+
+/* OpenCV compiled as static or dynamic libs */
+#define BUILD_SHARED_LIBS
+
+/* OpenCV intrinsics optimized code */
+#define CV_ENABLE_INTRINSICS
+
+/* OpenCV additional optimized code */
+/* #undef CV_DISABLE_OPTIMIZATION */
+
+/* Compile for 'real' NVIDIA GPU architectures */
+#define CUDA_ARCH_BIN ""
+
+/* Create PTX or BIN for 1.0 compute capability */
+/* #undef CUDA_ARCH_BIN_OR_PTX_10 */
+
+/* NVIDIA GPU features are used */
+#define CUDA_ARCH_FEATURES ""
+
+/* Compile for 'virtual' NVIDIA PTX architectures */
+#define CUDA_ARCH_PTX ""
+
+/* AMD's Basic Linear Algebra Subprograms Library*/
+/* #undef HAVE_CLAMDBLAS */
+
+/* AMD's OpenCL Fast Fourier Transform Library*/
+/* #undef HAVE_CLAMDFFT */
+
+/* Clp support */
+/* #undef HAVE_CLP */
+
+/* Cocoa API */
+/* #undef HAVE_COCOA */
+
+/* NVIDIA CUDA Runtime API*/
+/* #undef HAVE_CUDA */
+
+/* NVIDIA CUDA Basic Linear Algebra Subprograms (BLAS) API*/
+/* #undef HAVE_CUBLAS */
+
+/* NVIDIA CUDA Deep Neural Network (cuDNN) API*/
+/* #undef HAVE_CUDNN */
+
+/* NVIDIA CUDA Fast Fourier Transform (FFT) API*/
+/* #undef HAVE_CUFFT */
+
+/* DirectX */
+#define HAVE_DIRECTX
+#define HAVE_DIRECTX_NV12
+#define HAVE_D3D11
+#define HAVE_D3D10
+#define HAVE_D3D9
+
+/* Eigen Matrix & Linear Algebra Library */
+/* #undef HAVE_EIGEN */
+
+/* Geospatial Data Abstraction Library */
+/* #undef HAVE_GDAL */
+
+/* GTK+ 2.0 Thread support */
+/* #undef HAVE_GTHREAD */
+
+/* GTK+ 2.x toolkit */
+/* #undef HAVE_GTK */
+
+/* Halide support */
+/* #undef HAVE_HALIDE */
+
+/* Vulkan support */
+/* #undef HAVE_VULKAN */
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Intel Integrated Performance Primitives */
+#define HAVE_IPP
+#define HAVE_IPP_ICV
+#define HAVE_IPP_IW
+#define HAVE_IPP_IW_LL
+
+/* JPEG-2000 codec */
+/* #undef HAVE_OPENJPEG */
+#define HAVE_JASPER
+
+/* IJG JPEG codec */
+#define HAVE_JPEG
+
+/* libpng/png.h needs to be included */
+/* #undef HAVE_LIBPNG_PNG_H */
+
+/* GDCM DICOM codec */
+/* #undef HAVE_GDCM */
+
+/* NVIDIA Video Decoding API*/
+/* #undef HAVE_NVCUVID */
+
+/* NVIDIA Video Encoding API*/
+/* #undef HAVE_NVCUVENC */
+
+/* OpenCL Support */
+#define HAVE_OPENCL
+/* #undef HAVE_OPENCL_STATIC */
+/* #undef HAVE_OPENCL_SVM */
+
+/* NVIDIA OpenCL D3D Extensions support */
+#define HAVE_OPENCL_D3D11_NV
+
+/* OpenEXR codec */
+#define HAVE_OPENEXR
+
+/* OpenGL support*/
+/* #undef HAVE_OPENGL */
+
+/* PNG codec */
+#define HAVE_PNG
+
+/* Posix threads (pthreads) */
+/* #undef HAVE_PTHREAD */
+
+/* parallel_for with pthreads */
+/* #undef HAVE_PTHREADS_PF */
+
+/* Qt support */
+/* #undef HAVE_QT */
+
+/* Qt OpenGL support */
+/* #undef HAVE_QT_OPENGL */
+
+/* Intel Threading Building Blocks */
+/* #undef HAVE_TBB */
+
+/* Ste||ar Group High Performance ParallelX */
+/* #undef HAVE_HPX */
+
+/* TIFF codec */
+#define HAVE_TIFF
+
+/* Win32 UI */
+#define HAVE_WIN32UI
+
+/* Define if your processor stores words with the most significant byte
+   first (like Motorola and SPARC, unlike Intel and VAX). */
+/* #undef WORDS_BIGENDIAN */
+
+/* VA library (libva) */
+/* #undef HAVE_VA */
+
+/* Intel VA-API/OpenCL */
+/* #undef HAVE_VA_INTEL */
+
+/* Lapack */
+/* #undef HAVE_LAPACK */
+
+/* Library was compiled with functions instrumentation */
+/* #undef ENABLE_INSTRUMENTATION */
+
+/* OpenVX */
+/* #undef HAVE_OPENVX */
+
+/* OpenCV trace utilities */
+#define OPENCV_TRACE
+
+/* Library QR-code decoding */
+#define HAVE_QUIRC
+
+#endif // OPENCV_CVCONFIG_H_INCLUDED
diff --git a/IPL/include/opencv/opencv2/datasets/ar_hmdb.hpp b/IPL/include/opencv/opencv2/datasets/ar_hmdb.hpp
index 8941583..fa257ad 100644
--- a/IPL/include/opencv/opencv2/datasets/ar_hmdb.hpp
+++ b/IPL/include/opencv/opencv2/datasets/ar_hmdb.hpp
@@ -67,7 +67,7 @@ struct AR_hmdbObj : public Object
 class CV_EXPORTS AR_hmdb : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<AR_hmdb> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/ar_sports.hpp b/IPL/include/opencv/opencv2/datasets/ar_sports.hpp
index 7f51405..cb24e38 100644
--- a/IPL/include/opencv/opencv2/datasets/ar_sports.hpp
+++ b/IPL/include/opencv/opencv2/datasets/ar_sports.hpp
@@ -66,7 +66,7 @@ struct AR_sportsObj : public Object
 class CV_EXPORTS AR_sports : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<AR_sports> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/dataset.hpp b/IPL/include/opencv/opencv2/datasets/dataset.hpp
index ccf2b66..7b05c10 100644
--- a/IPL/include/opencv/opencv2/datasets/dataset.hpp
+++ b/IPL/include/opencv/opencv2/datasets/dataset.hpp
@@ -435,6 +435,53 @@ Implements loading dataset:
 ./opencv/build/bin/example_datasets_slam_tumindoor -p=/home/user/path_to_unpacked_folders/
 ~~~
 
+@defgroup datasets_sr Super Resolution
+
+### The Berkeley Segmentation Dataset and Benchmark
+
+Implements loading dataset:
+
+"The Berkeley Segmentation Dataset and Benchmark": <https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/segbench/>
+
+Usage:
+-# From link above download `BSDS300-images.tgz`.
+-# Unpack.
+-# To load data run:
+~~~
+./opencv/build/bin/example_datasets_sr_bsds -p=/home/user/path_to_unpacked_folder/
+~~~
+
+### DIV2K dataset: DIVerse 2K
+
+Implements loading dataset:
+
+"DIV2K dataset: DIVerse 2K": <https://data.vision.ee.ethz.ch/cvl/DIV2K/>
+
+Usage:
+-# From link above download 'Train data (HR images)' or any other of the dataset files.
+-# Unpack.
+-# To load data run:
+~~~
+./opencv/build/bin/example_datasets_sr_div2k -p=/home/user/path_to_unpacked_folder/folder_containing_the_images/
+~~~
+
+### The General-100 Dataset
+
+Implements loading dataset:
+
+"General-100 dataset contains 100 bmp-format images (with no compression).
+We used this dataset in our FSRCNN ECCV 2016 paper. The size of these 100 images ranges from 710 x 704 (large) to 131 x 112 (small).
+They are all of good quality with clear edges but fewer smooth regions (e.g., sky and ocean), thus are very suitable for the super-resolution training.":
+<http://mmlab.ie.cuhk.edu.hk/projects/FSRCNN.html>
+
+Usage:
+-# From link above download `General-100.zip`.
+-# Unpack.
+-# To load data run:
+~~~
+./opencv/build/bin/example_datasets_sr_general100 -p=/home/user/path_to_unpacked_folder/
+~~~
+
 @defgroup datasets_tr Text Recognition
 
 ### The Chars74K Dataset
@@ -485,7 +532,7 @@ Implements loading dataset:
 
 "VOT 2015 dataset comprises 60 short sequences showing various objects in challenging backgrounds.
 The sequences were chosen from a large pool of sequences including the ALOV dataset, OTB2 dataset,
-non-tracking datasets, Computer Vision Online, Professor Bob Fisher’s Image Database, Videezy,
+non-tracking datasets, Computer Vision Online, Professor Bob Fisher's Image Database, Videezy,
 Center for Research in Computer Vision, University of Central Florida, USA, NYU Center for Genomics
 and Systems Biology, Data Wrangling, Open Access Directory and Learning and Recognition in Vision
 Group, INRIA, France. The VOT sequence selection protocol was applied to obtain a representative
diff --git a/IPL/include/opencv/opencv2/datasets/fr_adience.hpp b/IPL/include/opencv/opencv2/datasets/fr_adience.hpp
index c84bce1..2df5d8d 100644
--- a/IPL/include/opencv/opencv2/datasets/fr_adience.hpp
+++ b/IPL/include/opencv/opencv2/datasets/fr_adience.hpp
@@ -83,7 +83,7 @@ struct FR_adienceObj : public Object
 class CV_EXPORTS FR_adience : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<FR_adience> create();
 
diff --git a/IPL/include/opencv/opencv2/datasets/fr_lfw.hpp b/IPL/include/opencv/opencv2/datasets/fr_lfw.hpp
index 7065da7..16d6315 100644
--- a/IPL/include/opencv/opencv2/datasets/fr_lfw.hpp
+++ b/IPL/include/opencv/opencv2/datasets/fr_lfw.hpp
@@ -66,7 +66,7 @@ struct FR_lfwObj : public Object
 class CV_EXPORTS FR_lfw : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<FR_lfw> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/gr_chalearn.hpp b/IPL/include/opencv/opencv2/datasets/gr_chalearn.hpp
index a8eaa6c..6a5c764 100644
--- a/IPL/include/opencv/opencv2/datasets/gr_chalearn.hpp
+++ b/IPL/include/opencv/opencv2/datasets/gr_chalearn.hpp
@@ -83,7 +83,7 @@ struct GR_chalearnObj : public Object
 class CV_EXPORTS GR_chalearn : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<GR_chalearn> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/gr_skig.hpp b/IPL/include/opencv/opencv2/datasets/gr_skig.hpp
index 9c86224..35bc6f4 100644
--- a/IPL/include/opencv/opencv2/datasets/gr_skig.hpp
+++ b/IPL/include/opencv/opencv2/datasets/gr_skig.hpp
@@ -105,7 +105,7 @@ struct GR_skigObj : public Object
 class CV_EXPORTS GR_skig : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<GR_skig> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/hpe_humaneva.hpp b/IPL/include/opencv/opencv2/datasets/hpe_humaneva.hpp
index 5366e0d..2eba8cf 100644
--- a/IPL/include/opencv/opencv2/datasets/hpe_humaneva.hpp
+++ b/IPL/include/opencv/opencv2/datasets/hpe_humaneva.hpp
@@ -77,7 +77,7 @@ enum datasetType
 class CV_EXPORTS HPE_humaneva : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<HPE_humaneva> create(int num=humaneva_1);
 };
diff --git a/IPL/include/opencv/opencv2/datasets/hpe_parse.hpp b/IPL/include/opencv/opencv2/datasets/hpe_parse.hpp
index 7629e2c..42294df 100644
--- a/IPL/include/opencv/opencv2/datasets/hpe_parse.hpp
+++ b/IPL/include/opencv/opencv2/datasets/hpe_parse.hpp
@@ -65,7 +65,7 @@ struct HPE_parseObj : public Object
 class CV_EXPORTS HPE_parse : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<HPE_parse> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/ir_affine.hpp b/IPL/include/opencv/opencv2/datasets/ir_affine.hpp
index 3b04a4b..7cc0c09 100644
--- a/IPL/include/opencv/opencv2/datasets/ir_affine.hpp
+++ b/IPL/include/opencv/opencv2/datasets/ir_affine.hpp
@@ -67,7 +67,7 @@ struct IR_affineObj : public Object
 class CV_EXPORTS IR_affine : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<IR_affine> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/ir_robot.hpp b/IPL/include/opencv/opencv2/datasets/ir_robot.hpp
index 0acfe0a..446e3be 100644
--- a/IPL/include/opencv/opencv2/datasets/ir_robot.hpp
+++ b/IPL/include/opencv/opencv2/datasets/ir_robot.hpp
@@ -76,7 +76,7 @@ struct IR_robotObj : public Object
 class CV_EXPORTS IR_robot : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<IR_robot> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/is_bsds.hpp b/IPL/include/opencv/opencv2/datasets/is_bsds.hpp
index 7357a67..b4a3e65 100644
--- a/IPL/include/opencv/opencv2/datasets/is_bsds.hpp
+++ b/IPL/include/opencv/opencv2/datasets/is_bsds.hpp
@@ -65,7 +65,7 @@ struct IS_bsdsObj : public Object
 class CV_EXPORTS IS_bsds : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<IS_bsds> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/is_weizmann.hpp b/IPL/include/opencv/opencv2/datasets/is_weizmann.hpp
index 5daa420..4f61578 100644
--- a/IPL/include/opencv/opencv2/datasets/is_weizmann.hpp
+++ b/IPL/include/opencv/opencv2/datasets/is_weizmann.hpp
@@ -68,7 +68,7 @@ struct IS_weizmannObj : public Object
 class CV_EXPORTS IS_weizmann : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<IS_weizmann> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/msm_epfl.hpp b/IPL/include/opencv/opencv2/datasets/msm_epfl.hpp
index a08fc4b..54aebde 100644
--- a/IPL/include/opencv/opencv2/datasets/msm_epfl.hpp
+++ b/IPL/include/opencv/opencv2/datasets/msm_epfl.hpp
@@ -77,7 +77,7 @@ struct MSM_epflObj : public Object
 class CV_EXPORTS MSM_epfl : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<MSM_epfl> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/msm_middlebury.hpp b/IPL/include/opencv/opencv2/datasets/msm_middlebury.hpp
index 2fd67bf..9b883e5 100644
--- a/IPL/include/opencv/opencv2/datasets/msm_middlebury.hpp
+++ b/IPL/include/opencv/opencv2/datasets/msm_middlebury.hpp
@@ -68,7 +68,7 @@ struct MSM_middleburyObj : public Object
 class CV_EXPORTS MSM_middlebury : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<MSM_middlebury> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/or_imagenet.hpp b/IPL/include/opencv/opencv2/datasets/or_imagenet.hpp
index 26a8f63..079165b 100644
--- a/IPL/include/opencv/opencv2/datasets/or_imagenet.hpp
+++ b/IPL/include/opencv/opencv2/datasets/or_imagenet.hpp
@@ -66,7 +66,7 @@ struct OR_imagenetObj : public Object
 class CV_EXPORTS OR_imagenet : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<OR_imagenet> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/or_mnist.hpp b/IPL/include/opencv/opencv2/datasets/or_mnist.hpp
index ff6bd60..ac4d838 100644
--- a/IPL/include/opencv/opencv2/datasets/or_mnist.hpp
+++ b/IPL/include/opencv/opencv2/datasets/or_mnist.hpp
@@ -66,7 +66,7 @@ struct OR_mnistObj : public Object
 class CV_EXPORTS OR_mnist : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<OR_mnist> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/or_pascal.hpp b/IPL/include/opencv/opencv2/datasets/or_pascal.hpp
index bca8e62..c5f27af 100644
--- a/IPL/include/opencv/opencv2/datasets/or_pascal.hpp
+++ b/IPL/include/opencv/opencv2/datasets/or_pascal.hpp
@@ -89,7 +89,7 @@ struct OR_pascalObj : public Object
 class CV_EXPORTS OR_pascal : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<OR_pascal> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/or_sun.hpp b/IPL/include/opencv/opencv2/datasets/or_sun.hpp
index 059c0d4..7d3d887 100644
--- a/IPL/include/opencv/opencv2/datasets/or_sun.hpp
+++ b/IPL/include/opencv/opencv2/datasets/or_sun.hpp
@@ -66,7 +66,7 @@ struct OR_sunObj : public Object
 class CV_EXPORTS OR_sun : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<OR_sun> create();
 
diff --git a/IPL/include/opencv/opencv2/datasets/pd_caltech.hpp b/IPL/include/opencv/opencv2/datasets/pd_caltech.hpp
index 9ff7278..c8545e2 100644
--- a/IPL/include/opencv/opencv2/datasets/pd_caltech.hpp
+++ b/IPL/include/opencv/opencv2/datasets/pd_caltech.hpp
@@ -76,7 +76,7 @@ struct PD_caltechObj : public Object
 class CV_EXPORTS PD_caltech : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<PD_caltech> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/pd_inria.hpp b/IPL/include/opencv/opencv2/datasets/pd_inria.hpp
index 7586578..9e1d30b 100644
--- a/IPL/include/opencv/opencv2/datasets/pd_inria.hpp
+++ b/IPL/include/opencv/opencv2/datasets/pd_inria.hpp
@@ -57,7 +57,7 @@ namespace datasets
 //! @addtogroup datasets_pd
 //! @{
 
-enum sampleType 
+enum sampleType
 {
     POS = 0,
     NEG = 1
@@ -67,7 +67,7 @@ struct PD_inriaObj : public Object
 {
     // image file name
     std::string filename;
-    
+
     // positive or negative
     sampleType sType;
 
@@ -83,7 +83,7 @@ struct PD_inriaObj : public Object
 class CV_EXPORTS PD_inria : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<PD_inria> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/slam_kitti.hpp b/IPL/include/opencv/opencv2/datasets/slam_kitti.hpp
index 1b7c408..a004b95 100644
--- a/IPL/include/opencv/opencv2/datasets/slam_kitti.hpp
+++ b/IPL/include/opencv/opencv2/datasets/slam_kitti.hpp
@@ -74,7 +74,7 @@ struct SLAM_kittiObj : public Object
 class CV_EXPORTS SLAM_kitti : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<SLAM_kitti> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/slam_tumindoor.hpp b/IPL/include/opencv/opencv2/datasets/slam_tumindoor.hpp
index 758dd13..eca07ad 100644
--- a/IPL/include/opencv/opencv2/datasets/slam_tumindoor.hpp
+++ b/IPL/include/opencv/opencv2/datasets/slam_tumindoor.hpp
@@ -74,7 +74,7 @@ struct SLAM_tumindoorObj : public Object
 class CV_EXPORTS SLAM_tumindoor : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<SLAM_tumindoor> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/sr_bsds.hpp b/IPL/include/opencv/opencv2/datasets/sr_bsds.hpp
new file mode 100644
index 0000000..c319323
--- /dev/null
+++ b/IPL/include/opencv/opencv2/datasets/sr_bsds.hpp
@@ -0,0 +1,41 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_DATASETS_SR_BSDS_HPP
+#define OPENCV_DATASETS_SR_BSDS_HPP
+
+#include <string>
+#include <vector>
+
+#include "opencv2/datasets/dataset.hpp"
+
+#include <opencv2/core.hpp>
+
+namespace cv
+{
+namespace datasets
+{
+
+//! @addtogroup datasets_sr
+//! @{
+
+struct SR_bsdsObj : public Object
+{
+    std::string imageName;
+};
+
+class CV_EXPORTS SR_bsds : public Dataset
+{
+public:
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
+
+    static Ptr<SR_bsds> create();
+};
+
+//! @}
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/datasets/sr_div2k.hpp b/IPL/include/opencv/opencv2/datasets/sr_div2k.hpp
new file mode 100644
index 0000000..9e2c025
--- /dev/null
+++ b/IPL/include/opencv/opencv2/datasets/sr_div2k.hpp
@@ -0,0 +1,41 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_DATASETS_SR_DIV2K_HPP
+#define OPENCV_DATASETS_SR_DIV2K_HPP
+
+#include <string>
+#include <vector>
+
+#include "opencv2/datasets/dataset.hpp"
+
+#include <opencv2/core.hpp>
+
+namespace cv
+{
+namespace datasets
+{
+
+//! @addtogroup datasets_sr
+//! @{
+
+struct SR_div2kObj : public Object
+{
+    std::string imageName;
+};
+
+class CV_EXPORTS SR_div2k : public Dataset
+{
+public:
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
+
+    static Ptr<SR_div2k> create();
+};
+
+//! @}
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/datasets/sr_general100.hpp b/IPL/include/opencv/opencv2/datasets/sr_general100.hpp
new file mode 100644
index 0000000..8b2d189
--- /dev/null
+++ b/IPL/include/opencv/opencv2/datasets/sr_general100.hpp
@@ -0,0 +1,41 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_DATASETS_SR_GENERAL100_HPP
+#define OPENCV_DATASETS_SR_GENERAL100_HPP
+
+#include <string>
+#include <vector>
+
+#include "opencv2/datasets/dataset.hpp"
+
+#include <opencv2/core.hpp>
+
+namespace cv
+{
+namespace datasets
+{
+
+//! @addtogroup datasets_sr
+//! @{
+
+struct SR_general100Obj : public Object
+{
+    std::string imageName;
+};
+
+class CV_EXPORTS SR_general100 : public Dataset
+{
+public:
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
+
+    static Ptr<SR_general100> create();
+};
+
+//! @}
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/datasets/tr_chars.hpp b/IPL/include/opencv/opencv2/datasets/tr_chars.hpp
index c213bff..69aab83 100644
--- a/IPL/include/opencv/opencv2/datasets/tr_chars.hpp
+++ b/IPL/include/opencv/opencv2/datasets/tr_chars.hpp
@@ -66,7 +66,7 @@ struct TR_charsObj : public Object
 class CV_EXPORTS TR_chars : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<TR_chars> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/tr_icdar.hpp b/IPL/include/opencv/opencv2/datasets/tr_icdar.hpp
index abfd7db..3b43d69 100644
--- a/IPL/include/opencv/opencv2/datasets/tr_icdar.hpp
+++ b/IPL/include/opencv/opencv2/datasets/tr_icdar.hpp
@@ -74,7 +74,7 @@ struct TR_icdarObj : public Object
 class CV_EXPORTS TR_icdar : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<TR_icdar> create();
 };
diff --git a/IPL/include/opencv/opencv2/datasets/tr_svt.hpp b/IPL/include/opencv/opencv2/datasets/tr_svt.hpp
index 6c2d533..7b94180 100644
--- a/IPL/include/opencv/opencv2/datasets/tr_svt.hpp
+++ b/IPL/include/opencv/opencv2/datasets/tr_svt.hpp
@@ -73,7 +73,7 @@ struct TR_svtObj : public Object
 class CV_EXPORTS TR_svt : public Dataset
 {
 public:
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     static Ptr<TR_svt> create();
 };
diff --git a/IPL/include/opencv/opencv/cv.h b/IPL/include/opencv/opencv2/datasets/track_alov.hpp
similarity index 53%
rename from IPL/include/opencv/opencv/cv.h
rename to IPL/include/opencv/opencv2/datasets/track_alov.hpp
index 0aefc6d..a3c5da0 100644
--- a/IPL/include/opencv/opencv/cv.h
+++ b/IPL/include/opencv/opencv2/datasets/track_alov.hpp
@@ -10,8 +10,7 @@
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2014, Itseez Inc, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -30,7 +29,7 @@
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
+// In no event shall the Itseez Inc or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
@@ -40,34 +39,69 @@
 //
 //M*/
 
-#ifndef __OPENCV_OLD_CV_H__
-#define __OPENCV_OLD_CV_H__
-
-#if defined(_MSC_VER)
-    #define CV_DO_PRAGMA(x) __pragma(x)
-    #define __CVSTR2__(x) #x
-    #define __CVSTR1__(x) __CVSTR2__(x)
-    #define __CVMSVCLOC__ __FILE__ "("__CVSTR1__(__LINE__)") : "
-    #define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (__CVMSVCLOC__ _msg))
-#elif defined(__GNUC__)
-    #define CV_DO_PRAGMA(x) _Pragma (#x)
-    #define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (_msg))
-#else
-    #define CV_DO_PRAGMA(x)
-    #define CV_MSG_PRAGMA(_msg)
-#endif
-#define CV_WARNING(x) CV_MSG_PRAGMA("Warning: " #x)
+#ifndef OPENCV_DATASETS_TRACK_ALOV_HPP
+#define OPENCV_DATASETS_TRACK_ALOV_HPP
+
+#include <string>
+#include <vector>
+
+#include "opencv2/datasets/dataset.hpp"
+#include "opencv2/datasets/util.hpp"
+
+using namespace std;
+
+namespace cv
+{
+namespace datasets
+{
+
+//! @addtogroup datasets_track
+//! @{
+
+struct TRACK_alovObj : public Object
+{
+    int id;
+    std::string imagePath;
+    vector <Point2f> gtbb;
+};
+
+const string sectionNames[] = { "01-Light", "02-SurfaceCover", "03-Specularity", "04-Transparency", "05-Shape", "06-MotionSmoothness", "07-MotionCoherence",
+"08-Clutter", "09-Confusion", "10-LowContrast", "11-Occlusion", "12-MovingCamera", "13-ZoomingCamera", "14-LongDuration" };
+
+const int sectionSizes[] = { 33, 15, 18, 20, 24, 22, 12, 15, 37, 23, 34, 22, 29, 10 };
 
-//CV_WARNING("This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module")
+class CV_EXPORTS TRACK_alov : public Dataset
+{
+public:
+    static Ptr<TRACK_alov> create();
 
-#include "opencv2/core/core_c.h"
-#include "opencv2/imgproc/imgproc_c.h"
-#include "opencv2/photo/photo_c.h"
-#include "opencv2/video/tracking_c.h"
-#include "opencv2/objdetect/objdetect_c.h"
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
-#if !defined(CV_IMPL)
-#define CV_IMPL extern "C"
-#endif //CV_IMPL
+    //Load only frames with annotations (~every 5-th frame)
+    virtual void loadAnnotatedOnly(const std::string &path) = 0;
 
-#endif // __OPENCV_OLD_CV_H_
+    virtual int getDatasetsNum() = 0;
+
+    virtual int getDatasetLength(int id) = 0;
+
+    virtual bool initDataset(int id) = 0;
+
+    virtual bool getNextFrame(Mat &frame) = 0;
+    virtual vector <Point2f> getNextGT() = 0;
+
+    //Get frame/GT by datasetID (1..N) frameID (1..K)
+    virtual bool getFrame(Mat &frame, int datasetID, int frameID) = 0;
+    virtual vector <Point2f> getGT(int datasetID, int frameID) = 0;
+
+protected:
+    vector <vector <Ptr<TRACK_alovObj> > > data;
+    int activeDatasetID;
+    int frameCounter;
+};
+
+//! @}
+
+}
+}
+
+#endif
diff --git a/IPL/include/opencv/opencv2/datasets/track_vot.hpp b/IPL/include/opencv/opencv2/datasets/track_vot.hpp
index 6249f02..a5a9c6f 100644
--- a/IPL/include/opencv/opencv2/datasets/track_vot.hpp
+++ b/IPL/include/opencv/opencv2/datasets/track_vot.hpp
@@ -70,7 +70,7 @@ class CV_EXPORTS TRACK_vot : public Dataset
 public:
     static Ptr<TRACK_vot> create();
 
-    virtual void load(const std::string &path) = 0;
+    virtual void load(const std::string &path) CV_OVERRIDE = 0;
 
     virtual int getDatasetsNum() = 0;
 
diff --git a/IPL/include/opencv/opencv2/dnn.hpp b/IPL/include/opencv/opencv2/dnn.hpp
index 37be989..97f2fe3 100644
--- a/IPL/include/opencv/opencv2/dnn.hpp
+++ b/IPL/include/opencv/opencv2/dnn.hpp
@@ -39,12 +39,12 @@
 //
 //M*/
 
-#ifndef __OPENCV_DNN_HPP__
-#define __OPENCV_DNN_HPP__
+#ifndef OPENCV_DNN_HPP
+#define OPENCV_DNN_HPP
 
-// This is an umbrealla header to include into you project.
+// This is an umbrella header to include into you project.
 // We are free to change headers layout in dnn subfolder, so please include
-// this header for future compartibility
+// this header for future compatibility
 
 
 /** @defgroup dnn Deep Neural Network module
@@ -52,13 +52,27 @@
     This module contains:
         - API for new layers creation, layers are building bricks of neural networks;
         - set of built-in most-useful Layers;
-        - API to constuct and modify comprehensive neural networks from layers;
-        - functionality for loading serialized networks models from differnet frameworks.
+        - API to construct and modify comprehensive neural networks from layers;
+        - functionality for loading serialized networks models from different frameworks.
 
-    Functionality of this module is designed only for forward pass computations (i. e. network testing).
+    Functionality of this module is designed only for forward pass computations (i.e. network testing).
     A network training is in principle not supported.
   @}
 */
+/** @example samples/dnn/classification.cpp
+Check @ref tutorial_dnn_googlenet "the corresponding tutorial" for more details
+*/
+/** @example samples/dnn/colorization.cpp
+*/
+/** @example samples/dnn/object_detection.cpp
+Check @ref tutorial_dnn_yolo "the corresponding tutorial" for more details
+*/
+/** @example samples/dnn/openpose.cpp
+*/
+/** @example samples/dnn/segmentation.cpp
+*/
+/** @example samples/dnn/text_detection.cpp
+*/
 #include <opencv2/dnn/dnn.hpp>
 
-#endif /* __OPENCV_DNN_HPP__ */
+#endif /* OPENCV_DNN_HPP */
diff --git a/IPL/include/opencv/opencv2/dnn/all_layers.hpp b/IPL/include/opencv/opencv2/dnn/all_layers.hpp
new file mode 100644
index 0000000..0969ba6
--- /dev/null
+++ b/IPL/include/opencv/opencv2/dnn/all_layers.hpp
@@ -0,0 +1,656 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP
+#define OPENCV_DNN_DNN_ALL_LAYERS_HPP
+#include <opencv2/dnn.hpp>
+
+namespace cv {
+namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
+//! @addtogroup dnn
+//! @{
+
+/** @defgroup dnnLayerList Partial List of Implemented Layers
+  @{
+  This subsection of dnn module contains information about built-in layers and their descriptions.
+
+  Classes listed here, in fact, provides C++ API for creating instances of built-in layers.
+  In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
+  You can use both API, but factory API is less convenient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()).
+
+  Built-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
+  In particular, the following layers and Caffe importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
+  - Convolution
+  - Deconvolution
+  - Pooling
+  - InnerProduct
+  - TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
+  - Softmax
+  - Reshape, Flatten, Slice, Split
+  - LRN
+  - MVN
+  - Dropout (since it does nothing on forward pass -))
+*/
+
+    class CV_EXPORTS BlankLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams &params);
+    };
+
+    /**
+     * Constant layer produces the same data blob at an every forward pass.
+     */
+    class CV_EXPORTS ConstLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams &params);
+    };
+
+    //! LSTM recurrent layer
+    class CV_EXPORTS LSTMLayer : public Layer
+    {
+    public:
+        /** Creates instance of LSTM layer */
+        static Ptr<LSTMLayer> create(const LayerParams& params);
+
+        /** @deprecated Use LayerParams::blobs instead.
+        @brief Set trained weights for LSTM layer.
+
+        LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
+
+        Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
+        Than current output and current cell state is computed as follows:
+        @f{eqnarray*}{
+        h_t &= o_t \odot tanh(c_t),               \\
+        c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
+        @f}
+        where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned weights.
+
+        Gates are computed as follows:
+        @f{eqnarray*}{
+        i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
+        f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
+        o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
+        g_t &= tanh   &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
+        @f}
+        where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
+        @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
+
+        For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
+        (i.e. @f$W_x@f$ is vertical concatenation of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
+        The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
+        and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
+
+        @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_h @f$)
+        @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_x @f$)
+        @param b  is bias vector (i.e. according to above mentioned notation is @f$ b @f$)
+        */
+        CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
+
+        /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
+          * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
+          * where `Wh` is parameter from setWeights().
+          */
+        virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
+
+        /** @deprecated Use flag `produce_cell_output` in LayerParams.
+          * @brief Specifies either interpret first dimension of input blob as timestamp dimension either as sample.
+          *
+          * If flag is set to true then shape of input blob will be interpreted as [`T`, `N`, `[data dims]`] where `T` specifies number of timestamps, `N` is number of independent streams.
+          * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
+          *
+          * If flag is set to false then shape of input blob will be interpreted as [`N`, `[data dims]`].
+          * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
+          */
+        CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0;
+
+        /** @deprecated Use flag `use_timestamp_dim` in LayerParams.
+         * @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
+         * @details Shape of the second output is the same as first output.
+         */
+        CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0;
+
+        /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
+         * @param input should contain packed values @f$x_t@f$
+         * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
+         *
+         * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
+         * where `T` specifies number of timestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
+         *
+         * If setUseTimstampsDim() is set to false then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
+         * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
+        */
+
+        int inputNameToIndex(String inputName) CV_OVERRIDE;
+        int outputNameToIndex(const String& outputName) CV_OVERRIDE;
+    };
+
+    /** @brief Classical recurrent layer
+
+    Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
+
+    - input: should contain packed input @f$x_t@f$.
+    - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
+
+    input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
+
+    output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
+
+    If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
+    */
+    class CV_EXPORTS RNNLayer : public Layer
+    {
+    public:
+        /** Creates instance of RNNLayer */
+        static Ptr<RNNLayer> create(const LayerParams& params);
+
+        /** Setups learned weights.
+
+        Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
+        @f{eqnarray*}{
+        h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h),  \\
+        o_t &= tanh&(W_{ho} h_t + b_o),
+        @f}
+
+        @param Wxh is @f$ W_{xh} @f$ matrix
+        @param bh  is @f$ b_{h}  @f$ vector
+        @param Whh is @f$ W_{hh} @f$ matrix
+        @param Who is @f$ W_{xo} @f$ matrix
+        @param bo  is @f$ b_{o}  @f$ vector
+        */
+        virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0;
+
+        /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
+         * @details Shape of the second output is the same as first output.
+         */
+        virtual void setProduceHiddenOutput(bool produce = false) = 0;
+
+    };
+
+    class CV_EXPORTS BaseConvolutionLayer : public Layer
+    {
+    public:
+        CV_DEPRECATED_EXTERNAL Size kernel, stride, pad, dilation, adjustPad;
+        std::vector<size_t> adjust_pads;
+        std::vector<size_t> kernel_size, strides, dilations;
+        std::vector<size_t> pads_begin, pads_end;
+        String padMode;
+        int numOutput;
+    };
+
+    class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
+    {
+    public:
+        static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer
+    {
+    public:
+        static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS LRNLayer : public Layer
+    {
+    public:
+        int type;
+
+        int size;
+        float alpha, beta, bias;
+        bool normBySize;
+
+        static Ptr<LRNLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS PoolingLayer : public Layer
+    {
+    public:
+        int type;
+        std::vector<size_t> kernel_size, strides;
+        std::vector<size_t> pads_begin, pads_end;
+        CV_DEPRECATED_EXTERNAL Size kernel, stride, pad;
+        CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b;
+        bool globalPooling; //!< Flag is true if at least one of the axes is global pooled.
+        std::vector<bool> isGlobalPooling;
+        bool computeMaxIdx;
+        String padMode;
+        bool ceilMode;
+        // If true for average pooling with padding, divide an every output region
+        // by a whole kernel area. Otherwise exclude zero padded values and divide
+        // by number of real values.
+        bool avePoolPaddedArea;
+        // ROIPooling parameters.
+        Size pooledSize;
+        float spatialScale;
+        // PSROIPooling parameters.
+        int psRoiOutChannels;
+
+        static Ptr<PoolingLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS SoftmaxLayer : public Layer
+    {
+    public:
+        bool logSoftMax;
+
+        static Ptr<SoftmaxLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS InnerProductLayer : public Layer
+    {
+    public:
+        int axis;
+        static Ptr<InnerProductLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS MVNLayer : public Layer
+    {
+    public:
+        float eps;
+        bool normVariance, acrossChannels;
+
+        static Ptr<MVNLayer> create(const LayerParams& params);
+    };
+
+    /* Reshaping */
+
+    class CV_EXPORTS ReshapeLayer : public Layer
+    {
+    public:
+        MatShape newShapeDesc;
+        Range newShapeRange;
+
+        static Ptr<ReshapeLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS FlattenLayer : public Layer
+    {
+    public:
+        static Ptr<FlattenLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ConcatLayer : public Layer
+    {
+    public:
+        int axis;
+        /**
+         * @brief Add zero padding in case of concatenation of blobs with different
+         * spatial sizes.
+         *
+         * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
+         */
+        bool padding;
+
+        static Ptr<ConcatLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS SplitLayer : public Layer
+    {
+    public:
+        int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
+
+        static Ptr<SplitLayer> create(const LayerParams &params);
+    };
+
+    /**
+     * Slice layer has several modes:
+     * 1. Caffe mode
+     * @param[in] axis Axis of split operation
+     * @param[in] slice_point Array of split points
+     *
+     * Number of output blobs equals to number of split points plus one. The
+     * first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis,
+     * the second output blob is a slice of input from @p slice_point[0] to
+     * @p slice_point[1] - 1 by @p axis and the last output blob is a slice of
+     * input from @p slice_point[-1] up to the end of @p axis size.
+     *
+     * 2. TensorFlow mode
+     * @param begin Vector of start indices
+     * @param size Vector of sizes
+     *
+     * More convenient numpy-like slice. One and only output blob
+     * is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]`
+     *
+     * 3. Torch mode
+     * @param axis Axis of split operation
+     *
+     * Split input blob on the equal parts by @p axis.
+     */
+    class CV_EXPORTS SliceLayer : public Layer
+    {
+    public:
+        /**
+         * @brief Vector of slice ranges.
+         *
+         * The first dimension equals number of output blobs.
+         * Inner vector has slice ranges for the first number of input dimensions.
+         */
+        std::vector<std::vector<Range> > sliceRanges;
+        int axis;
+        int num_split;
+
+        static Ptr<SliceLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS PermuteLayer : public Layer
+    {
+    public:
+        static Ptr<PermuteLayer> create(const LayerParams& params);
+    };
+
+    /**
+     * Permute channels of 4-dimensional input blob.
+     * @param group Number of groups to split input channels and pick in turns
+     *              into output blob.
+     *
+     * \f[ groupSize = \frac{number\ of\ channels}{group} \f]
+     * \f[ output(n, c, h, w) = input(n, groupSize \times (c \% group) + \lfloor \frac{c}{group} \rfloor, h, w) \f]
+     * Read more at https://arxiv.org/pdf/1707.01083.pdf
+     */
+    class CV_EXPORTS ShuffleChannelLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams& params);
+
+        int group;
+    };
+
+    /**
+     * @brief Adds extra values for specific axes.
+     * @param paddings Vector of paddings in format
+     *                 @code
+     *                 [ pad_before, pad_after,  // [0]th dimension
+     *                   pad_before, pad_after,  // [1]st dimension
+     *                   ...
+     *                   pad_before, pad_after ] // [n]th dimension
+     *                 @endcode
+     *                 that represents number of padded values at every dimension
+     *                 starting from the first one. The rest of dimensions won't
+     *                 be padded.
+     * @param value Value to be padded. Defaults to zero.
+     * @param type Padding type: 'constant', 'reflect'
+     * @param input_dims Torch's parameter. If @p input_dims is not equal to the
+     *                   actual input dimensionality then the `[0]th` dimension
+     *                   is considered as a batch dimension and @p paddings are shifted
+     *                   to a one dimension. Defaults to `-1` that means padding
+     *                   corresponding to @p paddings.
+     */
+    class CV_EXPORTS PaddingLayer : public Layer
+    {
+    public:
+        static Ptr<PaddingLayer> create(const LayerParams& params);
+    };
+
+    /* Activations */
+    class CV_EXPORTS ActivationLayer : public Layer
+    {
+    public:
+        virtual void forwardSlice(const float* src, float* dst, int len,
+                                  size_t outPlaneSize, int cn0, int cn1) const = 0;
+    };
+
+    class CV_EXPORTS ReLULayer : public ActivationLayer
+    {
+    public:
+        float negativeSlope;
+
+        static Ptr<ReLULayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ReLU6Layer : public ActivationLayer
+    {
+    public:
+        float minValue, maxValue;
+
+        static Ptr<ReLU6Layer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS ELULayer : public ActivationLayer
+    {
+    public:
+        static Ptr<ELULayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS TanHLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<TanHLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS SwishLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<SwishLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS MishLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<MishLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS SigmoidLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<SigmoidLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS BNLLLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<BNLLLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS AbsLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<AbsLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS PowerLayer : public ActivationLayer
+    {
+    public:
+        float power, scale, shift;
+
+        static Ptr<PowerLayer> create(const LayerParams &params);
+    };
+
+    /* Layers used in semantic segmentation */
+
+    class CV_EXPORTS CropLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams &params);
+    };
+
+    /** @brief Element wise operation on inputs
+
+    Extra optional parameters:
+    - "operation" as string. Values are "sum" (default), "prod", "max", "div"
+    - "coeff" as float array. Specify weights of inputs for SUM operation
+    - "output_channels_mode" as string. Values are "same" (default, all input must have the same layout), "input_0", "input_0_truncate", "max_input_channels"
+    */
+    class CV_EXPORTS EltwiseLayer : public Layer
+    {
+    public:
+        static Ptr<EltwiseLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS BatchNormLayer : public ActivationLayer
+    {
+    public:
+        bool hasWeights, hasBias;
+        float epsilon;
+
+        static Ptr<BatchNormLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS MaxUnpoolLayer : public Layer
+    {
+    public:
+        Size poolKernel;
+        Size poolPad;
+        Size poolStride;
+
+        static Ptr<MaxUnpoolLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ScaleLayer : public Layer
+    {
+    public:
+        bool hasBias;
+        int axis;
+
+        static Ptr<ScaleLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS ShiftLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS PriorBoxLayer : public Layer
+    {
+    public:
+        static Ptr<PriorBoxLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS ReorgLayer : public Layer
+    {
+    public:
+        static Ptr<ReorgLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS RegionLayer : public Layer
+    {
+    public:
+        static Ptr<RegionLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS DetectionOutputLayer : public Layer
+    {
+    public:
+        static Ptr<DetectionOutputLayer> create(const LayerParams& params);
+    };
+
+    /**
+     * @brief \f$ L_p \f$ - normalization layer.
+     * @param p Normalization factor. The most common `p = 1` for \f$ L_1 \f$ -
+     *          normalization or `p = 2` for \f$ L_2 \f$ - normalization or a custom one.
+     * @param eps Parameter \f$ \epsilon \f$ to prevent a division by zero.
+     * @param across_spatial If true, normalize an input across all non-batch dimensions.
+     *                       Otherwise normalize an every channel separately.
+     *
+     * Across spatial:
+     * @f[
+     * norm = \sqrt[p]{\epsilon + \sum_{x, y, c} |src(x, y, c)|^p } \\
+     * dst(x, y, c) = \frac{ src(x, y, c) }{norm}
+     * @f]
+     *
+     * Channel wise normalization:
+     * @f[
+     * norm(c) = \sqrt[p]{\epsilon + \sum_{x, y} |src(x, y, c)|^p } \\
+     * dst(x, y, c) = \frac{ src(x, y, c) }{norm(c)}
+     * @f]
+     *
+     * Where `x, y` - spatial coordinates, `c` - channel.
+     *
+     * An every sample in the batch is normalized separately. Optionally,
+     * output is scaled by the trained parameters.
+     */
+    class CV_EXPORTS NormalizeBBoxLayer : public Layer
+    {
+    public:
+        float pnorm, epsilon;
+        CV_DEPRECATED_EXTERNAL bool acrossSpatial;
+
+        static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
+    };
+
+    /**
+     * @brief Resize input 4-dimensional blob by nearest neighbor or bilinear strategy.
+     *
+     * Layer is used to support TensorFlow's resize_nearest_neighbor and resize_bilinear ops.
+     */
+    class CV_EXPORTS ResizeLayer : public Layer
+    {
+    public:
+        static Ptr<ResizeLayer> create(const LayerParams& params);
+    };
+
+    /**
+     * @brief Bilinear resize layer from https://github.com/cdmh/deeplab-public-ver2
+     *
+     * It differs from @ref ResizeLayer in output shape and resize scales computations.
+     */
+    class CV_EXPORTS InterpLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS ProposalLayer : public Layer
+    {
+    public:
+        static Ptr<ProposalLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS CropAndResizeLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams& params);
+    };
+
+//! @}
+//! @}
+CV__DNN_INLINE_NS_END
+}
+}
+#endif
diff --git a/IPL/include/opencv/opencv2/dnn/blob.hpp b/IPL/include/opencv/opencv2/dnn/blob.hpp
deleted file mode 100644
index bc582c8..0000000
--- a/IPL/include/opencv/opencv2/dnn/blob.hpp
+++ /dev/null
@@ -1,238 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_DNN_DNN_BLOB_HPP__
-#define __OPENCV_DNN_DNN_BLOB_HPP__
-#include <opencv2/core.hpp>
-#include <vector>
-#include <ostream>
-
-namespace cv
-{
-namespace dnn
-{
-//! @addtogroup dnn
-//! @{
-
-    /** @brief Lightweight class for storing and processing a shape of blob (or anything else). */
-    struct BlobShape
-    {
-        explicit BlobShape(int ndims = 4, int fill = 1);    //!< Creates n-dim shape and fill its by @p fill
-        BlobShape(int num, int cn, int rows, int cols);     //!< Creates 4-dim shape [@p num, @p cn, @p rows, @p cols]
-        BlobShape(int ndims, const int *sizes);             //!< Creates n-dim shape from the @p sizes array
-        BlobShape(const std::vector<int> &sizes);           //!< Creates n-dim shape from the @p sizes vector
-        template<int n>
-        BlobShape(const Vec<int, n> &shape);                //!< Creates n-dim shape from @ref cv::Vec
-
-        /** @brief Returns number of dimensions. */
-        int dims() const;
-
-        /** @brief Returns reference to the size of the specified @p axis.
-         *
-         * Negative @p axis is supported, in this case a counting starts from the last axis,
-         * i. e. -1 corresponds to last axis.
-         * If non-existing axis was passed then an error will be generated.
-         */
-        int &size(int axis);
-
-        /** @brief Returns the size of the specified @p axis.
-         *  @see size()
-         */
-        int size(int axis) const;
-
-        int operator[](int axis) const; //!< Does the same thing as size(axis).
-        int &operator[](int axis);      //!< Does the same thing as size(int) const.
-
-        /** @brief Returns the size of the specified @p axis.
-         *
-         * Does the same thing as size(int) const, but if non-existing axis will be passed then 1 will be returned,
-         * therefore this function always finishes successfully.
-         */
-        int xsize(int axis) const;
-
-        /** @brief Returns the product of all sizes of axes. */
-        ptrdiff_t total();
-
-        /** @brief Returns pointer to the first element of continuous size array. */
-        const int *ptr() const;
-
-        /** @brief Checks equality of two shapes. */
-        bool equal(const BlobShape &other) const;
-
-        bool operator== (const BlobShape &r) const;
-
-    private:
-        cv::AutoBuffer<int,4> sz;
-    };
-
-
-    /** @brief This class provides methods for continuous n-dimensional CPU and GPU array processing.
-     *
-     * The class is realized as a wrapper over @ref cv::Mat and @ref cv::UMat.
-     * It will support methods for switching and logical synchronization between CPU and GPU.
-    */
-    class CV_EXPORTS Blob
-    {
-    public:
-        explicit Blob();
-
-        /** @brief Constructs blob with specified @p shape and @p type. */
-        explicit Blob(const BlobShape &shape, int type = CV_32F);
-
-        /** @brief Constucts 4-dimensional blob (so-called batch) from image or array of images.
-         * @param image 2-dimensional multi-channel or 3-dimensional single-channel image (or array of images)
-         * @param dstCn specify size of second axis of ouptut blob
-        */
-        explicit Blob(InputArray image, int dstCn = -1);
-
-        /** @brief Creates blob with specified @p shape and @p type. */
-        void create(const BlobShape &shape, int type = CV_32F);
-
-        /** @brief Creates blob from cv::Mat or cv::UMat without copying the data */
-        void fill(InputArray in);
-        /** @brief Creates blob from user data.
-         *  @details If @p deepCopy is false then CPU data will not be allocated.
-         */
-        void fill(const BlobShape &shape, int type, void *data, bool deepCopy = true);
-
-        Mat& matRef();                      //!< Returns reference to cv::Mat, containing blob data.
-        const Mat& matRefConst() const;     //!< Returns reference to cv::Mat, containing blob data, for read-only purposes.
-        UMat &umatRef();                    //!< Returns reference to cv::UMat, containing blob data (not implemented yet).
-        const UMat &umatRefConst() const;   //!< Returns reference to cv::UMat, containing blob data, for read-only purposes (not implemented yet).
-
-        /** @brief Returns number of blob dimensions. */
-        int dims() const;
-
-        /** @brief Returns the size of the specified @p axis.
-         *
-         * Negative @p axis is supported, in this case a counting starts from the last axis,
-         * i. e. -1 corresponds to last axis.
-         * If non-existing axis was passed then an error will be generated.
-         */
-        int size(int axis) const;
-
-        /** @brief Returns the size of the specified @p axis.
-         *
-         * Does the same thing as size(int) const, but if non-existing axis will be passed then 1 will be returned,
-         * therefore this function always finishes successfully.
-         */
-        int xsize(int axis) const;
-
-        /** @brief Computes the product of sizes of axes among the specified axes range [@p startAxis; @p endAxis).
-         * @param startAxis the first axis to include in the range.
-         * @param endAxis   the first axis to exclude from the range.
-         * @details Negative axis indexing can be used.
-         */
-        size_t total(int startAxis = 0, int endAxis = INT_MAX) const;
-
-        /** @brief Converts @p axis index to canonical format (where 0 <= axis < dims()). */
-        int canonicalAxis(int axis) const;
-
-        /** @brief Returns shape of the blob. */
-        BlobShape shape() const;
-
-        /** @brief Checks equality of two blobs shapes. */
-        bool equalShape(const Blob &other) const;
-
-        /** @brief Returns slice of first two dimensions.
-         *  @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
-         */
-        Mat getPlane(int n, int cn);
-
-        /* Shape getters of 4-dimensional blobs. */
-        int cols() const;       //!< Returns size of the fourth axis blob.
-        int rows() const;       //!< Returns size of the thrid  axis blob.
-        int channels() const;   //!< Returns size of the second axis blob.
-        int num() const;        //!< Returns size of the first  axis blob.
-        Size size2() const;     //!< Returns cv::Size(cols(), rows())
-        Vec4i shape4() const;   //!< Returns shape of first four blob axes.
-
-        /** @brief Returns linear index of the element with specified coordinates in the blob.
-         *
-         * If @p n < dims() then unspecified coordinates will be filled by zeros.
-         * If @p n > dims() then extra coordinates will be ignored.
-         */
-        template<int n>
-        size_t offset(const Vec<int, n> &pos) const;
-        /** @overload */
-        size_t offset(int n = 0, int cn = 0, int row = 0, int col = 0) const;
-
-        /* CPU pointer getters */
-        /** @brief Returns pointer to the blob element with the specified position, stored in CPU memory.
-         *
-         * @p n correspond to the first axis, @p cn - to the second, etc.
-         * If dims() > 4 then unspecified coordinates will be filled by zeros.
-         * If dims() < 4 then extra coordinates will be ignored.
-         */
-        uchar *ptr(int n = 0, int cn = 0, int row = 0, int col = 0);
-        /** @overload */
-        template<typename TFloat>
-        TFloat *ptr(int n = 0, int cn = 0, int row = 0, int col = 0);
-        /** @overload ptr<float>() */
-        float *ptrf(int n = 0, int cn = 0, int row = 0, int col = 0);
-        //TODO: add const ptr methods
-
-        /** @brief Shares data from other @p blob.
-         * @returns *this
-         */
-        Blob &shareFrom(const Blob &blob);
-
-        /** @brief Changes shape of the blob without copying the data.
-         * @returns *this
-         */
-        Blob &reshape(const BlobShape &shape);
-
-        /** @brief Returns type of the blob. */
-        int type() const;
-
-    private:
-        const int *sizes() const;
-
-        Mat m;
-    };
-
-//! @}
-}
-}
-
-#include "blob.inl.hpp"
-
-#endif
diff --git a/IPL/include/opencv/opencv2/dnn/blob.inl.hpp b/IPL/include/opencv/opencv2/dnn/blob.inl.hpp
deleted file mode 100644
index 4a6de48..0000000
--- a/IPL/include/opencv/opencv2/dnn/blob.inl.hpp
+++ /dev/null
@@ -1,342 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_DNN_DNN_BLOB_INL_HPP__
-#define __OPENCV_DNN_DNN_BLOB_INL_HPP__
-#include "blob.hpp"
-
-namespace cv
-{
-namespace dnn
-{
-
-inline BlobShape::BlobShape(int ndims, int fill) : sz( (size_t)std::max(ndims, 0) )
-{
-    CV_Assert(ndims >= 0);
-    for (int i = 0; i < ndims; i++)
-        sz[i] = fill;
-}
-
-inline BlobShape::BlobShape(int ndims, const int *sizes) : sz( (size_t)std::max(ndims, 0) )
-{
-    CV_Assert(ndims >= 0);
-    for (int i = 0; i < ndims; i++)
-        sz[i] = sizes[i];
-}
-
-inline BlobShape::BlobShape(int num, int cn, int rows, int cols) : sz(4)
-{
-    sz[0] = num;
-    sz[1] = cn;
-    sz[2] = rows;
-    sz[3] = cols;
-}
-
-inline BlobShape::BlobShape(const std::vector<int> &sizes) : sz( sizes.size() )
-{
-    for (int i = 0; i < (int)sizes.size(); i++)
-        sz[i] = sizes[i];
-}
-
-template<int n>
-inline BlobShape::BlobShape(const Vec<int, n> &shape) : sz(n)
-{
-    for (int i = 0; i < n; i++)
-        sz[i] = shape[i];
-}
-
-inline int BlobShape::dims() const
-{
-    return (int)sz.size();
-}
-
-inline int BlobShape::xsize(int axis) const
-{
-    if (axis < -dims() || axis >= dims())
-        return 1;
-
-    return sz[(axis < 0) ? axis + dims() : axis];
-}
-
-inline int BlobShape::size(int axis) const
-{
-    CV_Assert(-dims() <= axis && axis < dims());
-    return sz[(axis < 0) ? axis + dims() : axis];
-}
-
-inline int &BlobShape::size(int axis)
-{
-    CV_Assert(-dims() <= axis && axis < dims());
-    return sz[(axis < 0) ? axis + dims() : axis];
-}
-
-inline int BlobShape::operator[] (int axis) const
-{
-    CV_Assert(-dims() <= axis && axis < dims());
-    return sz[(axis < 0) ? axis + dims() : axis];
-}
-
-inline int &BlobShape::operator[] (int axis)
-{
-    CV_Assert(-dims() <= axis && axis < dims());
-    return sz[(axis < 0) ? axis + dims() : axis];
-}
-
-inline ptrdiff_t BlobShape::total()
-{
-    if (dims() == 0)
-        return 0;
-
-    ptrdiff_t res = 1;
-    for (int i = 0; i < dims(); i++)
-        res *= sz[i];
-    return res;
-}
-
-inline const int *BlobShape::ptr() const
-{
-    return sz;
-}
-
-inline bool BlobShape::equal(const BlobShape &other) const
-{
-    if (this->dims() != other.dims())
-        return false;
-
-    for (int i = 0; i < other.dims(); i++)
-    {
-        if (sz[i] != other.sz[i])
-            return false;
-    }
-
-    return true;
-}
-
-inline bool BlobShape::operator==(const BlobShape &r) const
-{
-    return this->equal(r);
-}
-
-CV_EXPORTS std::ostream &operator<< (std::ostream &stream, const BlobShape &shape);
-
-/////////////////////////////////////////////////////////////////////
-
-inline int Blob::canonicalAxis(int axis) const
-{
-    CV_Assert(-dims() <= axis && axis < dims());
-    return (axis < 0) ? axis + dims() : axis;
-}
-
-inline int Blob::dims() const
-{
-    return m.dims;
-}
-
-inline int Blob::xsize(int axis) const
-{
-    if (axis < -dims() || axis >= dims())
-        return 1;
-
-    return sizes()[(axis < 0) ? axis + dims() : axis];
-}
-
-inline int Blob::size(int axis) const
-{
-    CV_Assert(-dims() <= axis && axis < dims());
-    return sizes()[(axis < 0) ? axis + dims() : axis];
-}
-
-inline size_t Blob::total(int startAxis, int endAxis) const
-{
-    if (startAxis < 0)
-        startAxis += dims();
-
-    if (endAxis == INT_MAX)
-        endAxis = dims();
-    else if (endAxis < 0)
-        endAxis += dims();
-
-    CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
-
-    size_t size = 1; //fix: assume that slice isn't empty
-    for (int i = startAxis; i < endAxis; i++)
-        size *= (size_t)sizes()[i];
-
-    return size;
-}
-
-
-template<int n>
-inline size_t Blob::offset(const Vec<int, n> &pos) const
-{
-    size_t ofs = 0;
-    int i;
-    for (i = 0; i < std::min(n, dims()); i++)
-    {
-        CV_DbgAssert(pos[i] >= 0 && pos[i] < size(i));
-        ofs = ofs * (size_t)size(i) + pos[i];
-    }
-    for (; i < dims(); i++)
-        ofs *= (size_t)size(i);
-    return ofs;
-}
-
-inline size_t Blob::offset(int n, int cn, int row, int col) const
-{
-    return offset(Vec4i(n, cn, row, col));
-}
-
-inline float *Blob::ptrf(int n, int cn, int row, int col)
-{
-    CV_Assert(type() == CV_32F);
-    return (float*)m.data + offset(n, cn, row, col);
-}
-
-inline uchar *Blob::ptr(int n, int cn, int row, int col)
-{
-    return m.data + m.elemSize() * offset(n, cn, row, col);
-}
-
-template<typename TFloat>
-inline TFloat* Blob::ptr(int n, int cn, int row, int col)
-{
-    CV_Assert(type() == cv::DataDepth<TFloat>::value);
-    return (TFloat*) ptr(n, cn, row, col);
-}
-
-inline BlobShape Blob::shape() const
-{
-    return BlobShape(dims(), sizes());
-}
-
-inline bool Blob::equalShape(const Blob &other) const
-{
-    if (this->dims() != other.dims())
-        return false;
-
-    for (int i = 0; i < dims(); i++)
-    {
-        if (this->sizes()[i] != other.sizes()[i])
-            return false;
-    }
-    return true;
-}
-
-inline Mat& Blob::matRef()
-{
-    return m;
-}
-
-inline const Mat& Blob::matRefConst() const
-{
-    return m;
-}
-
-inline UMat &Blob::umatRef()
-{
-    CV_Error(Error::StsNotImplemented, "");
-    return *(new UMat());
-}
-
-inline const UMat &Blob::umatRefConst() const
-{
-    CV_Error(Error::StsNotImplemented, "");
-    return *(new UMat());
-}
-
-inline Mat Blob::getPlane(int n, int cn)
-{
-    CV_Assert(dims() > 2);
-    return Mat(dims() - 2, sizes() + 2, type(), ptr(n, cn));
-}
-
-inline int Blob::cols() const
-{
-    return xsize(3);
-}
-
-inline int Blob::rows() const
-{
-    return xsize(2);
-}
-
-inline int Blob::channels() const
-{
-    return xsize(1);
-}
-
-inline int Blob::num() const
-{
-    return xsize(0);
-}
-
-inline Size Blob::size2() const
-{
-    return Size(cols(), rows());
-}
-
-inline int Blob::type() const
-{
-    return m.depth();
-}
-
-inline const int * Blob::sizes() const
-{
-    return &m.size[0];
-}
-
-
-inline Blob &Blob::shareFrom(const Blob &blob)
-{
-    this->m = blob.m;
-    return *this;
-}
-
-inline Blob &Blob::reshape(const BlobShape &shape)
-{
-    m = m.reshape(1, shape.dims(), shape.ptr());
-    return *this;
-}
-
-}
-}
-
-#endif
diff --git a/IPL/include/opencv/opencv2/dnn/dict.hpp b/IPL/include/opencv/opencv2/dnn/dict.hpp
index 61db133..463d314 100644
--- a/IPL/include/opencv/opencv2/dnn/dict.hpp
+++ b/IPL/include/opencv/opencv2/dnn/dict.hpp
@@ -39,30 +39,34 @@
 //
 //M*/
 
-#ifndef __OPENCV_DNN_DNN_DICT_HPP__
-#define __OPENCV_DNN_DNN_DICT_HPP__
-
 #include <opencv2/core.hpp>
 #include <map>
 #include <ostream>
 
-namespace cv
-{
-namespace dnn
-{
+#include <opencv2/dnn/dnn.hpp>
+
+#ifndef OPENCV_DNN_DNN_DICT_HPP
+#define OPENCV_DNN_DNN_DICT_HPP
+
+namespace cv {
+namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
 //! @addtogroup dnn
 //! @{
 
 /** @brief This struct stores the scalar value (or array) of one of the following type: double, cv::String or int64.
  *  @todo Maybe int64 is useless because double type exactly stores at least 2^52 integers.
  */
-struct DictValue
+struct CV_EXPORTS_W DictValue
 {
     DictValue(const DictValue &r);
-    DictValue(int p = 0)        : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; }       //!< Constructs integer scalar
+    DictValue(bool i)           : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i ? 1 : 0; }       //!< Constructs integer scalar
+    DictValue(int64 i = 0)      : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; }       //!< Constructs integer scalar
+    CV_WRAP DictValue(int i)    : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; }       //!< Constructs integer scalar
     DictValue(unsigned p)       : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; }       //!< Constructs integer scalar
-    DictValue(double p)         : type(Param::REAL), pd(new AutoBuffer<double,1>) { (*pd)[0] = p; }     //!< Constructs floating point scalar
-    DictValue(const String &p)  : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = p; }   //!< Constructs string scalar
+    CV_WRAP DictValue(double p)         : type(Param::REAL), pd(new AutoBuffer<double,1>) { (*pd)[0] = p; }     //!< Constructs floating point scalar
+    CV_WRAP DictValue(const String &s)  : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; }   //!< Constructs string scalar
+    DictValue(const char *s)            : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; }   //!< @overload
 
     template<typename TypeIter>
     static DictValue arrayInt(TypeIter begin, int size);    //!< Constructs integer array
@@ -76,9 +80,13 @@ struct DictValue
 
     int size() const;
 
-    bool isInt() const;
-    bool isString() const;
-    bool isReal() const;
+    CV_WRAP bool isInt() const;
+    CV_WRAP bool isString() const;
+    CV_WRAP bool isReal() const;
+
+    CV_WRAP int getIntValue(int idx = -1) const;
+    CV_WRAP double getRealValue(int idx = -1) const;
+    CV_WRAP String getStringValue(int idx = -1) const;
 
     DictValue &operator=(const DictValue &r);
 
@@ -88,17 +96,17 @@ struct DictValue
 
 private:
 
-    int type;
+    Param type;
 
     union
     {
         AutoBuffer<int64, 1> *pi;
         AutoBuffer<double, 1> *pd;
         AutoBuffer<String, 1> *ps;
-        void *p;
+        void *pv;
     };
 
-    DictValue(int _type, void *_p) : type(_type), p(_p) {}
+    DictValue(Param _type, void *_p) : type(_type), pv(_p) {}
     void release();
 };
 
@@ -111,11 +119,14 @@ class CV_EXPORTS Dict
 public:
 
     //! Checks a presence of the @p key in the dictionary.
-    bool has(const String &key);
+    bool has(const String &key) const;
 
     //! If the @p key in the dictionary then returns pointer to its value, else returns NULL.
     DictValue *ptr(const String &key);
 
+    /** @overload */
+    const DictValue *ptr(const String &key) const;
+
     //! If the @p key in the dictionary then returns its value, else an error will be generated.
     const DictValue &get(const String &key) const;
 
@@ -131,10 +142,18 @@ class CV_EXPORTS Dict
     template<typename T>
     const T &set(const String &key, const T &value);
 
+    //! Erase @p key from the dictionary.
+    void erase(const String &key);
+
     friend std::ostream &operator<<(std::ostream &stream, const Dict &dict);
+
+    std::map<String, DictValue>::const_iterator begin() const;
+
+    std::map<String, DictValue>::const_iterator end() const;
 };
 
 //! @}
+CV__DNN_INLINE_NS_END
 }
 }
 
diff --git a/IPL/include/opencv/opencv2/dnn/dnn.hpp b/IPL/include/opencv/opencv2/dnn/dnn.hpp
index 1d1244d..3b12508 100644
--- a/IPL/include/opencv/opencv2/dnn/dnn.hpp
+++ b/IPL/include/opencv/opencv2/dnn/dnn.hpp
@@ -39,66 +39,214 @@
 //
 //M*/
 
-#ifndef __OPENCV_DNN_DNN_HPP__
-#define __OPENCV_DNN_DNN_HPP__
+#ifndef OPENCV_DNN_DNN_HPP
+#define OPENCV_DNN_DNN_HPP
 
 #include <vector>
 #include <opencv2/core.hpp>
+#include "opencv2/core/async.hpp"
+
+#include "../dnn/version.hpp"
+
 #include <opencv2/dnn/dict.hpp>
-#include <opencv2/dnn/blob.hpp>
 
-namespace cv
-{
-namespace dnn //! This namespace is used for dnn module functionlaity.
-{
+namespace cv {
+namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
 //! @addtogroup dnn
 //! @{
 
-    /** @brief Initialize dnn module and built-in layers.
-     *
-     * This function automatically called on most of OpenCV builds,
-     * but you need to call it manually on some specific configurations (iOS for example).
+    typedef std::vector<int> MatShape;
+
+    /**
+     * @brief Enum of computation backends supported by layers.
+     * @see Net::setPreferableBackend
      */
-    CV_EXPORTS void initModule();
+    enum Backend
+    {
+        //! DNN_BACKEND_DEFAULT equals to DNN_BACKEND_INFERENCE_ENGINE if
+        //! OpenCV is built with Intel's Inference Engine library or
+        //! DNN_BACKEND_OPENCV otherwise.
+        DNN_BACKEND_DEFAULT = 0,
+        DNN_BACKEND_HALIDE,
+        DNN_BACKEND_INFERENCE_ENGINE,            //!< Intel's Inference Engine computational backend
+                                                 //!< @sa setInferenceEngineBackendType
+        DNN_BACKEND_OPENCV,
+        DNN_BACKEND_VKCOM,
+        DNN_BACKEND_CUDA,
+#ifdef __OPENCV_BUILD
+        DNN_BACKEND_INFERENCE_ENGINE_NGRAPH = 1000000,     // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType()
+        DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019,      // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType()
+#endif
+    };
+
+    /**
+     * @brief Enum of target devices for computations.
+     * @see Net::setPreferableTarget
+     */
+    enum Target
+    {
+        DNN_TARGET_CPU = 0,
+        DNN_TARGET_OPENCL,
+        DNN_TARGET_OPENCL_FP16,
+        DNN_TARGET_MYRIAD,
+        DNN_TARGET_VULKAN,
+        DNN_TARGET_FPGA,  //!< FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin.
+        DNN_TARGET_CUDA,
+        DNN_TARGET_CUDA_FP16
+    };
+
+    CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends();
+    CV_EXPORTS_W std::vector<Target> getAvailableTargets(dnn::Backend be);
 
     /** @brief This class provides all data needed to initialize layer.
      *
-     * It includes dictionary with scalar params (which can be readed by using Dict interface),
+     * It includes dictionary with scalar params (which can be read by using Dict interface),
      * blob params #blobs and optional meta information: #name and #type of layer instance.
     */
-    struct CV_EXPORTS LayerParams : public Dict
+    class CV_EXPORTS LayerParams : public Dict
     {
-        std::vector<Blob> blobs; //!< List of learned parameters stored as blobs.
+    public:
+        //TODO: Add ability to name blob params
+        std::vector<Mat> blobs; //!< List of learned parameters stored as blobs.
 
         String name; //!< Name of the layer instance (optional, can be used internal purposes).
         String type; //!< Type name which was used for creating layer by layer factory (optional).
     };
 
+   /**
+    * @brief Derivatives of this class encapsulates functions of certain backends.
+    */
+    class BackendNode
+    {
+    public:
+        BackendNode(int backendId);
+
+        virtual ~BackendNode(); //!< Virtual destructor to make polymorphism.
+
+        int backendId; //!< Backend identifier.
+    };
+
+    /**
+     * @brief Derivatives of this class wraps cv::Mat for different backends and targets.
+     */
+    class BackendWrapper
+    {
+    public:
+        BackendWrapper(int backendId, int targetId);
+
+        /**
+         * @brief Wrap cv::Mat for specific backend and target.
+         * @param[in] targetId Target identifier.
+         * @param[in] m cv::Mat for wrapping.
+         *
+         * Make CPU->GPU data transfer if it's require for the target.
+         */
+        BackendWrapper(int targetId, const cv::Mat& m);
+
+        /**
+         * @brief Make wrapper for reused cv::Mat.
+         * @param[in] base Wrapper of cv::Mat that will be reused.
+         * @param[in] shape Specific shape.
+         *
+         * Initialize wrapper from another one. It'll wrap the same host CPU
+         * memory and mustn't allocate memory on device(i.e. GPU). It might
+         * has different shape. Use in case of CPU memory reusing for reuse
+         * associated memory on device too.
+         */
+        BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape);
+
+        virtual ~BackendWrapper(); //!< Virtual destructor to make polymorphism.
+
+        /**
+         * @brief Transfer data to CPU host memory.
+         */
+        virtual void copyToHost() = 0;
+
+        /**
+         * @brief Indicate that an actual data is on CPU.
+         */
+        virtual void setHostDirty() = 0;
+
+        int backendId;  //!< Backend identifier.
+        int targetId;   //!< Target identifier.
+    };
+
+    class CV_EXPORTS ActivationLayer;
+
     /** @brief This interface class allows to build new Layers - are building blocks of networks.
      *
      * Each class, derived from Layer, must implement allocate() methods to declare own outputs and forward() to compute outputs.
-     * Also before using the new layer into networks you must register your layer by using one of @ref LayerFactoryModule "LayerFactory" macros.
+     * Also before using the new layer into networks you must register your layer by using one of @ref dnnLayerFactory "LayerFactory" macros.
      */
-    struct CV_EXPORTS Layer
+    class CV_EXPORTS_W Layer : public Algorithm
     {
+    public:
+
         //! List of learned parameters must be stored here to allow read them by using Net::getParam().
-        std::vector<Blob> blobs;
+        CV_PROP_RW std::vector<Mat> blobs;
 
-        /** @brief Allocates internal buffers and output blobs with respect to the shape of inputs.
+        /** @brief Computes and sets internal parameters according to inputs, outputs and blobs.
+         *  @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead
          *  @param[in]  input  vector of already allocated input blobs
-         *  @param[out] output vector of output blobs, which must be allocated
+         *  @param[out] output vector of already allocated output blobs
+         *
+         * If this method is called after network has allocated all memory for input and output blobs
+         * and before inferencing.
+         */
+        CV_DEPRECATED_EXTERNAL
+        virtual void finalize(const std::vector<Mat*> &input, std::vector<Mat> &output);
+
+        /** @brief Computes and sets internal parameters according to inputs, outputs and blobs.
+         *  @param[in]  inputs  vector of already allocated input blobs
+         *  @param[out] outputs vector of already allocated output blobs
          *
-         * This method must create each produced blob according to shape of @p input blobs and internal layer params.
-         * If this method is called first time then @p output vector consists from empty blobs and its size determined by number of output connections.
-         * This method can be called multiple times if size of any @p input blob was changed.
+         * If this method is called after network has allocated all memory for input and output blobs
+         * and before inferencing.
          */
-        virtual void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output) = 0;
+        CV_WRAP virtual void finalize(InputArrayOfArrays inputs, OutputArrayOfArrays outputs);
 
         /** @brief Given the @p input blobs, computes the output @p blobs.
+         *  @deprecated Use Layer::forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) instead
          *  @param[in]  input  the input blobs.
          *  @param[out] output allocated output blobs, which will store results of the computation.
+         *  @param[out] internals allocated internal blobs
+         */
+        CV_DEPRECATED_EXTERNAL
+        virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals);
+
+        /** @brief Given the @p input blobs, computes the output @p blobs.
+         *  @param[in]  inputs  the input blobs.
+         *  @param[out] outputs allocated output blobs, which will store results of the computation.
+         *  @param[out] internals allocated internal blobs
          */
-        virtual void forward(std::vector<Blob*> &input, std::vector<Blob> &output) = 0;
+        virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals);
+
+        /** @brief Given the @p input blobs, computes the output @p blobs.
+         *  @param[in]  inputs  the input blobs.
+         *  @param[out] outputs allocated output blobs, which will store results of the computation.
+         *  @param[out] internals allocated internal blobs
+         */
+        void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals);
+
+        /** @brief
+         * @overload
+         * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead
+         */
+        CV_DEPRECATED_EXTERNAL
+        void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs);
+
+        /** @brief
+         * @overload
+         * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead
+         */
+        CV_DEPRECATED std::vector<Mat> finalize(const std::vector<Mat> &inputs);
+
+        /** @brief Allocates layer and computes output.
+         *  @deprecated This method will be removed in the future release.
+         */
+        CV_DEPRECATED CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs,
+                                       CV_IN_OUT std::vector<Mat> &internals);
 
         /** @brief Returns index of input blob into the input array.
          *  @param inputName label of input blob
@@ -110,13 +258,122 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
         /** @brief Returns index of output blob in output array.
          *  @see inputNameToIndex()
          */
-        virtual int outputNameToIndex(String outputName);
+        CV_WRAP virtual int outputNameToIndex(const String& outputName);
+
+        /**
+         * @brief Ask layer if it support specific backend for doing computations.
+         * @param[in] backendId computation backend identifier.
+         * @see Backend
+         */
+        virtual bool supportBackend(int backendId);
+
+        /**
+         * @brief Returns Halide backend node.
+         * @param[in] inputs Input Halide buffers.
+         * @see BackendNode, BackendWrapper
+         *
+         * Input buffers should be exactly the same that will be used in forward invocations.
+         * Despite we can use Halide::ImageParam based on input shape only,
+         * it helps prevent some memory management issues (if something wrong,
+         * Halide tests will be failed).
+         */
+        virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs);
+
+        virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &inputs);
+
+        virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs, const std::vector<Ptr<BackendNode> >& nodes);
+
+        virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs);
+
+        /**
+         * @brief Returns a CUDA backend node
+         *
+         * @param   context  void pointer to CSLContext object
+         * @param   inputs   layer inputs
+         * @param   outputs  layer outputs
+         */
+        virtual Ptr<BackendNode> initCUDA(
+            void *context,
+            const std::vector<Ptr<BackendWrapper>>& inputs,
+            const std::vector<Ptr<BackendWrapper>>& outputs
+        );
+
+       /**
+        * @brief Automatic Halide scheduling based on layer hyper-parameters.
+        * @param[in] node Backend node with Halide functions.
+        * @param[in] inputs Blobs that will be used in forward invocations.
+        * @param[in] outputs Blobs that will be used in forward invocations.
+        * @param[in] targetId Target identifier
+        * @see BackendNode, Target
+        *
+        * Layer don't use own Halide::Func members because we can have applied
+        * layers fusing. In this way the fused function should be scheduled.
+        */
+        virtual void applyHalideScheduler(Ptr<BackendNode>& node,
+                                          const std::vector<Mat*> &inputs,
+                                          const std::vector<Mat> &outputs,
+                                          int targetId) const;
+
+        /**
+         * @brief Implement layers fusing.
+         * @param[in] node Backend node of bottom layer.
+         * @see BackendNode
+         *
+         * Actual for graph-based backends. If layer attached successfully,
+         * returns non-empty cv::Ptr to node of the same backend.
+         * Fuse only over the last function.
+         */
+        virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node);
+
+        /**
+         * @brief Tries to attach to the layer the subsequent activation layer, i.e. do the layer fusion in a partial case.
+         * @param[in] layer The subsequent activation layer.
+         *
+         * Returns true if the activation layer has been attached successfully.
+         */
+        virtual bool setActivation(const Ptr<ActivationLayer>& layer);
+
+        /**
+         * @brief Try to fuse current layer with a next one
+         * @param[in] top Next layer to be fused.
+         * @returns True if fusion was performed.
+         */
+        virtual bool tryFuse(Ptr<Layer>& top);
 
-        String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
-        String type; //!< Type name which was used for creating layer by layer factory.
+        /**
+         * @brief Returns parameters of layers with channel-wise multiplication and addition.
+         * @param[out] scale Channel-wise multipliers. Total number of values should
+         *                   be equal to number of channels.
+         * @param[out] shift Channel-wise offsets. Total number of values should
+         *                   be equal to number of channels.
+         *
+         * Some layers can fuse their transformations with further layers.
+         * In example, convolution + batch normalization. This way base layer
+         * use weights from layer after it. Fused layer is skipped.
+         * By default, @p scale and @p shift are empty that means layer has no
+         * element-wise multiplications or additions.
+         */
+        virtual void getScaleShift(Mat& scale, Mat& shift) const;
+
+        /**
+         * @brief "Deattaches" all the layers, attached to particular layer.
+         */
+        virtual void unsetAttached();
+
+        virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
+                                     const int requiredOutputs,
+                                     std::vector<MatShape> &outputs,
+                                     std::vector<MatShape> &internals) const;
+        virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
+                               const std::vector<MatShape> &outputs) const {CV_UNUSED(inputs); CV_UNUSED(outputs); return 0;}
+
+        CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
+        CV_PROP String type; //!< Type name which was used for creating layer by layer factory.
+        CV_PROP int preferableTarget; //!< prefer target for layer forwarding
 
         Layer();
-        explicit Layer(const LayerParams &params); //!< Initialize only #name, #type and #blobs fields.
+        explicit Layer(const LayerParams &params);      //!< Initializes only #name, #type and #blobs fields.
+        void setParamsFrom(const LayerParams &params);  //!< Initializes only #name, #type and #blobs fields.
         virtual ~Layer();
     };
 
@@ -130,13 +387,53 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
      *
      * This class supports reference counting of its instances, i. e. copies point to the same instance.
      */
-    class CV_EXPORTS Net
+    class CV_EXPORTS_W_SIMPLE Net
     {
     public:
 
-        Net();  //!< Default constructor.
-        ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore.
+        CV_WRAP Net();  //!< Default constructor.
+        CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore.
+
+        /** @brief Create a network from Intel's Model Optimizer intermediate representation (IR).
+         *  @param[in] xml XML configuration file with network's topology.
+         *  @param[in] bin Binary file with trained weights.
+         *  Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
+         *  backend.
+         */
+        CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin);
 
+        /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR).
+         *  @param[in] bufferModelConfig buffer with model's configuration.
+         *  @param[in] bufferWeights buffer with model's trained weights.
+         *  @returns Net object.
+         */
+        CV_WRAP static
+        Net readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights);
+
+        /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR).
+         *  @param[in] bufferModelConfigPtr buffer pointer of model's configuration.
+         *  @param[in] bufferModelConfigSize buffer size of model's configuration.
+         *  @param[in] bufferWeightsPtr buffer pointer of model's trained weights.
+         *  @param[in] bufferWeightsSize buffer size of model's trained weights.
+         *  @returns Net object.
+         */
+        static
+        Net readFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
+                                            const uchar* bufferWeightsPtr, size_t bufferWeightsSize);
+
+        /** Returns true if there are no layers in the network. */
+        CV_WRAP bool empty() const;
+
+        /** @brief Dump net to String
+         *  @returns String with structure, hyperparameters, backend, target and fusion
+         *  Call method after setInput(). To see correct backend, target and fusion run after forward().
+         */
+        CV_WRAP String dump();
+        /** @brief Dump net structure, hyperparameters, backend, target and fusion to dot file
+         *  @param path   path to output file with .dot extension
+         *  @see dump()
+         */
+        CV_WRAP void dumpToFile(const String& path);
         /** @brief Adds new layer to the net.
          *  @param name   unique name of the adding layer.
          *  @param type   typename of the adding layer (type must be registered in LayerRegister).
@@ -152,20 +449,25 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
         /** @brief Converts string name of the layer to the integer identifier.
          *  @returns id of the layer, or -1 if the layer wasn't found.
          */
-        int getLayerId(const String &layer);
+        CV_WRAP int getLayerId(const String &layer);
+
+        CV_WRAP std::vector<String> getLayerNames() const;
 
         /** @brief Container for strings and integers. */
         typedef DictValue LayerId;
 
-        /** @brief Delete layer for the network (not implemented yet) */
-        void deleteLayer(LayerId layer);
+        /** @brief Returns pointer to layer with specified id or name which the network use. */
+        CV_WRAP Ptr<Layer> getLayer(LayerId layerId);
+
+        /** @brief Returns pointers to input layers of specific layer. */
+        std::vector<Ptr<Layer> > getLayerInputs(LayerId layerId); // FIXIT: CV_WRAP
 
         /** @brief Connects output of the first layer to input of the second layer.
          *  @param outPin descriptor of the first layer output.
          *  @param inpPin descriptor of the second layer input.
          *
          * Descriptors have the following template <DFN>&lt;layer_name&gt;[.input_number]</DFN>:
-         * - the first part of the template <DFN>layer_name</DFN> is sting name of the added layer.
+         * - the first part of the template <DFN>layer_name</DFN> is string name of the added layer.
          *   If this part is empty then the network input pseudo layer will be used;
          * - the second optional part of the template <DFN>input_number</DFN>
          *   is either number of the layer input, either label one.
@@ -173,54 +475,118 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
          *
          *  @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex()
          */
-        void connect(String outPin, String inpPin);
+        CV_WRAP void connect(String outPin, String inpPin);
+
         /** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer.
          *  @param outLayerId identifier of the first layer
-         *  @param inpLayerId identifier of the second layer
          *  @param outNum number of the first layer output
+         *  @param inpLayerId identifier of the second layer
          *  @param inpNum number of the second layer input
          */
         void connect(int outLayerId, int outNum, int inpLayerId, int inpNum);
-        /** @brief Sets ouputs names of the network input pseudo layer.
+
+        /** @brief Sets outputs names of the network input pseudo layer.
          *
          * Each net always has special own the network input pseudo layer with id=0.
          * This layer stores the user blobs only and don't make any computations.
          * In fact, this layer provides the only way to pass user data into the network.
          * As any other layer, this layer can label its outputs and this function provides an easy way to do this.
          */
-        void setNetInputs(const std::vector<String> &inputBlobNames);
+        CV_WRAP void setInputsNames(const std::vector<String> &inputBlobNames);
 
-        /** @brief Runs forward pass for the whole network */
-        void forward();
-        /** @brief Runs forward pass to compute output of layer @p toLayer */
-        void forward(LayerId toLayer);
-        /** @brief Runs forward pass to compute output of layer @p toLayer, but computations start from @p startLayer */
-        void forward(LayerId startLayer, LayerId toLayer);
-        /** @overload */
-        void forward(const std::vector<LayerId> &startLayers, const std::vector<LayerId> &toLayers);
+        /** @brief Specify shape of network input.
+         */
+        CV_WRAP void setInputShape(const String &inputName, const MatShape& shape);
 
-        //TODO:
-        /** @brief Optimized forward.
-         *  @warning Not implemented yet.
-         *  @details Makes forward only those layers which weren't changed after previous forward().
+        /** @brief Runs forward pass to compute output of layer with name @p outputName.
+         *  @param outputName name for layer which output is needed to get
+         *  @return blob for first output of specified layer.
+         *  @details By default runs forward pass for the whole network.
          */
-        void forwardOpt(LayerId toLayer);
-        /** @overload */
-        void forwardOpt(const std::vector<LayerId> &toLayers);
+        CV_WRAP Mat forward(const String& outputName = String());
 
-        /** @brief Sets the new value for the layer output blob
-         *  @param outputName descriptor of the updating layer output blob.
-         *  @param blob new blob.
-         *  @see connect(String, String) to know format of the descriptor.
-         *  @note If updating blob is not empty then @p blob must have the same shape,
-         *  because network reshaping is not implemented yet.
+        /** @brief Runs forward pass to compute output of layer with name @p outputName.
+         *  @param outputName name for layer which output is needed to get
+         *  @details By default runs forward pass for the whole network.
+         *
+         *  This is an asynchronous version of forward(const String&).
+         *  dnn::DNN_BACKEND_INFERENCE_ENGINE backend is required.
+         */
+        CV_WRAP AsyncArray forwardAsync(const String& outputName = String());
+
+        /** @brief Runs forward pass to compute output of layer with name @p outputName.
+         *  @param outputBlobs contains all output blobs for specified layer.
+         *  @param outputName name for layer which output is needed to get
+         *  @details If @p outputName is empty, runs forward pass for the whole network.
+         */
+        CV_WRAP void forward(OutputArrayOfArrays outputBlobs, const String& outputName = String());
+
+        /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
+         *  @param outputBlobs contains blobs for first outputs of specified layers.
+         *  @param outBlobNames names for layers which outputs are needed to get
          */
-        void setBlob(String outputName, const Blob &blob);
-        /** @brief Returns the layer output blob.
-         *  @param outputName the descriptor of the returning layer output blob.
-         *  @see connect(String, String)
+        CV_WRAP void forward(OutputArrayOfArrays outputBlobs,
+                             const std::vector<String>& outBlobNames);
+
+        /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
+         *  @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames.
+         *  @param outBlobNames names for layers which outputs are needed to get
+         */
+        CV_WRAP_AS(forwardAndRetrieve) void forward(CV_OUT std::vector<std::vector<Mat> >& outputBlobs,
+                                                    const std::vector<String>& outBlobNames);
+
+        /**
+         * @brief Compile Halide layers.
+         * @param[in] scheduler Path to YAML file with scheduling directives.
+         * @see setPreferableBackend
+         *
+         * Schedule layers that support Halide backend. Then compile them for
+         * specific target. For layers that not represented in scheduling file
+         * or if no manual scheduling used at all, automatic scheduling will be applied.
          */
-        Blob getBlob(String outputName);
+        CV_WRAP void setHalideScheduler(const String& scheduler);
+
+        /**
+         * @brief Ask network to use specific computation backend where it supported.
+         * @param[in] backendId backend identifier.
+         * @see Backend
+         *
+         * If OpenCV is compiled with Intel's Inference Engine library, DNN_BACKEND_DEFAULT
+         * means DNN_BACKEND_INFERENCE_ENGINE. Otherwise it equals to DNN_BACKEND_OPENCV.
+         */
+        CV_WRAP void setPreferableBackend(int backendId);
+
+        /**
+         * @brief Ask network to make computations on specific target device.
+         * @param[in] targetId target identifier.
+         * @see Target
+         *
+         * List of supported combinations backend / target:
+         * |                        | DNN_BACKEND_OPENCV | DNN_BACKEND_INFERENCE_ENGINE | DNN_BACKEND_HALIDE |  DNN_BACKEND_CUDA |
+         * |------------------------|--------------------|------------------------------|--------------------|-------------------|
+         * | DNN_TARGET_CPU         |                  + |                            + |                  + |                   |
+         * | DNN_TARGET_OPENCL      |                  + |                            + |                  + |                   |
+         * | DNN_TARGET_OPENCL_FP16 |                  + |                            + |                    |                   |
+         * | DNN_TARGET_MYRIAD      |                    |                            + |                    |                   |
+         * | DNN_TARGET_FPGA        |                    |                            + |                    |                   |
+         * | DNN_TARGET_CUDA        |                    |                              |                    |                 + |
+         * | DNN_TARGET_CUDA_FP16   |                    |                              |                    |                 + |
+         */
+        CV_WRAP void setPreferableTarget(int targetId);
+
+        /** @brief Sets the new input value for the network
+         *  @param blob        A new blob. Should have CV_32F or CV_8U depth.
+         *  @param name        A name of input layer.
+         *  @param scalefactor An optional normalization scale.
+         *  @param mean        An optional mean subtraction values.
+         *  @see connect(String, String) to know format of the descriptor.
+         *
+         *  If scale or mean values are specified, a final input blob is computed
+         *  as:
+         * \f[input(n,c,h,w) = scalefactor \times (blob(n,c,h,w) - mean_c)\f]
+         */
+        CV_WRAP void setInput(InputArray blob, const String& name = "",
+                              double scalefactor = 1.0, const Scalar& mean = Scalar());
 
         /** @brief Sets the new value for the learned param of the layer.
          *  @param layer name or id of the layer.
@@ -230,47 +596,233 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
          *  @note If shape of the new blob differs from the previous shape,
          *  then the following forward pass may fail.
         */
-        void setParam(LayerId layer, int numParam, const Blob &blob);
+        CV_WRAP void setParam(LayerId layer, int numParam, const Mat &blob);
+
         /** @brief Returns parameter blob of the layer.
          *  @param layer name or id of the layer.
          *  @param numParam index of the layer parameter in the Layer::blobs array.
          *  @see Layer::blobs
          */
-        Blob getParam(LayerId layer, int numParam = 0);
+        CV_WRAP Mat getParam(LayerId layer, int numParam = 0);
 
-    private:
+        /** @brief Returns indexes of layers with unconnected outputs.
+         */
+        CV_WRAP std::vector<int> getUnconnectedOutLayers() const;
+
+        /** @brief Returns names of layers with unconnected outputs.
+         */
+        CV_WRAP std::vector<String> getUnconnectedOutLayersNames() const;
+
+        /** @brief Returns input and output shapes for all layers in loaded model;
+         *  preliminary inferencing isn't necessary.
+         *  @param netInputShapes shapes for all input blobs in net input layer.
+         *  @param layersIds output parameter for layer IDs.
+         *  @param inLayersShapes output parameter for input layers shapes;
+         * order is the same as in layersIds
+         *  @param outLayersShapes output parameter for output layers shapes;
+         * order is the same as in layersIds
+         */
+        CV_WRAP void getLayersShapes(const std::vector<MatShape>& netInputShapes,
+                                     CV_OUT std::vector<int>& layersIds,
+                                     CV_OUT std::vector<std::vector<MatShape> >& inLayersShapes,
+                                     CV_OUT std::vector<std::vector<MatShape> >& outLayersShapes) const;
+
+        /** @overload */
+        CV_WRAP void getLayersShapes(const MatShape& netInputShape,
+                                     CV_OUT std::vector<int>& layersIds,
+                                     CV_OUT std::vector<std::vector<MatShape> >& inLayersShapes,
+                                     CV_OUT std::vector<std::vector<MatShape> >& outLayersShapes) const;
+
+        /** @brief Returns input and output shapes for layer with specified
+         * id in loaded model; preliminary inferencing isn't necessary.
+         *  @param netInputShape shape input blob in net input layer.
+         *  @param layerId id for layer.
+         *  @param inLayerShapes output parameter for input layers shapes;
+         * order is the same as in layersIds
+         *  @param outLayerShapes output parameter for output layers shapes;
+         * order is the same as in layersIds
+         */
+        void getLayerShapes(const MatShape& netInputShape,
+                                    const int layerId,
+                                    CV_OUT std::vector<MatShape>& inLayerShapes,
+                                    CV_OUT std::vector<MatShape>& outLayerShapes) const; // FIXIT: CV_WRAP
+
+        /** @overload */
+        void getLayerShapes(const std::vector<MatShape>& netInputShapes,
+                                    const int layerId,
+                                    CV_OUT std::vector<MatShape>& inLayerShapes,
+                                    CV_OUT std::vector<MatShape>& outLayerShapes) const; // FIXIT: CV_WRAP
+
+        /** @brief Computes FLOP for whole loaded model with specified input shapes.
+         * @param netInputShapes vector of shapes for all net inputs.
+         * @returns computed FLOP.
+         */
+        CV_WRAP int64 getFLOPS(const std::vector<MatShape>& netInputShapes) const;
+        /** @overload */
+        CV_WRAP int64 getFLOPS(const MatShape& netInputShape) const;
+        /** @overload */
+        CV_WRAP int64 getFLOPS(const int layerId,
+                               const std::vector<MatShape>& netInputShapes) const;
+        /** @overload */
+        CV_WRAP int64 getFLOPS(const int layerId,
+                               const MatShape& netInputShape) const;
+
+        /** @brief Returns list of types for layer used in model.
+         * @param layersTypes output parameter for returning types.
+         */
+        CV_WRAP void getLayerTypes(CV_OUT std::vector<String>& layersTypes) const;
+
+        /** @brief Returns count of layers of specified type.
+         * @param layerType type.
+         * @returns count of layers
+         */
+        CV_WRAP int getLayersCount(const String& layerType) const;
+
+        /** @brief Computes bytes number which are required to store
+         * all weights and intermediate blobs for model.
+         * @param netInputShapes vector of shapes for all net inputs.
+         * @param weights output parameter to store resulting bytes for weights.
+         * @param blobs output parameter to store resulting bytes for intermediate blobs.
+         */
+        void getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
+                                          CV_OUT size_t& weights, CV_OUT size_t& blobs) const; // FIXIT: CV_WRAP
+        /** @overload */
+        CV_WRAP void getMemoryConsumption(const MatShape& netInputShape,
+                                          CV_OUT size_t& weights, CV_OUT size_t& blobs) const;
+        /** @overload */
+        CV_WRAP void getMemoryConsumption(const int layerId,
+                                          const std::vector<MatShape>& netInputShapes,
+                                          CV_OUT size_t& weights, CV_OUT size_t& blobs) const;
+        /** @overload */
+        CV_WRAP void getMemoryConsumption(const int layerId,
+                                          const MatShape& netInputShape,
+                                          CV_OUT size_t& weights, CV_OUT size_t& blobs) const;
+
+        /** @brief Computes bytes number which are required to store
+         * all weights and intermediate blobs for each layer.
+         * @param netInputShapes vector of shapes for all net inputs.
+         * @param layerIds output vector to save layer IDs.
+         * @param weights output parameter to store resulting bytes for weights.
+         * @param blobs output parameter to store resulting bytes for intermediate blobs.
+         */
+        void getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
+                                          CV_OUT std::vector<int>& layerIds,
+                                          CV_OUT std::vector<size_t>& weights,
+                                          CV_OUT std::vector<size_t>& blobs) const; // FIXIT: CV_WRAP
+        /** @overload */
+        void getMemoryConsumption(const MatShape& netInputShape,
+                                          CV_OUT std::vector<int>& layerIds,
+                                          CV_OUT std::vector<size_t>& weights,
+                                          CV_OUT std::vector<size_t>& blobs) const; // FIXIT: CV_WRAP
+
+        /** @brief Enables or disables layer fusion in the network.
+         * @param fusion true to enable the fusion, false to disable. The fusion is enabled by default.
+         */
+        CV_WRAP void enableFusion(bool fusion);
+
+        /** @brief Returns overall time for inference and timings (in ticks) for layers.
+         * Indexes in returned vector correspond to layers ids. Some layers can be fused with others,
+         * in this case zero ticks count will be return for that skipped layers.
+         * @param timings vector for tick timings for all layers.
+         * @return overall ticks for model inference.
+         */
+        CV_WRAP int64 getPerfProfile(CV_OUT std::vector<double>& timings);
 
+    private:
         struct Impl;
         Ptr<Impl> impl;
     };
 
-    /** @brief Small interface class for loading trained serialized models of different dnn-frameworks. */
-    class Importer
-    {
-    public:
-
-        /** @brief Adds loaded layers into the @p net and sets connetions between them. */
-        virtual void populateNet(Net net) = 0;
+    /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files.
+    *  @param cfgFile      path to the .cfg file with text description of the network architecture.
+    *  @param darknetModel path to the .weights file with learned network.
+    *  @returns Network object that ready to do forward, throw an exception in failure cases.
+    *  @returns Net object.
+    */
+    CV_EXPORTS_W Net readNetFromDarknet(const String &cfgFile, const String &darknetModel = String());
 
-        virtual ~Importer();
-    };
+    /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files.
+     *  @param bufferCfg   A buffer contains a content of .cfg file with text description of the network architecture.
+     *  @param bufferModel A buffer contains a content of .weights file with learned network.
+     *  @returns Net object.
+     */
+    CV_EXPORTS_W Net readNetFromDarknet(const std::vector<uchar>& bufferCfg,
+                                        const std::vector<uchar>& bufferModel = std::vector<uchar>());
 
-    /** @brief Creates the importer of <a href="http://caffe.berkeleyvision.org">Caffe</a> framework network.
-     *  @param prototxt   path to the .prototxt file with text description of the network architecture.
-     *  @param caffeModel path to the .caffemodel file with learned network.
-     *  @returns Pointer to the created importer, NULL in failure cases.
+    /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files.
+     *  @param bufferCfg   A buffer contains a content of .cfg file with text description of the network architecture.
+     *  @param lenCfg      Number of bytes to read from bufferCfg
+     *  @param bufferModel A buffer contains a content of .weights file with learned network.
+     *  @param lenModel    Number of bytes to read from bufferModel
+     *  @returns Net object.
      */
-    CV_EXPORTS Ptr<Importer> createCaffeImporter(const String &prototxt, const String &caffeModel = String());
+    CV_EXPORTS Net readNetFromDarknet(const char *bufferCfg, size_t lenCfg,
+                                      const char *bufferModel = NULL, size_t lenModel = 0);
+
+    /** @brief Reads a network model stored in <a href="http://caffe.berkeleyvision.org">Caffe</a> framework's format.
+      * @param prototxt   path to the .prototxt file with text description of the network architecture.
+      * @param caffeModel path to the .caffemodel file with learned network.
+      * @returns Net object.
+      */
+    CV_EXPORTS_W Net readNetFromCaffe(const String &prototxt, const String &caffeModel = String());
+
+    /** @brief Reads a network model stored in Caffe model in memory.
+      * @param bufferProto buffer containing the content of the .prototxt file
+      * @param bufferModel buffer containing the content of the .caffemodel file
+      * @returns Net object.
+      */
+    CV_EXPORTS_W Net readNetFromCaffe(const std::vector<uchar>& bufferProto,
+                                      const std::vector<uchar>& bufferModel = std::vector<uchar>());
+
+    /** @brief Reads a network model stored in Caffe model in memory.
+      * @details This is an overloaded member function, provided for convenience.
+      * It differs from the above function only in what argument(s) it accepts.
+      * @param bufferProto buffer containing the content of the .prototxt file
+      * @param lenProto length of bufferProto
+      * @param bufferModel buffer containing the content of the .caffemodel file
+      * @param lenModel length of bufferModel
+      * @returns Net object.
+      */
+    CV_EXPORTS Net readNetFromCaffe(const char *bufferProto, size_t lenProto,
+                                    const char *bufferModel = NULL, size_t lenModel = 0);
+
+    /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
+      * @param model  path to the .pb file with binary protobuf description of the network architecture
+      * @param config path to the .pbtxt file that contains text graph definition in protobuf format.
+      *               Resulting Net object is built by text graph using weights from a binary one that
+      *               let us make it more flexible.
+      * @returns Net object.
+      */
+    CV_EXPORTS_W Net readNetFromTensorflow(const String &model, const String &config = String());
+
+    /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
+      * @param bufferModel buffer containing the content of the pb file
+      * @param bufferConfig buffer containing the content of the pbtxt file
+      * @returns Net object.
+      */
+    CV_EXPORTS_W Net readNetFromTensorflow(const std::vector<uchar>& bufferModel,
+                                           const std::vector<uchar>& bufferConfig = std::vector<uchar>());
 
-    /** @brief Creates the importer of <a href="http://torch.ch">Torch7</a> framework network.
-     *  @param filename path to the file, dumped from Torch by using torch.save() function.
+    /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
+      * @details This is an overloaded member function, provided for convenience.
+      * It differs from the above function only in what argument(s) it accepts.
+      * @param bufferModel buffer containing the content of the pb file
+      * @param lenModel length of bufferModel
+      * @param bufferConfig buffer containing the content of the pbtxt file
+      * @param lenConfig length of bufferConfig
+      */
+    CV_EXPORTS Net readNetFromTensorflow(const char *bufferModel, size_t lenModel,
+                                         const char *bufferConfig = NULL, size_t lenConfig = 0);
+
+    /**
+     *  @brief Reads a network model stored in <a href="http://torch.ch">Torch7</a> framework's format.
+     *  @param model    path to the file, dumped from Torch by using torch.save() function.
      *  @param isBinary specifies whether the network was serialized in ascii mode or binary.
-     *  @returns Pointer to the created importer, NULL in failure cases.
-     *
-     *  @warning Torch7 importer is experimental now, you need explicitly set CMake opencv_dnn_BUILD_TORCH_IMPORTER flag to compile its.
+     *  @param evaluate specifies testing phase of network. If true, it's similar to evaluate() method in Torch.
+     *  @returns Net object.
      *
-     *  @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use long type of C language,
-     *  which has different bit-length on different systems.
+     *  @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use `long` type of C language,
+     *  which has various bit-length on different systems.
      *
      * The loading file must contain serialized <a href="https://github.com/torch/nn/blob/master/doc/module.md">nn.Module</a> object
      * with importing network. Try to eliminate a custom objects from serialazing data to avoid importing errors.
@@ -284,21 +836,460 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
      * - nn.SpatialMaxPooling, nn.SpatialAveragePooling
      * - nn.ReLU, nn.TanH, nn.Sigmoid
      * - nn.Reshape
+     * - nn.SoftMax, nn.LogSoftMax
      *
      * Also some equivalents of these classes from cunn, cudnn, and fbcunn may be successfully imported.
      */
-    CV_EXPORTS Ptr<Importer> createTorchImporter(const String &filename, bool isBinary = true);
+     CV_EXPORTS_W Net readNetFromTorch(const String &model, bool isBinary = true, bool evaluate = true);
+
+     /**
+      * @brief Read deep learning network represented in one of the supported formats.
+      * @param[in] model Binary file contains trained weights. The following file
+      *                  extensions are expected for models from different frameworks:
+      *                  * `*.caffemodel` (Caffe, http://caffe.berkeleyvision.org/)
+      *                  * `*.pb` (TensorFlow, https://www.tensorflow.org/)
+      *                  * `*.t7` | `*.net` (Torch, http://torch.ch/)
+      *                  * `*.weights` (Darknet, https://pjreddie.com/darknet/)
+      *                  * `*.bin` (DLDT, https://software.intel.com/openvino-toolkit)
+      *                  * `*.onnx` (ONNX, https://onnx.ai/)
+      * @param[in] config Text file contains network configuration. It could be a
+      *                   file with the following extensions:
+      *                  * `*.prototxt` (Caffe, http://caffe.berkeleyvision.org/)
+      *                  * `*.pbtxt` (TensorFlow, https://www.tensorflow.org/)
+      *                  * `*.cfg` (Darknet, https://pjreddie.com/darknet/)
+      *                  * `*.xml` (DLDT, https://software.intel.com/openvino-toolkit)
+      * @param[in] framework Explicit framework name tag to determine a format.
+      * @returns Net object.
+      *
+      * This function automatically detects an origin framework of trained model
+      * and calls an appropriate function such @ref readNetFromCaffe, @ref readNetFromTensorflow,
+      * @ref readNetFromTorch or @ref readNetFromDarknet. An order of @p model and @p config
+      * arguments does not matter.
+      */
+     CV_EXPORTS_W Net readNet(const String& model, const String& config = "", const String& framework = "");
+
+     /**
+      * @brief Read deep learning network represented in one of the supported formats.
+      * @details This is an overloaded member function, provided for convenience.
+      *          It differs from the above function only in what argument(s) it accepts.
+      * @param[in] framework    Name of origin framework.
+      * @param[in] bufferModel  A buffer with a content of binary file with weights
+      * @param[in] bufferConfig A buffer with a content of text file contains network configuration.
+      * @returns Net object.
+      */
+     CV_EXPORTS_W Net readNet(const String& framework, const std::vector<uchar>& bufferModel,
+                              const std::vector<uchar>& bufferConfig = std::vector<uchar>());
 
     /** @brief Loads blob which was serialized as torch.Tensor object of Torch7 framework.
-     *  @warning This function has the same limitations as createTorchImporter().
+     *  @warning This function has the same limitations as readNetFromTorch().
+     */
+    CV_EXPORTS_W Mat readTorchBlob(const String &filename, bool isBinary = true);
+
+    /** @brief Load a network from Intel's Model Optimizer intermediate representation.
+     *  @param[in] xml XML configuration file with network's topology.
+     *  @param[in] bin Binary file with trained weights.
+     *  @returns Net object.
+     *  Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
+     *  backend.
+     */
+    CV_EXPORTS_W
+    Net readNetFromModelOptimizer(const String &xml, const String &bin);
+
+    /** @brief Load a network from Intel's Model Optimizer intermediate representation.
+     *  @param[in] bufferModelConfig Buffer contains XML configuration with network's topology.
+     *  @param[in] bufferWeights Buffer contains binary data with trained weights.
+     *  @returns Net object.
+     *  Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
+     *  backend.
      */
-    CV_EXPORTS Blob readTorchBlob(const String &filename, bool isBinary = true);
+    CV_EXPORTS_W
+    Net readNetFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights);
+
+    /** @brief Load a network from Intel's Model Optimizer intermediate representation.
+     *  @param[in] bufferModelConfigPtr Pointer to buffer which contains XML configuration with network's topology.
+     *  @param[in] bufferModelConfigSize Binary size of XML configuration data.
+     *  @param[in] bufferWeightsPtr Pointer to buffer which contains binary data with trained weights.
+     *  @param[in] bufferWeightsSize Binary size of trained weights data.
+     *  @returns Net object.
+     *  Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
+     *  backend.
+     */
+    CV_EXPORTS
+    Net readNetFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
+                                           const uchar* bufferWeightsPtr, size_t bufferWeightsSize);
+
+    /** @brief Reads a network model <a href="https://onnx.ai/">ONNX</a>.
+     *  @param onnxFile path to the .onnx file with text description of the network architecture.
+     *  @returns Network object that ready to do forward, throw an exception in failure cases.
+     */
+    CV_EXPORTS_W Net readNetFromONNX(const String &onnxFile);
+
+    /** @brief Reads a network model from <a href="https://onnx.ai/">ONNX</a>
+     *         in-memory buffer.
+     *  @param buffer memory address of the first byte of the buffer.
+     *  @param sizeBuffer size of the buffer.
+     *  @returns Network object that ready to do forward, throw an exception
+     *        in failure cases.
+     */
+    CV_EXPORTS Net readNetFromONNX(const char* buffer, size_t sizeBuffer);
+
+    /** @brief Reads a network model from <a href="https://onnx.ai/">ONNX</a>
+     *         in-memory buffer.
+     *  @param buffer in-memory buffer that stores the ONNX model bytes.
+     *  @returns Network object that ready to do forward, throw an exception
+     *        in failure cases.
+     */
+    CV_EXPORTS_W Net readNetFromONNX(const std::vector<uchar>& buffer);
+
+    /** @brief Creates blob from .pb file.
+     *  @param path to the .pb file with input tensor.
+     *  @returns Mat.
+     */
+    CV_EXPORTS_W Mat readTensorFromONNX(const String& path);
+
+    /** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
+     *  subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
+     *  @param image input image (with 1-, 3- or 4-channels).
+     *  @param size spatial size for output image
+     *  @param mean scalar with mean values which are subtracted from channels. Values are intended
+     *  to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
+     *  @param scalefactor multiplier for @p image values.
+     *  @param swapRB flag which indicates that swap first and last channels
+     *  in 3-channel image is necessary.
+     *  @param crop flag which indicates whether image will be cropped after resize or not
+     *  @param ddepth Depth of output blob. Choose CV_32F or CV_8U.
+     *  @details if @p crop is true, input image is resized so one side after resize is equal to corresponding
+     *  dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
+     *  If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
+     *  @returns 4-dimensional Mat with NCHW dimensions order.
+     */
+    CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
+                                   const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
+                                   int ddepth=CV_32F);
+
+    /** @brief Creates 4-dimensional blob from image.
+     *  @details This is an overloaded member function, provided for convenience.
+     *           It differs from the above function only in what argument(s) it accepts.
+     */
+    CV_EXPORTS void blobFromImage(InputArray image, OutputArray blob, double scalefactor=1.0,
+                                  const Size& size = Size(), const Scalar& mean = Scalar(),
+                                  bool swapRB=false, bool crop=false, int ddepth=CV_32F);
+
+
+    /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
+     *  crops @p images from center, subtract @p mean values, scales values by @p scalefactor,
+     *  swap Blue and Red channels.
+     *  @param images input images (all with 1-, 3- or 4-channels).
+     *  @param size spatial size for output image
+     *  @param mean scalar with mean values which are subtracted from channels. Values are intended
+     *  to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
+     *  @param scalefactor multiplier for @p images values.
+     *  @param swapRB flag which indicates that swap first and last channels
+     *  in 3-channel image is necessary.
+     *  @param crop flag which indicates whether image will be cropped after resize or not
+     *  @param ddepth Depth of output blob. Choose CV_32F or CV_8U.
+     *  @details if @p crop is true, input image is resized so one side after resize is equal to corresponding
+     *  dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
+     *  If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
+     *  @returns 4-dimensional Mat with NCHW dimensions order.
+     */
+    CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0,
+                                    Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
+                                    int ddepth=CV_32F);
+
+    /** @brief Creates 4-dimensional blob from series of images.
+     *  @details This is an overloaded member function, provided for convenience.
+     *           It differs from the above function only in what argument(s) it accepts.
+     */
+    CV_EXPORTS void blobFromImages(InputArrayOfArrays images, OutputArray blob,
+                                   double scalefactor=1.0, Size size = Size(),
+                                   const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
+                                   int ddepth=CV_32F);
+
+    /** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure
+     *  (std::vector<cv::Mat>).
+     *  @param[in] blob_ 4 dimensional array (images, channels, height, width) in floating point precision (CV_32F) from
+     *  which you would like to extract the images.
+     *  @param[out] images_ array of 2D Mat containing the images extracted from the blob in floating point precision
+     *  (CV_32F). They are non normalized neither mean added. The number of returned images equals the first dimension
+     *  of the blob (batch size). Every image has a number of channels equals to the second dimension of the blob (depth).
+     */
+    CV_EXPORTS_W void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_);
+
+    /** @brief Convert all weights of Caffe network to half precision floating point.
+     * @param src Path to origin model from Caffe framework contains single
+     *            precision floating point weights (usually has `.caffemodel` extension).
+     * @param dst Path to destination model with updated weights.
+     * @param layersTypes Set of layers types which parameters will be converted.
+     *                    By default, converts only Convolutional and Fully-Connected layers'
+     *                    weights.
+     *
+     * @note Shrinked model has no origin float32 weights so it can't be used
+     *       in origin Caffe framework anymore. However the structure of data
+     *       is taken from NVidia's Caffe fork: https://github.com/NVIDIA/caffe.
+     *       So the resulting model may be used there.
+     */
+    CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst,
+                                       const std::vector<String>& layersTypes = std::vector<String>());
+
+    /** @brief Create a text representation for a binary network stored in protocol buffer format.
+     *  @param[in] model  A path to binary network.
+     *  @param[in] output A path to output text file to be created.
+     *
+     *  @note To reduce output file size, trained weights are not included.
+     */
+    CV_EXPORTS_W void writeTextGraph(const String& model, const String& output);
+
+    /** @brief Performs non maximum suppression given boxes and corresponding scores.
+
+     * @param bboxes a set of bounding boxes to apply NMS.
+     * @param scores a set of corresponding confidences.
+     * @param score_threshold a threshold used to filter boxes by score.
+     * @param nms_threshold a threshold used in non maximum suppression.
+     * @param indices the kept indices of bboxes after NMS.
+     * @param eta a coefficient in adaptive threshold formula: \f$nms\_threshold_{i+1}=eta\cdot nms\_threshold_i\f$.
+     * @param top_k if `>0`, keep at most @p top_k picked indices.
+     */
+    CV_EXPORTS void NMSBoxes(const std::vector<Rect>& bboxes, const std::vector<float>& scores,
+                               const float score_threshold, const float nms_threshold,
+                               CV_OUT std::vector<int>& indices,
+                               const float eta = 1.f, const int top_k = 0);
+
+    CV_EXPORTS_W void NMSBoxes(const std::vector<Rect2d>& bboxes, const std::vector<float>& scores,
+                               const float score_threshold, const float nms_threshold,
+                               CV_OUT std::vector<int>& indices,
+                               const float eta = 1.f, const int top_k = 0);
+
+    CV_EXPORTS_AS(NMSBoxesRotated) void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores,
+                             const float score_threshold, const float nms_threshold,
+                             CV_OUT std::vector<int>& indices,
+                             const float eta = 1.f, const int top_k = 0);
+
+
+     /** @brief This class is presented high-level API for neural networks.
+      *
+      * Model allows to set params for preprocessing input image.
+      * Model creates net from file with trained weights and config,
+      * sets preprocessing input and runs forward pass.
+      */
+     class CV_EXPORTS_W_SIMPLE Model : public Net
+     {
+     public:
+         /**
+          * @brief Default constructor.
+          */
+         Model();
+
+         /**
+          * @brief Create model from deep learning network represented in one of the supported formats.
+          * An order of @p model and @p config arguments does not matter.
+          * @param[in] model Binary file contains trained weights.
+          * @param[in] config Text file contains network configuration.
+          */
+         CV_WRAP Model(const String& model, const String& config = "");
+
+         /**
+          * @brief Create model from deep learning network.
+          * @param[in] network Net object.
+          */
+         CV_WRAP Model(const Net& network);
+
+         /** @brief Set input size for frame.
+          *  @param[in] size New input size.
+          *  @note If shape of the new blob less than 0, then frame size not change.
+         */
+         CV_WRAP Model& setInputSize(const Size& size);
+
+         /** @brief Set input size for frame.
+         *  @param[in] width New input width.
+         *  @param[in] height New input height.
+         *  @note If shape of the new blob less than 0,
+         *  then frame size not change.
+         */
+         CV_WRAP Model& setInputSize(int width, int height);
+
+         /** @brief Set mean value for frame.
+          *  @param[in] mean Scalar with mean values which are subtracted from channels.
+         */
+         CV_WRAP Model& setInputMean(const Scalar& mean);
+
+         /** @brief Set scalefactor value for frame.
+          *  @param[in] scale Multiplier for frame values.
+         */
+         CV_WRAP Model& setInputScale(double scale);
+
+         /** @brief Set flag crop for frame.
+          *  @param[in] crop Flag which indicates whether image will be cropped after resize or not.
+         */
+         CV_WRAP Model& setInputCrop(bool crop);
+
+         /** @brief Set flag swapRB for frame.
+          *  @param[in] swapRB Flag which indicates that swap first and last channels.
+         */
+         CV_WRAP Model& setInputSwapRB(bool swapRB);
+
+         /** @brief Set preprocessing parameters for frame.
+         *  @param[in] size New input size.
+         *  @param[in] mean Scalar with mean values which are subtracted from channels.
+         *  @param[in] scale Multiplier for frame values.
+         *  @param[in] swapRB Flag which indicates that swap first and last channels.
+         *  @param[in] crop Flag which indicates whether image will be cropped after resize or not.
+         *  blob(n, c, y, x) = scale * resize( frame(y, x, c) ) - mean(c) )
+         */
+         CV_WRAP void setInputParams(double scale = 1.0, const Size& size = Size(),
+                                     const Scalar& mean = Scalar(), bool swapRB = false, bool crop = false);
+
+         /** @brief Given the @p input frame, create input blob, run net and return the output @p blobs.
+          *  @param[in]  frame  The input image.
+          *  @param[out] outs Allocated output blobs, which will store results of the computation.
+          */
+         CV_WRAP void predict(InputArray frame, OutputArrayOfArrays outs);
+
+     protected:
+         struct Impl;
+         Ptr<Impl> impl;
+     };
+
+     /** @brief This class represents high-level API for classification models.
+      *
+      * ClassificationModel allows to set params for preprocessing input image.
+      * ClassificationModel creates net from file with trained weights and config,
+      * sets preprocessing input, runs forward pass and return top-1 prediction.
+      */
+     class CV_EXPORTS_W_SIMPLE ClassificationModel : public Model
+     {
+     public:
+         /**
+          * @brief Create classification model from network represented in one of the supported formats.
+          * An order of @p model and @p config arguments does not matter.
+          * @param[in] model Binary file contains trained weights.
+          * @param[in] config Text file contains network configuration.
+          */
+          CV_WRAP ClassificationModel(const String& model, const String& config = "");
+
+         /**
+          * @brief Create model from deep learning network.
+          * @param[in] network Net object.
+          */
+         CV_WRAP ClassificationModel(const Net& network);
+
+         /** @brief Given the @p input frame, create input blob, run net and return top-1 prediction.
+          *  @param[in]  frame  The input image.
+          */
+         std::pair<int, float> classify(InputArray frame);
+
+         /** @overload */
+         CV_WRAP void classify(InputArray frame, CV_OUT int& classId, CV_OUT float& conf);
+     };
+
+     /** @brief This class represents high-level API for keypoints models
+      *
+      * KeypointsModel allows to set params for preprocessing input image.
+      * KeypointsModel creates net from file with trained weights and config,
+      * sets preprocessing input, runs forward pass and returns the x and y coordinates of each detected keypoint
+      */
+     class CV_EXPORTS_W KeypointsModel: public Model
+     {
+     public:
+         /**
+          * @brief Create keypoints model from network represented in one of the supported formats.
+          * An order of @p model and @p config arguments does not matter.
+          * @param[in] model Binary file contains trained weights.
+          * @param[in] config Text file contains network configuration.
+          */
+          CV_WRAP KeypointsModel(const String& model, const String& config = "");
+
+         /**
+          * @brief Create model from deep learning network.
+          * @param[in] network Net object.
+          */
+         CV_WRAP KeypointsModel(const Net& network);
+
+         /** @brief Given the @p input frame, create input blob, run net
+          *  @param[in]  frame  The input image.
+          *  @param thresh minimum confidence threshold to select a keypoint
+          *  @returns a vector holding the x and y coordinates of each detected keypoint
+          *
+          */
+         CV_WRAP std::vector<Point2f> estimate(InputArray frame, float thresh=0.5);
+     };
+
+     /** @brief This class represents high-level API for segmentation  models
+      *
+      * SegmentationModel allows to set params for preprocessing input image.
+      * SegmentationModel creates net from file with trained weights and config,
+      * sets preprocessing input, runs forward pass and returns the class prediction for each pixel.
+      */
+     class CV_EXPORTS_W SegmentationModel: public Model
+     {
+     public:
+         /**
+          * @brief Create segmentation model from network represented in one of the supported formats.
+          * An order of @p model and @p config arguments does not matter.
+          * @param[in] model Binary file contains trained weights.
+          * @param[in] config Text file contains network configuration.
+          */
+          CV_WRAP SegmentationModel(const String& model, const String& config = "");
+
+         /**
+          * @brief Create model from deep learning network.
+          * @param[in] network Net object.
+          */
+         CV_WRAP SegmentationModel(const Net& network);
+
+         /** @brief Given the @p input frame, create input blob, run net
+          *  @param[in]  frame  The input image.
+          *  @param[out] mask Allocated class prediction for each pixel
+          */
+         CV_WRAP void segment(InputArray frame, OutputArray mask);
+     };
+
+     /** @brief This class represents high-level API for object detection networks.
+      *
+      * DetectionModel allows to set params for preprocessing input image.
+      * DetectionModel creates net from file with trained weights and config,
+      * sets preprocessing input, runs forward pass and return result detections.
+      * For DetectionModel SSD, Faster R-CNN, YOLO topologies are supported.
+      */
+     class CV_EXPORTS_W_SIMPLE DetectionModel : public Model
+     {
+     public:
+         /**
+          * @brief Create detection model from network represented in one of the supported formats.
+          * An order of @p model and @p config arguments does not matter.
+          * @param[in] model Binary file contains trained weights.
+          * @param[in] config Text file contains network configuration.
+          */
+         CV_WRAP DetectionModel(const String& model, const String& config = "");
+
+         /**
+          * @brief Create model from deep learning network.
+          * @param[in] network Net object.
+          */
+         CV_WRAP DetectionModel(const Net& network);
+
+         /** @brief Given the @p input frame, create input blob, run net and return result detections.
+          *  @param[in]  frame  The input image.
+          *  @param[out] classIds Class indexes in result detection.
+          *  @param[out] confidences A set of corresponding confidences.
+          *  @param[out] boxes A set of bounding boxes.
+          *  @param[in] confThreshold A threshold used to filter boxes by confidences.
+          *  @param[in] nmsThreshold A threshold used in non maximum suppression.
+          */
+         CV_WRAP void detect(InputArray frame, CV_OUT std::vector<int>& classIds,
+                             CV_OUT std::vector<float>& confidences, CV_OUT std::vector<Rect>& boxes,
+                             float confThreshold = 0.5f, float nmsThreshold = 0.0f);
+     };
 
 //! @}
+CV__DNN_INLINE_NS_END
 }
 }
 
 #include <opencv2/dnn/layer.hpp>
 #include <opencv2/dnn/dnn.inl.hpp>
 
-#endif  /* __OPENCV_DNN_DNN_HPP__ */
+/// @deprecated Include this header directly from application. Automatic inclusion will be removed
+#include <opencv2/dnn/utils/inference_engine.hpp>
+
+#endif  /* OPENCV_DNN_DNN_HPP */
diff --git a/IPL/include/opencv/opencv2/dnn/dnn.inl.hpp b/IPL/include/opencv/opencv2/dnn/dnn.inl.hpp
index 300ae58..d6809ce 100644
--- a/IPL/include/opencv/opencv2/dnn/dnn.inl.hpp
+++ b/IPL/include/opencv/opencv2/dnn/dnn.inl.hpp
@@ -39,15 +39,14 @@
 //
 //M*/
 
-#ifndef __OPENCV_DNN_DNN_INL_HPP__
-#define __OPENCV_DNN_DNN_INL_HPP__
+#ifndef OPENCV_DNN_DNN_INL_HPP
+#define OPENCV_DNN_DNN_INL_HPP
 
 #include <opencv2/dnn.hpp>
 
-namespace cv
-{
-namespace dnn
-{
+namespace cv {
+namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
 
 template<typename TypeIter>
 DictValue DictValue::arrayInt(TypeIter begin, int size)
@@ -86,7 +85,7 @@ inline DictValue DictValue::get<DictValue>(int idx) const
 template<>
 inline int64 DictValue::get<int64>(int idx) const
 {
-    CV_Assert(idx == -1 && size() == 1 || idx >= 0 && idx < size());
+    CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
     idx = (idx == -1) ? 0 : idx;
 
     if (type == Param::INT)
@@ -103,9 +102,13 @@ inline int64 DictValue::get<int64>(int idx) const
 
         return (int64)doubleValue;
     }
+    else if (type == Param::STRING)
+    {
+        return std::atoi((*ps)[idx].c_str());
+    }
     else
     {
-        CV_Assert(isInt() || isReal());
+        CV_Assert(isInt() || isReal() || isString());
         return 0;
     }
 }
@@ -116,6 +119,11 @@ inline int DictValue::get<int>(int idx) const
     return (int)get<int64>(idx);
 }
 
+inline int DictValue::getIntValue(int idx) const
+{
+    return (int)get<int64>(idx);
+}
+
 template<>
 inline unsigned DictValue::get<unsigned>(int idx) const
 {
@@ -131,7 +139,7 @@ inline bool DictValue::get<bool>(int idx) const
 template<>
 inline double DictValue::get<double>(int idx) const
 {
-    CV_Assert(idx == -1 && size() == 1 || idx >= 0 && idx < size());
+    CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
     idx = (idx == -1) ? 0 : idx;
 
     if (type == Param::REAL)
@@ -142,13 +150,22 @@ inline double DictValue::get<double>(int idx) const
     {
         return (double)(*pi)[idx];
     }
+    else if (type == Param::STRING)
+    {
+        return std::atof((*ps)[idx].c_str());
+    }
     else
     {
-        CV_Assert(isReal() || isInt());
+        CV_Assert(isReal() || isInt() || isString());
         return 0;
     }
 }
 
+inline double DictValue::getRealValue(int idx) const
+{
+    return get<double>(idx);
+}
+
 template<>
 inline float DictValue::get<float>(int idx) const
 {
@@ -159,10 +176,16 @@ template<>
 inline String DictValue::get<String>(int idx) const
 {
     CV_Assert(isString());
-    CV_Assert(idx == -1 && ps->size() == 1 || idx >= 0 && idx < (int)ps->size());
+    CV_Assert((idx == -1 && ps->size() == 1) || (idx >= 0 && idx < (int)ps->size()));
     return (*ps)[(idx == -1) ? 0 : idx];
 }
 
+
+inline String DictValue::getStringValue(int idx) const
+{
+    return get<String>(idx);
+}
+
 inline void DictValue::release()
 {
     switch (type)
@@ -176,6 +199,16 @@ inline void DictValue::release()
     case Param::REAL:
         delete pd;
         break;
+    case Param::BOOLEAN:
+    case Param::MAT:
+    case Param::MAT_VECTOR:
+    case Param::ALGORITHM:
+    case Param::FLOAT:
+    case Param::UNSIGNED_INT:
+    case Param::UINT64:
+    case Param::UCHAR:
+    case Param::SCALAR:
+        break; // unhandled
     }
 }
 
@@ -246,17 +279,22 @@ inline int DictValue::size() const
     {
     case Param::INT:
         return (int)pi->size();
-        break;
     case Param::STRING:
         return (int)ps->size();
-        break;
     case Param::REAL:
         return (int)pd->size();
-        break;
-    default:
-        CV_Error(Error::StsInternal, "");
-        return -1;
+    case Param::BOOLEAN:
+    case Param::MAT:
+    case Param::MAT_VECTOR:
+    case Param::ALGORITHM:
+    case Param::FLOAT:
+    case Param::UNSIGNED_INT:
+    case Param::UINT64:
+    case Param::UCHAR:
+    case Param::SCALAR:
+        break; // unhandled
     }
+    CV_Error_(Error::StsInternal, ("Unhandled type (%d)", static_cast<int>(type)));
 }
 
 inline std::ostream &operator<<(std::ostream &stream, const DictValue &dictv)
@@ -287,7 +325,7 @@ inline std::ostream &operator<<(std::ostream &stream, const DictValue &dictv)
 
 /////////////////////////////////////////////////////////////////
 
-inline bool Dict::has(const String &key)
+inline bool Dict::has(const String &key) const
 {
     return dict.count(key) != 0;
 }
@@ -298,6 +336,12 @@ inline DictValue *Dict::ptr(const String &key)
     return (i == dict.end()) ? NULL : &i->second;
 }
 
+inline const DictValue *Dict::ptr(const String &key) const
+{
+    _Dict::const_iterator i = dict.find(key);
+    return (i == dict.end()) ? NULL : &i->second;
+}
+
 inline const DictValue &Dict::get(const String &key) const
 {
     _Dict::const_iterator i = dict.find(key);
@@ -336,6 +380,11 @@ inline const T &Dict::set(const String &key, const T &value)
     return value;
 }
 
+inline void Dict::erase(const String &key)
+{
+    dict.erase(key);
+}
+
 inline std::ostream &operator<<(std::ostream &stream, const Dict &dict)
 {
     Dict::_Dict::const_iterator it;
@@ -345,6 +394,17 @@ inline std::ostream &operator<<(std::ostream &stream, const Dict &dict)
     return stream;
 }
 
+inline std::map<String, DictValue>::const_iterator Dict::begin() const
+{
+    return dict.begin();
+}
+
+inline std::map<String, DictValue>::const_iterator Dict::end() const
+{
+    return dict.end();
+}
+
+CV__DNN_INLINE_NS_END
 }
 }
 
diff --git a/IPL/include/opencv/opencv2/dnn/layer.details.hpp b/IPL/include/opencv/opencv2/dnn/layer.details.hpp
new file mode 100644
index 0000000..1133da5
--- /dev/null
+++ b/IPL/include/opencv/opencv2/dnn/layer.details.hpp
@@ -0,0 +1,78 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+#ifndef OPENCV_DNN_LAYER_DETAILS_HPP
+#define OPENCV_DNN_LAYER_DETAILS_HPP
+
+#include <opencv2/dnn/layer.hpp>
+
+namespace cv {
+namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
+
+/** @brief Registers layer constructor in runtime.
+*   @param type string, containing type name of the layer.
+*   @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer.
+*   @details This macros must be placed inside the function code.
+*/
+#define CV_DNN_REGISTER_LAYER_FUNC(type, constructorFunc) \
+    cv::dnn::LayerFactory::registerLayer(#type, constructorFunc);
+
+/** @brief Registers layer class in runtime.
+ *  @param type string, containing type name of the layer.
+ *  @param class C++ class, derived from Layer.
+ *  @details This macros must be placed inside the function code.
+ */
+#define CV_DNN_REGISTER_LAYER_CLASS(type, class) \
+    cv::dnn::LayerFactory::registerLayer(#type, cv::dnn::details::_layerDynamicRegisterer<class>);
+
+/** @brief Registers layer constructor on module load time.
+*   @param type string, containing type name of the layer.
+*   @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer.
+*   @details This macros must be placed outside the function code.
+*/
+#define CV_DNN_REGISTER_LAYER_FUNC_STATIC(type, constructorFunc) \
+static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constructorFunc);
+
+/** @brief Registers layer class on module load time.
+ *  @param type string, containing type name of the layer.
+ *  @param class C++ class, derived from Layer.
+ *  @details This macros must be placed outside the function code.
+ */
+#define CV_DNN_REGISTER_LAYER_CLASS_STATIC(type, class)                         \
+Ptr<Layer> __LayerStaticRegisterer_func_##type(LayerParams &params) \
+    { return Ptr<Layer>(new class(params)); }                       \
+static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, __LayerStaticRegisterer_func_##type);
+
+namespace details {
+
+template<typename LayerClass>
+Ptr<Layer> _layerDynamicRegisterer(LayerParams &params)
+{
+    return Ptr<Layer>(LayerClass::create(params));
+}
+
+//allows automatically register created layer on module load time
+class _LayerStaticRegisterer
+{
+    String type;
+public:
+
+    _LayerStaticRegisterer(const String &layerType, LayerFactory::Constructor layerConstructor)
+    {
+        this->type = layerType;
+        LayerFactory::registerLayer(layerType, layerConstructor);
+    }
+
+    ~_LayerStaticRegisterer()
+    {
+        LayerFactory::unregisterLayer(type);
+    }
+};
+
+} // namespace
+CV__DNN_INLINE_NS_END
+}} // namespace
+
+#endif
diff --git a/IPL/include/opencv/opencv2/dnn/layer.hpp b/IPL/include/opencv/opencv2/dnn/layer.hpp
index b28b6ac..8500599 100644
--- a/IPL/include/opencv/opencv2/dnn/layer.hpp
+++ b/IPL/include/opencv/opencv2/dnn/layer.hpp
@@ -39,18 +39,17 @@
 //
 //M*/
 
-#ifndef __OPENCV_DNN_LAYER_HPP__
-#define __OPENCV_DNN_LAYER_HPP__
+#ifndef OPENCV_DNN_LAYER_HPP
+#define OPENCV_DNN_LAYER_HPP
 #include <opencv2/dnn.hpp>
 
-namespace cv
-{
-namespace dnn
-{
+namespace cv {
+namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
 //! @addtogroup dnn
 //! @{
 //!
-//! @defgroup LayerFactoryModule Utilities for new layers registration
+//! @defgroup dnnLayerFactory Utilities for New Layers Registration
 //! @{
 
 /** @brief %Layer factory allows to create instances of registered layers. */
@@ -59,89 +58,28 @@ class CV_EXPORTS LayerFactory
 public:
 
     //! Each Layer class must provide this function to the factory
-    typedef Ptr<Layer>(*Constuctor)(LayerParams &params);
+    typedef Ptr<Layer>(*Constructor)(LayerParams &params);
 
-    //! Registers the layer class with typename @p type and specified @p constructor.
-    static void registerLayer(const String &type, Constuctor constructor);
+    //! Registers the layer class with typename @p type and specified @p constructor. Thread-safe.
+    static void registerLayer(const String &type, Constructor constructor);
 
-    //! Unregisters registered layer with specified type name.
+    //! Unregisters registered layer with specified type name. Thread-safe.
     static void unregisterLayer(const String &type);
 
     /** @brief Creates instance of registered layer.
      *  @param type type name of creating layer.
      *  @param params parameters which will be used for layer initialization.
+     *  @note Thread-safe.
      */
     static Ptr<Layer> createLayerInstance(const String &type, LayerParams& params);
 
 private:
     LayerFactory();
-
-    struct Impl;
-    static Ptr<Impl> impl();
 };
 
-/** @brief Registers layer constructor in runtime.
-*   @param type string, containing type name of the layer.
-*   @param constuctorFunc pointer to the function of type LayerRegister::Constuctor, which creates the layer.
-*   @details This macros must be placed inside the function code.
-*/
-#define REG_RUNTIME_LAYER_FUNC(type, constuctorFunc) \
-    LayerFactory::registerLayer(#type, constuctorFunc);
-
-/** @brief Registers layer class in runtime.
- *  @param type string, containing type name of the layer.
- *  @param class C++ class, derived from Layer.
- *  @details This macros must be placed inside the function code.
- */
-#define REG_RUNTIME_LAYER_CLASS(type, class) \
-    LayerFactory::registerLayer(#type, _layerDynamicRegisterer<class>);
-
-/** @brief Registers layer constructor on module load time.
-*   @param type string, containing type name of the layer.
-*   @param constuctorFunc pointer to the function of type LayerRegister::Constuctor, which creates the layer.
-*   @details This macros must be placed outside the function code.
-*/
-#define REG_STATIC_LAYER_FUNC(type, constuctorFunc) \
-static _LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc);
-
-/** @brief Registers layer class on module load time.
- *  @param type string, containing type name of the layer.
- *  @param class C++ class, derived from Layer.
- *  @details This macros must be placed outside the function code.
- */
-#define REG_STATIC_LAYER_CLASS(type, class)                         \
-Ptr<Layer> __LayerStaticRegisterer_func_##type(LayerParams &params) \
-    { return Ptr<Layer>(new class(params)); }                       \
-static _LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, __LayerStaticRegisterer_func_##type);
-
-
 //! @}
 //! @}
-
-
-template<typename LayerClass>
-Ptr<Layer> _layerDynamicRegisterer(LayerParams &params)
-{
-    return Ptr<Layer>(new LayerClass(params));
-}
-
-//allows automatically register created layer on module load time
-struct _LayerStaticRegisterer
-{
-    String type;
-
-    _LayerStaticRegisterer(const String &type, LayerFactory::Constuctor constuctor)
-    {
-        this->type = type;
-        LayerFactory::registerLayer(type, constuctor);
-    }
-
-    ~_LayerStaticRegisterer()
-    {
-        LayerFactory::unregisterLayer(type);
-    }
-};
-
+CV__DNN_INLINE_NS_END
 }
 }
 #endif
diff --git a/IPL/include/opencv/opencv2/dnn/shape_utils.hpp b/IPL/include/opencv/opencv2/dnn/shape_utils.hpp
new file mode 100644
index 0000000..5b8d953
--- /dev/null
+++ b/IPL/include/opencv/opencv2/dnn/shape_utils.hpp
@@ -0,0 +1,229 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_DNN_DNN_SHAPE_UTILS_HPP
+#define OPENCV_DNN_DNN_SHAPE_UTILS_HPP
+
+#include <opencv2/dnn/dnn.hpp>
+#include <opencv2/core/types_c.h>  // CV_MAX_DIM
+#include <iostream>
+#include <ostream>
+#include <sstream>
+
+namespace cv {
+namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
+
+//Slicing
+
+struct _Range : public cv::Range
+{
+    _Range(const Range &r) : cv::Range(r) {}
+    _Range(int start_, int size_ = 1) : cv::Range(start_, start_ + size_) {}
+};
+
+static inline Mat slice(const Mat &m, const _Range &r0)
+{
+    Range ranges[CV_MAX_DIM];
+    for (int i = 1; i < m.dims; i++)
+        ranges[i] = Range::all();
+    ranges[0] = r0;
+    return m(&ranges[0]);
+}
+
+static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1)
+{
+    CV_Assert(m.dims >= 2);
+    Range ranges[CV_MAX_DIM];
+    for (int i = 2; i < m.dims; i++)
+        ranges[i] = Range::all();
+    ranges[0] = r0;
+    ranges[1] = r1;
+    return m(&ranges[0]);
+}
+
+static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2)
+{
+    CV_Assert(m.dims >= 3);
+    Range ranges[CV_MAX_DIM];
+    for (int i = 3; i < m.dims; i++)
+        ranges[i] = Range::all();
+    ranges[0] = r0;
+    ranges[1] = r1;
+    ranges[2] = r2;
+    return m(&ranges[0]);
+}
+
+static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2, const _Range &r3)
+{
+    CV_Assert(m.dims >= 4);
+    Range ranges[CV_MAX_DIM];
+    for (int i = 4; i < m.dims; i++)
+        ranges[i] = Range::all();
+    ranges[0] = r0;
+    ranges[1] = r1;
+    ranges[2] = r2;
+    ranges[3] = r3;
+    return m(&ranges[0]);
+}
+
+static inline Mat getPlane(const Mat &m, int n, int cn)
+{
+    CV_Assert(m.dims > 2);
+    int sz[CV_MAX_DIM];
+    for(int i = 2; i < m.dims; i++)
+    {
+        sz[i-2] = m.size.p[i];
+    }
+    return Mat(m.dims - 2, sz, m.type(), (void*)m.ptr<float>(n, cn));
+}
+
+static inline MatShape shape(const int* dims, const int n)
+{
+    MatShape shape;
+    shape.assign(dims, dims + n);
+    return shape;
+}
+
+static inline MatShape shape(const Mat& mat)
+{
+    return shape(mat.size.p, mat.dims);
+}
+
+static inline MatShape shape(const MatSize& sz)
+{
+    return shape(sz.p, sz.dims());
+}
+
+static inline MatShape shape(const UMat& mat)
+{
+    return shape(mat.size.p, mat.dims);
+}
+
+#if 0  // issues with MatExpr wrapped into InputArray
+static inline
+MatShape shape(InputArray input)
+{
+    int sz[CV_MAX_DIM];
+    int ndims = input.sizend(sz);
+    return shape(sz, ndims);
+}
+#endif
+
+namespace {inline bool is_neg(int i) { return i < 0; }}
+
+static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1)
+{
+    int dims[] = {a0, a1, a2, a3};
+    MatShape s = shape(dims, 4);
+    s.erase(std::remove_if(s.begin(), s.end(), is_neg), s.end());
+    return s;
+}
+
+static inline int total(const MatShape& shape, int start = -1, int end = -1)
+{
+    if (start == -1) start = 0;
+    if (end == -1) end = (int)shape.size();
+
+    if (shape.empty())
+        return 0;
+
+    int elems = 1;
+    CV_Assert(start <= (int)shape.size() && end <= (int)shape.size() &&
+              start <= end);
+    for(int i = start; i < end; i++)
+    {
+        elems *= shape[i];
+    }
+    return elems;
+}
+
+static inline MatShape concat(const MatShape& a, const MatShape& b)
+{
+    MatShape c = a;
+    c.insert(c.end(), b.begin(), b.end());
+
+    return c;
+}
+
+static inline std::string toString(const MatShape& shape, const String& name = "")
+{
+    std::ostringstream ss;
+    if (!name.empty())
+        ss << name << ' ';
+    ss << '[';
+    for(size_t i = 0, n = shape.size(); i < n; ++i)
+        ss << ' ' << shape[i];
+    ss << " ]";
+    return ss.str();
+}
+static inline void print(const MatShape& shape, const String& name = "")
+{
+    std::cout << toString(shape, name) << std::endl;
+}
+static inline std::ostream& operator<<(std::ostream &out, const MatShape& shape)
+{
+    out << toString(shape);
+    return out;
+}
+
+inline int clamp(int ax, int dims)
+{
+    return ax < 0 ? ax + dims : ax;
+}
+
+inline int clamp(int ax, const MatShape& shape)
+{
+    return clamp(ax, (int)shape.size());
+}
+
+inline Range clamp(const Range& r, int axisSize)
+{
+    Range clamped(std::max(r.start, 0),
+                  r.end > 0 ? std::min(r.end, axisSize) : axisSize + r.end + 1);
+    CV_Assert_N(clamped.start < clamped.end, clamped.end <= axisSize);
+    return clamped;
+}
+
+CV__DNN_INLINE_NS_END
+}
+}
+#endif
diff --git a/IPL/include/opencv/opencv2/dnn/utils/inference_engine.hpp b/IPL/include/opencv/opencv2/dnn/utils/inference_engine.hpp
new file mode 100644
index 0000000..7db93a9
--- /dev/null
+++ b/IPL/include/opencv/opencv2/dnn/utils/inference_engine.hpp
@@ -0,0 +1,64 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#ifndef OPENCV_DNN_UTILS_INF_ENGINE_HPP
+#define OPENCV_DNN_UTILS_INF_ENGINE_HPP
+
+#include "../dnn.hpp"
+
+namespace cv { namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
+
+
+/* Values for 'OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE' parameter */
+#define CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API     "NN_BUILDER"
+#define CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH             "NGRAPH"
+
+/** @brief Returns Inference Engine internal backend API.
+ *
+ * See values of `CV_DNN_BACKEND_INFERENCE_ENGINE_*` macros.
+ *
+ * Default value is controlled through `OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE` runtime parameter (environment variable).
+ */
+CV_EXPORTS_W cv::String getInferenceEngineBackendType();
+
+/** @brief Specify Inference Engine internal backend API.
+ *
+ * See values of `CV_DNN_BACKEND_INFERENCE_ENGINE_*` macros.
+ *
+ * @returns previous value of internal backend API
+ */
+CV_EXPORTS_W cv::String setInferenceEngineBackendType(const cv::String& newBackendType);
+
+
+/** @brief Release a Myriad device (binded by OpenCV).
+ *
+ * Single Myriad device cannot be shared across multiple processes which uses
+ * Inference Engine's Myriad plugin.
+ */
+CV_EXPORTS_W void resetMyriadDevice();
+
+
+/* Values for 'OPENCV_DNN_IE_VPU_TYPE' parameter */
+#define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_UNSPECIFIED ""
+/// Intel(R) Movidius(TM) Neural Compute Stick, NCS (USB 03e7:2150), Myriad2 (https://software.intel.com/en-us/movidius-ncs)
+#define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2 "Myriad2"
+/// Intel(R) Neural Compute Stick 2, NCS2 (USB 03e7:2485), MyriadX (https://software.intel.com/ru-ru/neural-compute-stick)
+#define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X "MyriadX"
+
+
+/** @brief Returns Inference Engine VPU type.
+ *
+ * See values of `CV_DNN_INFERENCE_ENGINE_VPU_TYPE_*` macros.
+ */
+CV_EXPORTS_W cv::String getInferenceEngineVPUType();
+
+
+CV__DNN_INLINE_NS_END
+}} // namespace
+
+#endif // OPENCV_DNN_UTILS_INF_ENGINE_HPP
diff --git a/IPL/include/opencv/opencv2/dnn/version.hpp b/IPL/include/opencv/opencv2/dnn/version.hpp
new file mode 100644
index 0000000..f5c7424
--- /dev/null
+++ b/IPL/include/opencv/opencv2/dnn/version.hpp
@@ -0,0 +1,21 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_DNN_VERSION_HPP
+#define OPENCV_DNN_VERSION_HPP
+
+/// Use with major OpenCV version only.
+#define OPENCV_DNN_API_VERSION 20200310
+
+#if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS
+#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)
+#define CV__DNN_INLINE_NS_BEGIN namespace CV__DNN_INLINE_NS {
+#define CV__DNN_INLINE_NS_END }
+namespace cv { namespace dnn { namespace CV__DNN_INLINE_NS { } using namespace CV__DNN_INLINE_NS; }}
+#else
+#define CV__DNN_INLINE_NS_BEGIN
+#define CV__DNN_INLINE_NS_END
+#endif
+
+#endif  // OPENCV_DNN_VERSION_HPP
diff --git a/IPL/include/opencv/opencv2/dnn_superres.hpp b/IPL/include/opencv/opencv2/dnn_superres.hpp
new file mode 100644
index 0000000..953f90f
--- /dev/null
+++ b/IPL/include/opencv/opencv2/dnn_superres.hpp
@@ -0,0 +1,125 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef _OPENCV_DNN_SUPERRES_HPP_
+#define _OPENCV_DNN_SUPERRES_HPP_
+
+/** @defgroup dnn_superres DNN used for super resolution
+
+This module contains functionality for upscaling an image via convolutional neural networks.
+The following four models are implemented:
+
+- EDSR <https://arxiv.org/abs/1707.02921>
+- ESPCN <https://arxiv.org/abs/1609.05158>
+- FSRCNN <https://arxiv.org/abs/1608.00367>
+- LapSRN <https://arxiv.org/abs/1710.01992>
+
+*/
+
+#include "opencv2/core.hpp"
+#include "opencv2/dnn.hpp"
+
+namespace cv
+{
+namespace dnn_superres
+{
+
+//! @addtogroup dnn_superres
+//! @{
+
+/** @brief A class to upscale images via convolutional neural networks.
+The following four models are implemented:
+
+- edsr
+- espcn
+- fsrcnn
+- lapsrn
+ */
+
+class CV_EXPORTS_W DnnSuperResImpl
+{
+private:
+
+    /** @brief Net which holds the desired neural network
+     */
+    dnn::Net net;
+
+    std::string alg; //algorithm
+
+    int sc; //scale factor
+
+    void reconstruct_YCrCb(InputArray inpImg, InputArray origImg, OutputArray outpImg, int scale);
+
+    void preprocess_YCrCb(InputArray inpImg, OutputArray outpImg);
+
+public:
+
+    /** @brief Empty constructor for python
+     */
+    CV_WRAP static Ptr<DnnSuperResImpl> create();
+
+    // /** @brief Empty constructor
+    //  */
+    DnnSuperResImpl();
+
+    /** @brief Constructor which immediately sets the desired model
+    @param algo String containing one of the desired models:
+        - __edsr__
+        - __espcn__
+        - __fsrcnn__
+        - __lapsrn__
+    @param scale Integer specifying the upscale factor
+     */
+    DnnSuperResImpl(const String& algo, int scale);
+
+    /** @brief Read the model from the given path
+    @param path Path to the model file.
+    */
+    CV_WRAP void readModel(const String& path);
+
+    /** @brief Read the model from the given path
+    @param weights Path to the model weights file.
+    @param definition Path to the model definition file.
+    */
+    void readModel(const String& weights, const String& definition);
+
+    /** @brief Set desired model
+    @param algo String containing one of the desired models:
+        - __edsr__
+        - __espcn__
+        - __fsrcnn__
+        - __lapsrn__
+    @param scale Integer specifying the upscale factor
+     */
+    CV_WRAP void setModel(const String& algo, int scale);
+
+    /** @brief Upsample via neural network
+    @param img Image to upscale
+    @param result Destination upscaled image
+     */
+    CV_WRAP void upsample(InputArray img, OutputArray result);
+
+    /** @brief Upsample via neural network of multiple outputs
+    @param img Image to upscale
+    @param imgs_new Destination upscaled images
+    @param scale_factors Scaling factors of the output nodes
+    @param node_names Names of the output nodes in the neural network
+    */
+    CV_WRAP void upsampleMultioutput(InputArray img, std::vector<Mat> &imgs_new, const std::vector<int>& scale_factors, const std::vector<String>& node_names);
+
+    /** @brief Returns the scale factor of the model:
+    @return Current scale factor.
+    */
+    CV_WRAP int getScale();
+
+    /** @brief Returns the scale factor of the model:
+    @return Current algorithm.
+    */
+    CV_WRAP String getAlgorithm();
+};
+
+//! @} dnn_superres
+
+}} // cv::dnn_superres::
+#endif
diff --git a/IPL/include/opencv/opencv2/dpm.hpp b/IPL/include/opencv/opencv2/dpm.hpp
index 387a311..ab604ab 100644
--- a/IPL/include/opencv/opencv2/dpm.hpp
+++ b/IPL/include/opencv/opencv2/dpm.hpp
@@ -96,6 +96,9 @@ namespace cv
 namespace dpm
 {
 
+//! @addtogroup dpm
+//! @{
+
 /** @brief This is a C++ abstract class, it provides external user API to work with DPM.
  */
 class CV_EXPORTS_W DPMDetector
@@ -142,6 +145,8 @@ class CV_EXPORTS_W DPMDetector
     virtual ~DPMDetector(){}
 };
 
+//! @}
+
 } // namespace dpm
 } // namespace cv
 
diff --git a/IPL/include/opencv/opencv2/face.hpp b/IPL/include/opencv/opencv2/face.hpp
index d7237bc..8c4bda3 100644
--- a/IPL/include/opencv/opencv2/face.hpp
+++ b/IPL/include/opencv/opencv2/face.hpp
@@ -40,7 +40,7 @@ the use of this software, even if advised of the possibility of such damage.
 #define __OPENCV_FACE_HPP__
 
 /**
-@defgroup face Face Recognition
+@defgroup face Face Analysis
 
 - @ref face_changelog
 - @ref tutorial_face_main
@@ -70,7 +70,7 @@ which is available since the 2.4 release. I suggest you take a look at its descr
 
 Algorithm provides the following features for all derived classes:
 
--   So called “virtual constructor”. That is, each Algorithm derivative is registered at program
+-   So called "virtual constructor". That is, each Algorithm derivative is registered at program
     start and you can get the list of registered algorithms and create instance of a particular
     algorithm by its name (see Algorithm::create). If you plan to add your own algorithms, it is
     good practice to add a unique prefix to your algorithms to distinguish them from other
@@ -112,8 +112,8 @@ Here is an example of setting a threshold for the Eigenfaces method, when creati
 int num_components = 10;
 double threshold = 10.0;
 // Then if you want to have a cv::FaceRecognizer with a confidence threshold,
-// create the concrete implementation with the appropiate parameters:
-Ptr<FaceRecognizer> model = createEigenFaceRecognizer(num_components, threshold);
+// create the concrete implementation with the appropriate parameters:
+Ptr<FaceRecognizer> model = EigenFaceRecognizer::create(num_components, threshold);
 @endcode
 
 Sometimes it's impossible to train the model, just to experiment with threshold values. Thanks to
@@ -131,7 +131,7 @@ If you've set the threshold to 0.0 as we did above, then:
 
 @code
 //
-Mat img = imread("person1/3.jpg", CV_LOAD_IMAGE_GRAYSCALE);
+Mat img = imread("person1/3.jpg", IMREAD_GRAYSCALE);
 // Get a prediction from the model. Note: We've set a threshold of 0.0 above,
 // since the distance is almost always larger than 0.0, you'll get -1 as
 // label, which indicates, this face is unknown
@@ -148,7 +148,7 @@ Since every FaceRecognizer is a Algorithm, you can use Algorithm::name to get th
 
 @code
 // Create a FaceRecognizer:
-Ptr<FaceRecognizer> model = createEigenFaceRecognizer();
+Ptr<FaceRecognizer> model = EigenFaceRecognizer::create();
 // And here's how to get its name:
 String name = model->name();
 @endcode
@@ -162,7 +162,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     @param src The training images, that means the faces you want to learn. The data has to be
     given as a vector\<Mat\>.
     @param labels The labels corresponding to the images have to be given either as a vector\<int\>
-    or a
+    or a Mat of type CV_32SC1.
 
     The following source code snippet shows you how to learn a Fisherfaces model on a given set of
     images. The images are read with imread and pushed into a std::vector\<Mat\>. The labels of each
@@ -175,14 +175,16 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     // holds images and labels
     vector<Mat> images;
     vector<int> labels;
+    // using Mat of type CV_32SC1
+    // Mat labels(number_of_samples, 1, CV_32SC1);
     // images for first person
-    images.push_back(imread("person0/0.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(0);
-    images.push_back(imread("person0/1.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(0);
-    images.push_back(imread("person0/2.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(0);
+    images.push_back(imread("person0/0.jpg", IMREAD_GRAYSCALE)); labels.push_back(0);
+    images.push_back(imread("person0/1.jpg", IMREAD_GRAYSCALE)); labels.push_back(0);
+    images.push_back(imread("person0/2.jpg", IMREAD_GRAYSCALE)); labels.push_back(0);
     // images for second person
-    images.push_back(imread("person1/0.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(1);
-    images.push_back(imread("person1/1.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(1);
-    images.push_back(imread("person1/2.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(1);
+    images.push_back(imread("person1/0.jpg", IMREAD_GRAYSCALE)); labels.push_back(1);
+    images.push_back(imread("person1/1.jpg", IMREAD_GRAYSCALE)); labels.push_back(1);
+    images.push_back(imread("person1/2.jpg", IMREAD_GRAYSCALE)); labels.push_back(1);
     @endcode
 
     Now that you have read some images, we can create a new FaceRecognizer. In this example I'll create
@@ -192,7 +194,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     // Create a new Fisherfaces model and retain all available Fisherfaces,
     // this is the most common usage of this specific FaceRecognizer:
     //
-    Ptr<FaceRecognizer> model =  createFisherFaceRecognizer();
+    Ptr<FaceRecognizer> model =  FisherFaceRecognizer::create();
     @endcode
 
     And finally train it on the given dataset (the face images and labels):
@@ -211,7 +213,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     @param src The training images, that means the faces you want to learn. The data has to be given
     as a vector\<Mat\>.
     @param labels The labels corresponding to the images have to be given either as a vector\<int\> or
-    a
+    a Mat of type CV_32SC1.
 
     This method updates a (probably trained) FaceRecognizer, but only if the algorithm supports it. The
     Local Binary Patterns Histograms (LBPH) recognizer (see createLBPHFaceRecognizer) can be updated.
@@ -223,7 +225,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     // Create a new LBPH model (it can be updated) and use the default parameters,
     // this is the most common usage of this specific FaceRecognizer:
     //
-    Ptr<FaceRecognizer> model =  createLBPHFaceRecognizer();
+    Ptr<FaceRecognizer> model =  LBPHFaceRecognizer::create();
     // This is the common interface to train all of the available cv::FaceRecognizer
     // implementations:
     //
@@ -241,7 +243,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     // with the new features extracted from newImages!
     @endcode
 
-    Calling update on an Eigenfaces model (see createEigenFaceRecognizer), which doesn't support
+    Calling update on an Eigenfaces model (see EigenFaceRecognizer::create), which doesn't support
     updating, will throw an error similar to:
 
     @code
@@ -256,7 +258,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     CV_WRAP virtual void update(InputArrayOfArrays src, InputArray labels);
 
     /** @overload */
-    CV_WRAP int predict(InputArray src) const;
+    CV_WRAP_AS(predict_label) int predict(InputArray src) const;
 
 
     /** @brief Predicts a label and associated confidence (e.g. distance) for a given input image.
@@ -275,7 +277,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     // Do your initialization here (create the cv::FaceRecognizer model) ...
     // ...
     // Read in a sample image:
-    Mat img = imread("person1/3.jpg", CV_LOAD_IMAGE_GRAYSCALE);
+    Mat img = imread("person1/3.jpg", IMREAD_GRAYSCALE);
     // And get a prediction from the cv::FaceRecognizer:
     int predicted = model->predict(img);
     @endcode
@@ -286,7 +288,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     using namespace cv;
     // Do your initialization here (create the cv::FaceRecognizer model) ...
     // ...
-    Mat img = imread("person1/3.jpg", CV_LOAD_IMAGE_GRAYSCALE);
+    Mat img = imread("person1/3.jpg", IMREAD_GRAYSCALE);
     // Some variables for the predicted label and associated confidence (e.g. distance):
     int predicted_label = -1;
     double predicted_confidence = 0.0;
@@ -300,12 +302,11 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     /** @brief - if implemented - send all result of prediction to collector that can be used for somehow custom result handling
     @param src Sample image to get a prediction from.
     @param collector User-defined collector object that accepts all results
-    @param state - optional user-defined state token that should be passed back from FaceRecognizer implementation
 
     To implement this method u just have to do same internal cycle as in predict(InputArray src, CV_OUT int &label, CV_OUT double &confidence) but
     not try to get "best@ result, just resend it to caller side with given collector
     */
-    CV_WRAP virtual void predict(InputArray src, Ptr<PredictCollector> collector, const int state = 0) const = 0;
+    CV_WRAP_AS(predict_collect) virtual void predict(InputArray src, Ptr<PredictCollector> collector) const = 0;
 
     /** @brief Saves a FaceRecognizer and its model state.
 
@@ -319,7 +320,7 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     The suffix const means that prediction does not affect the internal model state, so the method can
     be safely called from within different threads.
      */
-    CV_WRAP virtual void save(const String& filename) const;
+    CV_WRAP virtual void write(const String& filename) const;
 
     /** @brief Loads a FaceRecognizer and its model state.
 
@@ -328,16 +329,19 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     FaceRecognizer::load(FileStorage& fs) in turn gets called by
     FaceRecognizer::load(const String& filename), to ease saving a model.
      */
-    CV_WRAP virtual void load(const String& filename);
+    CV_WRAP virtual void read(const String& filename);
 
     /** @overload
     Saves this model to a given FileStorage.
     @param fs The FileStorage to store this FaceRecognizer to.
     */
-    virtual void save(FileStorage& fs) const = 0;
+    virtual void write(FileStorage& fs) const CV_OVERRIDE = 0;
 
     /** @overload */
-    virtual void load(const FileStorage& fs) = 0;
+    virtual void read(const FileNode& fn) CV_OVERRIDE = 0;
+
+    /** @overload */
+    virtual bool empty() const CV_OVERRIDE = 0;
 
     /** @brief Sets string info for the specified model's label.
 
@@ -358,8 +362,10 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
     info.
      */
     CV_WRAP virtual std::vector<int> getLabelsByString(const String& str) const;
-    /** @brief threshhold parameter accessor - required for default BestMinDist collector */
+    /** @brief threshold parameter accessor - required for default BestMinDist collector */
     virtual double getThreshold() const = 0;
+    /** @brief Sets threshold of model */
+    virtual void setThreshold(double val) = 0;
 protected:
     // Stored pairs "label id - string info"
     std::map<int, String> _labelsInfo;
@@ -370,5 +376,11 @@ class CV_EXPORTS_W FaceRecognizer : public Algorithm
 }}
 
 #include "opencv2/face/facerec.hpp"
-
-#endif
+#include "opencv2/face/facemark.hpp"
+#include "opencv2/face/facemark_train.hpp"
+#include "opencv2/face/facemarkLBF.hpp"
+#include "opencv2/face/facemarkAAM.hpp"
+#include "opencv2/face/face_alignment.hpp"
+#include "opencv2/face/mace.hpp"
+
+#endif // __OPENCV_FACE_HPP__
diff --git a/IPL/include/opencv/opencv2/face/bif.hpp b/IPL/include/opencv/opencv2/face/bif.hpp
new file mode 100644
index 0000000..d4f0ec5
--- /dev/null
+++ b/IPL/include/opencv/opencv2/face/bif.hpp
@@ -0,0 +1,83 @@
+/*
+By downloading, copying, installing or using the software you agree to this license.
+If you do not agree to this license, do not download, install,
+copy or use the software.
+
+
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+
+Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
+Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+Copyright (C) 2009-2015, NVIDIA Corporation, all rights reserved.
+Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+Copyright (C) 2015, Itseez Inc., all rights reserved.
+Third party copyrights are property of their respective owners.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are disclaimed.
+In no event shall copyright holders or contributors be liable for any direct,
+indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+*/
+
+#ifndef __OPENCV_BIF_HPP__
+#define __OPENCV_BIF_HPP__
+
+#include "opencv2/core.hpp"
+
+namespace cv {
+namespace face {
+
+/** Implementation of bio-inspired features (BIF) from the paper:
+ *  Guo, Guodong, et al. "Human age estimation using bio-inspired features."
+ *  Computer Vision and Pattern Recognition, 2009. CVPR 2009.
+ */
+class CV_EXPORTS_W BIF : public Algorithm {
+public:
+    /** @returns The number of filter bands used for computing BIF. */
+    CV_WRAP virtual int getNumBands() const = 0;
+
+    /** @returns The number of image rotations. */
+    CV_WRAP virtual int getNumRotations() const = 0;
+
+    /** Computes features sby input image.
+     *  @param image Input image (CV_32FC1).
+     *  @param features Feature vector (CV_32FC1).
+     */
+    CV_WRAP virtual void compute(InputArray image,
+                                 OutputArray features) const = 0;
+
+    /**
+     * @param num_bands The number of filter bands (<=8) used for computing BIF.
+     * @param num_rotations The number of image rotations for computing BIF.
+     * @returns Object for computing BIF.
+     */
+    CV_WRAP static Ptr<BIF> create(int num_bands = 8, int num_rotations = 12);
+};
+
+}  // namespace cv
+}  // namespace face
+
+#endif  // #ifndef __OPENCV_FACEREC_HPP__
diff --git a/IPL/include/opencv/opencv2/face/face_alignment.hpp b/IPL/include/opencv/opencv2/face/face_alignment.hpp
new file mode 100644
index 0000000..e96081c
--- /dev/null
+++ b/IPL/include/opencv/opencv2/face/face_alignment.hpp
@@ -0,0 +1,60 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef __OPENCV_FACE_ALIGNMENT_HPP__
+#define __OPENCV_FACE_ALIGNMENT_HPP__
+
+#include "opencv2/face/facemark_train.hpp"
+
+namespace cv{
+namespace face{
+class CV_EXPORTS_W FacemarkKazemi : public Facemark
+{
+public:
+    struct CV_EXPORTS Params
+    {
+        /**
+        * \brief Constructor
+        */
+        Params();
+        /// cascade_depth This stores the deapth of cascade used for training.
+        unsigned long cascade_depth;
+        /// tree_depth This stores the max height of the regression tree built.
+        unsigned long tree_depth;
+        /// num_trees_per_cascade_level This stores number of trees fit per cascade level.
+        unsigned long num_trees_per_cascade_level;
+        /// learning_rate stores the learning rate in gradient boosting, also referred as shrinkage.
+        float learning_rate;
+        /// oversampling_amount stores number of initialisations used to create training samples.
+        unsigned long oversampling_amount;
+        /// num_test_coordinates stores number of test coordinates.
+        unsigned long num_test_coordinates;
+        /// lambda stores a value to calculate probability of closeness of two coordinates.
+        float lambda;
+        /// num_test_splits stores number of random test splits generated.
+        unsigned long num_test_splits;
+        /// configfile stores the name of the file containing the values of training parameters
+        String configfile;
+    };
+    static Ptr<FacemarkKazemi> create(const FacemarkKazemi::Params &parameters = FacemarkKazemi::Params());
+    virtual ~FacemarkKazemi();
+
+    /** @brief This function is used to train the model using gradient boosting to get a cascade of regressors
+    *which can then be used to predict shape.
+    *@param images A vector of type cv::Mat which stores the images which are used in training samples.
+    *@param landmarks A vector of vectors of type cv::Point2f which stores the landmarks detected in a particular image.
+    *@param scale A size of type cv::Size to which all images and landmarks have to be scaled to.
+    *@param configfile A variable of type std::string which stores the name of the file storing parameters for training the model.
+    *@param modelFilename A variable of type std::string which stores the name of the trained model file that has to be saved.
+    *@returns A boolean value. The function returns true if the model is trained properly or false if it is not trained.
+    */
+    virtual bool training(std::vector<Mat>& images, std::vector< std::vector<Point2f> >& landmarks,std::string configfile,Size scale,std::string modelFilename = "face_landmarks.dat")=0;
+
+    /// set the custom face detector
+    virtual bool setFaceDetector(bool(*f)(InputArray , OutputArray, void*), void* userData)=0;
+    /// get faces using the custom detector
+    virtual bool getFaces(InputArray image, OutputArray faces)=0;
+};
+
+}} // namespace
+#endif
diff --git a/IPL/include/opencv/opencv2/face/facemark.hpp b/IPL/include/opencv/opencv2/face/facemark.hpp
new file mode 100644
index 0000000..86e9384
--- /dev/null
+++ b/IPL/include/opencv/opencv2/face/facemark.hpp
@@ -0,0 +1,95 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+/*
+This file was part of GSoC Project: Facemark API for OpenCV
+Final report: https://gist.github.com/kurnianggoro/74de9121e122ad0bd825176751d47ecc
+Student: Laksono Kurnianggoro
+Mentor: Delia Passalacqua
+*/
+
+#ifndef __OPENCV_FACELANDMARK_HPP__
+#define __OPENCV_FACELANDMARK_HPP__
+
+/**
+@defgroup face Face Analysis
+- @ref tutorial_table_of_content_facemark
+- The Facemark API
+*/
+
+#include "opencv2/core.hpp"
+#include <vector>
+
+
+namespace cv {
+namespace face {
+
+
+/** @brief Abstract base class for all facemark models
+
+To utilize this API in your program, please take a look at the @ref tutorial_table_of_content_facemark
+### Description
+
+Facemark is a base class which provides universal access to any specific facemark algorithm.
+Therefore, the users should declare a desired algorithm before they can use it in their application.
+
+Here is an example on how to declare a facemark algorithm:
+@code
+// Using Facemark in your code:
+Ptr<Facemark> facemark = createFacemarkLBF();
+@endcode
+
+The typical pipeline for facemark detection is as follows:
+- Load the trained model using Facemark::loadModel.
+- Perform the fitting on an image via Facemark::fit.
+*/
+class CV_EXPORTS_W Facemark : public virtual Algorithm
+{
+public:
+
+    /** @brief A function to load the trained model before the fitting process.
+    @param model A string represent the filename of a trained model.
+
+    <B>Example of usage</B>
+    @code
+    facemark->loadModel("../data/lbf.model");
+    @endcode
+    */
+    CV_WRAP virtual void loadModel( String model ) = 0;
+    // virtual void saveModel(String fs)=0;
+
+    /** @brief Detect facial landmarks from an image.
+    @param image Input image.
+    @param faces Output of the function which represent region of interest of the detected faces.
+    Each face is stored in cv::Rect container.
+    @param landmarks The detected landmark points for each faces.
+
+    <B>Example of usage</B>
+    @code
+    Mat image = imread("image.jpg");
+    std::vector<Rect> faces;
+    std::vector<std::vector<Point2f> > landmarks;
+    facemark->fit(image, faces, landmarks);
+    @endcode
+    */
+    CV_WRAP virtual bool fit( InputArray image,
+                              InputArray faces,
+                              OutputArrayOfArrays landmarks) = 0;
+}; /* Facemark*/
+
+
+//! construct an AAM facemark detector
+CV_EXPORTS_W Ptr<Facemark> createFacemarkAAM();
+
+//! construct an LBF facemark detector
+CV_EXPORTS_W Ptr<Facemark> createFacemarkLBF();
+
+//! construct a Kazemi facemark detector
+CV_EXPORTS_W Ptr<Facemark> createFacemarkKazemi();
+
+
+} // face
+} // cv
+
+#endif //__OPENCV_FACELANDMARK_HPP__
diff --git a/IPL/include/opencv/opencv2/face/facemarkAAM.hpp b/IPL/include/opencv/opencv2/face/facemarkAAM.hpp
new file mode 100644
index 0000000..6f96e4a
--- /dev/null
+++ b/IPL/include/opencv/opencv2/face/facemarkAAM.hpp
@@ -0,0 +1,162 @@
+/*
+By downloading, copying, installing or using the software you agree to this
+license. If you do not agree to this license, do not download, install,
+copy or use the software.
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+Third party copyrights are property of their respective owners.
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall copyright holders or contributors be liable for
+any direct, indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+
+This file was part of GSoC Project: Facemark API for OpenCV
+Final report: https://gist.github.com/kurnianggoro/74de9121e122ad0bd825176751d47ecc
+Student: Laksono Kurnianggoro
+Mentor: Delia Passalacqua
+*/
+
+#ifndef __OPENCV_FACEMARK_AAM_HPP__
+#define __OPENCV_FACEMARK_AAM_HPP__
+
+#include "opencv2/face/facemark_train.hpp"
+namespace cv {
+namespace face {
+
+//! @addtogroup face
+//! @{
+
+class CV_EXPORTS_W FacemarkAAM : public FacemarkTrain
+{
+public:
+    struct CV_EXPORTS Params
+    {
+        /**
+        * \brief Constructor
+        */
+        Params();
+
+        /**
+        * \brief Read parameters from file, currently unused
+        */
+        void read(const FileNode& /*fn*/);
+
+        /**
+        * \brief Read parameters from file, currently unused
+        */
+        void write(FileStorage& /*fs*/) const;
+
+        std::string model_filename;
+        int m;
+        int n;
+        int n_iter;
+        bool verbose;
+        bool save_model;
+        int max_m, max_n, texture_max_m;
+        std::vector<float>scales;
+    };
+
+    /**
+    * \brief Optional parameter for fitting process.
+    */
+    struct CV_EXPORTS Config
+    {
+        Config( Mat rot = Mat::eye(2,2,CV_32F),
+                Point2f trans = Point2f(0.0f, 0.0f),
+                float scaling = 1.0f,
+                int scale_id=0
+        );
+
+        Mat R;
+        Point2f t;
+        float scale;
+        int model_scale_idx;
+
+    };
+
+    /**
+    * \brief Data container for the facemark::getData function
+    */
+    struct CV_EXPORTS Data
+    {
+        std::vector<Point2f> s0;
+    };
+
+    /**
+    * \brief The model of AAM Algorithm
+    */
+    struct CV_EXPORTS Model
+    {
+        std::vector<float>scales;
+        //!<  defines the scales considered to build the model
+
+        /*warping*/
+        std::vector<Vec3i> triangles;
+        //!<  each element contains 3 values, represent index of facemarks that construct one triangle (obtained using delaunay triangulation)
+
+        struct Texture{
+            int max_m; //!<  unused delete
+            Rect resolution;
+            //!<  resolution of the current scale
+            Mat A;
+            //!<  gray values from all face region in the dataset, projected in PCA space
+            Mat A0;
+            //!<  average of gray values from all face region in the dataset
+            Mat AA;
+            //!<  gray values from all erorded face region in the dataset, projected in PCA space
+            Mat AA0;
+            //!<  average of gray values from all erorded face region in the dataset
+
+            std::vector<std::vector<Point> > textureIdx;
+            //!<  index for warping of each delaunay triangle region constructed by 3 facemarks
+            std::vector<Point2f> base_shape;
+            //!<  basic shape, normalized to be fit in an image with current detection resolution
+            std::vector<int> ind1;
+            //!<  index of pixels for mapping process to obtains the grays values of face region
+            std::vector<int> ind2;
+            //!<  index of pixels for mapping process to obtains the grays values of eroded face region
+        };
+        std::vector<Texture> textures;
+        //!<  a container to holds the texture data for each scale of fitting
+
+        /*shape*/
+        std::vector<Point2f> s0;
+        //!<  the basic shape obtained from training dataset
+        Mat S,Q;
+        //!<  the encoded shapes from training data
+
+    };
+
+    //! overload with additional Config structures
+    virtual bool fitConfig( InputArray image, InputArray roi, OutputArrayOfArrays _landmarks, const std::vector<Config> &runtime_params ) = 0;
+
+
+    //!  initializer
+    static Ptr<FacemarkAAM> create(const FacemarkAAM::Params &parameters = FacemarkAAM::Params() );
+    virtual ~FacemarkAAM() {}
+
+}; /* AAM */
+
+//! @}
+
+} /* namespace face */
+} /* namespace cv */
+#endif
diff --git a/IPL/include/opencv/opencv2/face/facemarkLBF.hpp b/IPL/include/opencv/opencv2/face/facemarkLBF.hpp
new file mode 100644
index 0000000..7f4cd32
--- /dev/null
+++ b/IPL/include/opencv/opencv2/face/facemarkLBF.hpp
@@ -0,0 +1,120 @@
+/*
+By downloading, copying, installing or using the software you agree to this
+license. If you do not agree to this license, do not download, install,
+copy or use the software.
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+Third party copyrights are property of their respective owners.
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall copyright holders or contributors be liable for
+any direct, indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+
+This file was part of GSoC Project: Facemark API for OpenCV
+Final report: https://gist.github.com/kurnianggoro/74de9121e122ad0bd825176751d47ecc
+Student: Laksono Kurnianggoro
+Mentor: Delia Passalacqua
+*/
+
+#ifndef __OPENCV_FACEMARK_LBF_HPP__
+#define __OPENCV_FACEMARK_LBF_HPP__
+
+#include "opencv2/face/facemark_train.hpp"
+
+namespace cv {
+namespace face {
+
+//! @addtogroup face
+//! @{
+
+class CV_EXPORTS_W FacemarkLBF : public FacemarkTrain
+{
+public:
+    struct CV_EXPORTS Params
+    {
+        /**
+        * \brief Constructor
+        */
+        Params();
+
+        double shape_offset;
+        //!<  offset for the loaded face landmark points
+        String cascade_face;
+        //!<  filename of the face detector model
+        bool verbose;
+        //!< show the training print-out
+
+        int n_landmarks;
+        //!<  number of landmark points
+        int initShape_n;
+        //!<  multiplier for augment the training data
+
+        int stages_n;
+        //!<  number of refinement stages
+        int tree_n;
+        //!<  number of tree in the model for each landmark point refinement
+        int tree_depth;
+        //!<  the depth of decision tree, defines the size of feature
+        double bagging_overlap;
+        //!<  overlap ratio for training the LBF feature
+
+        std::string model_filename;
+        //!<  filename where the trained model will be saved
+        bool save_model; //!< flag to save the trained model or not
+        unsigned int seed; //!< seed for shuffling the training data
+
+        std::vector<int> feats_m;
+        std::vector<double> radius_m;
+        std::vector<int> pupils[2];
+        //!<  index of facemark points on pupils of left and right eye
+
+        Rect detectROI;
+
+        void read(const FileNode& /*fn*/);
+        void write(FileStorage& /*fs*/) const;
+
+    };
+
+    class BBox {
+    public:
+        BBox();
+        ~BBox();
+        BBox(double x, double y, double w, double h);
+
+        Mat project(const Mat &shape) const;
+        Mat reproject(const Mat &shape) const;
+
+        double x, y;
+        double x_center, y_center;
+        double x_scale, y_scale;
+        double width, height;
+    };
+
+    static Ptr<FacemarkLBF> create(const FacemarkLBF::Params &parameters = FacemarkLBF::Params() );
+    virtual ~FacemarkLBF(){};
+}; /* LBF */
+
+//! @}
+
+} /* namespace face */
+}/* namespace cv */
+
+#endif
diff --git a/IPL/include/opencv/opencv2/face/facemark_train.hpp b/IPL/include/opencv/opencv2/face/facemark_train.hpp
new file mode 100644
index 0000000..d6e27e9
--- /dev/null
+++ b/IPL/include/opencv/opencv2/face/facemark_train.hpp
@@ -0,0 +1,392 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+/*
+This file was part of GSoC Project: Facemark API for OpenCV
+Final report: https://gist.github.com/kurnianggoro/74de9121e122ad0bd825176751d47ecc
+Student: Laksono Kurnianggoro
+Mentor: Delia Passalacqua
+*/
+
+#ifndef __OPENCV_FACELANDMARKTRAIN_HPP__
+#define __OPENCV_FACELANDMARKTRAIN_HPP__
+
+/**
+@defgroup face Face Analysis
+- @ref tutorial_table_of_content_facemark
+- The Facemark API
+*/
+
+#include "opencv2/face/facemark.hpp"
+#include "opencv2/objdetect.hpp"
+#include <vector>
+#include <string>
+
+
+namespace cv {
+namespace face {
+
+//! @addtogroup face
+//! @{
+
+typedef bool(*FN_FaceDetector)(InputArray, OutputArray, void* userData);
+
+struct CParams{
+    String cascade; //!<  the face detector
+    double scaleFactor; //!< Parameter specifying how much the image size is reduced at each image scale.
+    int minNeighbors; //!< Parameter specifying how many neighbors each candidate rectangle should have to retain it.
+    Size minSize; //!< Minimum possible object size.
+    Size maxSize; //!< Maximum possible object size.
+
+    CV_EXPORTS CParams(
+        String cascade_model,
+        double sf = 1.1,
+        int minN = 3,
+        Size minSz = Size(30, 30),
+        Size maxSz = Size()
+    );
+
+    CascadeClassifier face_cascade;
+};
+
+/** @brief Default face detector
+This function is mainly utilized by the implementation of a Facemark Algorithm.
+End users are advised to use function Facemark::getFaces which can be manually defined
+and circumvented to the algorithm by Facemark::setFaceDetector.
+
+@param image The input image to be processed.
+@param faces Output of the function which represent region of interest of the detected faces.
+Each face is stored in cv::Rect container.
+@param params detector parameters
+
+<B>Example of usage</B>
+@code
+std::vector<cv::Rect> faces;
+CParams params("haarcascade_frontalface_alt.xml");
+cv::face::getFaces(frame, faces, &params);
+for(int j=0;j<faces.size();j++){
+    cv::rectangle(frame, faces[j], cv::Scalar(255,0,255));
+}
+cv::imshow("detection", frame);
+@endcode
+*/
+CV_EXPORTS bool getFaces(InputArray image, OutputArray faces, CParams* params);
+
+CV_EXPORTS_W bool getFacesHAAR(InputArray image, OutputArray faces, const String& face_cascade_name);
+
+/** @brief A utility to load list of paths to training image and annotation file.
+@param imageList The specified file contains paths to the training images.
+@param annotationList The specified file contains paths to the training annotations.
+@param images The loaded paths of training images.
+@param annotations The loaded paths of annotation files.
+
+Example of usage:
+@code
+String imageFiles = "images_path.txt";
+String ptsFiles = "annotations_path.txt";
+std::vector<String> images_train;
+std::vector<String> landmarks_train;
+loadDatasetList(imageFiles,ptsFiles,images_train,landmarks_train);
+@endcode
+*/
+CV_EXPORTS_W bool loadDatasetList(String imageList,
+                                  String annotationList,
+                                  std::vector<String> & images,
+                                  std::vector<String> & annotations);
+
+/** @brief A utility to load facial landmark dataset from a single file.
+
+@param filename The filename of a file that contains the dataset information.
+Each line contains the filename of an image followed by
+pairs of x and y values of facial landmarks points separated by a space.
+Example
+@code
+/home/user/ibug/image_003_1.jpg 336.820955 240.864510 334.238298 260.922709 335.266918 ...
+/home/user/ibug/image_005_1.jpg 376.158428 230.845712 376.736984 254.924635 383.265403 ...
+@endcode
+@param images A vector where each element represent the filename of image in the dataset.
+Images are not loaded by default to save the memory.
+@param facePoints The loaded landmark points for all training data.
+@param delim Delimiter between each element, the default value is a whitespace.
+@param offset An offset value to adjust the loaded points.
+
+<B>Example of usage</B>
+@code
+cv::String imageFiles = "../data/images_train.txt";
+cv::String ptsFiles = "../data/points_train.txt";
+std::vector<String> images;
+std::vector<std::vector<Point2f> > facePoints;
+loadTrainingData(imageFiles, ptsFiles, images, facePoints, 0.0f);
+@endcode
+*/
+CV_EXPORTS_W bool loadTrainingData( String filename , std::vector<String> & images,
+                                    OutputArray facePoints,
+                                    char delim = ' ', float offset = 0.0f);
+
+/** @brief A utility to load facial landmark information from the dataset.
+
+@param imageList A file contains the list of image filenames in the training dataset.
+@param groundTruth A file contains the list of filenames
+where the landmarks points information are stored.
+The content in each file should follow the standard format (see face::loadFacePoints).
+@param images A vector where each element represent the filename of image in the dataset.
+Images are not loaded by default to save the memory.
+@param facePoints The loaded landmark points for all training data.
+@param offset An offset value to adjust the loaded points.
+
+<B>Example of usage</B>
+@code
+cv::String imageFiles = "../data/images_train.txt";
+cv::String ptsFiles = "../data/points_train.txt";
+std::vector<String> images;
+std::vector<std::vector<Point2f> > facePoints;
+loadTrainingData(imageFiles, ptsFiles, images, facePoints, 0.0f);
+@endcode
+
+example of content in the images_train.txt
+@code
+/home/user/ibug/image_003_1.jpg
+/home/user/ibug/image_004_1.jpg
+/home/user/ibug/image_005_1.jpg
+/home/user/ibug/image_006.jpg
+@endcode
+
+example of content in the points_train.txt
+@code
+/home/user/ibug/image_003_1.pts
+/home/user/ibug/image_004_1.pts
+/home/user/ibug/image_005_1.pts
+/home/user/ibug/image_006.pts
+@endcode
+*/
+CV_EXPORTS_W bool loadTrainingData( String imageList, String groundTruth,
+                                    std::vector<String> & images,
+                                    OutputArray facePoints,
+                                    float offset = 0.0f);
+
+/** @brief This function extracts the data for training from .txt files which contains the corresponding image name and landmarks.
+*The first file in each file should give the path of the image whose
+*landmarks are being described in the file. Then in the subsequent
+*lines there should be coordinates of the landmarks in the image
+*i.e each line should be of the form x,y
+*where x represents the x coordinate of the landmark and y represents
+*the y coordinate of the landmark.
+*
+*For reference you can see the files as provided in the
+*<a href="http://www.ifp.illinois.edu/~vuongle2/helen/">HELEN dataset</a>
+*
+* @param filename A vector of type cv::String containing name of the .txt files.
+* @param trainlandmarks A vector of type cv::Point2f that would store shape or landmarks of all images.
+* @param trainimages A vector of type cv::String which stores the name of images whose landmarks are tracked
+* @returns A boolean value. It returns true when it reads the data successfully and false otherwise
+*/
+CV_EXPORTS_W bool loadTrainingData(std::vector<String> filename,std::vector< std::vector<Point2f> >
+                          &trainlandmarks,std::vector<String> & trainimages);
+
+/** @brief A utility to load facial landmark information from a given file.
+
+@param filename The filename of file contains the facial landmarks data.
+@param points The loaded facial landmark points.
+@param offset An offset value to adjust the loaded points.
+
+<B>Example of usage</B>
+@code
+std::vector<Point2f> points;
+face::loadFacePoints("filename.txt", points, 0.0f);
+@endcode
+
+The annotation file should follow the default format which is
+@code
+version: 1
+n_points:  68
+{
+212.716603 499.771793
+230.232816 566.290071
+...
+}
+@endcode
+where n_points is the number of points considered
+and each point is represented as its position in x and y.
+*/
+CV_EXPORTS_W bool loadFacePoints( String filename, OutputArray points,
+                                  float offset = 0.0f);
+
+/** @brief Utility to draw the detected facial landmark points
+
+@param image The input image to be processed.
+@param points Contains the data of points which will be drawn.
+@param color The color of points in BGR format represented by cv::Scalar.
+
+<B>Example of usage</B>
+@code
+std::vector<Rect> faces;
+std::vector<std::vector<Point2f> > landmarks;
+facemark->getFaces(img, faces);
+facemark->fit(img, faces, landmarks);
+for(int j=0;j<rects.size();j++){
+    face::drawFacemarks(frame, landmarks[j], Scalar(0,0,255));
+}
+@endcode
+*/
+CV_EXPORTS_W void drawFacemarks( InputOutputArray image, InputArray points,
+                                 Scalar color = Scalar(255,0,0));
+
+/** @brief Abstract base class for trainable facemark models
+
+To utilize this API in your program, please take a look at the @ref tutorial_table_of_content_facemark
+### Description
+
+The AAM and LBF facemark models in OpenCV are derived from the abstract base class FacemarkTrain, which
+provides a unified access to those facemark algorithms in OpenCV.
+
+Here is an example on how to declare facemark algorithm:
+@code
+// Using Facemark in your code:
+Ptr<Facemark> facemark = FacemarkLBF::create();
+@endcode
+
+
+The typical pipeline for facemark detection is listed as follows:
+- (Non-mandatory) Set a user defined face detection using FacemarkTrain::setFaceDetector.
+  The facemark algorithms are designed to fit the facial points into a face.
+  Therefore, the face information should be provided to the facemark algorithm.
+  Some algorithms might provides a default face recognition function.
+  However, the users might prefer to use their own face detector to obtains the best possible detection result.
+- (Non-mandatory) Training the model for a specific algorithm using FacemarkTrain::training.
+  In this case, the model should be automatically saved by the algorithm.
+  If the user already have a trained model, then this part can be omitted.
+- Load the trained model using Facemark::loadModel.
+- Perform the fitting via the Facemark::fit.
+*/
+class CV_EXPORTS_W FacemarkTrain : public Facemark
+{
+public:
+    /** @brief Add one training sample to the trainer.
+
+    @param image Input image.
+    @param landmarks The ground-truth of facial landmarks points corresponds to the image.
+
+    <B>Example of usage</B>
+    @code
+    String imageFiles = "../data/images_train.txt";
+    String ptsFiles = "../data/points_train.txt";
+    std::vector<String> images_train;
+    std::vector<String> landmarks_train;
+
+    // load the list of dataset: image paths and landmark file paths
+    loadDatasetList(imageFiles,ptsFiles,images_train,landmarks_train);
+
+    Mat image;
+    std::vector<Point2f> facial_points;
+    for(size_t i=0;i<images_train.size();i++){
+        image = imread(images_train[i].c_str());
+        loadFacePoints(landmarks_train[i],facial_points);
+        facemark->addTrainingSample(image, facial_points);
+    }
+    @endcode
+
+    The contents in the training files should follows the standard format.
+    Here are examples for the contents in these files.
+    example of content in the images_train.txt
+    @code
+    /home/user/ibug/image_003_1.jpg
+    /home/user/ibug/image_004_1.jpg
+    /home/user/ibug/image_005_1.jpg
+    /home/user/ibug/image_006.jpg
+    @endcode
+
+    example of content in the points_train.txt
+    @code
+    /home/user/ibug/image_003_1.pts
+    /home/user/ibug/image_004_1.pts
+    /home/user/ibug/image_005_1.pts
+    /home/user/ibug/image_006.pts
+    @endcode
+
+    */
+    virtual bool addTrainingSample(InputArray image, InputArray landmarks)=0;
+
+    /** @brief Trains a Facemark algorithm using the given dataset.
+    Before the training process, training samples should be added to the trainer
+    using face::addTrainingSample function.
+
+    @param parameters Optional extra parameters (algorithm dependent).
+
+    <B>Example of usage</B>
+    @code
+    FacemarkLBF::Params params;
+    params.model_filename = "ibug68.model"; // filename to save the trained model
+    Ptr<Facemark> facemark = FacemarkLBF::create(params);
+
+    // add training samples (see Facemark::addTrainingSample)
+
+    facemark->training();
+    @endcode
+    */
+
+    virtual void training(void* parameters=0)=0;
+
+    /** @brief Set a user defined face detector for the Facemark algorithm.
+    @param detector The user defined face detector function
+    @param userData Detector parameters
+
+    <B>Example of usage</B>
+    @code
+    MyDetectorParameters detectorParameters(...);
+    facemark->setFaceDetector(myDetector, &detectorParameters);
+    @endcode
+
+    Example of a user defined face detector
+    @code
+    bool myDetector( InputArray image, OutputArray faces, void* userData)
+    {
+        MyDetectorParameters* params = (MyDetectorParameters*)userData;
+        // -------- do something --------
+    }
+    @endcode
+
+    TODO Lifetime of detector parameters is uncontrolled. Rework interface design to "Ptr<FaceDetector>".
+    */
+    virtual bool setFaceDetector(FN_FaceDetector detector, void* userData = 0)=0;
+
+    /** @brief Detect faces from a given image using default or user defined face detector.
+    Some Algorithm might not provide a default face detector.
+
+    @param image Input image.
+    @param faces Output of the function which represent region of interest of the detected faces. Each face is stored in cv::Rect container.
+
+    <B>Example of usage</B>
+    @code
+    std::vector<cv::Rect> faces;
+    facemark->getFaces(img, faces);
+    for(int j=0;j<faces.size();j++){
+        cv::rectangle(img, faces[j], cv::Scalar(255,0,255));
+    }
+    @endcode
+    */
+    virtual bool getFaces(InputArray image, OutputArray faces)=0;
+
+    /** @brief Get data from an algorithm
+
+    @param items The obtained data, algorithm dependent.
+
+    <B>Example of usage</B>
+    @code
+    Ptr<FacemarkAAM> facemark = FacemarkAAM::create();
+    facemark->loadModel("AAM.yml");
+
+    FacemarkAAM::Data data;
+    facemark->getData(&data);
+    std::vector<Point2f> s0 = data.s0;
+
+    cout<<s0<<endl;
+    @endcode
+    */
+    virtual bool getData(void * items=0)=0; // FIXIT
+}; /* Facemark*/
+
+//! @}
+} /* namespace face */
+} /* namespace cv */
+#endif //__OPENCV_FACELANDMARKTRAIN_HPP__
diff --git a/IPL/include/opencv/opencv2/face/facerec.hpp b/IPL/include/opencv/opencv2/face/facerec.hpp
index 40f62f1..83c0701 100644
--- a/IPL/include/opencv/opencv2/face/facerec.hpp
+++ b/IPL/include/opencv/opencv2/face/facerec.hpp
@@ -21,82 +21,107 @@ class CV_EXPORTS_W BasicFaceRecognizer : public FaceRecognizer
 {
 public:
     /** @see setNumComponents */
-    CV_WRAP virtual int getNumComponents() const = 0;
+    CV_WRAP int getNumComponents() const;
     /** @copybrief getNumComponents @see getNumComponents */
-    CV_WRAP virtual void setNumComponents(int val) = 0;
+    CV_WRAP void setNumComponents(int val);
     /** @see setThreshold */
-    CV_WRAP virtual double getThreshold() const = 0;
+    CV_WRAP double getThreshold() const CV_OVERRIDE;
     /** @copybrief getThreshold @see getThreshold */
-    CV_WRAP virtual void setThreshold(double val) = 0;
-    CV_WRAP virtual std::vector<cv::Mat> getProjections() const = 0;
-    CV_WRAP virtual cv::Mat getLabels() const = 0;
-    CV_WRAP virtual cv::Mat getEigenValues() const = 0;
-    CV_WRAP virtual cv::Mat getEigenVectors() const = 0;
-    CV_WRAP virtual cv::Mat getMean() const = 0;
+    CV_WRAP void setThreshold(double val) CV_OVERRIDE;
+    CV_WRAP std::vector<cv::Mat> getProjections() const;
+    CV_WRAP cv::Mat getLabels() const;
+    CV_WRAP cv::Mat getEigenValues() const;
+    CV_WRAP cv::Mat getEigenVectors() const;
+    CV_WRAP cv::Mat getMean() const;
+
+    virtual void read(const FileNode& fn) CV_OVERRIDE;
+    virtual void write(FileStorage& fs) const CV_OVERRIDE;
+    virtual bool empty() const CV_OVERRIDE;
+
+    using FaceRecognizer::read;
+    using FaceRecognizer::write;
+
+protected:
+    int _num_components;
+    double _threshold;
+    std::vector<Mat> _projections;
+    Mat _labels;
+    Mat _eigenvectors;
+    Mat _eigenvalues;
+    Mat _mean;
+};
+
+class CV_EXPORTS_W EigenFaceRecognizer : public BasicFaceRecognizer
+{
+public:
+    /**
+    @param num_components The number of components (read: Eigenfaces) kept for this Principal
+    Component Analysis. As a hint: There's no rule how many components (read: Eigenfaces) should be
+    kept for good reconstruction capabilities. It is based on your input data, so experiment with the
+    number. Keeping 80 components should almost always be sufficient.
+    @param threshold The threshold applied in the prediction.
+
+    ### Notes:
+
+    -   Training and prediction must be done on grayscale images, use cvtColor to convert between the
+        color spaces.
+    -   **THE EIGENFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL
+        SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your
+        input data has the correct shape, else a meaningful exception is thrown. Use resize to resize
+        the images.
+    -   This model does not support updating.
+
+    ### Model internal data:
+
+    -   num_components see EigenFaceRecognizer::create.
+    -   threshold see EigenFaceRecognizer::create.
+    -   eigenvalues The eigenvalues for this Principal Component Analysis (ordered descending).
+    -   eigenvectors The eigenvectors for this Principal Component Analysis (ordered by their
+        eigenvalue).
+    -   mean The sample mean calculated from the training data.
+    -   projections The projections of the training data.
+    -   labels The threshold applied in the prediction. If the distance to the nearest neighbor is
+        larger than the threshold, this method returns -1.
+     */
+    CV_WRAP static Ptr<EigenFaceRecognizer> create(int num_components = 0, double threshold = DBL_MAX);
+};
+
+class CV_EXPORTS_W FisherFaceRecognizer : public BasicFaceRecognizer
+{
+public:
+    /**
+    @param num_components The number of components (read: Fisherfaces) kept for this Linear
+    Discriminant Analysis with the Fisherfaces criterion. It's useful to keep all components, that
+    means the number of your classes c (read: subjects, persons you want to recognize). If you leave
+    this at the default (0) or set it to a value less-equal 0 or greater (c-1), it will be set to the
+    correct number (c-1) automatically.
+    @param threshold The threshold applied in the prediction. If the distance to the nearest neighbor
+    is larger than the threshold, this method returns -1.
+
+    ### Notes:
+
+    -   Training and prediction must be done on grayscale images, use cvtColor to convert between the
+        color spaces.
+    -   **THE FISHERFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL
+        SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your
+        input data has the correct shape, else a meaningful exception is thrown. Use resize to resize
+        the images.
+    -   This model does not support updating.
+
+    ### Model internal data:
+
+    -   num_components see FisherFaceRecognizer::create.
+    -   threshold see FisherFaceRecognizer::create.
+    -   eigenvalues The eigenvalues for this Linear Discriminant Analysis (ordered descending).
+    -   eigenvectors The eigenvectors for this Linear Discriminant Analysis (ordered by their
+        eigenvalue).
+    -   mean The sample mean calculated from the training data.
+    -   projections The projections of the training data.
+    -   labels The labels corresponding to the projections.
+     */
+    CV_WRAP static Ptr<FisherFaceRecognizer> create(int num_components = 0, double threshold = DBL_MAX);
 };
 
-/**
-@param num_components The number of components (read: Eigenfaces) kept for this Principal
-Component Analysis. As a hint: There's no rule how many components (read: Eigenfaces) should be
-kept for good reconstruction capabilities. It is based on your input data, so experiment with the
-number. Keeping 80 components should almost always be sufficient.
-@param threshold The threshold applied in the prediction.
-
-### Notes:
-
--   Training and prediction must be done on grayscale images, use cvtColor to convert between the
-    color spaces.
--   **THE EIGENFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL
-    SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your
-    input data has the correct shape, else a meaningful exception is thrown. Use resize to resize
-    the images.
--   This model does not support updating.
-
-### Model internal data:
-
--   num_components see createEigenFaceRecognizer.
--   threshold see createEigenFaceRecognizer.
--   eigenvalues The eigenvalues for this Principal Component Analysis (ordered descending).
--   eigenvectors The eigenvectors for this Principal Component Analysis (ordered by their
-    eigenvalue).
--   mean The sample mean calculated from the training data.
--   projections The projections of the training data.
--   labels The threshold applied in the prediction. If the distance to the nearest neighbor is
-    larger than the threshold, this method returns -1.
- */
-CV_EXPORTS_W Ptr<BasicFaceRecognizer> createEigenFaceRecognizer(int num_components = 0, double threshold = DBL_MAX);
-
-/**
-@param num_components The number of components (read: Fisherfaces) kept for this Linear
-Discriminant Analysis with the Fisherfaces criterion. It's useful to keep all components, that
-means the number of your classes c (read: subjects, persons you want to recognize). If you leave
-this at the default (0) or set it to a value less-equal 0 or greater (c-1), it will be set to the
-correct number (c-1) automatically.
-@param threshold The threshold applied in the prediction. If the distance to the nearest neighbor
-is larger than the threshold, this method returns -1.
-
-### Notes:
-
--   Training and prediction must be done on grayscale images, use cvtColor to convert between the
-    color spaces.
--   **THE FISHERFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL
-    SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your
-    input data has the correct shape, else a meaningful exception is thrown. Use resize to resize
-    the images.
--   This model does not support updating.
-
-### Model internal data:
-
--   num_components see createFisherFaceRecognizer.
--   threshold see createFisherFaceRecognizer.
--   eigenvalues The eigenvalues for this Linear Discriminant Analysis (ordered descending).
--   eigenvectors The eigenvectors for this Linear Discriminant Analysis (ordered by their
-    eigenvalue).
--   mean The sample mean calculated from the training data.
--   projections The projections of the training data.
--   labels The labels corresponding to the projections.
- */
-CV_EXPORTS_W Ptr<BasicFaceRecognizer> createFisherFaceRecognizer(int num_components = 0, double threshold = DBL_MAX);
 
 class CV_EXPORTS_W LBPHFaceRecognizer : public FaceRecognizer
 {
@@ -118,46 +143,46 @@ class CV_EXPORTS_W LBPHFaceRecognizer : public FaceRecognizer
     /** @copybrief getNeighbors @see getNeighbors */
     CV_WRAP virtual void setNeighbors(int val) = 0;
     /** @see setThreshold */
-    CV_WRAP virtual double getThreshold() const = 0;
+    CV_WRAP virtual double getThreshold() const CV_OVERRIDE = 0;
     /** @copybrief getThreshold @see getThreshold */
-    CV_WRAP virtual void setThreshold(double val) = 0;
+    CV_WRAP virtual void setThreshold(double val) CV_OVERRIDE = 0;
     CV_WRAP virtual std::vector<cv::Mat> getHistograms() const = 0;
     CV_WRAP virtual cv::Mat getLabels() const = 0;
-};
 
-/**
-@param radius The radius used for building the Circular Local Binary Pattern. The greater the
-radius, the
-@param neighbors The number of sample points to build a Circular Local Binary Pattern from. An
-appropriate value is to use `8` sample points. Keep in mind: the more sample points you include,
-the higher the computational cost.
-@param grid_x The number of cells in the horizontal direction, 8 is a common value used in
-publications. The more cells, the finer the grid, the higher the dimensionality of the resulting
-feature vector.
-@param grid_y The number of cells in the vertical direction, 8 is a common value used in
-publications. The more cells, the finer the grid, the higher the dimensionality of the resulting
-feature vector.
-@param threshold The threshold applied in the prediction. If the distance to the nearest neighbor
-is larger than the threshold, this method returns -1.
-
-### Notes:
-
--   The Circular Local Binary Patterns (used in training and prediction) expect the data given as
-    grayscale images, use cvtColor to convert between the color spaces.
--   This model supports updating.
-
-### Model internal data:
-
--   radius see createLBPHFaceRecognizer.
--   neighbors see createLBPHFaceRecognizer.
--   grid_x see createLBPHFaceRecognizer.
--   grid_y see createLBPHFaceRecognizer.
--   threshold see createLBPHFaceRecognizer.
--   histograms Local Binary Patterns Histograms calculated from the given training data (empty if
-    none was given).
--   labels Labels corresponding to the calculated Local Binary Patterns Histograms.
- */
-CV_EXPORTS_W Ptr<LBPHFaceRecognizer> createLBPHFaceRecognizer(int radius=1, int neighbors=8, int grid_x=8, int grid_y=8, double threshold = DBL_MAX);
+    /**
+    @param radius The radius used for building the Circular Local Binary Pattern. The greater the
+    radius, the smoother the image but more spatial information you can get.
+    @param neighbors The number of sample points to build a Circular Local Binary Pattern from. An
+    appropriate value is to use `8` sample points. Keep in mind: the more sample points you include,
+    the higher the computational cost.
+    @param grid_x The number of cells in the horizontal direction, 8 is a common value used in
+    publications. The more cells, the finer the grid, the higher the dimensionality of the resulting
+    feature vector.
+    @param grid_y The number of cells in the vertical direction, 8 is a common value used in
+    publications. The more cells, the finer the grid, the higher the dimensionality of the resulting
+    feature vector.
+    @param threshold The threshold applied in the prediction. If the distance to the nearest neighbor
+    is larger than the threshold, this method returns -1.
+
+    ### Notes:
+
+    -   The Circular Local Binary Patterns (used in training and prediction) expect the data given as
+        grayscale images, use cvtColor to convert between the color spaces.
+    -   This model supports updating.
+
+    ### Model internal data:
+
+    -   radius see LBPHFaceRecognizer::create.
+    -   neighbors see LBPHFaceRecognizer::create.
+    -   grid_x see LLBPHFaceRecognizer::create.
+    -   grid_y see LBPHFaceRecognizer::create.
+    -   threshold see LBPHFaceRecognizer::create.
+    -   histograms Local Binary Patterns Histograms calculated from the given training data (empty if
+        none was given).
+    -   labels Labels corresponding to the calculated Local Binary Patterns Histograms.
+     */
+    CV_WRAP static Ptr<LBPHFaceRecognizer> create(int radius=1, int neighbors=8, int grid_x=8, int grid_y=8, double threshold = DBL_MAX);
+};
 
 //! @}
 
diff --git a/IPL/include/opencv/opencv2/face/mace.hpp b/IPL/include/opencv/opencv2/face/mace.hpp
new file mode 100644
index 0000000..ba3ec86
--- /dev/null
+++ b/IPL/include/opencv/opencv2/face/mace.hpp
@@ -0,0 +1,114 @@
+// This file is part of the OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __mace_h_onboard__
+#define __mace_h_onboard__
+
+#include "opencv2/core.hpp"
+
+
+namespace cv {
+namespace face {
+
+//! @addtogroup face
+//! @{
+
+
+/**
+@brief Minimum Average Correlation Energy Filter
+    useful for authentication with (cancellable) biometrical features.
+    (does not need many positives to train (10-50), and no negatives at all, also robust to noise/salting)
+
+    see also: @cite Savvides04
+
+    this implementation is largely based on: https://code.google.com/archive/p/pam-face-authentication (GSOC 2009)
+
+    use it like:
+    @code
+
+    Ptr<face::MACE> mace = face::MACE::create(64);
+
+    vector<Mat> pos_images = ...
+    mace->train(pos_images);
+
+    Mat query = ...
+    bool same = mace->same(query);
+
+    @endcode
+
+    you can also use two-factor authentication, with an additional passphrase:
+
+    @code
+    String owners_passphrase = "ilikehotdogs";
+    Ptr<face::MACE> mace = face::MACE::create(64);
+    mace->salt(owners_passphrase);
+    vector<Mat> pos_images = ...
+    mace->train(pos_images);
+
+    // now, users have to give a valid passphrase, along with the image:
+    Mat query = ...
+    cout << "enter passphrase: ";
+    string pass;
+    getline(cin, pass);
+    mace->salt(pass);
+    bool same = mace->same(query);
+    @endcode
+
+    save/load your model:
+    @code
+    Ptr<face::MACE> mace = face::MACE::create(64);
+    mace->train(pos_images);
+    mace->save("my_mace.xml");
+
+    // later:
+    Ptr<MACE> reloaded = MACE::load("my_mace.xml");
+    reloaded->same(some_image);
+    @endcode
+
+*/
+
+class CV_EXPORTS_W MACE : public cv::Algorithm
+{
+public:
+    /**
+    @brief optionally encrypt images with random convolution
+    @param passphrase a crc64 random seed will get generated from this
+    */
+    CV_WRAP virtual void salt(const cv::String &passphrase) = 0;
+
+    /**
+    @brief train it on positive features
+       compute the mace filter: `h = D(-1) * X * (X(+) * D(-1) * X)(-1) * C`
+       also calculate a minimal threshold for this class, the smallest self-similarity from the train images
+    @param images  a vector<Mat> with the train images
+    */
+    CV_WRAP virtual void train(cv::InputArrayOfArrays images) = 0;
+
+    /**
+    @brief correlate query img and threshold to min class value
+    @param query  a Mat with query image
+    */
+    CV_WRAP virtual bool same(cv::InputArray query) const = 0;
+
+
+    /**
+    @brief constructor
+    @param filename  build a new MACE instance from a pre-serialized FileStorage
+    @param objname (optional) top-level node in the FileStorage
+    */
+    CV_WRAP static cv::Ptr<MACE> load(const String &filename, const String &objname=String());
+
+    /**
+    @brief constructor
+    @param IMGSIZE  images will get resized to this (should be an even number)
+    */
+    CV_WRAP static cv::Ptr<MACE> create(int IMGSIZE=64);
+};
+
+//! @}
+
+}/* namespace face */
+}/* namespace cv */
+
+#endif // __mace_h_onboard__
diff --git a/IPL/include/opencv/opencv2/face/predict_collector.hpp b/IPL/include/opencv/opencv2/face/predict_collector.hpp
index 92de6c1..dba3c7e 100644
--- a/IPL/include/opencv/opencv2/face/predict_collector.hpp
+++ b/IPL/include/opencv/opencv2/face/predict_collector.hpp
@@ -44,262 +44,84 @@ the use of this software, even if advised of the possibility of such damage.
 
 #ifndef __OPENCV_PREDICT_COLLECTOR_HPP__
 #define __OPENCV_PREDICT_COLLECTOR_HPP__
-#include <cfloat>
-#include <list>
+
 #include <vector>
 #include <map>
-#include "opencv2/core/cvdef.h"
-#include "opencv2/core/cvstd.hpp"
-#undef emit //fix for qt
+#include <utility>
+#include <cfloat>
+
+#include "opencv2/core/base.hpp"
+
 namespace cv {
 namespace face {
 //! @addtogroup face
 //! @{
 /** @brief Abstract base class for all strategies of prediction result handling
 */
-class CV_EXPORTS_W PredictCollector {
-protected:
-    double _threshold;
-    int _size;
-    int _state;
-    int _excludeLabel;
-    double _distanceKoef;
-    double _minthreshold;
+class CV_EXPORTS_W PredictCollector
+{
 public:
-    /** @brief creates new predict collector with given threshold */
-    PredictCollector(double threshold = DBL_MAX) {
-        _threshold = threshold;
-        _excludeLabel = 0;
-        _distanceKoef = 1;
-        _minthreshold = -1;
-    }
-    CV_WRAP virtual ~PredictCollector() {}
+    virtual ~PredictCollector() {}
 
-    /** @brief called once at start of recognition
+    /** @brief Interface method called by face recognizer before results processing
     @param size total size of prediction evaluation that recognizer could perform
-    @param state user defined send-to-back optional value to allow multi-thread, multi-session or aggregation scenarios
-    */
-    CV_WRAP virtual void init(const int size, const int state = 0);
-
-    /** @brief called by recognizer prior to emit to decide if prediction require emiting
-    @param label current predicted label
-    @param dist current predicted distance
-    @param state back send state parameter of prediction  session
-    @return true if prediction is valid and required for emiting
-    @note can override given label and distance to another values
     */
-    CV_WRAP virtual bool defaultFilter(int* label, double* dist, const int state);
-
-    /** @brief extension point for filter - called if base filter executed */
-    CV_WRAP virtual bool filter(int* label, double* dist, const int state);
+    virtual void init(size_t size) { CV_UNUSED(size); }
 
-    /** @brief called with every recognition result
+    /** @brief Interface method called by face recognizer for each result
     @param label current prediction label
     @param dist current prediction distance (confidence)
-    @param state user defined send-to-back optional value to allow multi-thread, multi-session or aggregation scenarios
-    @return true if recognizer should proceed prediction , false - if recognizer should terminate prediction
-    */
-    CV_WRAP virtual bool emit(const int label, const double dist, const int state = 0); //not abstract while Python generation require non-abstract class
-
-    /** @brief outer interface method to be called from recognizer
-    @param label current prediction label
-    @param dist current prediction distance (confidence)
-    @param state user defined send-to-back optional value to allow multi-thread, multi-session or aggregation scenarios
-    @note wraps filter and emit calls, not tended to be overriden
-    */
-    CV_WRAP virtual bool collect(int label, double dist, const int state = 0);
-
-    /**
-    @brief get size of prediction
-    ### Description
-    Is set by recognizer and is amount of all available predicts
-    So we can use it to perform statistic collectors before prediction of whole set
-    */
-    CV_WRAP virtual int getSize();
-
-    /** @brief set size of prediction */
-    CV_WRAP virtual void setSize(int size);
-
-    /**
-    @brief get state of prediction
-    ### Description
-    State is a custom value assigned for prediction session, 0 if it's no-state session
-    */
-    CV_WRAP virtual int getState();
-
-    /** @brief set state of prediction */
-    CV_WRAP virtual void setState(int state);
-
-    /**
-    @brief returns currently excluded label, 0 if no set
-    ### Description
-    We require to exclude label if we want to test card in train set against others
     */
-    CV_WRAP virtual int getExcludeLabel();
-
-    /** @brief set exclude label of prediction */
-    CV_WRAP virtual void setExcludeLabel(int excludeLabel);
-
-    /**
-    @brief returns current distance koeficient (applyed to distance in filter stage)
-    ### Description
-    It's required if we want to predict with distinct algorithms in one session
-    so LBPH, Eigen and Fisher distance are different, but we can provide koef for them to translate to
-    each other (while their distribuition for same train set is close and started from 0)
-    Default 1 koef means that distance is not corrected
-    */
-    CV_WRAP virtual double getDistanceKoef();
-
-    /** @brief set exclude label of prediction */
-    CV_WRAP virtual void setDistanceKoef(double distanceKoef);
-    /**
-    @brief returns current minimal threshold
-    ### Description
-    It's required when we must exclude most closed predictions (for example we
-    search for close but not same faces - usable for mixed set where doubles exists
-    in train collection)
-    */
-    CV_WRAP virtual double getMinThreshold();
-
-    /** @brief set minimal threshold for prediction */
-    CV_WRAP virtual void setMinThreshold(double minthreshold);
-
+    virtual bool collect(int label, double dist) = 0;
 };
 
-/** @brief default predict collector that trace minimal distance with treshhold checking (that is default behavior for most predict logic)
-*/
-class CV_EXPORTS_W MinDistancePredictCollector : public PredictCollector {
-private:
-    int _label;
-    double _dist;
-public:
-    /** @brief creates new MinDistancePredictCollector with given threshold */
-    CV_WRAP MinDistancePredictCollector(double threshold = DBL_MAX) : PredictCollector(threshold) {
-        _label = -1;
-        _dist = DBL_MAX;
-    };
-    CV_WRAP bool emit(const int label, const double dist, const int state = 0);
-    CV_WRAP bool filter(int* label, double* dist, const int state);
-    /** @brief result label, -1 if not found */
-    CV_WRAP int getLabel() const;
-    /** @brief result distance (confidence) DBL_MAX if not found */
-    CV_WRAP double getDist() const;
-    /** @brief factory method to create cv-pointers to MinDistancePredictCollector */
-    CV_WRAP static Ptr<MinDistancePredictCollector> create(double threshold = DBL_MAX);
-};
+/** @brief Default predict collector
 
-/**
-@brief Collects top N most close predictions
-@note Prevent doubling of same label - if one label is occured twice - most closed distance value will be set
+Trace minimal distance with treshhold checking (that is default behavior for most predict logic)
 */
-class CV_EXPORTS_W TopNPredictCollector : public PredictCollector {
-private:
-    size_t _size;
-    Ptr<std::list<std::pair<int, double> > > _idx;
+class CV_EXPORTS_W StandardCollector : public PredictCollector
+{
 public:
-    CV_WRAP TopNPredictCollector(size_t size = 5, double threshold = DBL_MAX) : PredictCollector(threshold) {
-        _size = size;
-        _idx = Ptr<std::list<std::pair<int, double> > >(new std::list<std::pair<int, double> >);
+    struct PredictResult
+    {
+        int label;
+        double distance;
+        PredictResult(int label_ = -1, double distance_ = DBL_MAX) : label(label_), distance(distance_) {}
     };
-    CV_WRAP bool emit(const int label, const double dist, const int state = 0);
-    CV_WRAP bool filter(int* label, double* dist, const int state);
-    Ptr<std::list<std::pair<int, double> > > getResult();
-    CV_WRAP std::vector<std::pair<int, double> > getResultVector(); // pythonable version
-    CV_WRAP static Ptr<TopNPredictCollector> create(size_t size = 5, double threshold = DBL_MAX);
-};
-
-
-/**
-@brief Collects all predict results to single vector
-@note this collector not analyze double labels in emit, it's raw copy of source prediction result,
-remember that filter is still applyed so you can use min/max threshold , distanceKoef and excludeLabel
-*/
-class CV_EXPORTS_W VectorPredictCollector : public PredictCollector {
-private:
-    Ptr<std::vector<std::pair<int, double> > > _idx;
-public:
-    CV_WRAP static const int DEFAULT_SIZE = 5; // top 5 by default
-    CV_WRAP VectorPredictCollector(double threshold = DBL_MAX) : PredictCollector(threshold) {
-        _idx = Ptr<std::vector<std::pair<int, double> > >(new std::vector<std::pair<int, double> >);
-    }
-    CV_WRAP bool emit(const int label, const double dist, const int state = 0);
-    Ptr<std::vector<std::pair<int, double> > > getResult();
-    CV_WRAP std::vector<std::pair<int, double> > getResultVector(); // pythonable version
-    CV_WRAP static Ptr<VectorPredictCollector> create(double threshold = DBL_MAX);
-};
-
-
-/**
-@brief Collects all predict results to single vector
-@note this collector not analyze double labels in emit, it's raw copy of source prediction result,
-remember that filter is still applyed so you can use min/max threshold , distanceKoef and excludeLabel
-*/
-class CV_EXPORTS_W MapPredictCollector : public PredictCollector {
-private:
-    Ptr<std::map<int, double> > _idx;
-public:
-    CV_WRAP static const int DEFAULT_SIZE = 5; // top 5 by default
-    CV_WRAP MapPredictCollector(double threshold = DBL_MAX) : PredictCollector(threshold) {
-        _idx = Ptr<std::map<int, double> >(new std::map<int, double>);
-    }
-    CV_WRAP bool emit(const int label, const double dist, const int state = 0);
-    Ptr<std::map<int, double> > getResult();
-    CV_WRAP std::vector<std::pair<int, double> >  getResultVector(); // pythonable version
-    CV_WRAP static Ptr<MapPredictCollector> create(double threshold = DBL_MAX);
-};
-
-/**
-@brief Collects basic statistic information about prediction
-@note stat predict collector is usefull for determining valid thresholds
-on given trained set, additionally it's required to
-evaluate unified koefs between algorithms
-*/
-class CV_EXPORTS_W StatPredictCollector : public PredictCollector {
-private:
-    double _min;
-    double _max;
-    int _count;
-    double _sum;
-public:
-    CV_WRAP StatPredictCollector(double threshold = DBL_MAX) : PredictCollector(threshold) {
-        _min = DBL_MAX;
-        _max = DBL_MIN;
-        _count = 0;
-        _sum = 0;
-    }
-    CV_WRAP bool emit(const int label, const double dist, const int state = 0);
-    CV_WRAP double getMin();
-    CV_WRAP double getMax();
-    CV_WRAP double getSum();
-    CV_WRAP int getCount();
-    CV_WRAP static Ptr<StatPredictCollector> create(double threshold = DBL_MAX);
-};
-
-/**
-@brief evaluates standard deviation of given prediction session over trained set
-@note in combine with StatPredictCollector can provide statistically based metrices
-for thresholds
-*/
-class CV_EXPORTS_W StdPredictCollector : public PredictCollector {
-private:
-    double _avg;
-    double _n;
-    double _s;
+protected:
+    double threshold;
+    PredictResult minRes;
+    std::vector<PredictResult> data;
 public:
-    CV_WRAP StdPredictCollector(double threshold = DBL_MAX, double avg = 0) : PredictCollector(threshold) {
-        _avg = avg;
-        _n = 0;
-        _s = 0;
-    }
-    CV_WRAP bool emit(const int label, const double dist, const int state = 0);
-    CV_WRAP double getResult();
-    CV_WRAP static Ptr<StdPredictCollector> create(double threshold = DBL_MAX, double avg = 0);
+    /** @brief Constructor
+    @param threshold_ set threshold
+    */
+    StandardCollector(double threshold_ = DBL_MAX);
+    /** @brief overloaded interface method */
+    void init(size_t size) CV_OVERRIDE;
+    /** @brief overloaded interface method */
+    bool collect(int label, double dist) CV_OVERRIDE;
+    /** @brief Returns label with minimal distance */
+    CV_WRAP int getMinLabel() const;
+    /** @brief Returns minimal distance value */
+    CV_WRAP double getMinDist() const;
+    /** @brief Return results as vector
+    @param sorted If set, results will be sorted by distance
+    Each values is a pair of label and distance.
+    */
+    CV_WRAP std::vector< std::pair<int, double> > getResults(bool sorted = false) const;
+    /** @brief Return results as map
+    Labels are keys, values are minimal distances
+    */
+    std::map<int, double> getResultsMap() const;
+    /** @brief Static constructor
+    @param threshold set threshold
+    */
+    CV_WRAP static Ptr<StandardCollector> create(double threshold = DBL_MAX);
 };
 
-
-
 //! @}
 }
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/IPL/include/opencv/opencv2/features2d.hpp b/IPL/include/opencv/opencv2/features2d.hpp
index 692d3d9..24f0af5 100644
--- a/IPL/include/opencv/opencv2/features2d.hpp
+++ b/IPL/include/opencv/opencv2/features2d.hpp
@@ -40,11 +40,15 @@
 //
 //M*/
 
-#ifndef __OPENCV_FEATURES_2D_HPP__
-#define __OPENCV_FEATURES_2D_HPP__
+#ifndef OPENCV_FEATURES_2D_HPP
+#define OPENCV_FEATURES_2D_HPP
 
+#include "opencv2/opencv_modules.hpp"
 #include "opencv2/core.hpp"
+
+#ifdef HAVE_OPENCV_FLANN
 #include "opencv2/flann/miniflann.hpp"
+#endif
 
 /**
   @defgroup features2d 2D Features Framework
@@ -76,6 +80,10 @@ This section describes approaches based on local 2D features and used to categor
     -   (Python) An example using the features2D framework to perform object categorization can be
         found at opencv_source_code/samples/python/find_obj.py
 
+    @defgroup feature2d_hal Hardware Acceleration Layer
+    @{
+        @defgroup features2d_hal_interface Interface
+    @}
   @}
  */
 
@@ -117,6 +125,10 @@ class CV_EXPORTS KeyPointsFilter
      * Remove duplicated keypoints.
      */
     static void removeDuplicated( std::vector<KeyPoint>& keypoints );
+    /*
+     * Remove duplicated keypoints and sort the remaining keypoints
+     */
+    static void removeDuplicatedSorted( std::vector<KeyPoint>& keypoints );
 
     /*
      * Retain the specified number of the best keypoints (according to the response)
@@ -129,7 +141,11 @@ class CV_EXPORTS KeyPointsFilter
 
 /** @brief Abstract base class for 2D image feature detectors and descriptor extractors
 */
+#ifdef __EMSCRIPTEN__
+class CV_EXPORTS_W Feature2D : public Algorithm
+#else
 class CV_EXPORTS_W Feature2D : public virtual Algorithm
+#endif
 {
 public:
     virtual ~Feature2D();
@@ -153,8 +169,8 @@ class CV_EXPORTS_W Feature2D : public virtual Algorithm
     @param masks Masks for each input image specifying where to look for keypoints (optional).
     masks[i] is a mask for images[i].
     */
-    virtual void detect( InputArrayOfArrays images,
-                         std::vector<std::vector<KeyPoint> >& keypoints,
+    CV_WRAP virtual void detect( InputArrayOfArrays images,
+                         CV_OUT std::vector<std::vector<KeyPoint> >& keypoints,
                          InputArrayOfArrays masks=noArray() );
 
     /** @brief Computes the descriptors for a set of keypoints detected in an image (first variant) or image set
@@ -182,8 +198,8 @@ class CV_EXPORTS_W Feature2D : public virtual Algorithm
     descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the
     descriptor for keypoint j-th keypoint.
     */
-    virtual void compute( InputArrayOfArrays images,
-                          std::vector<std::vector<KeyPoint> >& keypoints,
+    CV_WRAP virtual void compute( InputArrayOfArrays images,
+                          CV_OUT CV_IN_OUT std::vector<std::vector<KeyPoint> >& keypoints,
                           OutputArrayOfArrays descriptors );
 
     /** Detects keypoints and computes the descriptors */
@@ -196,8 +212,21 @@ class CV_EXPORTS_W Feature2D : public virtual Algorithm
     CV_WRAP virtual int descriptorType() const;
     CV_WRAP virtual int defaultNorm() const;
 
+    CV_WRAP void write( const String& fileName ) const;
+
+    CV_WRAP void read( const String& fileName );
+
+    virtual void write( FileStorage&) const CV_OVERRIDE;
+
+    // see corresponding cv::Algorithm method
+    CV_WRAP virtual void read( const FileNode&) CV_OVERRIDE;
+
     //! Return true if detector object is empty
-    CV_WRAP virtual bool empty() const;
+    CV_WRAP virtual bool empty() const CV_OVERRIDE;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+
+    // see corresponding cv::Algorithm method
+    CV_WRAP inline void write(const Ptr<FileStorage>& fs, const String& name = String()) const { Algorithm::write(fs, name); }
 };
 
 /** Feature detectors in OpenCV have wrappers with a common interface that enables you to easily switch
@@ -242,6 +271,36 @@ class CV_EXPORTS_W BRISK : public Feature2D
     @param indexChange index remapping of the bits. */
     CV_WRAP static Ptr<BRISK> create(const std::vector<float> &radiusList, const std::vector<int> &numberList,
         float dMax=5.85f, float dMin=8.2f, const std::vector<int>& indexChange=std::vector<int>());
+
+    /** @brief The BRISK constructor for a custom pattern, detection threshold and octaves
+
+    @param thresh AGAST detection threshold score.
+    @param octaves detection octaves. Use 0 to do single scale.
+    @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for
+    keypoint scale 1).
+    @param numberList defines the number of sampling points on the sampling circle. Must be the same
+    size as radiusList..
+    @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint
+    scale 1).
+    @param dMin threshold for the long pairings used for orientation determination (in pixels for
+    keypoint scale 1).
+    @param indexChange index remapping of the bits. */
+    CV_WRAP static Ptr<BRISK> create(int thresh, int octaves, const std::vector<float> &radiusList,
+        const std::vector<int> &numberList, float dMax=5.85f, float dMin=8.2f,
+        const std::vector<int>& indexChange=std::vector<int>());
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+
+    /** @brief Set detection threshold.
+    @param threshold AGAST detection threshold score.
+    */
+    CV_WRAP virtual void setThreshold(int threshold) { CV_UNUSED(threshold); return; }
+    CV_WRAP virtual int getThreshold() const { return -1; }
+
+    /** @brief Set detection octaves.
+    @param octaves detection octaves. Use 0 to do single scale.
+    */
+    CV_WRAP virtual void setOctaves(int octaves) { CV_UNUSED(octaves); return; }
+    CV_WRAP virtual int getOctaves() const { return -1; }
 };
 
 /** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor
@@ -254,7 +313,8 @@ k-tuples) are rotated according to the measured orientation).
 class CV_EXPORTS_W ORB : public Feature2D
 {
 public:
-    enum { kBytes = 32, HARRIS_SCORE=0, FAST_SCORE=1 };
+    enum ScoreType { HARRIS_SCORE=0, FAST_SCORE=1 };
+    static const int kBytes = 32;
 
     /** @brief The ORB constructor
 
@@ -265,10 +325,11 @@ class CV_EXPORTS_W ORB : public Feature2D
     will mean that to cover certain scale range you will need more pyramid levels and so the speed
     will suffer.
     @param nlevels The number of pyramid levels. The smallest level will have linear size equal to
-    input_image_linear_size/pow(scaleFactor, nlevels).
+    input_image_linear_size/pow(scaleFactor, nlevels - firstLevel).
     @param edgeThreshold This is size of the border where the features are not detected. It should
     roughly match the patchSize parameter.
-    @param firstLevel It should be 0 in the current implementation.
+    @param firstLevel The level of pyramid to put source image to. Previous layers are filled
+    with upscaled source image.
     @param WTA_K The number of points that produce each element of the oriented BRIEF descriptor. The
     default value 2 means the BRIEF where we take a random point pair and compare their brightnesses,
     so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3
@@ -284,10 +345,10 @@ class CV_EXPORTS_W ORB : public Feature2D
     but it is a little faster to compute.
     @param patchSize size of the patch used by the oriented BRIEF descriptor. Of course, on smaller
     pyramid layers the perceived image area covered by a feature will be larger.
-    @param fastThreshold
+    @param fastThreshold the fast threshold
      */
     CV_WRAP static Ptr<ORB> create(int nfeatures=500, float scaleFactor=1.2f, int nlevels=8, int edgeThreshold=31,
-        int firstLevel=0, int WTA_K=2, int scoreType=ORB::HARRIS_SCORE, int patchSize=31, int fastThreshold=20);
+        int firstLevel=0, int WTA_K=2, ORB::ScoreType scoreType=ORB::HARRIS_SCORE, int patchSize=31, int fastThreshold=20);
 
     CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0;
     CV_WRAP virtual int getMaxFeatures() const = 0;
@@ -307,14 +368,15 @@ class CV_EXPORTS_W ORB : public Feature2D
     CV_WRAP virtual void setWTA_K(int wta_k) = 0;
     CV_WRAP virtual int getWTA_K() const = 0;
 
-    CV_WRAP virtual void setScoreType(int scoreType) = 0;
-    CV_WRAP virtual int getScoreType() const = 0;
+    CV_WRAP virtual void setScoreType(ORB::ScoreType scoreType) = 0;
+    CV_WRAP virtual ORB::ScoreType getScoreType() const = 0;
 
     CV_WRAP virtual void setPatchSize(int patchSize) = 0;
     CV_WRAP virtual int getPatchSize() const = 0;
 
     CV_WRAP virtual void setFastThreshold(int fastThreshold) = 0;
     CV_WRAP virtual int getFastThreshold() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
 };
 
 /** @brief Maximally stable extremal region extractor
@@ -336,12 +398,12 @@ code which is distributed under GPL.
 class CV_EXPORTS_W MSER : public Feature2D
 {
 public:
-    /** @brief Full consturctor for %MSER detector
+    /** @brief Full constructor for %MSER detector
 
     @param _delta it compares \f$(size_{i}-size_{i-delta})/size_{i-delta}\f$
     @param _min_area prune the area which smaller than minArea
     @param _max_area prune the area which bigger than maxArea
-    @param _max_variation prune the area have simliar size to its children
+    @param _max_variation prune the area have similar size to its children
     @param _min_diversity for color image, trace back to cut off mser with diversity less than min_diversity
     @param _max_evolution  for color image, the evolution steps
     @param _area_threshold for color image, the area threshold to cause re-initialize
@@ -355,13 +417,13 @@ class CV_EXPORTS_W MSER : public Feature2D
 
     /** @brief Detect %MSER regions
 
-    @param image input image (8UC1, 8UC3 or 8UC4)
+    @param image input image (8UC1, 8UC3 or 8UC4, must be greater or equal than 3x3)
     @param msers resulting list of point sets
     @param bboxes resulting bounding boxes
     */
     CV_WRAP virtual void detectRegions( InputArray image,
                                         CV_OUT std::vector<std::vector<Point> >& msers,
-                                        std::vector<Rect>& bboxes ) = 0;
+                                        CV_OUT std::vector<Rect>& bboxes ) = 0;
 
     CV_WRAP virtual void setDelta(int delta) = 0;
     CV_WRAP virtual int getDelta() const = 0;
@@ -374,6 +436,42 @@ class CV_EXPORTS_W MSER : public Feature2D
 
     CV_WRAP virtual void setPass2Only(bool f) = 0;
     CV_WRAP virtual bool getPass2Only() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+};
+
+//! @} features2d_main
+
+//! @addtogroup features2d_main
+//! @{
+
+/** @brief Wrapping class for feature detection using the FAST method. :
+ */
+class CV_EXPORTS_W FastFeatureDetector : public Feature2D
+{
+public:
+    enum DetectorType
+    {
+        TYPE_5_8 = 0, TYPE_7_12 = 1, TYPE_9_16 = 2
+    };
+    enum
+    {
+        THRESHOLD = 10000, NONMAX_SUPPRESSION=10001, FAST_N=10002
+    };
+
+
+    CV_WRAP static Ptr<FastFeatureDetector> create( int threshold=10,
+                                                    bool nonmaxSuppression=true,
+                                                    FastFeatureDetector::DetectorType type=FastFeatureDetector::TYPE_9_16 );
+
+    CV_WRAP virtual void setThreshold(int threshold) = 0;
+    CV_WRAP virtual int getThreshold() const = 0;
+
+    CV_WRAP virtual void setNonmaxSuppression(bool f) = 0;
+    CV_WRAP virtual bool getNonmaxSuppression() const = 0;
+
+    CV_WRAP virtual void setType(FastFeatureDetector::DetectorType type) = 0;
+    CV_WRAP virtual FastFeatureDetector::DetectorType getType() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
 };
 
 /** @overload */
@@ -394,32 +492,36 @@ FastFeatureDetector::TYPE_5_8
 
 Detects corners using the FAST algorithm by @cite Rosten06 .
 
-@note In Python API, types are given as cv2.FAST_FEATURE_DETECTOR_TYPE_5_8,
-cv2.FAST_FEATURE_DETECTOR_TYPE_7_12 and cv2.FAST_FEATURE_DETECTOR_TYPE_9_16. For corner
-detection, use cv2.FAST.detect() method.
+@note In Python API, types are given as cv.FAST_FEATURE_DETECTOR_TYPE_5_8,
+cv.FAST_FEATURE_DETECTOR_TYPE_7_12 and cv.FAST_FEATURE_DETECTOR_TYPE_9_16. For corner
+detection, use cv.FAST.detect() method.
  */
 CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector<KeyPoint>& keypoints,
-                      int threshold, bool nonmaxSuppression, int type );
+                      int threshold, bool nonmaxSuppression, FastFeatureDetector::DetectorType type );
 
 //! @} features2d_main
 
 //! @addtogroup features2d_main
 //! @{
 
-/** @brief Wrapping class for feature detection using the FAST method. :
+/** @brief Wrapping class for feature detection using the AGAST method. :
  */
-class CV_EXPORTS_W FastFeatureDetector : public Feature2D
+class CV_EXPORTS_W AgastFeatureDetector : public Feature2D
 {
 public:
+    enum DetectorType
+    {
+        AGAST_5_8 = 0, AGAST_7_12d = 1, AGAST_7_12s = 2, OAST_9_16 = 3,
+    };
+
     enum
     {
-        TYPE_5_8 = 0, TYPE_7_12 = 1, TYPE_9_16 = 2,
-        THRESHOLD = 10000, NONMAX_SUPPRESSION=10001, FAST_N=10002,
+        THRESHOLD = 10000, NONMAX_SUPPRESSION = 10001,
     };
 
-    CV_WRAP static Ptr<FastFeatureDetector> create( int threshold=10,
-                                                    bool nonmaxSuppression=true,
-                                                    int type=FastFeatureDetector::TYPE_9_16 );
+    CV_WRAP static Ptr<AgastFeatureDetector> create( int threshold=10,
+                                                     bool nonmaxSuppression=true,
+                                                     AgastFeatureDetector::DetectorType type = AgastFeatureDetector::OAST_9_16);
 
     CV_WRAP virtual void setThreshold(int threshold) = 0;
     CV_WRAP virtual int getThreshold() const = 0;
@@ -427,8 +529,9 @@ class CV_EXPORTS_W FastFeatureDetector : public Feature2D
     CV_WRAP virtual void setNonmaxSuppression(bool f) = 0;
     CV_WRAP virtual bool getNonmaxSuppression() const = 0;
 
-    CV_WRAP virtual void setType(int type) = 0;
-    CV_WRAP virtual int getType() const = 0;
+    CV_WRAP virtual void setType(AgastFeatureDetector::DetectorType type) = 0;
+    CV_WRAP virtual AgastFeatureDetector::DetectorType getType() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
 };
 
 /** @overload */
@@ -454,36 +557,7 @@ Detects corners using the AGAST algorithm by @cite mair2010_agast .
 
  */
 CV_EXPORTS void AGAST( InputArray image, CV_OUT std::vector<KeyPoint>& keypoints,
-                      int threshold, bool nonmaxSuppression, int type );
-//! @} features2d_main
-
-//! @addtogroup features2d_main
-//! @{
-
-/** @brief Wrapping class for feature detection using the AGAST method. :
- */
-class CV_EXPORTS_W AgastFeatureDetector : public Feature2D
-{
-public:
-    enum
-    {
-        AGAST_5_8 = 0, AGAST_7_12d = 1, AGAST_7_12s = 2, OAST_9_16 = 3,
-        THRESHOLD = 10000, NONMAX_SUPPRESSION = 10001,
-    };
-
-    CV_WRAP static Ptr<AgastFeatureDetector> create( int threshold=10,
-                                                     bool nonmaxSuppression=true,
-                                                     int type=AgastFeatureDetector::OAST_9_16 );
-
-    CV_WRAP virtual void setThreshold(int threshold) = 0;
-    CV_WRAP virtual int getThreshold() const = 0;
-
-    CV_WRAP virtual void setNonmaxSuppression(bool f) = 0;
-    CV_WRAP virtual bool getNonmaxSuppression() const = 0;
-
-    CV_WRAP virtual void setType(int type) = 0;
-    CV_WRAP virtual int getType() const = 0;
-};
+                      int threshold, bool nonmaxSuppression, AgastFeatureDetector::DetectorType type );
 
 /** @brief Wrapping class for feature detection using the goodFeaturesToTrack function. :
  */
@@ -492,6 +566,8 @@ class CV_EXPORTS_W GFTTDetector : public Feature2D
 public:
     CV_WRAP static Ptr<GFTTDetector> create( int maxCorners=1000, double qualityLevel=0.01, double minDistance=1,
                                              int blockSize=3, bool useHarrisDetector=false, double k=0.04 );
+    CV_WRAP static Ptr<GFTTDetector> create( int maxCorners, double qualityLevel, double minDistance,
+                                             int blockSize, int gradiantSize, bool useHarrisDetector=false, double k=0.04 );
     CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0;
     CV_WRAP virtual int getMaxFeatures() const = 0;
 
@@ -509,6 +585,7 @@ class CV_EXPORTS_W GFTTDetector : public Feature2D
 
     CV_WRAP virtual void setK(double k) = 0;
     CV_WRAP virtual double getK() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
 };
 
 /** @brief Class for extracting blobs from an image. :
@@ -575,6 +652,7 @@ class CV_EXPORTS_W SimpleBlobDetector : public Feature2D
 
   CV_WRAP static Ptr<SimpleBlobDetector>
     create(const SimpleBlobDetector::Params &parameters = SimpleBlobDetector::Params());
+  CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
 };
 
 //! @} features2d_main
@@ -591,7 +669,7 @@ F. Alcantarilla, Adrien Bartoli and Andrew J. Davison. In European Conference on
 class CV_EXPORTS_W KAZE : public Feature2D
 {
 public:
-    enum
+    enum DiffusivityType
     {
         DIFF_PM_G1 = 0,
         DIFF_PM_G2 = 1,
@@ -612,7 +690,7 @@ class CV_EXPORTS_W KAZE : public Feature2D
     CV_WRAP static Ptr<KAZE> create(bool extended=false, bool upright=false,
                                     float threshold = 0.001f,
                                     int nOctaves = 4, int nOctaveLayers = 4,
-                                    int diffusivity = KAZE::DIFF_PM_G2);
+                                    KAZE::DiffusivityType diffusivity = KAZE::DIFF_PM_G2);
 
     CV_WRAP virtual void setExtended(bool extended) = 0;
     CV_WRAP virtual bool getExtended() const = 0;
@@ -629,22 +707,32 @@ class CV_EXPORTS_W KAZE : public Feature2D
     CV_WRAP virtual void setNOctaveLayers(int octaveLayers) = 0;
     CV_WRAP virtual int getNOctaveLayers() const = 0;
 
-    CV_WRAP virtual void setDiffusivity(int diff) = 0;
-    CV_WRAP virtual int getDiffusivity() const = 0;
+    CV_WRAP virtual void setDiffusivity(KAZE::DiffusivityType diff) = 0;
+    CV_WRAP virtual KAZE::DiffusivityType getDiffusivity() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
 };
 
-/** @brief Class implementing the AKAZE keypoint detector and descriptor extractor, described in @cite ANB13 . :
+/** @brief Class implementing the AKAZE keypoint detector and descriptor extractor, described in @cite ANB13.
 
-@note AKAZE descriptors can only be used with KAZE or AKAZE keypoints. Try to avoid using *extract*
-and *detect* instead of *operator()* due to performance reasons. .. [ANB13] Fast Explicit Diffusion
-for Accelerated Features in Nonlinear Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien
-Bartoli. In British Machine Vision Conference (BMVC), Bristol, UK, September 2013.
- */
+@details AKAZE descriptors can only be used with KAZE or AKAZE keypoints. This class is thread-safe.
+
+@note When you need descriptors use Feature2D::detectAndCompute, which
+provides better performance. When using Feature2D::detect followed by
+Feature2D::compute scale space pyramid is computed twice.
+
+@note AKAZE implements T-API. When image is passed as UMat some parts of the algorithm
+will use OpenCL.
+
+@note [ANB13] Fast Explicit Diffusion for Accelerated Features in Nonlinear
+Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien Bartoli. In
+British Machine Vision Conference (BMVC), Bristol, UK, September 2013.
+
+*/
 class CV_EXPORTS_W AKAZE : public Feature2D
 {
 public:
     // AKAZE descriptor type
-    enum
+    enum DescriptorType
     {
         DESCRIPTOR_KAZE_UPRIGHT = 2, ///< Upright descriptors, not invariant to rotation
         DESCRIPTOR_KAZE = 3,
@@ -664,13 +752,13 @@ class CV_EXPORTS_W AKAZE : public Feature2D
     @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or
     DIFF_CHARBONNIER
      */
-    CV_WRAP static Ptr<AKAZE> create(int descriptor_type=AKAZE::DESCRIPTOR_MLDB,
+    CV_WRAP static Ptr<AKAZE> create(AKAZE::DescriptorType descriptor_type = AKAZE::DESCRIPTOR_MLDB,
                                      int descriptor_size = 0, int descriptor_channels = 3,
                                      float threshold = 0.001f, int nOctaves = 4,
-                                     int nOctaveLayers = 4, int diffusivity = KAZE::DIFF_PM_G2);
+                                     int nOctaveLayers = 4, KAZE::DiffusivityType diffusivity = KAZE::DIFF_PM_G2);
 
-    CV_WRAP virtual void setDescriptorType(int dtype) = 0;
-    CV_WRAP virtual int getDescriptorType() const = 0;
+    CV_WRAP virtual void setDescriptorType(AKAZE::DescriptorType dtype) = 0;
+    CV_WRAP virtual AKAZE::DescriptorType getDescriptorType() const = 0;
 
     CV_WRAP virtual void setDescriptorSize(int dsize) = 0;
     CV_WRAP virtual int getDescriptorSize() const = 0;
@@ -687,8 +775,9 @@ class CV_EXPORTS_W AKAZE : public Feature2D
     CV_WRAP virtual void setNOctaveLayers(int octaveLayers) = 0;
     CV_WRAP virtual int getNOctaveLayers() const = 0;
 
-    CV_WRAP virtual void setDiffusivity(int diff) = 0;
-    CV_WRAP virtual int getDiffusivity() const = 0;
+    CV_WRAP virtual void setDiffusivity(KAZE::DiffusivityType diff) = 0;
+    CV_WRAP virtual KAZE::DiffusivityType getDiffusivity() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
 };
 
 //! @} features2d_main
@@ -714,7 +803,7 @@ template<> struct Accumulator<short>  { typedef float Type; };
 template<class T>
 struct CV_EXPORTS SL2
 {
-    enum { normType = NORM_L2SQR };
+    static const NormTypes normType = NORM_L2SQR;
     typedef T ValueType;
     typedef typename Accumulator<T>::Type ResultType;
 
@@ -728,9 +817,9 @@ struct CV_EXPORTS SL2
  * Euclidean distance functor
  */
 template<class T>
-struct CV_EXPORTS L2
+struct L2
 {
-    enum { normType = NORM_L2 };
+    static const NormTypes normType = NORM_L2;
     typedef T ValueType;
     typedef typename Accumulator<T>::Type ResultType;
 
@@ -744,9 +833,9 @@ struct CV_EXPORTS L2
  * Manhattan distance (city block distance) functor
  */
 template<class T>
-struct CV_EXPORTS L1
+struct L1
 {
-    enum { normType = NORM_L1 };
+    static const NormTypes normType = NORM_L1;
     typedef T ValueType;
     typedef typename Accumulator<T>::Type ResultType;
 
@@ -771,6 +860,16 @@ an image set.
 class CV_EXPORTS_W DescriptorMatcher : public Algorithm
 {
 public:
+   enum MatcherType
+    {
+        FLANNBASED            = 1,
+        BRUTEFORCE            = 2,
+        BRUTEFORCE_L1         = 3,
+        BRUTEFORCE_HAMMING    = 4,
+        BRUTEFORCE_HAMMINGLUT = 5,
+        BRUTEFORCE_SL2        = 6
+    };
+
     virtual ~DescriptorMatcher();
 
     /** @brief Adds descriptors to train a CPU(trainDescCollectionis) or GPU(utrainDescCollectionis) descriptor
@@ -789,11 +888,11 @@ class CV_EXPORTS_W DescriptorMatcher : public Algorithm
 
     /** @brief Clears the train descriptor collections.
      */
-    CV_WRAP virtual void clear();
+    CV_WRAP virtual void clear() CV_OVERRIDE;
 
     /** @brief Returns true if there are no train descriptors in the both collections.
      */
-    CV_WRAP virtual bool empty() const;
+    CV_WRAP virtual bool empty() const CV_OVERRIDE;
 
     /** @brief Returns true if the descriptor matcher supports masking permissible matches.
      */
@@ -868,8 +967,8 @@ class CV_EXPORTS_W DescriptorMatcher : public Algorithm
     query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
     returned in the distance increasing order.
      */
-    void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors,
-                      std::vector<std::vector<DMatch> >& matches, float maxDistance,
+    CV_WRAP void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors,
+                      CV_OUT std::vector<std::vector<DMatch> >& matches, float maxDistance,
                       InputArray mask=noArray(), bool compactResult=false ) const;
 
     /** @overload
@@ -906,13 +1005,26 @@ class CV_EXPORTS_W DescriptorMatcher : public Algorithm
     false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
     the matches vector does not contain matches for fully masked-out query descriptors.
     */
-    void radiusMatch( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
+    CV_WRAP void radiusMatch( InputArray queryDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, float maxDistance,
                       InputArrayOfArrays masks=noArray(), bool compactResult=false );
 
+
+    CV_WRAP void write( const String& fileName ) const
+    {
+        FileStorage fs(fileName, FileStorage::WRITE);
+        write(fs);
+    }
+
+    CV_WRAP void read( const String& fileName )
+    {
+        FileStorage fs(fileName, FileStorage::READ);
+        read(fs.root());
+    }
     // Reads matcher object from a file node
-    virtual void read( const FileNode& );
+    // see corresponding cv::Algorithm method
+    CV_WRAP virtual void read( const FileNode& ) CV_OVERRIDE;
     // Writes matcher object to a file storage
-    virtual void write( FileStorage& ) const;
+    virtual void write( FileStorage& ) const CV_OVERRIDE;
 
     /** @brief Clones the matcher.
 
@@ -920,7 +1032,7 @@ class CV_EXPORTS_W DescriptorMatcher : public Algorithm
     that is, copies both parameters and train data. If emptyTrainData is true, the method creates an
     object copy with the current parameters but with empty train data.
      */
-    virtual Ptr<DescriptorMatcher> clone( bool emptyTrainData=false ) const = 0;
+    CV_WRAP virtual Ptr<DescriptorMatcher> clone( bool emptyTrainData=false ) const = 0;
 
     /** @brief Creates a descriptor matcher of a given type with the default parameters (using default
     constructor).
@@ -934,6 +1046,13 @@ class CV_EXPORTS_W DescriptorMatcher : public Algorithm
     -   `FlannBased`
      */
     CV_WRAP static Ptr<DescriptorMatcher> create( const String& descriptorMatcherType );
+
+    CV_WRAP static Ptr<DescriptorMatcher> create( const DescriptorMatcher::MatcherType& matcherType );
+
+
+    // see corresponding cv::Algorithm method
+    CV_WRAP inline void write(const Ptr<FileStorage>& fs, const String& name = String()) const { Algorithm::write(fs, name); }
+
 protected:
     /**
      * Class to work with descriptors from several images as with one merged matrix.
@@ -990,8 +1109,17 @@ sets.
 class CV_EXPORTS_W BFMatcher : public DescriptorMatcher
 {
 public:
-    /** @brief Brute-force matcher constructor.
+    /** @brief Brute-force matcher constructor (obsolete). Please use BFMatcher.create()
+     *
+     *
+    */
+    CV_WRAP BFMatcher( int normType=NORM_L2, bool crossCheck=false );
+
+    virtual ~BFMatcher() {}
+
+    virtual bool isMaskSupported() const CV_OVERRIDE { return true; }
 
+    /** @brief Brute-force matcher create method.
     @param normType One of NORM_L1, NORM_L2, NORM_HAMMING, NORM_HAMMING2. L1 and L2 norms are
     preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and
     BRIEF, NORM_HAMMING2 should be used with ORB when WTA_K==3 or 4 (see ORB::ORB constructor
@@ -1003,26 +1131,24 @@ class CV_EXPORTS_W BFMatcher : public DescriptorMatcher
     pairs. Such technique usually produces best results with minimal number of outliers when there are
     enough matches. This is alternative to the ratio test, used by D. Lowe in SIFT paper.
      */
-    CV_WRAP BFMatcher( int normType=NORM_L2, bool crossCheck=false );
-    virtual ~BFMatcher() {}
+    CV_WRAP static Ptr<BFMatcher> create( int normType=NORM_L2, bool crossCheck=false ) ;
 
-    virtual bool isMaskSupported() const { return true; }
-
-    virtual Ptr<DescriptorMatcher> clone( bool emptyTrainData=false ) const;
+    virtual Ptr<DescriptorMatcher> clone( bool emptyTrainData=false ) const CV_OVERRIDE;
 protected:
     virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
-        InputArrayOfArrays masks=noArray(), bool compactResult=false );
+        InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE;
     virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
-        InputArrayOfArrays masks=noArray(), bool compactResult=false );
+        InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE;
 
     int normType;
     bool crossCheck;
 };
 
+#if defined(HAVE_OPENCV_FLANN) || defined(CV_DOXYGEN)
 
 /** @brief Flann-based descriptor matcher.
 
-This matcher trains flann::Index_ on a train descriptor collection and calls its nearest search
+This matcher trains cv::flann::Index on a train descriptor collection and calls its nearest search
 methods to find the best matches. So, this matcher may be faster when matching a large train
 collection than the brute force matcher. FlannBasedMatcher does not support masking permissible
 matches of descriptor sets because flann::Index does not support this. :
@@ -1033,27 +1159,29 @@ class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher
     CV_WRAP FlannBasedMatcher( const Ptr<flann::IndexParams>& indexParams=makePtr<flann::KDTreeIndexParams>(),
                        const Ptr<flann::SearchParams>& searchParams=makePtr<flann::SearchParams>() );
 
-    virtual void add( InputArrayOfArrays descriptors );
-    virtual void clear();
+    virtual void add( InputArrayOfArrays descriptors ) CV_OVERRIDE;
+    virtual void clear() CV_OVERRIDE;
 
     // Reads matcher object from a file node
-    virtual void read( const FileNode& );
+    virtual void read( const FileNode& ) CV_OVERRIDE;
     // Writes matcher object to a file storage
-    virtual void write( FileStorage& ) const;
+    virtual void write( FileStorage& ) const CV_OVERRIDE;
+
+    virtual void train() CV_OVERRIDE;
+    virtual bool isMaskSupported() const CV_OVERRIDE;
 
-    virtual void train();
-    virtual bool isMaskSupported() const;
+    CV_WRAP static Ptr<FlannBasedMatcher> create();
 
-    virtual Ptr<DescriptorMatcher> clone( bool emptyTrainData=false ) const;
+    virtual Ptr<DescriptorMatcher> clone( bool emptyTrainData=false ) const CV_OVERRIDE;
 protected:
     static void convertToDMatches( const DescriptorCollection& descriptors,
                                    const Mat& indices, const Mat& distances,
                                    std::vector<std::vector<DMatch> >& matches );
 
     virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
-        InputArrayOfArrays masks=noArray(), bool compactResult=false );
+        InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE;
     virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
-        InputArrayOfArrays masks=noArray(), bool compactResult=false );
+        InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE;
 
     Ptr<flann::IndexParams> indexParams;
     Ptr<flann::SearchParams> searchParams;
@@ -1063,6 +1191,8 @@ class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher
     int addedDescCount;
 };
 
+#endif
+
 //! @} features2d_match
 
 /****************************************************************************************\
@@ -1072,20 +1202,20 @@ class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher
 //! @addtogroup features2d_draw
 //! @{
 
-struct CV_EXPORTS DrawMatchesFlags
+enum struct DrawMatchesFlags
 {
-    enum{ DEFAULT = 0, //!< Output image matrix will be created (Mat::create),
-                       //!< i.e. existing memory of output image may be reused.
-                       //!< Two source image, matches and single keypoints will be drawn.
-                       //!< For each keypoint only the center point will be drawn (without
-                       //!< the circle around keypoint with keypoint size and orientation).
-          DRAW_OVER_OUTIMG = 1, //!< Output image matrix will not be created (Mat::create).
-                                //!< Matches will be drawn on existing content of output image.
-          NOT_DRAW_SINGLE_POINTS = 2, //!< Single keypoints will not be drawn.
-          DRAW_RICH_KEYPOINTS = 4 //!< For each keypoint the circle around keypoint with keypoint size and
-                                  //!< orientation will be drawn.
-        };
+  DEFAULT = 0, //!< Output image matrix will be created (Mat::create),
+               //!< i.e. existing memory of output image may be reused.
+               //!< Two source image, matches and single keypoints will be drawn.
+               //!< For each keypoint only the center point will be drawn (without
+               //!< the circle around keypoint with keypoint size and orientation).
+  DRAW_OVER_OUTIMG = 1, //!< Output image matrix will not be created (Mat::create).
+                        //!< Matches will be drawn on existing content of output image.
+  NOT_DRAW_SINGLE_POINTS = 2, //!< Single keypoints will not be drawn.
+  DRAW_RICH_KEYPOINTS = 4 //!< For each keypoint the circle around keypoint with keypoint size and
+                          //!< orientation will be drawn.
 };
+CV_ENUM_FLAGS(DrawMatchesFlags)
 
 /** @brief Draws keypoints.
 
@@ -1098,12 +1228,12 @@ output image. See possible flags bit values below.
 DrawMatchesFlags. See details above in drawMatches .
 
 @note
-For Python API, flags are modified as cv2.DRAW_MATCHES_FLAGS_DEFAULT,
-cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS, cv2.DRAW_MATCHES_FLAGS_DRAW_OVER_OUTIMG,
-cv2.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS
+For Python API, flags are modified as cv.DRAW_MATCHES_FLAGS_DEFAULT,
+cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS, cv.DRAW_MATCHES_FLAGS_DRAW_OVER_OUTIMG,
+cv.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS
  */
 CV_EXPORTS_W void drawKeypoints( InputArray image, const std::vector<KeyPoint>& keypoints, InputOutputArray outImage,
-                               const Scalar& color=Scalar::all(-1), int flags=DrawMatchesFlags::DEFAULT );
+                               const Scalar& color=Scalar::all(-1), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT );
 
 /** @brief Draws the found matches of keypoints from two images.
 
@@ -1131,14 +1261,14 @@ CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector<KeyPoint>& key
                              InputArray img2, const std::vector<KeyPoint>& keypoints2,
                              const std::vector<DMatch>& matches1to2, InputOutputArray outImg,
                              const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1),
-                             const std::vector<char>& matchesMask=std::vector<char>(), int flags=DrawMatchesFlags::DEFAULT );
+                             const std::vector<char>& matchesMask=std::vector<char>(), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT );
 
 /** @overload */
 CV_EXPORTS_AS(drawMatchesKnn) void drawMatches( InputArray img1, const std::vector<KeyPoint>& keypoints1,
                              InputArray img2, const std::vector<KeyPoint>& keypoints2,
                              const std::vector<std::vector<DMatch> >& matches1to2, InputOutputArray outImg,
                              const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1),
-                             const std::vector<std::vector<char> >& matchesMask=std::vector<std::vector<char> >(), int flags=DrawMatchesFlags::DEFAULT );
+                             const std::vector<std::vector<char> >& matchesMask=std::vector<std::vector<char> >(), DrawMatchesFlags flags=DrawMatchesFlags::DEFAULT );
 
 //! @} features2d_draw
 
@@ -1228,8 +1358,8 @@ class CV_EXPORTS_W BOWKMeansTrainer : public BOWTrainer
     virtual ~BOWKMeansTrainer();
 
     // Returns trained vocabulary (i.e. cluster centers).
-    CV_WRAP virtual Mat cluster() const;
-    CV_WRAP virtual Mat cluster( const Mat& descriptors ) const;
+    CV_WRAP virtual Mat cluster() const CV_OVERRIDE;
+    CV_WRAP virtual Mat cluster( const Mat& descriptors ) const CV_OVERRIDE;
 
 protected:
 
diff --git a/IPL/include/opencv/opencv2/features2d/hal/interface.h b/IPL/include/opencv/opencv2/features2d/hal/interface.h
new file mode 100644
index 0000000..bc3b084
--- /dev/null
+++ b/IPL/include/opencv/opencv2/features2d/hal/interface.h
@@ -0,0 +1,33 @@
+#ifndef OPENCV_FEATURE2D_HAL_INTERFACE_H
+#define OPENCV_FEATURE2D_HAL_INTERFACE_H
+
+#include "opencv2/core/cvdef.h"
+//! @addtogroup features2d_hal_interface
+//! @{
+
+//! @name Fast feature detector types
+//! @sa cv::FastFeatureDetector
+//! @{
+#define CV_HAL_TYPE_5_8  0
+#define CV_HAL_TYPE_7_12 1
+#define CV_HAL_TYPE_9_16 2
+//! @}
+
+//! @name Key point
+//! @sa cv::KeyPoint
+//! @{
+struct CV_EXPORTS cvhalKeyPoint
+{
+    float x;
+    float y;
+    float size;
+    float angle;
+    float response;
+    int octave;
+    int class_id;
+};
+//! @}
+
+//! @}
+
+#endif
diff --git a/IPL/include/opencv/opencv2/flann.hpp b/IPL/include/opencv/opencv2/flann.hpp
index 4f92d57..887759e 100644
--- a/IPL/include/opencv/opencv2/flann.hpp
+++ b/IPL/include/opencv/opencv2/flann.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef _OPENCV_FLANN_HPP_
-#define _OPENCV_FLANN_HPP_
+#ifndef OPENCV_FLANN_HPP
+#define OPENCV_FLANN_HPP
 
 #include "opencv2/core.hpp"
 #include "opencv2/flann/miniflann.hpp"
@@ -59,7 +59,7 @@ can be found in @cite Muja2009 .
 namespace cvflann
 {
     CV_EXPORTS flann_distance_t flann_distance_type();
-    FLANN_DEPRECATED CV_EXPORTS void set_distance_type(flann_distance_t distance_type, int order);
+    CV_DEPRECATED CV_EXPORTS void set_distance_type(flann_distance_t distance_type, int order);
 }
 
 
@@ -103,6 +103,58 @@ using ::cvflann::KL_Divergence;
 
 /** @brief The FLANN nearest neighbor index class. This class is templated with the type of elements for which
 the index is built.
+
+`Distance` functor specifies the metric to be used to calculate the distance between two points.
+There are several `Distance` functors that are readily available:
+
+cv::cvflann::L2_Simple - Squared Euclidean distance functor.
+This is the simpler, unrolled version. This is preferable for very low dimensionality data (eg 3D points)
+
+cv::flann::L2 - Squared Euclidean distance functor, optimized version.
+
+cv::flann::L1 - Manhattan distance functor, optimized version.
+
+cv::flann::MinkowskiDistance -  The Minkowsky distance functor.
+This is highly optimised with loop unrolling.
+The computation of squared root at the end is omitted for efficiency.
+
+cv::flann::MaxDistance - The max distance functor. It computes the
+maximum distance between two vectors. This distance is not a valid kdtree distance, it's not
+dimensionwise additive.
+
+cv::flann::HammingLUT -  %Hamming distance functor. It counts the bit
+differences between two strings using a lookup table implementation.
+
+cv::flann::Hamming - %Hamming distance functor. Population count is
+performed using library calls, if available. Lookup table implementation is used as a fallback.
+
+cv::flann::Hamming2 - %Hamming distance functor. Population count is
+implemented in 12 arithmetic operations (one of which is multiplication).
+
+cv::flann::HistIntersectionDistance - The histogram
+intersection distance functor.
+
+cv::flann::HellingerDistance - The Hellinger distance functor.
+
+cv::flann::ChiSquareDistance - The chi-square distance functor.
+
+cv::flann::KL_Divergence - The Kullback-Leibler divergence functor.
+
+Although the provided implementations cover a vast range of cases, it is also possible to use
+a custom implementation. The distance functor is a class whose `operator()` computes the distance
+between two features. If the distance is also a kd-tree compatible distance, it should also provide an
+`accum_dist()` method that computes the distance between individual feature dimensions.
+
+In addition to `operator()` and `accum_dist()`, a distance functor should also define the
+`ElementType` and the `ResultType` as the types of the elements it operates on and the type of the
+result it computes. If a distance functor can be used as a kd-tree distance (meaning that the full
+distance between a pair of features can be accumulated from the partial distances between the
+individual dimensions) a typedef `is_kdtree_distance` should be present inside the distance functor.
+If the distance is not a kd-tree distance, but it's a distance in a vector space (the individual
+dimensions of the elements it operates on can be accessed independently) a typedef
+`is_vector_space_distance` should be defined inside the functor. If neither typedef is defined, the
+distance is assumed to be a metric distance and will only be used with indexes operating on
+generic metric distances.
  */
 template <typename Distance>
 class GenericIndex
@@ -217,6 +269,17 @@ class GenericIndex
                        std::vector<DistanceType>& dists, int knn, const ::cvflann::SearchParams& params);
         void knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& params);
 
+        /** @brief Performs a radius nearest neighbor search for a given query point using the index.
+
+        @param query The query point.
+        @param indices Vector that will contain the indices of the nearest neighbors found.
+        @param dists Vector that will contain the distances to the nearest neighbors found. It has the same
+        number of elements as indices.
+        @param radius The search radius.
+        @param params SearchParams
+
+        This function returns the number of nearest neighbors found.
+        */
         int radiusSearch(const std::vector<ElementType>& query, std::vector<int>& indices,
                          std::vector<DistanceType>& dists, DistanceType radius, const ::cvflann::SearchParams& params);
         int radiusSearch(const Mat& query, Mat& indices, Mat& dists,
@@ -226,14 +289,15 @@ class GenericIndex
 
         int veclen() const { return nnIndex->veclen(); }
 
-        int size() const { return nnIndex->size(); }
+        int size() const { return (int)nnIndex->size(); }
 
         ::cvflann::IndexParams getParameters() { return nnIndex->getParameters(); }
 
-        FLANN_DEPRECATED const ::cvflann::IndexParams* getIndexParameters() { return nnIndex->getIndexParameters(); }
+        CV_DEPRECATED const ::cvflann::IndexParams* getIndexParameters() { return nnIndex->getIndexParameters(); }
 
 private:
         ::cvflann::Index<Distance>* nnIndex;
+        Mat _dataset;
 };
 
 //! @cond IGNORED
@@ -249,10 +313,11 @@ class GenericIndex
 
 template <typename Distance>
 GenericIndex<Distance>::GenericIndex(const Mat& dataset, const ::cvflann::IndexParams& params, Distance distance)
+: _dataset(dataset)
 {
     CV_Assert(dataset.type() == CvType<ElementType>::type());
     CV_Assert(dataset.isContinuous());
-    ::cvflann::Matrix<ElementType> m_dataset((ElementType*)dataset.ptr<ElementType>(0), dataset.rows, dataset.cols);
+    ::cvflann::Matrix<ElementType> m_dataset((ElementType*)_dataset.ptr<ElementType>(0), _dataset.rows, _dataset.cols);
 
     nnIndex = new ::cvflann::Index<Distance>(m_dataset, params, distance);
 
@@ -332,168 +397,137 @@ int GenericIndex<Distance>::radiusSearch(const Mat& query, Mat& indices, Mat& di
     return nnIndex->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
 }
 
-//! @endcond
-
 /**
  * @deprecated Use GenericIndex class instead
  */
 template <typename T>
-class
-#ifndef _MSC_VER
- FLANN_DEPRECATED
-#endif
- Index_ {
+class Index_
+{
 public:
-        typedef typename L2<T>::ElementType ElementType;
-        typedef typename L2<T>::ResultType DistanceType;
-
-    Index_(const Mat& features, const ::cvflann::IndexParams& params);
-
-    ~Index_();
+    typedef typename L2<T>::ElementType ElementType;
+    typedef typename L2<T>::ResultType DistanceType;
 
-    void knnSearch(const std::vector<ElementType>& query, std::vector<int>& indices, std::vector<DistanceType>& dists, int knn, const ::cvflann::SearchParams& params);
-    void knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& params);
-
-    int radiusSearch(const std::vector<ElementType>& query, std::vector<int>& indices, std::vector<DistanceType>& dists, DistanceType radius, const ::cvflann::SearchParams& params);
-    int radiusSearch(const Mat& query, Mat& indices, Mat& dists, DistanceType radius, const ::cvflann::SearchParams& params);
-
-    void save(String filename)
-        {
-            if (nnIndex_L1) nnIndex_L1->save(filename);
-            if (nnIndex_L2) nnIndex_L2->save(filename);
-        }
-
-    int veclen() const
+    CV_DEPRECATED Index_(const Mat& dataset, const ::cvflann::IndexParams& params)
     {
-            if (nnIndex_L1) return nnIndex_L1->veclen();
-            if (nnIndex_L2) return nnIndex_L2->veclen();
-        }
+        printf("[WARNING] The cv::flann::Index_<T> class is deperecated, use cv::flann::GenericIndex<Distance> instead\n");
 
-    int size() const
-    {
-            if (nnIndex_L1) return nnIndex_L1->size();
-            if (nnIndex_L2) return nnIndex_L2->size();
-        }
-
-        ::cvflann::IndexParams getParameters()
-        {
-            if (nnIndex_L1) return nnIndex_L1->getParameters();
-            if (nnIndex_L2) return nnIndex_L2->getParameters();
+        CV_Assert(dataset.type() == CvType<ElementType>::type());
+        CV_Assert(dataset.isContinuous());
+        ::cvflann::Matrix<ElementType> m_dataset((ElementType*)dataset.ptr<ElementType>(0), dataset.rows, dataset.cols);
 
+        if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L2 ) {
+            nnIndex_L1 = NULL;
+            nnIndex_L2 = new ::cvflann::Index< L2<ElementType> >(m_dataset, params);
         }
-
-        FLANN_DEPRECATED const ::cvflann::IndexParams* getIndexParameters()
-        {
-            if (nnIndex_L1) return nnIndex_L1->getIndexParameters();
-            if (nnIndex_L2) return nnIndex_L2->getIndexParameters();
+        else if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L1 ) {
+            nnIndex_L1 = new ::cvflann::Index< L1<ElementType> >(m_dataset, params);
+            nnIndex_L2 = NULL;
         }
+        else {
+            printf("[ERROR] cv::flann::Index_<T> only provides backwards compatibility for the L1 and L2 distances. "
+                   "For other distance types you must use cv::flann::GenericIndex<Distance>\n");
+            CV_Assert(0);
+        }
+        if (nnIndex_L1) nnIndex_L1->buildIndex();
+        if (nnIndex_L2) nnIndex_L2->buildIndex();
+    }
+    CV_DEPRECATED ~Index_()
+    {
+        if (nnIndex_L1) delete nnIndex_L1;
+        if (nnIndex_L2) delete nnIndex_L2;
+    }
 
-private:
-        // providing backwards compatibility for L2 and L1 distances (most common)
-        ::cvflann::Index< L2<ElementType> >* nnIndex_L2;
-        ::cvflann::Index< L1<ElementType> >* nnIndex_L1;
-};
-
-#ifdef _MSC_VER
-template <typename T>
-class FLANN_DEPRECATED Index_;
-#endif
+    CV_DEPRECATED void knnSearch(const std::vector<ElementType>& query, std::vector<int>& indices, std::vector<DistanceType>& dists, int knn, const ::cvflann::SearchParams& searchParams)
+    {
+        ::cvflann::Matrix<ElementType> m_query((ElementType*)&query[0], 1, query.size());
+        ::cvflann::Matrix<int> m_indices(&indices[0], 1, indices.size());
+        ::cvflann::Matrix<DistanceType> m_dists(&dists[0], 1, dists.size());
 
-//! @cond IGNORED
+        if (nnIndex_L1) nnIndex_L1->knnSearch(m_query,m_indices,m_dists,knn,searchParams);
+        if (nnIndex_L2) nnIndex_L2->knnSearch(m_query,m_indices,m_dists,knn,searchParams);
+    }
+    CV_DEPRECATED void knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& searchParams)
+    {
+        CV_Assert(queries.type() == CvType<ElementType>::type());
+        CV_Assert(queries.isContinuous());
+        ::cvflann::Matrix<ElementType> m_queries((ElementType*)queries.ptr<ElementType>(0), queries.rows, queries.cols);
 
-template <typename T>
-Index_<T>::Index_(const Mat& dataset, const ::cvflann::IndexParams& params)
-{
-    printf("[WARNING] The cv::flann::Index_<T> class is deperecated, use cv::flann::GenericIndex<Distance> instead\n");
+        CV_Assert(indices.type() == CV_32S);
+        CV_Assert(indices.isContinuous());
+        ::cvflann::Matrix<int> m_indices((int*)indices.ptr<int>(0), indices.rows, indices.cols);
 
-    CV_Assert(dataset.type() == CvType<ElementType>::type());
-    CV_Assert(dataset.isContinuous());
-    ::cvflann::Matrix<ElementType> m_dataset((ElementType*)dataset.ptr<ElementType>(0), dataset.rows, dataset.cols);
+        CV_Assert(dists.type() == CvType<DistanceType>::type());
+        CV_Assert(dists.isContinuous());
+        ::cvflann::Matrix<DistanceType> m_dists((DistanceType*)dists.ptr<DistanceType>(0), dists.rows, dists.cols);
 
-    if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L2 ) {
-        nnIndex_L1 = NULL;
-        nnIndex_L2 = new ::cvflann::Index< L2<ElementType> >(m_dataset, params);
+        if (nnIndex_L1) nnIndex_L1->knnSearch(m_queries,m_indices,m_dists,knn, searchParams);
+        if (nnIndex_L2) nnIndex_L2->knnSearch(m_queries,m_indices,m_dists,knn, searchParams);
     }
-    else if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L1 ) {
-        nnIndex_L1 = new ::cvflann::Index< L1<ElementType> >(m_dataset, params);
-        nnIndex_L2 = NULL;
-    }
-    else {
-        printf("[ERROR] cv::flann::Index_<T> only provides backwards compatibility for the L1 and L2 distances. "
-        "For other distance types you must use cv::flann::GenericIndex<Distance>\n");
-        CV_Assert(0);
-    }
-    if (nnIndex_L1) nnIndex_L1->buildIndex();
-    if (nnIndex_L2) nnIndex_L2->buildIndex();
-}
 
-template <typename T>
-Index_<T>::~Index_()
-{
-    if (nnIndex_L1) delete nnIndex_L1;
-    if (nnIndex_L2) delete nnIndex_L2;
-}
-
-template <typename T>
-void Index_<T>::knnSearch(const std::vector<ElementType>& query, std::vector<int>& indices, std::vector<DistanceType>& dists, int knn, const ::cvflann::SearchParams& searchParams)
-{
-    ::cvflann::Matrix<ElementType> m_query((ElementType*)&query[0], 1, query.size());
-    ::cvflann::Matrix<int> m_indices(&indices[0], 1, indices.size());
-    ::cvflann::Matrix<DistanceType> m_dists(&dists[0], 1, dists.size());
+    CV_DEPRECATED int radiusSearch(const std::vector<ElementType>& query, std::vector<int>& indices, std::vector<DistanceType>& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams)
+    {
+        ::cvflann::Matrix<ElementType> m_query((ElementType*)&query[0], 1, query.size());
+        ::cvflann::Matrix<int> m_indices(&indices[0], 1, indices.size());
+        ::cvflann::Matrix<DistanceType> m_dists(&dists[0], 1, dists.size());
 
-    if (nnIndex_L1) nnIndex_L1->knnSearch(m_query,m_indices,m_dists,knn,searchParams);
-    if (nnIndex_L2) nnIndex_L2->knnSearch(m_query,m_indices,m_dists,knn,searchParams);
-}
+        if (nnIndex_L1) return nnIndex_L1->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
+        if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
+    }
 
+    CV_DEPRECATED int radiusSearch(const Mat& query, Mat& indices, Mat& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams)
+    {
+        CV_Assert(query.type() == CvType<ElementType>::type());
+        CV_Assert(query.isContinuous());
+        ::cvflann::Matrix<ElementType> m_query((ElementType*)query.ptr<ElementType>(0), query.rows, query.cols);
 
-template <typename T>
-void Index_<T>::knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& searchParams)
-{
-    CV_Assert(queries.type() == CvType<ElementType>::type());
-    CV_Assert(queries.isContinuous());
-    ::cvflann::Matrix<ElementType> m_queries((ElementType*)queries.ptr<ElementType>(0), queries.rows, queries.cols);
+        CV_Assert(indices.type() == CV_32S);
+        CV_Assert(indices.isContinuous());
+        ::cvflann::Matrix<int> m_indices((int*)indices.ptr<int>(0), indices.rows, indices.cols);
 
-    CV_Assert(indices.type() == CV_32S);
-    CV_Assert(indices.isContinuous());
-    ::cvflann::Matrix<int> m_indices((int*)indices.ptr<int>(0), indices.rows, indices.cols);
+        CV_Assert(dists.type() == CvType<DistanceType>::type());
+        CV_Assert(dists.isContinuous());
+        ::cvflann::Matrix<DistanceType> m_dists((DistanceType*)dists.ptr<DistanceType>(0), dists.rows, dists.cols);
 
-    CV_Assert(dists.type() == CvType<DistanceType>::type());
-    CV_Assert(dists.isContinuous());
-    ::cvflann::Matrix<DistanceType> m_dists((DistanceType*)dists.ptr<DistanceType>(0), dists.rows, dists.cols);
+        if (nnIndex_L1) return nnIndex_L1->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
+        if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
+    }
 
-    if (nnIndex_L1) nnIndex_L1->knnSearch(m_queries,m_indices,m_dists,knn, searchParams);
-    if (nnIndex_L2) nnIndex_L2->knnSearch(m_queries,m_indices,m_dists,knn, searchParams);
-}
+    CV_DEPRECATED void save(String filename)
+    {
+        if (nnIndex_L1) nnIndex_L1->save(filename);
+        if (nnIndex_L2) nnIndex_L2->save(filename);
+    }
 
-template <typename T>
-int Index_<T>::radiusSearch(const std::vector<ElementType>& query, std::vector<int>& indices, std::vector<DistanceType>& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams)
-{
-    ::cvflann::Matrix<ElementType> m_query((ElementType*)&query[0], 1, query.size());
-    ::cvflann::Matrix<int> m_indices(&indices[0], 1, indices.size());
-    ::cvflann::Matrix<DistanceType> m_dists(&dists[0], 1, dists.size());
+    CV_DEPRECATED int veclen() const
+    {
+        if (nnIndex_L1) return nnIndex_L1->veclen();
+        if (nnIndex_L2) return nnIndex_L2->veclen();
+    }
 
-    if (nnIndex_L1) return nnIndex_L1->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
-    if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
-}
+    CV_DEPRECATED int size() const
+    {
+        if (nnIndex_L1) return nnIndex_L1->size();
+        if (nnIndex_L2) return nnIndex_L2->size();
+    }
 
-template <typename T>
-int Index_<T>::radiusSearch(const Mat& query, Mat& indices, Mat& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams)
-{
-    CV_Assert(query.type() == CvType<ElementType>::type());
-    CV_Assert(query.isContinuous());
-    ::cvflann::Matrix<ElementType> m_query((ElementType*)query.ptr<ElementType>(0), query.rows, query.cols);
+    CV_DEPRECATED ::cvflann::IndexParams getParameters()
+    {
+        if (nnIndex_L1) return nnIndex_L1->getParameters();
+        if (nnIndex_L2) return nnIndex_L2->getParameters();
 
-    CV_Assert(indices.type() == CV_32S);
-    CV_Assert(indices.isContinuous());
-    ::cvflann::Matrix<int> m_indices((int*)indices.ptr<int>(0), indices.rows, indices.cols);
+    }
 
-    CV_Assert(dists.type() == CvType<DistanceType>::type());
-    CV_Assert(dists.isContinuous());
-    ::cvflann::Matrix<DistanceType> m_dists((DistanceType*)dists.ptr<DistanceType>(0), dists.rows, dists.cols);
+    CV_DEPRECATED const ::cvflann::IndexParams* getIndexParameters()
+    {
+        if (nnIndex_L1) return nnIndex_L1->getIndexParameters();
+        if (nnIndex_L2) return nnIndex_L2->getIndexParameters();
+    }
 
-    if (nnIndex_L1) return nnIndex_L1->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
-    if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
-}
+private:
+    // providing backwards compatibility for L2 and L1 distances (most common)
+    ::cvflann::Index< L2<ElementType> >* nnIndex_L2;
+    ::cvflann::Index< L1<ElementType> >* nnIndex_L1;
+};
 
 //! @endcond
 
@@ -532,10 +566,10 @@ int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::K
     return ::cvflann::hierarchicalClustering<Distance>(m_features, m_centers, params, d);
 }
 
-/** @deprecated
-*/
+//! @cond IGNORED
+
 template <typename ELEM_TYPE, typename DIST_TYPE>
-FLANN_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::KMeansIndexParams& params)
+CV_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::KMeansIndexParams& params)
 {
     printf("[WARNING] cv::flann::hierarchicalClustering<ELEM_TYPE,DIST_TYPE> is deprecated, use "
         "cv::flann::hierarchicalClustering<Distance> instead\n");
@@ -554,6 +588,8 @@ FLANN_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, c
     }
 }
 
+//! @endcond
+
 //! @} flann
 
 } } // namespace cv::flann
diff --git a/IPL/include/opencv/opencv2/flann/all_indices.h b/IPL/include/opencv/opencv2/flann/all_indices.h
index ff53fd8..ba5a2f2 100644
--- a/IPL/include/opencv/opencv2/flann/all_indices.h
+++ b/IPL/include/opencv/opencv2/flann/all_indices.h
@@ -30,6 +30,8 @@
 #ifndef OPENCV_FLANN_ALL_INDICES_H_
 #define OPENCV_FLANN_ALL_INDICES_H_
 
+//! @cond IGNORED
+
 #include "general.h"
 
 #include "nn_index.h"
@@ -152,4 +154,6 @@ NNIndex<Distance>* create_index_by_type(const Matrix<typename Distance::ElementT
 
 }
 
+//! @endcond
+
 #endif /* OPENCV_FLANN_ALL_INDICES_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/allocator.h b/IPL/include/opencv/opencv2/flann/allocator.h
index 26091d0..d5870a0 100644
--- a/IPL/include/opencv/opencv2/flann/allocator.h
+++ b/IPL/include/opencv/opencv2/flann/allocator.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_ALLOCATOR_H_
 #define OPENCV_FLANN_ALLOCATOR_H_
 
+//! @cond IGNORED
+
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -97,6 +99,7 @@ class PooledAllocator
         blocksize = blockSize;
         remaining = 0;
         base = NULL;
+        loc = NULL;
 
         usedMemory = 0;
         wastedMemory = 0;
@@ -181,8 +184,13 @@ class PooledAllocator
         return mem;
     }
 
+private:
+    PooledAllocator(const PooledAllocator &); // copy disabled
+    PooledAllocator& operator=(const PooledAllocator &); // assign disabled
 };
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_ALLOCATOR_H_
diff --git a/IPL/include/opencv/opencv2/flann/any.h b/IPL/include/opencv/opencv2/flann/any.h
index bfe06c8..f5684e9 100644
--- a/IPL/include/opencv/opencv2/flann/any.h
+++ b/IPL/include/opencv/opencv2/flann/any.h
@@ -12,6 +12,8 @@
  * Adapted for FLANN by Marius Muja
  */
 
+//! @cond IGNORED
+
 #include "defines.h"
 #include <stdexcept>
 #include <ostream>
@@ -54,50 +56,50 @@ struct base_any_policy
 template<typename T>
 struct typed_base_any_policy : base_any_policy
 {
-    virtual ::size_t get_size() { return sizeof(T); }
-    virtual const std::type_info& type() { return typeid(T); }
+    virtual ::size_t get_size() CV_OVERRIDE { return sizeof(T); }
+    virtual const std::type_info& type() CV_OVERRIDE { return typeid(T); }
 
 };
 
 template<typename T>
-struct small_any_policy : typed_base_any_policy<T>
+struct small_any_policy CV_FINAL : typed_base_any_policy<T>
 {
-    virtual void static_delete(void**) { }
-    virtual void copy_from_value(void const* src, void** dest)
+    virtual void static_delete(void**) CV_OVERRIDE { }
+    virtual void copy_from_value(void const* src, void** dest) CV_OVERRIDE
     {
         new (dest) T(* reinterpret_cast<T const*>(src));
     }
-    virtual void clone(void* const* src, void** dest) { *dest = *src; }
-    virtual void move(void* const* src, void** dest) { *dest = *src; }
-    virtual void* get_value(void** src) { return reinterpret_cast<void*>(src); }
-    virtual const void* get_value(void* const * src) { return reinterpret_cast<const void*>(src); }
-    virtual void print(std::ostream& out, void* const* src) { out << *reinterpret_cast<T const*>(src); }
+    virtual void clone(void* const* src, void** dest) CV_OVERRIDE { *dest = *src; }
+    virtual void move(void* const* src, void** dest) CV_OVERRIDE { *dest = *src; }
+    virtual void* get_value(void** src) CV_OVERRIDE { return reinterpret_cast<void*>(src); }
+    virtual const void* get_value(void* const * src) CV_OVERRIDE { return reinterpret_cast<const void*>(src); }
+    virtual void print(std::ostream& out, void* const* src) CV_OVERRIDE { out << *reinterpret_cast<T const*>(src); }
 };
 
 template<typename T>
-struct big_any_policy : typed_base_any_policy<T>
+struct big_any_policy CV_FINAL : typed_base_any_policy<T>
 {
-    virtual void static_delete(void** x)
+    virtual void static_delete(void** x) CV_OVERRIDE
     {
         if (* x) delete (* reinterpret_cast<T**>(x));
         *x = NULL;
     }
-    virtual void copy_from_value(void const* src, void** dest)
+    virtual void copy_from_value(void const* src, void** dest) CV_OVERRIDE
     {
         *dest = new T(*reinterpret_cast<T const*>(src));
     }
-    virtual void clone(void* const* src, void** dest)
+    virtual void clone(void* const* src, void** dest) CV_OVERRIDE
     {
         *dest = new T(**reinterpret_cast<T* const*>(src));
     }
-    virtual void move(void* const* src, void** dest)
+    virtual void move(void* const* src, void** dest) CV_OVERRIDE
     {
         (*reinterpret_cast<T**>(dest))->~T();
         **reinterpret_cast<T**>(dest) = **reinterpret_cast<T* const*>(src);
     }
-    virtual void* get_value(void** src) { return *src; }
-    virtual const void* get_value(void* const * src) { return *src; }
-    virtual void print(std::ostream& out, void* const* src) { out << *reinterpret_cast<T const*>(*src); }
+    virtual void* get_value(void** src) CV_OVERRIDE { return *src; }
+    virtual const void* get_value(void* const * src) CV_OVERRIDE { return *src; }
+    virtual void print(std::ostream& out, void* const* src) CV_OVERRIDE { out << *reinterpret_cast<T const*>(*src); }
 };
 
 template<> inline void big_any_policy<flann_centers_init_t>::print(std::ostream& out, void* const* src)
@@ -246,6 +248,12 @@ struct any
         return assign(x);
     }
 
+    /// Assignment operator. Template-based version above doesn't work as expected. We need regular assignment operator here.
+    any& operator=(const any& x)
+    {
+        return assign(x);
+    }
+
     /// Assignment operator, specialed for literal strings.
     /// They have types like const char [6] which don't work as expected.
     any& operator=(const char* x)
@@ -321,4 +329,6 @@ inline std::ostream& operator <<(std::ostream& out, const any& any_val)
 
 }
 
+//! @endcond
+
 #endif // OPENCV_FLANN_ANY_H_
diff --git a/IPL/include/opencv/opencv2/flann/autotuned_index.h b/IPL/include/opencv/opencv2/flann/autotuned_index.h
index 6ffb929..eb4554f 100644
--- a/IPL/include/opencv/opencv2/flann/autotuned_index.h
+++ b/IPL/include/opencv/opencv2/flann/autotuned_index.h
@@ -30,6 +30,10 @@
 #ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
 #define OPENCV_FLANN_AUTOTUNED_INDEX_H_
 
+//! @cond IGNORED
+
+#include <sstream>
+
 #include "general.h"
 #include "nn_index.h"
 #include "ground_truth.h"
@@ -81,6 +85,7 @@ class AutotunedIndex : public NNIndex<Distance>
         memory_weight_ = get_param(params, "memory_weight", 0.0f);
         sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
         bestIndex_ = NULL;
+        speedup_ = 0;
     }
 
     AutotunedIndex(const AutotunedIndex&);
@@ -97,7 +102,7 @@ class AutotunedIndex : public NNIndex<Distance>
     /**
      *          Method responsible with building the index.
      */
-    virtual void buildIndex()
+    virtual void buildIndex() CV_OVERRIDE
     {
         std::ostringstream stream;
         bestParams_ = estimateBuildParams();
@@ -121,7 +126,7 @@ class AutotunedIndex : public NNIndex<Distance>
     /**
      *  Saves the index to a stream
      */
-    virtual void saveIndex(FILE* stream)
+    virtual void saveIndex(FILE* stream) CV_OVERRIDE
     {
         save_value(stream, (int)bestIndex_->getType());
         bestIndex_->saveIndex(stream);
@@ -131,7 +136,7 @@ class AutotunedIndex : public NNIndex<Distance>
     /**
      *  Loads the index from a stream
      */
-    virtual void loadIndex(FILE* stream)
+    virtual void loadIndex(FILE* stream) CV_OVERRIDE
     {
         int index_type;
 
@@ -148,7 +153,7 @@ class AutotunedIndex : public NNIndex<Distance>
     /**
      *      Method that searches for nearest-neighbors
      */
-    virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
+    virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
     {
         int checks = get_param<int>(searchParams,"checks",FLANN_CHECKS_AUTOTUNED);
         if (checks == FLANN_CHECKS_AUTOTUNED) {
@@ -160,7 +165,7 @@ class AutotunedIndex : public NNIndex<Distance>
     }
 
 
-    IndexParams getParameters() const
+    IndexParams getParameters() const CV_OVERRIDE
     {
         return bestIndex_->getParameters();
     }
@@ -179,7 +184,7 @@ class AutotunedIndex : public NNIndex<Distance>
     /**
      *      Number of features in this index.
      */
-    virtual size_t size() const
+    virtual size_t size() const CV_OVERRIDE
     {
         return bestIndex_->size();
     }
@@ -187,7 +192,7 @@ class AutotunedIndex : public NNIndex<Distance>
     /**
      *  The length of each vector in this index.
      */
-    virtual size_t veclen() const
+    virtual size_t veclen() const CV_OVERRIDE
     {
         return bestIndex_->veclen();
     }
@@ -195,7 +200,7 @@ class AutotunedIndex : public NNIndex<Distance>
     /**
      * The amount of memory (in bytes) this index uses.
      */
-    virtual int usedMemory() const
+    virtual int usedMemory() const CV_OVERRIDE
     {
         return bestIndex_->usedMemory();
     }
@@ -203,7 +208,7 @@ class AutotunedIndex : public NNIndex<Distance>
     /**
      * Algorithm name
      */
-    virtual flann_algorithm_t getType() const
+    virtual flann_algorithm_t getType() const CV_OVERRIDE
     {
         return FLANN_INDEX_AUTOTUNED;
     }
@@ -585,4 +590,6 @@ class AutotunedIndex : public NNIndex<Distance>
 };
 }
 
+//! @endcond
+
 #endif /* OPENCV_FLANN_AUTOTUNED_INDEX_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/composite_index.h b/IPL/include/opencv/opencv2/flann/composite_index.h
index 527ca1a..bcf0827 100644
--- a/IPL/include/opencv/opencv2/flann/composite_index.h
+++ b/IPL/include/opencv/opencv2/flann/composite_index.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_COMPOSITE_INDEX_H_
 #define OPENCV_FLANN_COMPOSITE_INDEX_H_
 
+//! @cond IGNORED
+
 #include "general.h"
 #include "nn_index.h"
 #include "kdtree_index.h"
@@ -101,7 +103,7 @@ class CompositeIndex : public NNIndex<Distance>
     /**
      * @return The index type
      */
-    flann_algorithm_t getType() const
+    flann_algorithm_t getType() const CV_OVERRIDE
     {
         return FLANN_INDEX_COMPOSITE;
     }
@@ -109,7 +111,7 @@ class CompositeIndex : public NNIndex<Distance>
     /**
      * @return Size of the index
      */
-    size_t size() const
+    size_t size() const CV_OVERRIDE
     {
         return kdtree_index_->size();
     }
@@ -117,7 +119,7 @@ class CompositeIndex : public NNIndex<Distance>
     /**
      * \returns The dimensionality of the features in this index.
      */
-    size_t veclen() const
+    size_t veclen() const CV_OVERRIDE
     {
         return kdtree_index_->veclen();
     }
@@ -125,7 +127,7 @@ class CompositeIndex : public NNIndex<Distance>
     /**
      * \returns The amount of memory (in bytes) used by the index.
      */
-    int usedMemory() const
+    int usedMemory() const CV_OVERRIDE
     {
         return kmeans_index_->usedMemory() + kdtree_index_->usedMemory();
     }
@@ -133,7 +135,7 @@ class CompositeIndex : public NNIndex<Distance>
     /**
      * \brief Builds the index
      */
-    void buildIndex()
+    void buildIndex() CV_OVERRIDE
     {
         Logger::info("Building kmeans tree...\n");
         kmeans_index_->buildIndex();
@@ -145,7 +147,7 @@ class CompositeIndex : public NNIndex<Distance>
      * \brief Saves the index to a stream
      * \param stream The stream to save the index to
      */
-    void saveIndex(FILE* stream)
+    void saveIndex(FILE* stream) CV_OVERRIDE
     {
         kmeans_index_->saveIndex(stream);
         kdtree_index_->saveIndex(stream);
@@ -155,7 +157,7 @@ class CompositeIndex : public NNIndex<Distance>
      * \brief Loads the index from a stream
      * \param stream The stream from which the index is loaded
      */
-    void loadIndex(FILE* stream)
+    void loadIndex(FILE* stream) CV_OVERRIDE
     {
         kmeans_index_->loadIndex(stream);
         kdtree_index_->loadIndex(stream);
@@ -164,7 +166,7 @@ class CompositeIndex : public NNIndex<Distance>
     /**
      * \returns The index parameters
      */
-    IndexParams getParameters() const
+    IndexParams getParameters() const CV_OVERRIDE
     {
         return index_params_;
     }
@@ -172,7 +174,7 @@ class CompositeIndex : public NNIndex<Distance>
     /**
      * \brief Method that searches for nearest-neighbours
      */
-    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
+    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
     {
         kmeans_index_->findNeighbors(result, vec, searchParams);
         kdtree_index_->findNeighbors(result, vec, searchParams);
@@ -191,4 +193,6 @@ class CompositeIndex : public NNIndex<Distance>
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_COMPOSITE_INDEX_H_
diff --git a/IPL/include/opencv/opencv2/flann/config.h b/IPL/include/opencv/opencv2/flann/config.h
index 56832fd..c9342c0 100644
--- a/IPL/include/opencv/opencv2/flann/config.h
+++ b/IPL/include/opencv/opencv2/flann/config.h
@@ -30,9 +30,13 @@
 #ifndef OPENCV_FLANN_CONFIG_H_
 #define OPENCV_FLANN_CONFIG_H_
 
+//! @cond IGNORED
+
 #ifdef FLANN_VERSION_
 #undef FLANN_VERSION_
 #endif
 #define FLANN_VERSION_ "1.6.10"
 
+//! @endcond
+
 #endif /* OPENCV_FLANN_CONFIG_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/defines.h b/IPL/include/opencv/opencv2/flann/defines.h
index f0264f7..884c600 100644
--- a/IPL/include/opencv/opencv2/flann/defines.h
+++ b/IPL/include/opencv/opencv2/flann/defines.h
@@ -30,12 +30,14 @@
 #ifndef OPENCV_FLANN_DEFINES_H_
 #define OPENCV_FLANN_DEFINES_H_
 
+//! @cond IGNORED
+
 #include "config.h"
 
 #ifdef FLANN_EXPORT
 #undef FLANN_EXPORT
 #endif
-#ifdef WIN32
+#ifdef _WIN32
 /* win32 dll export/import directives */
  #ifdef FLANN_EXPORTS
   #define FLANN_EXPORT __declspec(dllexport)
@@ -50,19 +52,6 @@
 #endif
 
 
-#ifdef FLANN_DEPRECATED
-#undef FLANN_DEPRECATED
-#endif
-#ifdef __GNUC__
-#define FLANN_DEPRECATED __attribute__ ((deprecated))
-#elif defined(_MSC_VER)
-#define FLANN_DEPRECATED __declspec(deprecated)
-#else
-#pragma message("WARNING: You need to implement FLANN_DEPRECATED for this compiler")
-#define FLANN_DEPRECATED
-#endif
-
-
 #undef FLANN_PLATFORM_32_BIT
 #undef FLANN_PLATFORM_64_BIT
 #if defined __amd64__ || defined __x86_64__ || defined _WIN64 || defined _M_X64
@@ -174,4 +163,6 @@ enum
 
 }
 
+//! @endcond
+
 #endif /* OPENCV_FLANN_DEFINES_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/dist.h b/IPL/include/opencv/opencv2/flann/dist.h
index 9dbe527..07a1cc2 100644
--- a/IPL/include/opencv/opencv2/flann/dist.h
+++ b/IPL/include/opencv/opencv2/flann/dist.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_DIST_H_
 #define OPENCV_FLANN_DIST_H_
 
+//! @cond IGNORED
+
 #include <cmath>
 #include <cstdlib>
 #include <string.h>
@@ -43,11 +45,11 @@ typedef unsigned __int64 uint64_t;
 
 #include "defines.h"
 
-#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
+#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64))
 # include <Intrin.h>
 #endif
 
-#ifdef __ARM_NEON__
+#if defined(__ARM_NEON__) && !defined(__CUDACC__)
 # include "arm_neon.h"
 #endif
 
@@ -114,7 +116,7 @@ struct L2_Simple
         ResultType result = ResultType();
         ResultType diff;
         for(size_t i = 0; i < size; ++i ) {
-            diff = *a++ - *b++;
+            diff = (ResultType)(*a++ - *b++);
             result += diff*diff;
         }
         return result;
@@ -425,7 +427,7 @@ struct Hamming
     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
     {
         ResultType result = 0;
-#ifdef __ARM_NEON__
+#if defined(__ARM_NEON__) && !defined(__CUDACC__)
         {
             uint32x4_t bits = vmovq_n_u32(0);
             for (size_t i = 0; i < size; i += 16) {
@@ -441,7 +443,7 @@ struct Hamming
             result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
             result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
         }
-#elif __GNUC__
+#elif defined(__GNUC__)
         {
             //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll)
             typedef unsigned long long pop_t;
@@ -462,10 +464,9 @@ struct Hamming
             }
         }
 #else // NO NEON and NOT GNUC
-        typedef unsigned long long pop_t;
         HammingLUT lut;
         result = lut(reinterpret_cast<const unsigned char*> (a),
-                     reinterpret_cast<const unsigned char*> (b), size * sizeof(pop_t));
+                     reinterpret_cast<const unsigned char*> (b), size);
 #endif
         return result;
     }
@@ -698,7 +699,7 @@ struct KL_Divergence
     typedef typename Accumulator<T>::Type ResultType;
 
     /**
-     *  Compute the Kullback–Leibler divergence
+     *  Compute the Kullback-Leibler divergence
      */
     template <typename Iterator1, typename Iterator2>
     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
@@ -843,7 +844,7 @@ typename Distance::ResultType ensureSquareDistance( typename Distance::ResultTyp
 
 /*
  * ...and a template to ensure the user that he will process the normal distance,
- * and not squared distance, without loosing processing time calling sqrt(ensureSquareDistance)
+ * and not squared distance, without losing processing time calling sqrt(ensureSquareDistance)
  * that will result in doing actually sqrt(dist*dist) for L1 distance for instance.
  */
 template <typename Distance, typename ElementType>
@@ -902,4 +903,6 @@ typename Distance::ResultType ensureSimpleDistance( typename Distance::ResultTyp
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_DIST_H_
diff --git a/IPL/include/opencv/opencv2/flann/dummy.h b/IPL/include/opencv/opencv2/flann/dummy.h
index 26bd3fa..c176f2e 100644
--- a/IPL/include/opencv/opencv2/flann/dummy.h
+++ b/IPL/include/opencv/opencv2/flann/dummy.h
@@ -2,15 +2,15 @@
 #ifndef OPENCV_FLANN_DUMMY_H_
 #define OPENCV_FLANN_DUMMY_H_
 
+//! @cond IGNORED
+
 namespace cvflann
 {
 
-#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS
-__declspec(dllexport)
-#endif
-void dummyfunc();
+CV_DEPRECATED inline void dummyfunc() {}
 
 }
 
+//! @endcond
 
 #endif  /* OPENCV_FLANN_DUMMY_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/dynamic_bitset.h b/IPL/include/opencv/opencv2/flann/dynamic_bitset.h
index d795b5d..a00ce1b 100644
--- a/IPL/include/opencv/opencv2/flann/dynamic_bitset.h
+++ b/IPL/include/opencv/opencv2/flann/dynamic_bitset.h
@@ -35,6 +35,8 @@
 #ifndef OPENCV_FLANN_DYNAMIC_BITSET_H_
 #define OPENCV_FLANN_DYNAMIC_BITSET_H_
 
+//! @cond IGNORED
+
 #ifndef FLANN_USE_BOOST
 #  define FLANN_USE_BOOST 0
 #endif
@@ -59,7 +61,7 @@ class DynamicBitset
 public:
     /** default constructor
      */
-    DynamicBitset()
+    DynamicBitset() : size_(0)
     {
     }
 
@@ -156,4 +158,6 @@ class DynamicBitset
 
 #endif
 
+//! @endcond
+
 #endif // OPENCV_FLANN_DYNAMIC_BITSET_H_
diff --git a/IPL/include/opencv/opencv2/flann/flann_base.hpp b/IPL/include/opencv/opencv2/flann/flann_base.hpp
index 98c33cf..83606d2 100644
--- a/IPL/include/opencv/opencv2/flann/flann_base.hpp
+++ b/IPL/include/opencv/opencv2/flann/flann_base.hpp
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_BASE_HPP_
 #define OPENCV_FLANN_BASE_HPP_
 
+//! @cond IGNORED
+
 #include <vector>
 #include <cassert>
 #include <cstdio>
@@ -80,9 +82,11 @@ NNIndex<Distance>* load_saved_index(const Matrix<typename Distance::ElementType>
     }
     IndexHeader header = load_header(fin);
     if (header.data_type != Datatype<ElementType>::type()) {
+        fclose(fin);
         throw FLANNException("Datatype of saved index is different than of the one to be created.");
     }
     if ((size_t(header.rows) != dataset.rows)||(size_t(header.cols) != dataset.cols)) {
+        fclose(fin);
         throw FLANNException("The index saved belongs to a different dataset");
     }
 
@@ -126,7 +130,7 @@ class Index : public NNIndex<Distance>
     /**
      * Builds the index.
      */
-    void buildIndex()
+    void buildIndex() CV_OVERRIDE
     {
         if (!loaded_) {
             nnIndex_->buildIndex();
@@ -148,7 +152,7 @@ class Index : public NNIndex<Distance>
      * \brief Saves the index to a stream
      * \param stream The stream to save the index to
      */
-    virtual void saveIndex(FILE* stream)
+    virtual void saveIndex(FILE* stream) CV_OVERRIDE
     {
         nnIndex_->saveIndex(stream);
     }
@@ -157,7 +161,7 @@ class Index : public NNIndex<Distance>
      * \brief Loads the index from a stream
      * \param stream The stream from which the index is loaded
      */
-    virtual void loadIndex(FILE* stream)
+    virtual void loadIndex(FILE* stream) CV_OVERRIDE
     {
         nnIndex_->loadIndex(stream);
     }
@@ -165,7 +169,7 @@ class Index : public NNIndex<Distance>
     /**
      * \returns number of features in this index.
      */
-    size_t veclen() const
+    size_t veclen() const CV_OVERRIDE
     {
         return nnIndex_->veclen();
     }
@@ -173,7 +177,7 @@ class Index : public NNIndex<Distance>
     /**
      * \returns The dimensionality of the features in this index.
      */
-    size_t size() const
+    size_t size() const CV_OVERRIDE
     {
         return nnIndex_->size();
     }
@@ -181,7 +185,7 @@ class Index : public NNIndex<Distance>
     /**
      * \returns The index type (kdtree, kmeans,...)
      */
-    flann_algorithm_t getType() const
+    flann_algorithm_t getType() const CV_OVERRIDE
     {
         return nnIndex_->getType();
     }
@@ -189,7 +193,7 @@ class Index : public NNIndex<Distance>
     /**
      * \returns The amount of memory (in bytes) used by the index.
      */
-    virtual int usedMemory() const
+    virtual int usedMemory() const CV_OVERRIDE
     {
         return nnIndex_->usedMemory();
     }
@@ -198,7 +202,7 @@ class Index : public NNIndex<Distance>
     /**
      * \returns The index parameters
      */
-    IndexParams getParameters() const
+    IndexParams getParameters() const CV_OVERRIDE
     {
         return nnIndex_->getParameters();
     }
@@ -211,7 +215,7 @@ class Index : public NNIndex<Distance>
      * \param[in] knn Number of nearest neighbors to return
      * \param[in] params Search parameters
      */
-    void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params)
+    void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params) CV_OVERRIDE
     {
         nnIndex_->knnSearch(queries, indices, dists, knn, params);
     }
@@ -225,7 +229,7 @@ class Index : public NNIndex<Distance>
      * \param[in] params Search parameters
      * \returns Number of neighbors found
      */
-    int radiusSearch(const Matrix<ElementType>& query, Matrix<int>& indices, Matrix<DistanceType>& dists, float radius, const SearchParams& params)
+    int radiusSearch(const Matrix<ElementType>& query, Matrix<int>& indices, Matrix<DistanceType>& dists, float radius, const SearchParams& params) CV_OVERRIDE
     {
         return nnIndex_->radiusSearch(query, indices, dists, radius, params);
     }
@@ -233,7 +237,7 @@ class Index : public NNIndex<Distance>
     /**
      * \brief Method that searches for nearest-neighbours
      */
-    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
+    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
     {
         nnIndex_->findNeighbors(result, vec, searchParams);
     }
@@ -241,7 +245,7 @@ class Index : public NNIndex<Distance>
     /**
      * \brief Returns actual index
      */
-    FLANN_DEPRECATED NNIndex<Distance>* getIndex()
+    CV_DEPRECATED NNIndex<Distance>* getIndex()
     {
         return nnIndex_;
     }
@@ -250,7 +254,7 @@ class Index : public NNIndex<Distance>
      * \brief Returns index parameters.
      * \deprecated use getParameters() instead.
      */
-    FLANN_DEPRECATED  const IndexParams* getIndexParameters()
+    CV_DEPRECATED  const IndexParams* getIndexParameters()
     {
         return &index_params_;
     }
@@ -262,6 +266,9 @@ class Index : public NNIndex<Distance>
     bool loaded_;
     /** Parameters passed to the index */
     IndexParams index_params_;
+
+    Index(const Index &); // copy disabled
+    Index& operator=(const Index &); // assign disabled
 };
 
 /**
@@ -287,4 +294,7 @@ int hierarchicalClustering(const Matrix<typename Distance::ElementType>& points,
 }
 
 }
+
+//! @endcond
+
 #endif /* OPENCV_FLANN_BASE_HPP_ */
diff --git a/IPL/include/opencv/opencv2/flann/general.h b/IPL/include/opencv/opencv2/flann/general.h
index 9d5402a..ac848d6 100644
--- a/IPL/include/opencv/opencv2/flann/general.h
+++ b/IPL/include/opencv/opencv2/flann/general.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_GENERAL_H_
 #define OPENCV_FLANN_GENERAL_H_
 
+//! @cond IGNORED
+
 #include "opencv2/core.hpp"
 
 namespace cvflann
@@ -46,5 +48,6 @@ class FLANNException : public cv::Exception
 
 }
 
+//! @endcond
 
 #endif  /* OPENCV_FLANN_GENERAL_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/ground_truth.h b/IPL/include/opencv/opencv2/flann/ground_truth.h
index fd8f3ae..17f2a8e 100644
--- a/IPL/include/opencv/opencv2/flann/ground_truth.h
+++ b/IPL/include/opencv/opencv2/flann/ground_truth.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_GROUND_TRUTH_H_
 #define OPENCV_FLANN_GROUND_TRUTH_H_
 
+//! @cond IGNORED
+
 #include "dist.h"
 #include "matrix.h"
 
@@ -91,4 +93,6 @@ void compute_ground_truth(const Matrix<typename Distance::ElementType>& dataset,
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_GROUND_TRUTH_H_
diff --git a/IPL/include/opencv/opencv2/flann/hdf5.h b/IPL/include/opencv/opencv2/flann/hdf5.h
index 80d23b9..7554384 100644
--- a/IPL/include/opencv/opencv2/flann/hdf5.h
+++ b/IPL/include/opencv/opencv2/flann/hdf5.h
@@ -30,6 +30,8 @@
 #ifndef OPENCV_FLANN_HDF5_H_
 #define OPENCV_FLANN_HDF5_H_
 
+//! @cond IGNORED
+
 #include <hdf5.h>
 
 #include "matrix.h"
@@ -228,4 +230,6 @@ void load_from_file(cvflann::Matrix<T>& dataset, const String& filename, const S
 #endif // HAVE_MPI
 } // namespace cvflann::mpi
 
+//! @endcond
+
 #endif /* OPENCV_FLANN_HDF5_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/heap.h b/IPL/include/opencv/opencv2/flann/heap.h
index 92a6ea6..ee1c682 100644
--- a/IPL/include/opencv/opencv2/flann/heap.h
+++ b/IPL/include/opencv/opencv2/flann/heap.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_HEAP_H_
 #define OPENCV_FLANN_HEAP_H_
 
+//! @cond IGNORED
+
 #include <algorithm>
 #include <vector>
 
@@ -162,4 +164,6 @@ class Heap
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_HEAP_H_
diff --git a/IPL/include/opencv/opencv2/flann/hierarchical_clustering_index.h b/IPL/include/opencv/opencv2/flann/hierarchical_clustering_index.h
index 9d890d4..20304ed 100644
--- a/IPL/include/opencv/opencv2/flann/hierarchical_clustering_index.h
+++ b/IPL/include/opencv/opencv2/flann/hierarchical_clustering_index.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
 #define OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
 
+//! @cond IGNORED
+
 #include <algorithm>
 #include <map>
 #include <cassert>
@@ -435,7 +437,7 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
     /**
      *  Returns size of index.
      */
-    size_t size() const
+    size_t size() const CV_OVERRIDE
     {
         return size_;
     }
@@ -443,7 +445,7 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
     /**
      * Returns the length of an index feature.
      */
-    size_t veclen() const
+    size_t veclen() const CV_OVERRIDE
     {
         return veclen_;
     }
@@ -453,7 +455,7 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
      * Computes the inde memory usage
      * Returns: memory used by the index
      */
-    int usedMemory() const
+    int usedMemory() const CV_OVERRIDE
     {
         return pool.usedMemory+pool.wastedMemory+memoryCounter;
     }
@@ -461,7 +463,7 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
     /**
      * Builds the index
      */
-    void buildIndex()
+    void buildIndex() CV_OVERRIDE
     {
         if (branching_<2) {
             throw FLANNException("Branching factor must be at least 2");
@@ -480,13 +482,13 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
     }
 
 
-    flann_algorithm_t getType() const
+    flann_algorithm_t getType() const CV_OVERRIDE
     {
         return FLANN_INDEX_HIERARCHICAL;
     }
 
 
-    void saveIndex(FILE* stream)
+    void saveIndex(FILE* stream) CV_OVERRIDE
     {
         save_value(stream, branching_);
         save_value(stream, trees_);
@@ -501,7 +503,7 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
     }
 
 
-    void loadIndex(FILE* stream)
+    void loadIndex(FILE* stream) CV_OVERRIDE
     {
         free_elements();
 
@@ -544,7 +546,7 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
      *     vec = the vector for which to search the nearest neighbors
      *     searchParams = parameters that influence the search algorithm (checks)
      */
-    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
+    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
     {
 
         int maxChecks = get_param(searchParams,"checks",32);
@@ -569,7 +571,7 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
 
     }
 
-    IndexParams getParameters() const
+    IndexParams getParameters() const CV_OVERRIDE
     {
         return params;
     }
@@ -578,7 +580,7 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
 private:
 
     /**
-     * Struture representing a node in the hierarchical k-means tree.
+     * Structure representing a node in the hierarchical k-means tree.
      */
     struct Node
     {
@@ -845,4 +847,6 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
 
 }
 
+//! @endcond
+
 #endif /* OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/index_testing.h b/IPL/include/opencv/opencv2/flann/index_testing.h
index d764004..47b6f0b 100644
--- a/IPL/include/opencv/opencv2/flann/index_testing.h
+++ b/IPL/include/opencv/opencv2/flann/index_testing.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_INDEX_TESTING_H_
 #define OPENCV_FLANN_INDEX_TESTING_H_
 
+//! @cond IGNORED
+
 #include <cstring>
 #include <cassert>
 #include <cmath>
@@ -315,4 +317,6 @@ void test_index_precisions(NNIndex<Distance>& index, const Matrix<typename Dista
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_INDEX_TESTING_H_
diff --git a/IPL/include/opencv/opencv2/flann/kdtree_index.h b/IPL/include/opencv/opencv2/flann/kdtree_index.h
index dc0971c..4723505 100644
--- a/IPL/include/opencv/opencv2/flann/kdtree_index.h
+++ b/IPL/include/opencv/opencv2/flann/kdtree_index.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_KDTREE_INDEX_H_
 #define OPENCV_FLANN_KDTREE_INDEX_H_
 
+//! @cond IGNORED
+
 #include <algorithm>
 #include <map>
 #include <cassert>
@@ -120,24 +122,29 @@ class KDTreeIndex : public NNIndex<Distance>
     /**
      * Builds the index
      */
-    void buildIndex()
+    void buildIndex() CV_OVERRIDE
     {
         /* Construct the randomized trees. */
         for (int i = 0; i < trees_; i++) {
             /* Randomize the order of vectors to allow for unbiased sampling. */
+#ifndef OPENCV_FLANN_USE_STD_RAND
+            cv::randShuffle(vind_);
+#else
             std::random_shuffle(vind_.begin(), vind_.end());
+#endif
+
             tree_roots_[i] = divideTree(&vind_[0], int(size_) );
         }
     }
 
 
-    flann_algorithm_t getType() const
+    flann_algorithm_t getType() const CV_OVERRIDE
     {
         return FLANN_INDEX_KDTREE;
     }
 
 
-    void saveIndex(FILE* stream)
+    void saveIndex(FILE* stream) CV_OVERRIDE
     {
         save_value(stream, trees_);
         for (int i=0; i<trees_; ++i) {
@@ -147,7 +154,7 @@ class KDTreeIndex : public NNIndex<Distance>
 
 
 
-    void loadIndex(FILE* stream)
+    void loadIndex(FILE* stream) CV_OVERRIDE
     {
         load_value(stream, trees_);
         if (tree_roots_!=NULL) {
@@ -165,7 +172,7 @@ class KDTreeIndex : public NNIndex<Distance>
     /**
      *  Returns size of index.
      */
-    size_t size() const
+    size_t size() const CV_OVERRIDE
     {
         return size_;
     }
@@ -173,7 +180,7 @@ class KDTreeIndex : public NNIndex<Distance>
     /**
      * Returns the length of an index feature.
      */
-    size_t veclen() const
+    size_t veclen() const CV_OVERRIDE
     {
         return veclen_;
     }
@@ -182,7 +189,7 @@ class KDTreeIndex : public NNIndex<Distance>
      * Computes the inde memory usage
      * Returns: memory used by the index
      */
-    int usedMemory() const
+    int usedMemory() const CV_OVERRIDE
     {
         return int(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int));  // pool memory and vind array memory
     }
@@ -196,7 +203,7 @@ class KDTreeIndex : public NNIndex<Distance>
      *     vec = the vector for which to search the nearest neighbors
      *     maxCheck = the maximum number of restarts (in a best-bin-first manner)
      */
-    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
+    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
     {
         int maxChecks = get_param(searchParams,"checks", 32);
         float epsError = 1+get_param(searchParams,"eps",0.0f);
@@ -209,7 +216,7 @@ class KDTreeIndex : public NNIndex<Distance>
         }
     }
 
-    IndexParams getParameters() const
+    IndexParams getParameters() const CV_OVERRIDE
     {
         return index_params_;
     }
@@ -618,4 +625,6 @@ class KDTreeIndex : public NNIndex<Distance>
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_KDTREE_INDEX_H_
diff --git a/IPL/include/opencv/opencv2/flann/kdtree_single_index.h b/IPL/include/opencv/opencv2/flann/kdtree_single_index.h
index 30488ad..fa38f9f 100644
--- a/IPL/include/opencv/opencv2/flann/kdtree_single_index.h
+++ b/IPL/include/opencv/opencv2/flann/kdtree_single_index.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
 #define OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
 
+//! @cond IGNORED
+
 #include <algorithm>
 #include <map>
 #include <cassert>
@@ -87,6 +89,7 @@ class KDTreeSingleIndex : public NNIndex<Distance>
     {
         size_ = dataset_.rows;
         dim_ = dataset_.cols;
+        root_node_ = 0;
         int dim_param = get_param(params,"dim",-1);
         if (dim_param>0) dim_ = dim_param;
         leaf_max_size_ = get_param(params,"leaf_max_size",10);
@@ -113,7 +116,7 @@ class KDTreeSingleIndex : public NNIndex<Distance>
     /**
      * Builds the index
      */
-    void buildIndex()
+    void buildIndex() CV_OVERRIDE
     {
         computeBoundingBox(root_bbox_);
         root_node_ = divideTree(0, (int)size_, root_bbox_ );   // construct the tree
@@ -132,13 +135,13 @@ class KDTreeSingleIndex : public NNIndex<Distance>
         }
     }
 
-    flann_algorithm_t getType() const
+    flann_algorithm_t getType() const CV_OVERRIDE
     {
         return FLANN_INDEX_KDTREE_SINGLE;
     }
 
 
-    void saveIndex(FILE* stream)
+    void saveIndex(FILE* stream) CV_OVERRIDE
     {
         save_value(stream, size_);
         save_value(stream, dim_);
@@ -153,7 +156,7 @@ class KDTreeSingleIndex : public NNIndex<Distance>
     }
 
 
-    void loadIndex(FILE* stream)
+    void loadIndex(FILE* stream) CV_OVERRIDE
     {
         load_value(stream, size_);
         load_value(stream, dim_);
@@ -178,7 +181,7 @@ class KDTreeSingleIndex : public NNIndex<Distance>
     /**
      *  Returns size of index.
      */
-    size_t size() const
+    size_t size() const CV_OVERRIDE
     {
         return size_;
     }
@@ -186,7 +189,7 @@ class KDTreeSingleIndex : public NNIndex<Distance>
     /**
      * Returns the length of an index feature.
      */
-    size_t veclen() const
+    size_t veclen() const CV_OVERRIDE
     {
         return dim_;
     }
@@ -195,7 +198,7 @@ class KDTreeSingleIndex : public NNIndex<Distance>
      * Computes the inde memory usage
      * Returns: memory used by the index
      */
-    int usedMemory() const
+    int usedMemory() const CV_OVERRIDE
     {
         return (int)(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int));  // pool memory and vind array memory
     }
@@ -209,7 +212,7 @@ class KDTreeSingleIndex : public NNIndex<Distance>
      * \param[in] knn Number of nearest neighbors to return
      * \param[in] params Search parameters
      */
-    void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params)
+    void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params) CV_OVERRIDE
     {
         assert(queries.cols == veclen());
         assert(indices.rows >= queries.rows);
@@ -224,7 +227,7 @@ class KDTreeSingleIndex : public NNIndex<Distance>
         }
     }
 
-    IndexParams getParameters() const
+    IndexParams getParameters() const CV_OVERRIDE
     {
         return index_params_;
     }
@@ -238,7 +241,7 @@ class KDTreeSingleIndex : public NNIndex<Distance>
      *     vec = the vector for which to search the nearest neighbors
      *     maxCheck = the maximum number of restarts (in a best-bin-first manner)
      */
-    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
+    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
     {
         float epsError = 1+get_param(searchParams,"eps",0.0f);
 
@@ -631,4 +634,6 @@ class KDTreeSingleIndex : public NNIndex<Distance>
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_KDTREE_SINGLE_INDEX_H_
diff --git a/IPL/include/opencv/opencv2/flann/kmeans_index.h b/IPL/include/opencv/opencv2/flann/kmeans_index.h
index 226fc71..7574e7f 100644
--- a/IPL/include/opencv/opencv2/flann/kmeans_index.h
+++ b/IPL/include/opencv/opencv2/flann/kmeans_index.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_KMEANS_INDEX_H_
 #define OPENCV_FLANN_KMEANS_INDEX_H_
 
+//! @cond IGNORED
+
 #include <algorithm>
 #include <map>
 #include <cassert>
@@ -266,7 +268,7 @@ class KMeansIndex : public NNIndex<Distance>
 
 public:
 
-    flann_algorithm_t getType() const
+    flann_algorithm_t getType() const CV_OVERRIDE
     {
         return FLANN_INDEX_KMEANS;
     }
@@ -276,30 +278,27 @@ class KMeansIndex : public NNIndex<Distance>
     public:
         KMeansDistanceComputer(Distance _distance, const Matrix<ElementType>& _dataset,
             const int _branching, const int* _indices, const Matrix<double>& _dcenters, const size_t _veclen,
-            int* _count, int* _belongs_to, std::vector<DistanceType>& _radiuses, bool& _converged, cv::Mutex& _mtx)
+            std::vector<int> &_new_centroids, std::vector<DistanceType> &_sq_dists)
             : distance(_distance)
             , dataset(_dataset)
             , branching(_branching)
             , indices(_indices)
             , dcenters(_dcenters)
             , veclen(_veclen)
-            , count(_count)
-            , belongs_to(_belongs_to)
-            , radiuses(_radiuses)
-            , converged(_converged)
-            , mtx(_mtx)
+            , new_centroids(_new_centroids)
+            , sq_dists(_sq_dists)
         {
         }
 
-        void operator()(const cv::Range& range) const
+        void operator()(const cv::Range& range) const CV_OVERRIDE
         {
             const int begin = range.start;
             const int end = range.end;
 
             for( int i = begin; i<end; ++i)
             {
-                DistanceType sq_dist = distance(dataset[indices[i]], dcenters[0], veclen);
-                int new_centroid = 0;
+                DistanceType sq_dist(distance(dataset[indices[i]], dcenters[0], veclen));
+                int new_centroid(0);
                 for (int j=1; j<branching; ++j) {
                     DistanceType new_sq_dist = distance(dataset[indices[i]], dcenters[j], veclen);
                     if (sq_dist>new_sq_dist) {
@@ -307,17 +306,8 @@ class KMeansIndex : public NNIndex<Distance>
                         sq_dist = new_sq_dist;
                     }
                 }
-                if (sq_dist > radiuses[new_centroid]) {
-                    radiuses[new_centroid] = sq_dist;
-                }
-                if (new_centroid != belongs_to[i]) {
-                    count[belongs_to[i]]--;
-                    count[new_centroid]++;
-                    belongs_to[i] = new_centroid;
-                    mtx.lock();
-                    converged = false;
-                    mtx.unlock();
-                }
+                sq_dists[i] = sq_dist;
+                new_centroids[i] = new_centroid;
             }
         }
 
@@ -328,11 +318,8 @@ class KMeansIndex : public NNIndex<Distance>
         const int* indices;
         const Matrix<double>& dcenters;
         const size_t veclen;
-        int* count;
-        int* belongs_to;
-        std::vector<DistanceType>& radiuses;
-        bool& converged;
-        cv::Mutex& mtx;
+        std::vector<int> &new_centroids;
+        std::vector<DistanceType> &sq_dists;
         KMeansDistanceComputer& operator=( const KMeansDistanceComputer & ) { return *this; }
     };
 
@@ -398,7 +385,7 @@ class KMeansIndex : public NNIndex<Distance>
     /**
      *  Returns size of index.
      */
-    size_t size() const
+    size_t size() const CV_OVERRIDE
     {
         return size_;
     }
@@ -406,7 +393,7 @@ class KMeansIndex : public NNIndex<Distance>
     /**
      * Returns the length of an index feature.
      */
-    size_t veclen() const
+    size_t veclen() const CV_OVERRIDE
     {
         return veclen_;
     }
@@ -421,7 +408,7 @@ class KMeansIndex : public NNIndex<Distance>
      * Computes the inde memory usage
      * Returns: memory used by the index
      */
-    int usedMemory() const
+    int usedMemory() const CV_OVERRIDE
     {
         return pool_.usedMemory+pool_.wastedMemory+memoryCounter_;
     }
@@ -429,7 +416,7 @@ class KMeansIndex : public NNIndex<Distance>
     /**
      * Builds the index
      */
-    void buildIndex()
+    void buildIndex() CV_OVERRIDE
     {
         if (branching_<2) {
             throw FLANNException("Branching factor must be at least 2");
@@ -448,7 +435,7 @@ class KMeansIndex : public NNIndex<Distance>
     }
 
 
-    void saveIndex(FILE* stream)
+    void saveIndex(FILE* stream) CV_OVERRIDE
     {
         save_value(stream, branching_);
         save_value(stream, iterations_);
@@ -460,7 +447,7 @@ class KMeansIndex : public NNIndex<Distance>
     }
 
 
-    void loadIndex(FILE* stream)
+    void loadIndex(FILE* stream) CV_OVERRIDE
     {
         load_value(stream, branching_);
         load_value(stream, iterations_);
@@ -495,7 +482,7 @@ class KMeansIndex : public NNIndex<Distance>
      *     vec = the vector for which to search the nearest neighbors
      *     searchParams = parameters that influence the search algorithm (checks, cb_index)
      */
-    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
+    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
     {
 
         int maxChecks = get_param(searchParams,"checks",32);
@@ -554,7 +541,7 @@ class KMeansIndex : public NNIndex<Distance>
         return clusterCount;
     }
 
-    IndexParams getParameters() const
+    IndexParams getParameters() const CV_OVERRIDE
     {
         return index_params_;
     }
@@ -562,7 +549,7 @@ class KMeansIndex : public NNIndex<Distance>
 
 private:
     /**
-     * Struture representing a node in the hierarchical k-means tree.
+     * Structure representing a node in the hierarchical k-means tree.
      */
     struct KMeansNode
     {
@@ -726,7 +713,7 @@ class KMeansIndex : public NNIndex<Distance>
         }
 
         cv::AutoBuffer<int> centers_idx_buf(branching);
-        int* centers_idx = (int*)centers_idx_buf;
+        int* centers_idx = centers_idx_buf.data();
         int centers_length;
         (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
 
@@ -739,7 +726,7 @@ class KMeansIndex : public NNIndex<Distance>
 
 
         cv::AutoBuffer<double> dcenters_buf(branching*veclen_);
-        Matrix<double> dcenters((double*)dcenters_buf,branching,veclen_);
+        Matrix<double> dcenters(dcenters_buf.data(), branching, veclen_);
         for (int i=0; i<centers_length; ++i) {
             ElementType* vec = dataset_[centers_idx[i]];
             for (size_t k=0; k<veclen_; ++k) {
@@ -749,7 +736,7 @@ class KMeansIndex : public NNIndex<Distance>
 
         std::vector<DistanceType> radiuses(branching);
         cv::AutoBuffer<int> count_buf(branching);
-        int* count = (int*)count_buf;
+        int* count = count_buf.data();
         for (int i=0; i<branching; ++i) {
             radiuses[i] = 0;
             count[i] = 0;
@@ -757,7 +744,7 @@ class KMeansIndex : public NNIndex<Distance>
 
         //	assign points to clusters
         cv::AutoBuffer<int> belongs_to_buf(indices_length);
-        int* belongs_to = (int*)belongs_to_buf;
+        int* belongs_to = belongs_to_buf.data();
         for (int i=0; i<indices_length; ++i) {
 
             DistanceType sq_dist = distance_(dataset_[indices[i]], dcenters[0], veclen_);
@@ -800,11 +787,27 @@ class KMeansIndex : public NNIndex<Distance>
                 }
             }
 
+            std::vector<int> new_centroids(indices_length);
+            std::vector<DistanceType> sq_dists(indices_length);
+
             // reassign points to clusters
-            cv::Mutex mtx;
-            KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, dcenters, veclen_, count, belongs_to, radiuses, converged, mtx);
+            KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists);
             parallel_for_(cv::Range(0, (int)indices_length), invoker);
 
+            for (int i=0; i < (int)indices_length; ++i) {
+                DistanceType sq_dist(sq_dists[i]);
+                int new_centroid(new_centroids[i]);
+                if (sq_dist > radiuses[new_centroid]) {
+                    radiuses[new_centroid] = sq_dist;
+                }
+                if (new_centroid != belongs_to[i]) {
+                    count[belongs_to[i]]--;
+                    count[new_centroid]++;
+                    belongs_to[i] = new_centroid;
+                    converged = false;
+                }
+            }
+
             for (int i=0; i<branching; ++i) {
                 // if one cluster converges to an empty cluster,
                 // move an element into that cluster
@@ -874,6 +877,8 @@ class KMeansIndex : public NNIndex<Distance>
             computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
             start=end;
         }
+
+        delete[] centers;
     }
 
 
@@ -1051,7 +1056,7 @@ class KMeansIndex : public NNIndex<Distance>
 
 
     /**
-     * Helper function the descends in the hierarchical k-means tree by spliting those clusters that minimize
+     * Helper function the descends in the hierarchical k-means tree by splitting those clusters that minimize
      * the overall variance of the clustering.
      * Params:
      *     root = root node
@@ -1166,4 +1171,6 @@ class KMeansIndex : public NNIndex<Distance>
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_KMEANS_INDEX_H_
diff --git a/IPL/include/opencv/opencv2/flann/linear_index.h b/IPL/include/opencv/opencv2/flann/linear_index.h
index 5aa7a5c..8a0f10f 100644
--- a/IPL/include/opencv/opencv2/flann/linear_index.h
+++ b/IPL/include/opencv/opencv2/flann/linear_index.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_LINEAR_INDEX_H_
 #define OPENCV_FLANN_LINEAR_INDEX_H_
 
+//! @cond IGNORED
+
 #include "general.h"
 #include "nn_index.h"
 
@@ -63,47 +65,47 @@ class LinearIndex : public NNIndex<Distance>
     LinearIndex(const LinearIndex&);
     LinearIndex& operator=(const LinearIndex&);
 
-    flann_algorithm_t getType() const
+    flann_algorithm_t getType() const CV_OVERRIDE
     {
         return FLANN_INDEX_LINEAR;
     }
 
 
-    size_t size() const
+    size_t size() const CV_OVERRIDE
     {
         return dataset_.rows;
     }
 
-    size_t veclen() const
+    size_t veclen() const CV_OVERRIDE
     {
         return dataset_.cols;
     }
 
 
-    int usedMemory() const
+    int usedMemory() const CV_OVERRIDE
     {
         return 0;
     }
 
-    void buildIndex()
+    void buildIndex() CV_OVERRIDE
     {
         /* nothing to do here for linear search */
     }
 
-    void saveIndex(FILE*)
+    void saveIndex(FILE*) CV_OVERRIDE
     {
         /* nothing to do here for linear search */
     }
 
 
-    void loadIndex(FILE*)
+    void loadIndex(FILE*) CV_OVERRIDE
     {
         /* nothing to do here for linear search */
 
         index_params_["algorithm"] = getType();
     }
 
-    void findNeighbors(ResultSet<DistanceType>& resultSet, const ElementType* vec, const SearchParams& /*searchParams*/)
+    void findNeighbors(ResultSet<DistanceType>& resultSet, const ElementType* vec, const SearchParams& /*searchParams*/) CV_OVERRIDE
     {
         ElementType* data = dataset_.data;
         for (size_t i = 0; i < dataset_.rows; ++i, data += dataset_.cols) {
@@ -112,7 +114,7 @@ class LinearIndex : public NNIndex<Distance>
         }
     }
 
-    IndexParams getParameters() const
+    IndexParams getParameters() const CV_OVERRIDE
     {
         return index_params_;
     }
@@ -129,4 +131,6 @@ class LinearIndex : public NNIndex<Distance>
 
 }
 
+//! @endcond
+
 #endif // OPENCV_FLANN_LINEAR_INDEX_H_
diff --git a/IPL/include/opencv/opencv2/flann/logger.h b/IPL/include/opencv/opencv2/flann/logger.h
index 24f3fb6..8911812 100644
--- a/IPL/include/opencv/opencv2/flann/logger.h
+++ b/IPL/include/opencv/opencv2/flann/logger.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_LOGGER_H
 #define OPENCV_FLANN_LOGGER_H
 
+//! @cond IGNORED
+
 #include <stdio.h>
 #include <stdarg.h>
 
@@ -63,7 +65,12 @@ class Logger
             stream = stdout;
         }
         else {
+#ifdef _MSC_VER
+            if (fopen_s(&stream, name, "w") != 0)
+                stream = NULL;
+#else
             stream = fopen(name,"w");
+#endif
             if (stream == NULL) {
                 stream = stdout;
             }
@@ -127,4 +134,6 @@ class Logger
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_LOGGER_H
diff --git a/IPL/include/opencv/opencv2/flann/lsh_index.h b/IPL/include/opencv/opencv2/flann/lsh_index.h
index 4d4670e..ee620da 100644
--- a/IPL/include/opencv/opencv2/flann/lsh_index.h
+++ b/IPL/include/opencv/opencv2/flann/lsh_index.h
@@ -35,6 +35,8 @@
 #ifndef OPENCV_FLANN_LSH_INDEX_H_
 #define OPENCV_FLANN_LSH_INDEX_H_
 
+//! @cond IGNORED
+
 #include <algorithm>
 #include <cassert>
 #include <cstring>
@@ -107,7 +109,7 @@ class LshIndex : public NNIndex<Distance>
     /**
      * Builds the index
      */
-    void buildIndex()
+    void buildIndex() CV_OVERRIDE
     {
         tables_.resize(table_number_);
         for (unsigned int i = 0; i < table_number_; ++i) {
@@ -119,13 +121,13 @@ class LshIndex : public NNIndex<Distance>
         }
     }
 
-    flann_algorithm_t getType() const
+    flann_algorithm_t getType() const CV_OVERRIDE
     {
         return FLANN_INDEX_LSH;
     }
 
 
-    void saveIndex(FILE* stream)
+    void saveIndex(FILE* stream) CV_OVERRIDE
     {
         save_value(stream,table_number_);
         save_value(stream,key_size_);
@@ -133,7 +135,7 @@ class LshIndex : public NNIndex<Distance>
         save_value(stream, dataset_);
     }
 
-    void loadIndex(FILE* stream)
+    void loadIndex(FILE* stream) CV_OVERRIDE
     {
         load_value(stream, table_number_);
         load_value(stream, key_size_);
@@ -151,7 +153,7 @@ class LshIndex : public NNIndex<Distance>
     /**
      *  Returns size of index.
      */
-    size_t size() const
+    size_t size() const CV_OVERRIDE
     {
         return dataset_.rows;
     }
@@ -159,7 +161,7 @@ class LshIndex : public NNIndex<Distance>
     /**
      * Returns the length of an index feature.
      */
-    size_t veclen() const
+    size_t veclen() const CV_OVERRIDE
     {
         return feature_size_;
     }
@@ -168,13 +170,13 @@ class LshIndex : public NNIndex<Distance>
      * Computes the index memory usage
      * Returns: memory used by the index
      */
-    int usedMemory() const
+    int usedMemory() const CV_OVERRIDE
     {
         return (int)(dataset_.rows * sizeof(int));
     }
 
 
-    IndexParams getParameters() const
+    IndexParams getParameters() const CV_OVERRIDE
     {
         return index_params_;
     }
@@ -187,7 +189,7 @@ class LshIndex : public NNIndex<Distance>
      * \param[in] knn Number of nearest neighbors to return
      * \param[in] params Search parameters
      */
-    virtual void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params)
+    virtual void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params) CV_OVERRIDE
     {
         assert(queries.cols == veclen());
         assert(indices.rows >= queries.rows);
@@ -217,7 +219,7 @@ class LshIndex : public NNIndex<Distance>
      *     vec = the vector for which to search the nearest neighbors
      *     maxCheck = the maximum number of restarts (in a best-bin-first manner)
      */
-    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& /*searchParams*/)
+    void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& /*searchParams*/) CV_OVERRIDE
     {
         getNeighbors(vec, result);
     }
@@ -389,4 +391,6 @@ class LshIndex : public NNIndex<Distance>
 };
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_LSH_INDEX_H_
diff --git a/IPL/include/opencv/opencv2/flann/lsh_table.h b/IPL/include/opencv/opencv2/flann/lsh_table.h
index 582dcdb..db8b5af 100644
--- a/IPL/include/opencv/opencv2/flann/lsh_table.h
+++ b/IPL/include/opencv/opencv2/flann/lsh_table.h
@@ -35,6 +35,8 @@
 #ifndef OPENCV_FLANN_LSH_TABLE_H_
 #define OPENCV_FLANN_LSH_TABLE_H_
 
+//! @cond IGNORED
+
 #include <algorithm>
 #include <iostream>
 #include <iomanip>
@@ -146,6 +148,9 @@ class LshTable
      */
     LshTable()
     {
+        key_size_ = 0;
+        feature_size_ = 0;
+        speed_level_ = kArray;
     }
 
     /** Default constructor
@@ -155,8 +160,8 @@ class LshTable
      */
     LshTable(unsigned int feature_size, unsigned int key_size)
     {
-        (void)feature_size;
-        (void)key_size;
+        feature_size_ = feature_size;
+        CV_UNUSED(key_size);
         std::cerr << "LSH is not implemented for that type" << std::endl;
         assert(0);
     }
@@ -265,7 +270,7 @@ class LshTable
     {
         const size_t key_size_lower_bound = 1;
         //a value (size_t(1) << key_size) must fit the size_t type so key_size has to be strictly less than size of size_t
-        const size_t key_size_upper_bound = std::min(sizeof(BucketKey) * CHAR_BIT + 1, sizeof(size_t) * CHAR_BIT);
+        const size_t key_size_upper_bound = (std::min)(sizeof(BucketKey) * CHAR_BIT + 1, sizeof(size_t) * CHAR_BIT);
         if (key_size < key_size_lower_bound || key_size >= key_size_upper_bound)
         {
             CV_Error(cv::Error::StsBadArg, cv::format("Invalid key_size (=%d). Valid values for your system are %d <= key_size < %d.", (int)key_size, (int)key_size_lower_bound, (int)key_size_upper_bound));
@@ -330,6 +335,8 @@ class LshTable
      */
     unsigned int key_size_;
 
+    unsigned int feature_size_;
+
     // Members only used for the unsigned char specialization
     /** The mask to apply to a feature to get the hash key
      * Only used in the unsigned char case
@@ -343,14 +350,19 @@ class LshTable
 template<>
 inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
 {
+    feature_size_ = feature_size;
     initialize(subsignature_size);
     // Allocate the mask
-    mask_ = std::vector<size_t>((size_t)ceil((float)(feature_size * sizeof(char)) / (float)sizeof(size_t)), 0);
+    mask_ = std::vector<size_t>((feature_size * sizeof(char) + sizeof(size_t) - 1) / sizeof(size_t), 0);
 
     // A bit brutal but fast to code
-    std::vector<size_t> indices(feature_size * CHAR_BIT);
-    for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = i;
+    std::vector<int> indices(feature_size * CHAR_BIT);
+    for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = (int)i;
+#ifndef OPENCV_FLANN_USE_STD_RAND
+    cv::randShuffle(indices);
+#else
     std::random_shuffle(indices.begin(), indices.end());
+#endif
 
     // Generate a random set of order of subsignature_size_ bits
     for (unsigned int i = 0; i < key_size_; ++i) {
@@ -386,6 +398,7 @@ inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) cons
 {
     // no need to check if T is dividable by sizeof(size_t) like in the Hamming
     // distance computation as we have a mask
+    // FIXIT: This is bad assumption, because we reading tail bytes after of the allocated features buffer
     const size_t* feature_block_ptr = reinterpret_cast<const size_t*> ((const void*)feature);
 
     // Figure out the subsignature of the feature
@@ -394,10 +407,20 @@ inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) cons
     size_t subsignature = 0;
     size_t bit_index = 1;
 
-    for (std::vector<size_t>::const_iterator pmask_block = mask_.begin(); pmask_block != mask_.end(); ++pmask_block) {
+    for (unsigned i = 0; i < feature_size_; i += sizeof(size_t)) {
         // get the mask and signature blocks
-        size_t feature_block = *feature_block_ptr;
-        size_t mask_block = *pmask_block;
+        size_t feature_block;
+        if (i <= feature_size_ - sizeof(size_t))
+        {
+            feature_block = *feature_block_ptr;
+        }
+        else
+        {
+            size_t tmp = 0;
+            memcpy(&tmp, feature_block_ptr, feature_size_ - i); // preserve bytes order
+            feature_block = tmp;
+        }
+        size_t mask_block = mask_[i / sizeof(size_t)];
         while (mask_block) {
             // Get the lowest set bit in the mask block
             size_t lowest_bit = mask_block & (-(ptrdiff_t)mask_block);
@@ -489,4 +512,6 @@ inline LshStats LshTable<unsigned char>::getStats() const
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+//! @endcond
+
 #endif /* OPENCV_FLANN_LSH_TABLE_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/matrix.h b/IPL/include/opencv/opencv2/flann/matrix.h
index 51b6c63..34893b7 100644
--- a/IPL/include/opencv/opencv2/flann/matrix.h
+++ b/IPL/include/opencv/opencv2/flann/matrix.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_DATASET_H_
 #define OPENCV_FLANN_DATASET_H_
 
+//! @cond IGNORED
+
 #include <stdio.h>
 
 #include "general.h"
@@ -66,7 +68,7 @@ class Matrix
     /**
      * Convenience function for deallocating the storage data.
      */
-    FLANN_DEPRECATED void free()
+    CV_DEPRECATED void free()
     {
         fprintf(stderr, "The cvflann::Matrix<T>::free() method is deprecated "
                 "and it does not do any memory deallocation any more.  You are"
@@ -113,4 +115,6 @@ class UntypedMatrix
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_DATASET_H_
diff --git a/IPL/include/opencv/opencv2/flann/miniflann.hpp b/IPL/include/opencv/opencv2/flann/miniflann.hpp
index 02fa236..2532907 100644
--- a/IPL/include/opencv/opencv2/flann/miniflann.hpp
+++ b/IPL/include/opencv/opencv2/flann/miniflann.hpp
@@ -40,8 +40,10 @@
 //
 //M*/
 
-#ifndef _OPENCV_MINIFLANN_HPP_
-#define _OPENCV_MINIFLANN_HPP_
+#ifndef OPENCV_MINIFLANN_HPP
+#define OPENCV_MINIFLANN_HPP
+
+//! @cond IGNORED
 
 #include "opencv2/core.hpp"
 #include "opencv2/flann/defines.h"
@@ -52,6 +54,20 @@ namespace cv
 namespace flann
 {
 
+enum FlannIndexType {
+    FLANN_INDEX_TYPE_8U = CV_8U,
+    FLANN_INDEX_TYPE_8S = CV_8S,
+    FLANN_INDEX_TYPE_16U = CV_16U,
+    FLANN_INDEX_TYPE_16S = CV_16S,
+    FLANN_INDEX_TYPE_32S = CV_32S,
+    FLANN_INDEX_TYPE_32F = CV_32F,
+    FLANN_INDEX_TYPE_64F = CV_64F,
+    FLANN_INDEX_TYPE_STRING,
+    FLANN_INDEX_TYPE_BOOL,
+    FLANN_INDEX_TYPE_ALGORITHM,
+    LAST_VALUE_FLANN_INDEX_TYPE = FLANN_INDEX_TYPE_ALGORITHM
+};
+
 struct CV_EXPORTS IndexParams
 {
     IndexParams();
@@ -68,12 +84,17 @@ struct CV_EXPORTS IndexParams
     void setBool(const String& key, bool value);
     void setAlgorithm(int value);
 
+    // FIXIT: replace by void write(FileStorage& fs) const + read()
     void getAll(std::vector<String>& names,
-                std::vector<int>& types,
+                std::vector<FlannIndexType>& types,
                 std::vector<String>& strValues,
                 std::vector<double>& numValues) const;
 
     void* params;
+
+private:
+    IndexParams(const IndexParams &); // copy disabled
+    IndexParams& operator=(const IndexParams &); // assign disabled
 };
 
 struct CV_EXPORTS KDTreeIndexParams : public IndexParams
@@ -155,4 +176,6 @@ class CV_EXPORTS_W Index
 
 } } // namespace cv::flann
 
+//! @endcond
+
 #endif
diff --git a/IPL/include/opencv/opencv2/flann/nn_index.h b/IPL/include/opencv/opencv2/flann/nn_index.h
index 381d4bc..00fe6ec 100644
--- a/IPL/include/opencv/opencv2/flann/nn_index.h
+++ b/IPL/include/opencv/opencv2/flann/nn_index.h
@@ -36,6 +36,8 @@
 #include "result_set.h"
 #include "params.h"
 
+//! @cond IGNORED
+
 namespace cvflann
 {
 
@@ -174,4 +176,6 @@ class NNIndex
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_NNINDEX_H
diff --git a/IPL/include/opencv/opencv2/flann/object_factory.h b/IPL/include/opencv/opencv2/flann/object_factory.h
index 7f971c5..5cc45ad 100644
--- a/IPL/include/opencv/opencv2/flann/object_factory.h
+++ b/IPL/include/opencv/opencv2/flann/object_factory.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_OBJECT_FACTORY_H_
 #define OPENCV_FLANN_OBJECT_FACTORY_H_
 
+//! @cond IGNORED
+
 #include <map>
 
 namespace cvflann
@@ -88,4 +90,6 @@ class ObjectFactory
 
 }
 
+//! @endcond
+
 #endif /* OPENCV_FLANN_OBJECT_FACTORY_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/params.h b/IPL/include/opencv/opencv2/flann/params.h
index 95ef4cd..b8f7331 100644
--- a/IPL/include/opencv/opencv2/flann/params.h
+++ b/IPL/include/opencv/opencv2/flann/params.h
@@ -30,6 +30,8 @@
 #ifndef OPENCV_FLANN_PARAMS_H_
 #define OPENCV_FLANN_PARAMS_H_
 
+//! @cond IGNORED
+
 #include "any.h"
 #include "general.h"
 #include <iostream>
@@ -95,5 +97,6 @@ inline void print_params(const IndexParams& params)
 
 }
 
+//! @endcond
 
 #endif /* OPENCV_FLANN_PARAMS_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/random.h b/IPL/include/opencv/opencv2/flann/random.h
index a3cf5ec..3bb48b6 100644
--- a/IPL/include/opencv/opencv2/flann/random.h
+++ b/IPL/include/opencv/opencv2/flann/random.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_RANDOM_H
 #define OPENCV_FLANN_RANDOM_H
 
+//! @cond IGNORED
+
 #include <algorithm>
 #include <cstdlib>
 #include <vector>
@@ -40,13 +42,31 @@
 namespace cvflann
 {
 
+inline int rand()
+{
+#ifndef OPENCV_FLANN_USE_STD_RAND
+#   if INT_MAX == RAND_MAX
+    int v = cv::theRNG().next() & INT_MAX;
+#   else
+    int v = cv::theRNG().uniform(0, RAND_MAX + 1);
+#   endif
+#else
+    int v = std::rand();
+#endif // OPENCV_FLANN_USE_STD_RAND
+    return v;
+}
+
 /**
  * Seeds the random number generator
  *  @param seed Random seed
  */
 inline void seed_random(unsigned int seed)
 {
-    srand(seed);
+#ifndef OPENCV_FLANN_USE_STD_RAND
+    cv::theRNG() = cv::RNG(seed);
+#else
+    std::srand(seed);
+#endif
 }
 
 /*
@@ -60,7 +80,7 @@ inline void seed_random(unsigned int seed)
  */
 inline double rand_double(double high = 1.0, double low = 0)
 {
-    return low + ((high-low) * (std::rand() / (RAND_MAX + 1.0)));
+    return low + ((high-low) * (rand() / (RAND_MAX + 1.0)));
 }
 
 /**
@@ -71,7 +91,7 @@ inline double rand_double(double high = 1.0, double low = 0)
  */
 inline int rand_int(int high = RAND_MAX, int low = 0)
 {
-    return low + (int) ( double(high-low) * (std::rand() / (RAND_MAX + 1.0)));
+    return low + (int) ( double(high-low) * (rand() / (RAND_MAX + 1.0)));
 }
 
 /**
@@ -107,7 +127,11 @@ class UniqueRandom
         for (int i = 0; i < size_; ++i) vals_[i] = i;
 
         // shuffle the elements in the array
+#ifndef OPENCV_FLANN_USE_STD_RAND
+        cv::randShuffle(vals_);
+#else
         std::random_shuffle(vals_.begin(), vals_.end());
+#endif
 
         counter_ = 0;
     }
@@ -130,4 +154,6 @@ class UniqueRandom
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_RANDOM_H
diff --git a/IPL/include/opencv/opencv2/flann/result_set.h b/IPL/include/opencv/opencv2/flann/result_set.h
index 9750019..735028f 100644
--- a/IPL/include/opencv/opencv2/flann/result_set.h
+++ b/IPL/include/opencv/opencv2/flann/result_set.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_RESULTSET_H
 #define OPENCV_FLANN_RESULTSET_H
 
+//! @cond IGNORED
+
 #include <algorithm>
 #include <cstring>
 #include <iostream>
@@ -109,13 +111,13 @@ class KNNSimpleResultSet : public ResultSet<DistanceType>
         return count;
     }
 
-    bool full() const
+    bool full() const CV_OVERRIDE
     {
         return count == capacity;
     }
 
 
-    void addPoint(DistanceType dist, int index)
+    void addPoint(DistanceType dist, int index) CV_OVERRIDE
     {
         if (dist >= worst_distance_) return;
         int i;
@@ -139,7 +141,7 @@ class KNNSimpleResultSet : public ResultSet<DistanceType>
         worst_distance_ = dists[capacity-1];
     }
 
-    DistanceType worstDist() const
+    DistanceType worstDist() const CV_OVERRIDE
     {
         return worst_distance_;
     }
@@ -176,13 +178,13 @@ class KNNResultSet : public ResultSet<DistanceType>
         return count;
     }
 
-    bool full() const
+    bool full() const CV_OVERRIDE
     {
         return count == capacity;
     }
 
 
-    void addPoint(DistanceType dist, int index)
+    void addPoint(DistanceType dist, int index) CV_OVERRIDE
     {
         if (dist >= worst_distance_) return;
         int i;
@@ -215,7 +217,7 @@ class KNNResultSet : public ResultSet<DistanceType>
         worst_distance_ = dists[capacity-1];
     }
 
-    DistanceType worstDist() const
+    DistanceType worstDist() const CV_OVERRIDE
     {
         return worst_distance_;
     }
@@ -301,16 +303,16 @@ class UniqueResultSet : public ResultSet<DistanceType>
         unsigned int index_;
     };
 
-    /** Default cosntructor */
+    /** Default constructor */
     UniqueResultSet() :
-        worst_distance_(std::numeric_limits<DistanceType>::max())
+        is_full_(false), worst_distance_(std::numeric_limits<DistanceType>::max())
     {
     }
 
     /** Check the status of the set
      * @return true if we have k NN
      */
-    inline bool full() const
+    inline bool full() const CV_OVERRIDE
     {
         return is_full_;
     }
@@ -365,7 +367,7 @@ class UniqueResultSet : public ResultSet<DistanceType>
      * If we don't have enough neighbors, it returns the max possible value
      * @return
      */
-    inline DistanceType worstDist() const
+    inline DistanceType worstDist() const CV_OVERRIDE
     {
         return worst_distance_;
     }
@@ -402,7 +404,7 @@ class KNNUniqueResultSet : public UniqueResultSet<DistanceType>
      * @param dist distance for that neighbor
      * @param index index of that neighbor
      */
-    inline void addPoint(DistanceType dist, int index)
+    inline void addPoint(DistanceType dist, int index) CV_OVERRIDE
     {
         // Don't do anything if we are worse than the worst
         if (dist >= worst_distance_) return;
@@ -422,7 +424,7 @@ class KNNUniqueResultSet : public UniqueResultSet<DistanceType>
 
     /** Remove all elements in the set
      */
-    void clear()
+    void clear() CV_OVERRIDE
     {
         dist_indices_.clear();
         worst_distance_ = std::numeric_limits<DistanceType>::max();
@@ -461,14 +463,14 @@ class RadiusUniqueResultSet : public UniqueResultSet<DistanceType>
      * @param dist distance for that neighbor
      * @param index index of that neighbor
      */
-    void addPoint(DistanceType dist, int index)
+    void addPoint(DistanceType dist, int index) CV_OVERRIDE
     {
         if (dist <= radius_) dist_indices_.insert(DistIndex(dist, index));
     }
 
     /** Remove all elements in the set
      */
-    inline void clear()
+    inline void clear() CV_OVERRIDE
     {
         dist_indices_.clear();
     }
@@ -477,7 +479,7 @@ class RadiusUniqueResultSet : public UniqueResultSet<DistanceType>
     /** Check the status of the set
      * @return alwys false
      */
-    inline bool full() const
+    inline bool full() const CV_OVERRIDE
     {
         return true;
     }
@@ -486,7 +488,7 @@ class RadiusUniqueResultSet : public UniqueResultSet<DistanceType>
      * If we don't have enough neighbors, it returns the max possible value
      * @return
      */
-    inline DistanceType worstDist() const
+    inline DistanceType worstDist() const CV_OVERRIDE
     {
         return radius_;
     }
@@ -540,4 +542,6 @@ class KNNRadiusUniqueResultSet : public KNNUniqueResultSet<DistanceType>
 };
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_RESULTSET_H
diff --git a/IPL/include/opencv/opencv2/flann/sampling.h b/IPL/include/opencv/opencv2/flann/sampling.h
index 396f177..4e452b9 100644
--- a/IPL/include/opencv/opencv2/flann/sampling.h
+++ b/IPL/include/opencv/opencv2/flann/sampling.h
@@ -30,6 +30,8 @@
 #ifndef OPENCV_FLANN_SAMPLING_H_
 #define OPENCV_FLANN_SAMPLING_H_
 
+//! @cond IGNORED
+
 #include "matrix.h"
 #include "random.h"
 
@@ -77,5 +79,6 @@ Matrix<T> random_sample(const Matrix<T>& srcMatrix, size_t size)
 
 } // namespace
 
+//! @endcond
 
 #endif /* OPENCV_FLANN_SAMPLING_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/saving.h b/IPL/include/opencv/opencv2/flann/saving.h
index 7e3bea5..53359b4 100644
--- a/IPL/include/opencv/opencv2/flann/saving.h
+++ b/IPL/include/opencv/opencv2/flann/saving.h
@@ -29,6 +29,8 @@
 #ifndef OPENCV_FLANN_SAVING_H_
 #define OPENCV_FLANN_SAVING_H_
 
+//! @cond IGNORED
+
 #include <cstring>
 #include <vector>
 
@@ -184,4 +186,6 @@ void load_value(FILE* stream, std::vector<T>& value)
 
 }
 
+//! @endcond
+
 #endif /* OPENCV_FLANN_SAVING_H_ */
diff --git a/IPL/include/opencv/opencv2/flann/simplex_downhill.h b/IPL/include/opencv/opencv2/flann/simplex_downhill.h
index 145901a..20b7e03 100644
--- a/IPL/include/opencv/opencv2/flann/simplex_downhill.h
+++ b/IPL/include/opencv/opencv2/flann/simplex_downhill.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_SIMPLEX_DOWNHILL_H_
 #define OPENCV_FLANN_SIMPLEX_DOWNHILL_H_
 
+//! @cond IGNORED
+
 namespace cvflann
 {
 
@@ -129,7 +131,7 @@ float optimizeSimplexDownhill(T* points, int n, F func, float* vals = NULL )
         }
 
         if (val_r<vals[0]) {
-            // value is smaller than smalest in simplex
+            // value is smaller than smallest in simplex
 
             // expand some more to see if it drops further
             for (int i=0; i<n; ++i) {
@@ -183,4 +185,6 @@ float optimizeSimplexDownhill(T* points, int n, F func, float* vals = NULL )
 
 }
 
+//! @endcond
+
 #endif //OPENCV_FLANN_SIMPLEX_DOWNHILL_H_
diff --git a/IPL/include/opencv/opencv2/flann/timer.h b/IPL/include/opencv/opencv2/flann/timer.h
index f771a34..73795aa 100644
--- a/IPL/include/opencv/opencv2/flann/timer.h
+++ b/IPL/include/opencv/opencv2/flann/timer.h
@@ -31,6 +31,8 @@
 #ifndef OPENCV_FLANN_TIMER_H
 #define OPENCV_FLANN_TIMER_H
 
+//! @cond IGNORED
+
 #include <time.h>
 #include "opencv2/core.hpp"
 #include "opencv2/core/utility.hpp"
@@ -91,4 +93,6 @@ class StartStopTimer
 
 }
 
+//! @endcond
+
 #endif // FLANN_TIMER_H
diff --git a/IPL/include/opencv/opencv2/fuzzy.hpp b/IPL/include/opencv/opencv2/fuzzy.hpp
index 8a532c0..d660cc3 100644
--- a/IPL/include/opencv/opencv2/fuzzy.hpp
+++ b/IPL/include/opencv/opencv2/fuzzy.hpp
@@ -44,21 +44,26 @@
 
 #include "opencv2/fuzzy/types.hpp"
 #include "opencv2/fuzzy/fuzzy_F0_math.hpp"
+#include "opencv2/fuzzy/fuzzy_F1_math.hpp"
 #include "opencv2/fuzzy/fuzzy_image.hpp"
 
 /**
 @defgroup fuzzy Image processing based on fuzzy mathematics
 
-Namespace for all functions is **ft**. The module brings implementation of the last image processing algorithms based on fuzzy mathematics.
+Namespace for all functions is `ft`. The module brings implementation of the last image processing algorithms based on fuzzy mathematics. Method are named based on the pattern `FT`_degree_dimension`_`method.
 
   @{
-    @defgroup f0_math Math with F0-transfrom support
+    @defgroup f0_math Math with F0-transform support
 
-Fuzzy transform (F-transform) of the 0th degree transform whole image to a vector of its components. These components are used in latter computation.
+Fuzzy transform (\f$F^0\f$-transform) of the 0th degree transforms whole image to a matrix of its components. These components are used in latter computation where each of them represents average color of certain subarea.
+
+    @defgroup f1_math Math with F1-transform support
+
+Fuzzy transform (\f$F^1\f$-transform) of the 1th degree transforms whole image to a matrix of its components. Each component is polynomial of the 1th degree carrying information about average color and average gradient of certain subarea.
 
     @defgroup f_image Fuzzy image processing
 
-Image proceesing based on F-transform is fast to process and easy to understand.
+Image proceesing based on fuzzy mathematics namely F-transform.
    @}
 
 */
diff --git a/IPL/include/opencv/opencv2/fuzzy/fuzzy_F0_math.hpp b/IPL/include/opencv/opencv2/fuzzy/fuzzy_F0_math.hpp
index e0a2c48..7beeb9b 100644
--- a/IPL/include/opencv/opencv2/fuzzy/fuzzy_F0_math.hpp
+++ b/IPL/include/opencv/opencv2/fuzzy/fuzzy_F0_math.hpp
@@ -53,64 +53,66 @@ namespace ft
     //! @addtogroup f0_math
     //! @{
 
-    /** @brief Computes components of the array using direct F0-transform.
-    @param matrix Input 1-channel array.
-    @param kernel Kernel used for processing. Function **createKernel** can be used.
-    @param components Output 32-bit array for the components.
+    /** @brief Computes components of the array using direct \f$F^0\f$-transform.
+    @param matrix Input array.
+    @param kernel Kernel used for processing. Function `ft::createKernel` can be used.
+    @param components Output 32-bit float array for the components.
     @param mask Mask can be used for unwanted area marking.
 
     The function computes components using predefined kernel and mask.
-
-    @note
-        F-transform technique is described in paper @cite Perf:FT.
-     */
-    CV_EXPORTS void FT02D_components(InputArray matrix, InputArray kernel, OutputArray components, InputArray mask);
-
-    /** @brief Computes components of the array using direct F0-transform.
-    @param matrix Input 1-channel array.
-    @param kernel Kernel used for processing. Function **createKernel** can be used.
-    @param components Output 32-bit array for the components.
-
-    The function computes components using predefined kernel.
-
-    @note
-        F-transform technique is described in paper @cite Perf:FT.
      */
-    CV_EXPORTS void FT02D_components(InputArray matrix, InputArray kernel, OutputArray components);
+    CV_EXPORTS_W void FT02D_components(InputArray matrix, InputArray kernel, OutputArray components, InputArray mask = noArray());
 
-    /** @brief Computes inverse F0-transfrom.
-    @param components Input 32-bit array for the components.
-    @param kernel Kernel used for processing. Function **createKernel** can be used.
-    @param output Output 32-bit array.
+    /** @brief Computes inverse \f$F^0\f$-transfrom.
+    @param components Input 32-bit float single channel array for the components.
+    @param kernel Kernel used for processing. Function `ft::createKernel` can be used.
+    @param output Output 32-bit float array.
     @param width Width of the output array.
     @param height Height of the output array.
 
-    @note
-        F-transform technique is described in paper @cite Perf:FT.
+    Computation of inverse F-transform.
      */
-    CV_EXPORTS void FT02D_inverseFT(InputArray components, InputArray kernel, OutputArray output, int width, int height);
+    CV_EXPORTS_W void FT02D_inverseFT(InputArray components, InputArray kernel, OutputArray output, int width, int height);
 
-    /** @brief Computes F0-transfrom and inverse F0-transfrom at once.
-    @param image Input image.
-    @param kernel Kernel used for processing. Function **createKernel** can be used.
-    @param output Output 32-bit array.
+    /** @brief Computes \f$F^0\f$-transfrom and inverse \f$F^0\f$-transfrom at once.
+    @param matrix Input matrix.
+    @param kernel Kernel used for processing. Function `ft::createKernel` can be used.
+    @param output Output 32-bit float array.
     @param mask Mask used for unwanted area marking.
 
-    This function computes F-transfrom and inverse F-transfotm in one step. It is fully sufficient and optimized for **Mat**.
+    This function computes F-transfrom and inverse F-transfotm in one step. It is fully sufficient and optimized for `cv::Mat`.
     */
-    CV_EXPORTS void FT02D_process(const Mat &image, const Mat &kernel, Mat &output, const Mat &mask);
+    CV_EXPORTS_W void FT02D_process(InputArray matrix, InputArray kernel, OutputArray output, InputArray mask = noArray());
 
-    /** @brief Computes F0-transfrom and inverse F0-transfrom at once and return state.
-    @param image Input image.
-    @param kernel Kernel used for processing. Function **createKernel** can be used.
-    @param imageOutput Output 32-bit array.
+    /** @brief Computes \f$F^0\f$-transfrom and inverse \f$F^0\f$-transfrom at once and return state.
+    @param matrix Input matrix.
+    @param kernel Kernel used for processing. Function `ft::createKernel` can be used.
+    @param output Output 32-bit float array.
     @param mask Mask used for unwanted area marking.
     @param maskOutput Mask after one iteration.
-    @param firstStop If **true** function returns -1 when first problem appears. In case of **false**, the process is completed and summation of all problems returned.
+    @param firstStop If **true** function returns -1 when first problem appears. In case of `false` the process is completed and summation of all problems returned.
+
+    This function computes iteration of F-transfrom and inverse F-transfotm and handle image and mask change. The function is used in `ft::inpaint` function.
+    */
+    CV_EXPORTS_W int FT02D_iteration(InputArray matrix, InputArray kernel, OutputArray output, InputArray mask, OutputArray maskOutput, bool firstStop);
+
+    /** @brief Sligtly less accurate version of \f$F^0\f$-transfrom computation optimized for higher speed. The methods counts with linear basic function.
+    @param matrix Input 3 channels matrix.
+    @param radius Radius of the `ft::LINEAR` basic function.
+    @param output Output array.
+
+    This function computes F-transfrom and inverse F-transfotm using linear basic function in one step. It is ~10 times faster than `ft::FT02D_process` method.
+    */
+    CV_EXPORTS_W void FT02D_FL_process(InputArray matrix, const int radius, OutputArray output);
+
+    /** @brief Sligtly less accurate version of \f$F^0\f$-transfrom computation optimized for higher speed. The methods counts with linear basic function.
+    @param matrix Input 3 channels matrix.
+    @param radius Radius of the `ft::LINEAR` basic function.
+    @param output Output array.
 
-    This function computes iteration of F-transfrom and inverse F-transfotm and handle image and mask change. The function is used in *inpaint* function.
+    This function computes F-transfrom and inverse F-transfotm using linear basic function in one step. It is ~9 times faster then `ft::FT02D_process` method and more accurate than `ft::FT02D_FL_process` method.
     */
-    CV_EXPORTS int FT02D_iteration(const Mat &image, const Mat &kernel, Mat &imageOutput, const Mat &mask, Mat &maskOutput, bool firstStop = true);
+    CV_EXPORTS_W void FT02D_FL_process_float(InputArray matrix, const int radius, OutputArray output);
 
     //! @}
 }
diff --git a/IPL/include/opencv/opencv2/fuzzy/fuzzy_F1_math.hpp b/IPL/include/opencv/opencv2/fuzzy/fuzzy_F1_math.hpp
new file mode 100644
index 0000000..3320e67
--- /dev/null
+++ b/IPL/include/opencv/opencv2/fuzzy/fuzzy_F1_math.hpp
@@ -0,0 +1,124 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, University of Ostrava, Institute for Research and Applications of Fuzzy Modeling,
+// Pavel Vlasanek, all rights reserved. Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_FUZZY_F1_MATH_H__
+#define __OPENCV_FUZZY_F1_MATH_H__
+
+#include "opencv2/fuzzy/types.hpp"
+#include "opencv2/core.hpp"
+
+namespace cv
+{
+
+namespace ft
+{
+    //! @addtogroup f1_math
+    //! @{
+
+    /** @brief Computes components of the array using direct \f$F^1\f$-transform.
+    @param matrix Input array.
+    @param kernel Kernel used for processing. Function `ft::createKernel` can be used.
+    @param components Output 32-bit float array for the components.
+
+    The function computes linear components using predefined kernel.
+    */
+    CV_EXPORTS_W void FT12D_components(InputArray matrix, InputArray kernel, OutputArray components);
+
+    /** @brief Computes elements of \f$F^1\f$-transform components.
+    @param matrix Input array.
+    @param kernel Kernel used for processing. Function `ft::createKernel` can be used.
+    @param c00 Elements represent average color.
+    @param c10 Elements represent average vertical gradient.
+    @param c01 Elements represent average horizontal gradient.
+    @param components Output 32-bit float array for the components.
+    @param mask Mask can be used for unwanted area marking.
+
+    The function computes components and its elements using predefined kernel and mask.
+    */
+    CV_EXPORTS_W void FT12D_polynomial(InputArray matrix, InputArray kernel, OutputArray c00, OutputArray c10, OutputArray c01, OutputArray components, InputArray mask = noArray());
+
+    /** @brief Creates vertical matrix for \f$F^1\f$-transform computation.
+    @param radius Radius of the basic function.
+    @param matrix The vertical matrix.
+    @param chn Number of channels.
+
+    The function creates helper vertical matrix for \f$F^1\f$-transfrom processing. It is used for gradient computation.
+    */
+    CV_EXPORTS_W void FT12D_createPolynomMatrixVertical(int radius, OutputArray matrix, const int chn);
+
+    /** @brief Creates horizontal matrix for \f$F^1\f$-transform computation.
+    @param radius Radius of the basic function.
+    @param matrix The horizontal matrix.
+    @param chn Number of channels.
+
+    The function creates helper horizontal matrix for \f$F^1\f$-transfrom processing. It is used for gradient computation.
+    */
+    CV_EXPORTS_W void FT12D_createPolynomMatrixHorizontal(int radius, OutputArray matrix, const int chn);
+
+    /** @brief Computes \f$F^1\f$-transfrom and inverse \f$F^1\f$-transfrom at once.
+    @param matrix Input matrix.
+    @param kernel Kernel used for processing. Function `ft::createKernel` can be used.
+    @param output Output 32-bit float array.
+    @param mask Mask used for unwanted area marking.
+
+    This function computes \f$F^1\f$-transfrom and inverse \f$F^1\f$-transfotm in one step. It is fully sufficient and optimized for `cv::Mat`.
+
+    @note
+        F-transform technique of first degreee is described in paper @cite Vlas:FT.
+    */
+    CV_EXPORTS_W void FT12D_process(InputArray matrix, InputArray kernel, OutputArray output, InputArray mask = noArray());
+
+    /** @brief Computes inverse \f$F^1\f$-transfrom.
+    @param components Input 32-bit float single channel array for the components.
+    @param kernel Kernel used for processing. The same kernel as for components computation must be used.
+    @param output Output 32-bit float array.
+    @param width Width of the output array.
+    @param height Height of the output array.
+
+    Computation of inverse \f$F^1\f$-transform.
+    */
+    CV_EXPORTS_W void FT12D_inverseFT(InputArray components, InputArray kernel, OutputArray output, int width, int height);
+
+    //! @}
+}
+}
+
+#endif // __OPENCV_FUZZY_F1_MATH_H__
diff --git a/IPL/include/opencv/opencv2/fuzzy/fuzzy_image.hpp b/IPL/include/opencv/opencv2/fuzzy/fuzzy_image.hpp
index 00a8efa..9c9eeff 100644
--- a/IPL/include/opencv/opencv2/fuzzy/fuzzy_image.hpp
+++ b/IPL/include/opencv/opencv2/fuzzy/fuzzy_image.hpp
@@ -56,23 +56,23 @@ namespace ft
     /** @brief Creates kernel from basic functions.
     @param A Basic function used in axis **x**.
     @param B Basic function used in axis **y**.
-    @param kernel Final 32-b kernel derived from **A** and **B**.
+    @param kernel Final 32-bit kernel derived from **A** and **B**.
     @param chn Number of kernel channels.
 
     The function creates kernel usable for latter fuzzy image processing.
     */
-    CV_EXPORTS void createKernel(cv::InputArray A, cv::InputArray B, cv::OutputArray kernel, const int chn = 1);
+    CV_EXPORTS_AS(createKernel1) void createKernel(InputArray A, InputArray B, OutputArray kernel, const int chn);
 
     /** @brief Creates kernel from general functions.
     @param function Function type could be one of the following:
         -   **LINEAR** Linear basic function.
     @param radius Radius of the basic function.
-    @param kernel Final 32-b kernel.
+    @param kernel Final 32-bit kernel.
     @param chn Number of kernel channels.
 
     The function creates kernel from predefined functions.
     */
-    CV_EXPORTS void createKernel(int function, int radius, cv::OutputArray kernel, const int chn = 1);
+    CV_EXPORTS_W void createKernel(int function, int radius, OutputArray kernel, const int chn);
 
     /** @brief Image inpainting
     @param image Input image.
@@ -80,27 +80,27 @@ namespace ft
     @param output Output 32-bit image.
     @param radius Radius of the basic function.
     @param function Function type could be one of the following:
-        -   **LINEAR** Linear basic function.
+        -   `ft::LINEAR` Linear basic function.
     @param algorithm Algorithm could be one of the following:
-        -   **ONE_STEP** One step algorithm.
-        -   **MULTI_STEP** Algorithm automaticaly increasing radius of the basic function.
-        -   **ITERATIVE** Iterative algorithm running in more steps using partial computations.
+        -   `ft::ONE_STEP` One step algorithm.
+        -   `ft::MULTI_STEP` This algorithm automaticaly increases radius of the basic function.
+        -   `ft::ITERATIVE` Iterative algorithm running in more steps using partial computations.
 
     This function provides inpainting technique based on the fuzzy mathematic.
 
     @note
         The algorithms are described in paper @cite Perf:rec.
     */
-    CV_EXPORTS void inpaint(const cv::Mat &image, const cv::Mat &mask, cv::Mat &output, int radius = 2, int function = ft::LINEAR, int algorithm = ft::ONE_STEP);
+    CV_EXPORTS_W void inpaint(InputArray image, InputArray mask, OutputArray output, int radius, int function, int algorithm);
 
     /** @brief Image filtering
     @param image Input image.
-    @param kernel Final 32-b kernel.
+    @param kernel Final 32-bit kernel.
     @param output Output 32-bit image.
 
     Filtering of the input image by means of F-transform.
     */
-    CV_EXPORTS void filter(const cv::Mat &image, const cv::Mat &kernel, cv::Mat &output);
+    CV_EXPORTS_W void filter(InputArray image, InputArray kernel, OutputArray output);
 
     //! @}
 }
diff --git a/IPL/include/opencv/opencv2/fuzzy/types.hpp b/IPL/include/opencv/opencv2/fuzzy/types.hpp
index ec831e6..22e0a01 100644
--- a/IPL/include/opencv/opencv2/fuzzy/types.hpp
+++ b/IPL/include/opencv/opencv2/fuzzy/types.hpp
@@ -52,15 +52,15 @@ namespace ft
 
     enum
     {
-        LINEAR = 1,
-        SINUS = 2
+        LINEAR = 1, //!< linear (triangular) shape
+        SINUS = 2 //!< sinusoidal shape
     };
 
     enum
     {
-        ONE_STEP = 1,
-        MULTI_STEP = 2,
-        ITERATIVE = 3
+        ONE_STEP = 1, //!< processing in one step
+        MULTI_STEP = 2, //!< processing in multiple step
+        ITERATIVE = 3 //!< processing in several iterations
     };
 
     //! @}
diff --git a/IPL/include/opencv/opencv2/gapi.hpp b/IPL/include/opencv/opencv2/gapi.hpp
new file mode 100644
index 0000000..2c99c86
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi.hpp
@@ -0,0 +1,33 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_HPP
+#define OPENCV_GAPI_HPP
+
+#include <memory>
+
+/** \defgroup gapi G-API framework
+@{
+    @defgroup gapi_main_classes G-API Main Classes
+    @defgroup gapi_data_objects G-API Data Types
+    @{
+      @defgroup gapi_meta_args G-API Metadata Descriptors
+    @}
+    @defgroup gapi_std_backends G-API Standard Backends
+    @defgroup gapi_compile_args G-API Graph Compilation Arguments
+@}
+ */
+
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/garray.hpp>
+#include <opencv2/gapi/gcomputation.hpp>
+#include <opencv2/gapi/gcompiled.hpp>
+#include <opencv2/gapi/gtyped.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/operators.hpp>
+
+#endif // OPENCV_GAPI_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/core.hpp b/IPL/include/opencv/opencv2/gapi/core.hpp
new file mode 100644
index 0000000..00400a8
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/core.hpp
@@ -0,0 +1,1726 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_CORE_HPP
+#define OPENCV_GAPI_CORE_HPP
+
+#include <math.h>
+
+#include <utility> // std::tuple
+
+#include <opencv2/imgproc.hpp>
+
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+
+/** \defgroup gapi_core G-API Core functionality
+@{
+    @defgroup gapi_math Graph API: Math operations
+    @defgroup gapi_pixelwise Graph API: Pixelwise operations
+    @defgroup gapi_matrixop Graph API: Operations on matrices
+    @defgroup gapi_transform Graph API: Image and channel composition functions
+@}
+ */
+namespace cv { namespace gapi {
+namespace core {
+    using GMat2 = std::tuple<GMat,GMat>;
+    using GMat3 = std::tuple<GMat,GMat,GMat>; // FIXME: how to avoid this?
+    using GMat4 = std::tuple<GMat,GMat,GMat,GMat>;
+    using GMatScalar = std::tuple<GMat, GScalar>;
+
+    G_TYPED_KERNEL(GAdd, <GMat(GMat, GMat, int)>, "org.opencv.core.math.add") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc b, int ddepth) {
+            if (ddepth == -1)
+            {
+                // OpenCV: When the input arrays in add/subtract/multiply/divide
+                // functions have different depths, the output array depth must be
+                // explicitly specified!
+                // See artim_op() @ arithm.cpp
+                GAPI_Assert(a.chan == b.chan);
+                GAPI_Assert(a.depth == b.depth);
+                return a;
+            }
+            return a.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GAddC, <GMat(GMat, GScalar, int)>, "org.opencv.core.math.addC") {
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc, int ddepth) {
+            return a.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GSub, <GMat(GMat, GMat, int)>, "org.opencv.core.math.sub") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc b, int ddepth) {
+            if (ddepth == -1)
+            {
+                // This macro should select a larger data depth from a and b
+                // considering the number of channels in the same
+                // FIXME!!! Clarify if it is valid for sub()
+                GAPI_Assert(a.chan == b.chan);
+                ddepth = std::max(a.depth, b.depth);
+            }
+            return a.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GSubC, <GMat(GMat, GScalar, int)>, "org.opencv.core.math.subC") {
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc, int ddepth) {
+            return a.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GSubRC,<GMat(GScalar, GMat, int)>, "org.opencv.core.math.subRC") {
+        static GMatDesc outMeta(GScalarDesc, GMatDesc b, int ddepth) {
+            return b.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GMul, <GMat(GMat, GMat, double, int)>, "org.opencv.core.math.mul") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc, double, int ddepth) {
+            return a.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GMulCOld, <GMat(GMat, double, int)>, "org.opencv.core.math.mulCOld") {
+        static GMatDesc outMeta(GMatDesc a, double, int ddepth) {
+            return a.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GMulC, <GMat(GMat, GScalar, int)>, "org.opencv.core.math.mulC"){
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc, int ddepth) {
+            return a.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GMulS, <GMat(GMat, GScalar)>, "org.opencv.core.math.muls") {
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a;
+        }
+    }; // FIXME: Merge with MulC
+
+    G_TYPED_KERNEL(GDiv, <GMat(GMat, GMat, double, int)>, "org.opencv.core.math.div") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc b, double, int ddepth) {
+            if (ddepth == -1)
+            {
+                GAPI_Assert(a.depth == b.depth);
+                return b;
+            }
+            return a.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GDivC, <GMat(GMat, GScalar, double, int)>, "org.opencv.core.math.divC") {
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc, double, int ddepth) {
+            return a.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GDivRC, <GMat(GScalar, GMat, double, int)>, "org.opencv.core.math.divRC") {
+        static GMatDesc outMeta(GScalarDesc, GMatDesc b, double, int ddepth) {
+            return b.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GMean, <GScalar(GMat)>, "org.opencv.core.math.mean") {
+        static GScalarDesc outMeta(GMatDesc) {
+            return empty_scalar_desc();
+        }
+    };
+
+    G_TYPED_KERNEL_M(GPolarToCart, <GMat2(GMat, GMat, bool)>, "org.opencv.core.math.polarToCart") {
+        static std::tuple<GMatDesc, GMatDesc> outMeta(GMatDesc, GMatDesc a, bool) {
+            return std::make_tuple(a, a);
+        }
+    };
+
+    G_TYPED_KERNEL_M(GCartToPolar, <GMat2(GMat, GMat, bool)>, "org.opencv.core.math.cartToPolar") {
+        static std::tuple<GMatDesc, GMatDesc> outMeta(GMatDesc x, GMatDesc, bool) {
+            return std::make_tuple(x, x);
+        }
+    };
+
+    G_TYPED_KERNEL(GPhase, <GMat(GMat, GMat, bool)>, "org.opencv.core.math.phase") {
+        static GMatDesc outMeta(const GMatDesc &inx, const GMatDesc &, bool) {
+            return inx;
+        }
+    };
+
+    G_TYPED_KERNEL(GMask, <GMat(GMat,GMat)>, "org.opencv.core.pixelwise.mask") {
+        static GMatDesc outMeta(GMatDesc in, GMatDesc) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpGT, <GMat(GMat, GMat)>, "org.opencv.core.pixelwise.compare.cmpGT") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpGE, <GMat(GMat, GMat)>, "org.opencv.core.pixelwise.compare.cmpGE") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpLE, <GMat(GMat, GMat)>, "org.opencv.core.pixelwise.compare.cmpLE") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpLT, <GMat(GMat, GMat)>, "org.opencv.core.pixelwise.compare.cmpLT") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpEQ, <GMat(GMat, GMat)>, "org.opencv.core.pixelwise.compare.cmpEQ") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpNE, <GMat(GMat, GMat)>, "org.opencv.core.pixelwise.compare.cmpNE") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpGTScalar, <GMat(GMat, GScalar)>, "org.opencv.core.pixelwise.compare.cmpGTScalar"){
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpGEScalar, <GMat(GMat, GScalar)>, "org.opencv.core.pixelwise.compare.cmpGEScalar"){
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpLEScalar, <GMat(GMat, GScalar)>, "org.opencv.core.pixelwise.compare.cmpLEScalar"){
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpLTScalar, <GMat(GMat, GScalar)>, "org.opencv.core.pixelwise.compare.cmpLTScalar"){
+    static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpEQScalar, <GMat(GMat, GScalar)>, "org.opencv.core.pixelwise.compare.cmpEQScalar"){
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GCmpNEScalar, <GMat(GMat, GScalar)>, "org.opencv.core.pixelwise.compare.cmpNEScalar"){
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a.withDepth(CV_8U);
+        }
+    };
+
+    G_TYPED_KERNEL(GAnd, <GMat(GMat, GMat)>, "org.opencv.core.pixelwise.bitwise_and") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GAndS, <GMat(GMat, GScalar)>, "org.opencv.core.pixelwise.bitwise_andS") {
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GOr, <GMat(GMat, GMat)>, "org.opencv.core.pixelwise.bitwise_or") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GOrS, <GMat(GMat, GScalar)>, "org.opencv.core.pixelwise.bitwise_orS") {
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GXor, <GMat(GMat, GMat)>, "org.opencv.core.pixelwise.bitwise_xor") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GXorS, <GMat(GMat, GScalar)>, "org.opencv.core.pixelwise.bitwise_xorS") {
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GNot, <GMat(GMat)>, "org.opencv.core.pixelwise.bitwise_not") {
+        static GMatDesc outMeta(GMatDesc a) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GSelect, <GMat(GMat, GMat, GMat)>, "org.opencv.core.pixelwise.select") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc, GMatDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GMin, <GMat(GMat, GMat)>, "org.opencv.core.matrixop.min") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GMax, <GMat(GMat, GMat)>, "org.opencv.core.matrixop.max") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GAbsDiff, <GMat(GMat, GMat)>, "org.opencv.core.matrixop.absdiff") {
+        static GMatDesc outMeta(GMatDesc a, GMatDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GAbsDiffC, <GMat(GMat, GScalar)>, "org.opencv.core.matrixop.absdiffC") {
+        static GMatDesc outMeta(GMatDesc a, GScalarDesc) {
+            return a;
+        }
+    };
+
+    G_TYPED_KERNEL(GSum, <GScalar(GMat)>, "org.opencv.core.matrixop.sum") {
+        static GScalarDesc outMeta(GMatDesc) {
+            return empty_scalar_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GAddW, <GMat(GMat, double, GMat, double, double, int)>, "org.opencv.core.matrixop.addweighted") {
+        static GMatDesc outMeta(GMatDesc a, double, GMatDesc b, double, double, int ddepth) {
+            if (ddepth == -1)
+            {
+                // OpenCV: When the input arrays in add/subtract/multiply/divide
+                // functions have different depths, the output array depth must be
+                // explicitly specified!
+                // See artim_op() @ arithm.cpp
+                GAPI_Assert(a.chan == b.chan);
+                GAPI_Assert(a.depth == b.depth);
+                return a;
+            }
+            return a.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GNormL1, <GScalar(GMat)>, "org.opencv.core.matrixop.norml1") {
+        static GScalarDesc outMeta(GMatDesc) {
+            return empty_scalar_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GNormL2, <GScalar(GMat)>, "org.opencv.core.matrixop.norml2") {
+        static GScalarDesc outMeta(GMatDesc) {
+            return empty_scalar_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GNormInf, <GScalar(GMat)>, "org.opencv.core.matrixop.norminf") {
+        static GScalarDesc outMeta(GMatDesc) {
+            return empty_scalar_desc();
+        }
+    };
+
+    G_TYPED_KERNEL_M(GIntegral, <GMat2(GMat, int, int)>, "org.opencv.core.matrixop.integral") {
+        static std::tuple<GMatDesc, GMatDesc> outMeta(GMatDesc in, int sd, int sqd) {
+            return std::make_tuple(in.withSizeDelta(1,1).withDepth(sd),
+                                   in.withSizeDelta(1,1).withDepth(sqd));
+        }
+    };
+
+    G_TYPED_KERNEL(GThreshold, <GMat(GMat, GScalar, GScalar, int)>, "org.opencv.core.matrixop.threshold") {
+        static GMatDesc outMeta(GMatDesc in, GScalarDesc, GScalarDesc, int) {
+            return in;
+        }
+    };
+
+
+    G_TYPED_KERNEL_M(GThresholdOT, <GMatScalar(GMat, GScalar, int)>, "org.opencv.core.matrixop.thresholdOT") {
+        static std::tuple<GMatDesc,GScalarDesc> outMeta(GMatDesc in, GScalarDesc, int) {
+            return std::make_tuple(in, empty_scalar_desc());
+        }
+    };
+
+    G_TYPED_KERNEL(GInRange, <GMat(GMat, GScalar, GScalar)>, "org.opencv.core.matrixop.inrange") {
+        static GMatDesc outMeta(GMatDesc in, GScalarDesc, GScalarDesc) {
+            return in.withType(CV_8U, 1);
+        }
+    };
+
+    G_TYPED_KERNEL_M(GSplit3, <GMat3(GMat)>, "org.opencv.core.transform.split3") {
+        static std::tuple<GMatDesc, GMatDesc, GMatDesc> outMeta(GMatDesc in) {
+            const auto out_depth = in.depth;
+            const auto out_desc  = in.withType(out_depth, 1);
+            return std::make_tuple(out_desc, out_desc, out_desc);
+        }
+    };
+
+    G_TYPED_KERNEL_M(GSplit4, <GMat4(GMat)>,"org.opencv.core.transform.split4") {
+        static std::tuple<GMatDesc, GMatDesc, GMatDesc, GMatDesc> outMeta(GMatDesc in) {
+            const auto out_depth = in.depth;
+            const auto out_desc = in.withType(out_depth, 1);
+            return std::make_tuple(out_desc, out_desc, out_desc, out_desc);
+        }
+    };
+
+    G_TYPED_KERNEL(GResize, <GMat(GMat,Size,double,double,int)>, "org.opencv.core.transform.resize") {
+        static GMatDesc outMeta(GMatDesc in, Size sz, double fx, double fy, int) {
+            if (sz.width != 0 && sz.height != 0)
+            {
+                return in.withSize(sz);
+            }
+            else
+            {
+                GAPI_Assert(fx != 0. && fy != 0.);
+                return in.withSize
+                    (Size(static_cast<int>(round(in.size.width  * fx)),
+                          static_cast<int>(round(in.size.height * fy))));
+            }
+        }
+    };
+
+    G_TYPED_KERNEL(GResizeP, <GMatP(GMatP,Size,int)>, "org.opencv.core.transform.resizeP") {
+        static GMatDesc outMeta(GMatDesc in, Size sz, int interp) {
+            GAPI_Assert(in.depth == CV_8U);
+            GAPI_Assert(in.chan == 3);
+            GAPI_Assert(in.planar);
+            GAPI_Assert(interp == cv::INTER_LINEAR);
+            return in.withSize(sz);
+        }
+    };
+
+    G_TYPED_KERNEL(GMerge3, <GMat(GMat,GMat,GMat)>, "org.opencv.core.transform.merge3") {
+        static GMatDesc outMeta(GMatDesc in, GMatDesc, GMatDesc) {
+            // Preserve depth and add channel component
+            return in.withType(in.depth, 3);
+        }
+    };
+
+    G_TYPED_KERNEL(GMerge4, <GMat(GMat,GMat,GMat,GMat)>, "org.opencv.core.transform.merge4") {
+        static GMatDesc outMeta(GMatDesc in, GMatDesc, GMatDesc, GMatDesc) {
+            // Preserve depth and add channel component
+            return in.withType(in.depth, 4);
+        }
+    };
+
+    G_TYPED_KERNEL(GRemap, <GMat(GMat, Mat, Mat, int, int, Scalar)>, "org.opencv.core.transform.remap") {
+        static GMatDesc outMeta(GMatDesc in, Mat m1, Mat, int, int, Scalar) {
+            return in.withSize(m1.size());
+        }
+    };
+
+    G_TYPED_KERNEL(GFlip, <GMat(GMat, int)>, "org.opencv.core.transform.flip") {
+        static GMatDesc outMeta(GMatDesc in, int) {
+            return in;
+        }
+    };
+
+    // TODO: eliminate the need in this kernel (streaming)
+    G_TYPED_KERNEL(GCrop, <GMat(GMat, Rect)>, "org.opencv.core.transform.crop") {
+        static GMatDesc outMeta(GMatDesc in, Rect rc) {
+            return in.withSize(Size(rc.width, rc.height));
+        }
+    };
+
+    G_TYPED_KERNEL(GCopy, <GMat(GMat)>, "org.opencv.core.transform.copy") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GConcatHor, <GMat(GMat, GMat)>, "org.opencv.imgproc.transform.concatHor") {
+        static GMatDesc outMeta(GMatDesc l, GMatDesc r) {
+            return l.withSizeDelta(+r.size.width, 0);
+        }
+    };
+
+    G_TYPED_KERNEL(GConcatVert, <GMat(GMat, GMat)>, "org.opencv.imgproc.transform.concatVert") {
+        static GMatDesc outMeta(GMatDesc t, GMatDesc b) {
+            return t.withSizeDelta(0, +b.size.height);
+        }
+    };
+
+    G_TYPED_KERNEL(GLUT, <GMat(GMat, Mat)>, "org.opencv.core.transform.LUT") {
+        static GMatDesc outMeta(GMatDesc in, Mat) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GConvertTo, <GMat(GMat, int, double, double)>, "org.opencv.core.transform.convertTo") {
+        static GMatDesc outMeta(GMatDesc in, int rdepth, double, double) {
+            return rdepth < 0 ? in : in.withDepth(rdepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GSqrt, <GMat(GMat)>, "org.opencv.core.math.sqrt") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GNormalize, <GMat(GMat, double, double, int, int)>, "org.opencv.core.normalize") {
+        static GMatDesc outMeta(GMatDesc in, double, double, int, int ddepth) {
+            // unlike opencv doesn't have a mask as a parameter
+            return (ddepth < 0 ? in : in.withDepth(ddepth));
+        }
+    };
+
+    G_TYPED_KERNEL(GWarpPerspective, <GMat(GMat, const Mat&, Size, int, int, const cv::Scalar&)>, "org.opencv.core.warpPerspective") {
+        static GMatDesc outMeta(GMatDesc in, const Mat&, Size dsize, int, int borderMode, const cv::Scalar&) {
+            GAPI_Assert((borderMode == cv::BORDER_CONSTANT || borderMode == cv::BORDER_REPLICATE) &&
+                        "cv::gapi::warpPerspective supports only cv::BORDER_CONSTANT and cv::BORDER_REPLICATE border modes");
+            return in.withType(in.depth, in.chan).withSize(dsize);
+        }
+    };
+
+    G_TYPED_KERNEL(GWarpAffine, <GMat(GMat, const Mat&, Size, int, int, const cv::Scalar&)>, "org.opencv.core.warpAffine") {
+        static GMatDesc outMeta(GMatDesc in, const Mat&, Size dsize, int, int border_mode, const cv::Scalar&) {
+            GAPI_Assert(border_mode != cv::BORDER_TRANSPARENT &&
+                        "cv::BORDER_TRANSPARENT mode is not supported in cv::gapi::warpAffine");
+            return in.withType(in.depth, in.chan).withSize(dsize);
+        }
+    };
+}
+
+//! @addtogroup gapi_math
+//! @{
+
+/** @brief Calculates the per-element sum of two matrices.
+
+The function add calculates sum of two matrices of the same size and the same number of channels:
+\f[\texttt{dst}(I) =  \texttt{saturate} ( \texttt{src1}(I) +  \texttt{src2}(I)) \quad \texttt{if mask}(I) \ne0\f]
+
+The function can be replaced with matrix expressions:
+    \f[\texttt{dst} =  \texttt{src1} + \texttt{src2}\f]
+
+The input matrices and the output matrix can all have the same or different depths. For example, you
+can add a 16-bit unsigned matrix to a 8-bit signed matrix and store the sum as a 32-bit
+floating-point matrix. Depth of the output matrix is determined by the ddepth parameter.
+If src1.depth() == src2.depth(), ddepth can be set to the default -1. In this case, the output matrix will have
+the same depth as the input matrices.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.add"
+@param src1 first input matrix.
+@param src2 second input matrix.
+@param ddepth optional depth of the output matrix.
+@sa sub, addWeighted
+*/
+GAPI_EXPORTS GMat add(const GMat& src1, const GMat& src2, int ddepth = -1);
+
+/** @brief Calculates the per-element sum of matrix and given scalar.
+
+The function addC adds a given scalar value to each element of given matrix.
+The function can be replaced with matrix expressions:
+
+    \f[\texttt{dst} =  \texttt{src1} + \texttt{c}\f]
+
+Depth of the output matrix is determined by the ddepth parameter.
+If ddepth is set to default -1, the depth of output matrix will be the same as the depth of input matrix.
+The matrices can be single or multi channel. Output matrix must have the same size and number of channels as the input matrix.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.addC"
+@param src1 first input matrix.
+@param c scalar value to be added.
+@param ddepth optional depth of the output matrix.
+@sa sub, addWeighted
+*/
+GAPI_EXPORTS GMat addC(const GMat& src1, const GScalar& c, int ddepth = -1);
+//! @overload
+GAPI_EXPORTS GMat addC(const GScalar& c, const GMat& src1, int ddepth = -1);
+
+/** @brief Calculates the per-element difference between two matrices.
+
+The function sub calculates difference between two matrices, when both matrices have the same size and the same number of
+channels:
+    \f[\texttt{dst}(I) =   \texttt{src1}(I) -  \texttt{src2}(I)\f]
+
+The function can be replaced with matrix expressions:
+\f[\texttt{dst} =   \texttt{src1} -  \texttt{src2}\f]
+
+The input matrices and the output matrix can all have the same or different depths. For example, you
+can subtract two 8-bit unsigned matrices store the result as a 16-bit signed matrix.
+Depth of the output matrix is determined by the ddepth parameter.
+If src1.depth() == src2.depth(), ddepth can be set to the default -1. In this case, the output matrix will have
+the same depth as the input matrices. The matrices can be single or multi channel.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.sub"
+@param src1 first input matrix.
+@param src2 second input matrix.
+@param ddepth optional depth of the output matrix.
+@sa  add, addC
+  */
+GAPI_EXPORTS GMat sub(const GMat& src1, const GMat& src2, int ddepth = -1);
+
+/** @brief Calculates the per-element difference between matrix and given scalar.
+
+The function can be replaced with matrix expressions:
+    \f[\texttt{dst} =  \texttt{src} - \texttt{c}\f]
+
+Depth of the output matrix is determined by the ddepth parameter.
+If ddepth is set to default -1, the depth of output matrix will be the same as the depth of input matrix.
+The matrices can be single or multi channel. Output matrix must have the same size as src.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.subC"
+@param src first input matrix.
+@param c scalar value to subtracted.
+@param ddepth optional depth of the output matrix.
+@sa  add, addC, subRC
+  */
+GAPI_EXPORTS GMat subC(const GMat& src, const GScalar& c, int ddepth = -1);
+
+/** @brief Calculates the per-element difference between given scalar and the matrix.
+
+The function can be replaced with matrix expressions:
+    \f[\texttt{dst} =  \texttt{val} - \texttt{src}\f]
+
+Depth of the output matrix is determined by the ddepth parameter.
+If ddepth is set to default -1, the depth of output matrix will be the same as the depth of input matrix.
+The matrices can be single or multi channel. Output matrix must have the same size as src.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.subRC"
+@param c scalar value to subtract from.
+@param src input matrix to be subtracted.
+@param ddepth optional depth of the output matrix.
+@sa  add, addC, subC
+  */
+GAPI_EXPORTS GMat subRC(const GScalar& c, const GMat& src, int ddepth = -1);
+
+/** @brief Calculates the per-element scaled product of two matrices.
+
+The function mul calculates the per-element product of two matrices:
+
+\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{scale} \cdot \texttt{src1} (I)  \cdot \texttt{src2} (I))\f]
+
+If src1.depth() == src2.depth(), ddepth can be set to the default -1. In this case, the output matrix will have
+the same depth as the input matrices. The matrices can be single or multi channel.
+Output matrix must have the same size as input matrices.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.mul"
+@param src1 first input matrix.
+@param src2 second input matrix of the same size and the same depth as src1.
+@param scale optional scale factor.
+@param ddepth optional depth of the output matrix.
+@sa add, sub, div, addWeighted
+*/
+GAPI_EXPORTS GMat mul(const GMat& src1, const GMat& src2, double scale = 1.0, int ddepth = -1);
+
+/** @brief Multiplies matrix by scalar.
+
+The function mulC multiplies each element of matrix src by given scalar value:
+
+\f[\texttt{dst} (I)= \texttt{saturate} (  \texttt{src1} (I)  \cdot \texttt{multiplier} )\f]
+
+The matrices can be single or multi channel. Output matrix must have the same size as src.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.mulC"
+@param src input matrix.
+@param multiplier factor to be multiplied.
+@param ddepth optional depth of the output matrix. If -1, the depth of output matrix will be the same as input matrix depth.
+@sa add, sub, div, addWeighted
+*/
+GAPI_EXPORTS GMat mulC(const GMat& src, double multiplier, int ddepth = -1);
+//! @overload
+GAPI_EXPORTS GMat mulC(const GMat& src, const GScalar& multiplier, int ddepth = -1);   // FIXME: merge with mulc
+//! @overload
+GAPI_EXPORTS GMat mulC(const GScalar& multiplier, const GMat& src, int ddepth = -1);   // FIXME: merge with mulc
+
+/** @brief Performs per-element division of two matrices.
+
+The function divides one matrix by another:
+\f[\texttt{dst(I) = saturate(src1(I)*scale/src2(I))}\f]
+
+When src2(I) is zero, dst(I) will also be zero. Different channels of
+multi-channel matrices are processed independently.
+The matrices can be single or multi channel. Output matrix must have the same size and depth as src.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.div"
+@param src1 first input matrix.
+@param src2 second input matrix of the same size and depth as src1.
+@param scale scalar factor.
+@param ddepth optional depth of the output matrix; you can only pass -1 when src1.depth() == src2.depth().
+@sa  mul, add, sub
+*/
+GAPI_EXPORTS GMat div(const GMat& src1, const GMat& src2, double scale, int ddepth = -1);
+
+/** @brief Divides matrix by scalar.
+
+The function divC divides each element of matrix src by given scalar value:
+
+\f[\texttt{dst(I) = saturate(src(I)*scale/divisor)}\f]
+
+When divisor is zero, dst(I) will also be zero. Different channels of
+multi-channel matrices are processed independently.
+The matrices can be single or multi channel. Output matrix must have the same size and depth as src.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.divC"
+@param src input matrix.
+@param divisor number to be divided by.
+@param ddepth optional depth of the output matrix. If -1, the depth of output matrix will be the same as input matrix depth.
+@param scale scale factor.
+@sa add, sub, div, addWeighted
+*/
+GAPI_EXPORTS GMat divC(const GMat& src, const GScalar& divisor, double scale, int ddepth = -1);
+
+/** @brief Divides scalar by matrix.
+
+The function divRC divides given scalar by each element of matrix src and keep the division result in new matrix of the same size and type as src:
+
+\f[\texttt{dst(I) = saturate(divident*scale/src(I))}\f]
+
+When src(I) is zero, dst(I) will also be zero. Different channels of
+multi-channel matrices are processed independently.
+The matrices can be single or multi channel. Output matrix must have the same size and depth as src.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.divRC"
+@param src input matrix.
+@param divident number to be divided.
+@param ddepth optional depth of the output matrix. If -1, the depth of output matrix will be the same as input matrix depth.
+@param scale scale factor
+@sa add, sub, div, addWeighted
+*/
+GAPI_EXPORTS GMat divRC(const GScalar& divident, const GMat& src, double scale, int ddepth = -1);
+
+/** @brief Applies a mask to a matrix.
+
+The function mask set value from given matrix if the corresponding pixel value in mask matrix set to true,
+and set the matrix value to 0 otherwise.
+
+Supported src matrix data types are @ref CV_8UC1, @ref CV_16SC1, @ref CV_16UC1. Supported mask data type is @ref CV_8UC1.
+
+@note Function textual ID is "org.opencv.core.math.mask"
+@param src input matrix.
+@param mask input mask matrix.
+*/
+GAPI_EXPORTS GMat mask(const GMat& src, const GMat& mask);
+
+/** @brief Calculates an average (mean) of matrix elements.
+
+The function mean calculates the mean value M of matrix elements,
+independently for each channel, and return it.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.math.mean"
+@param src input matrix.
+*/
+GAPI_EXPORTS GScalar mean(const GMat& src);
+
+/** @brief Calculates x and y coordinates of 2D vectors from their magnitude and angle.
+
+The function polarToCart calculates the Cartesian coordinates of each 2D
+vector represented by the corresponding elements of magnitude and angle:
+\f[\begin{array}{l} \texttt{x} (I) =  \texttt{magnitude} (I) \cos ( \texttt{angle} (I)) \\ \texttt{y} (I) =  \texttt{magnitude} (I) \sin ( \texttt{angle} (I)) \\ \end{array}\f]
+
+The relative accuracy of the estimated coordinates is about 1e-6.
+
+First output is a matrix of x-coordinates of 2D vectors.
+Second output is a matrix of y-coordinates of 2D vectors.
+Both output must have the same size and depth as input matrices.
+
+@note Function textual ID is "org.opencv.core.math.polarToCart"
+
+@param magnitude input floating-point @ref CV_32FC1 matrix (1xN) of magnitudes of 2D vectors;
+@param angle input floating-point @ref CV_32FC1 matrix (1xN) of angles of 2D vectors.
+@param angleInDegrees when true, the input angles are measured in
+degrees, otherwise, they are measured in radians.
+@sa cartToPolar, exp, log, pow, sqrt
+*/
+GAPI_EXPORTS std::tuple<GMat, GMat> polarToCart(const GMat& magnitude, const GMat& angle,
+                                              bool angleInDegrees = false);
+
+/** @brief Calculates the magnitude and angle of 2D vectors.
+
+The function cartToPolar calculates either the magnitude, angle, or both
+for every 2D vector (x(I),y(I)):
+\f[\begin{array}{l} \texttt{magnitude} (I)= \sqrt{\texttt{x}(I)^2+\texttt{y}(I)^2} , \\ \texttt{angle} (I)= \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))[ \cdot180 / \pi ] \end{array}\f]
+
+The angles are calculated with accuracy about 0.3 degrees. For the point
+(0,0), the angle is set to 0.
+
+First output is a matrix of magnitudes of the same size and depth as input x.
+Second output is a matrix of angles that has the same size and depth as
+x; the angles are measured in radians (from 0 to 2\*Pi) or in degrees (0 to 360 degrees).
+
+@note Function textual ID is "org.opencv.core.math.cartToPolar"
+
+@param x matrix of @ref CV_32FC1 x-coordinates.
+@param y array of @ref CV_32FC1 y-coordinates.
+@param angleInDegrees a flag, indicating whether the angles are measured
+in radians (which is by default), or in degrees.
+@sa polarToCart
+*/
+GAPI_EXPORTS std::tuple<GMat, GMat> cartToPolar(const GMat& x, const GMat& y,
+                                              bool angleInDegrees = false);
+
+/** @brief Calculates the rotation angle of 2D vectors.
+
+The function cv::phase calculates the rotation angle of each 2D vector that
+is formed from the corresponding elements of x and y :
+\f[\texttt{angle} (I) =  \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))\f]
+
+The angle estimation accuracy is about 0.3 degrees. When x(I)=y(I)=0 ,
+the corresponding angle(I) is set to 0.
+@param x input floating-point array of x-coordinates of 2D vectors.
+@param y input array of y-coordinates of 2D vectors; it must have the
+same size and the same type as x.
+@param angleInDegrees when true, the function calculates the angle in
+degrees, otherwise, they are measured in radians.
+@return array of vector angles; it has the same size and same type as x.
+*/
+GAPI_EXPORTS GMat phase(const GMat& x, const GMat &y, bool angleInDegrees = false);
+
+/** @brief Calculates a square root of array elements.
+
+The function cv::gapi::sqrt calculates a square root of each input array element.
+In case of multi-channel arrays, each channel is processed
+independently. The accuracy is approximately the same as of the built-in
+std::sqrt .
+@param src input floating-point array.
+@return output array of the same size and type as src.
+*/
+GAPI_EXPORTS GMat sqrt(const GMat &src);
+
+//! @} gapi_math
+//!
+//! @addtogroup gapi_pixelwise
+//! @{
+
+/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are greater compare to elements in second.
+
+The function compares elements of two matrices src1 and src2 of the same size:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  > \texttt{src2} (I)\f]
+
+When the comparison result is true, the corresponding element of output
+array is set to 255. The comparison operations can be replaced with the
+equivalent matrix expressions:
+\f[\texttt{dst} =   \texttt{src1} > \texttt{src2}\f]
+
+Output matrix of depth @ref CV_8U must have the same size and the same number of channels as
+    the input matrices/matrix.
+
+Supported input matrix data types are @ref CV_8UC1, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpGT"
+@param src1 first input matrix.
+@param src2 second input matrix/scalar of the same depth as first input matrix.
+@sa min, max, threshold, cmpLE, cmpGE, cmpLS
+*/
+GAPI_EXPORTS GMat cmpGT(const GMat& src1, const GMat& src2);
+/** @overload
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpGTScalar"
+*/
+GAPI_EXPORTS GMat cmpGT(const GMat& src1, const GScalar& src2);
+
+/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are less than elements in second.
+
+The function compares elements of two matrices src1 and src2 of the same size:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  < \texttt{src2} (I)\f]
+
+When the comparison result is true, the corresponding element of output
+array is set to 255. The comparison operations can be replaced with the
+equivalent matrix expressions:
+    \f[\texttt{dst} =   \texttt{src1} < \texttt{src2}\f]
+
+Output matrix of depth @ref CV_8U must have the same size and the same number of channels as
+    the input matrices/matrix.
+
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpLT"
+@param src1 first input matrix.
+@param src2 second input matrix/scalar of the same depth as first input matrix.
+@sa min, max, threshold, cmpLE, cmpGE, cmpGT
+*/
+GAPI_EXPORTS GMat cmpLT(const GMat& src1, const GMat& src2);
+/** @overload
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpLTScalar"
+*/
+GAPI_EXPORTS GMat cmpLT(const GMat& src1, const GScalar& src2);
+
+/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are greater or equal compare to elements in second.
+
+The function compares elements of two matrices src1 and src2 of the same size:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  >= \texttt{src2} (I)\f]
+
+When the comparison result is true, the corresponding element of output
+array is set to 255. The comparison operations can be replaced with the
+equivalent matrix expressions:
+    \f[\texttt{dst} =   \texttt{src1} >= \texttt{src2}\f]
+
+Output matrix of depth @ref CV_8U must have the same size and the same number of channels as
+    the input matrices.
+
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpGE"
+@param src1 first input matrix.
+@param src2 second input matrix/scalar of the same depth as first input matrix.
+@sa min, max, threshold, cmpLE, cmpGT, cmpLS
+*/
+GAPI_EXPORTS GMat cmpGE(const GMat& src1, const GMat& src2);
+/** @overload
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpLGEcalar"
+*/
+GAPI_EXPORTS GMat cmpGE(const GMat& src1, const GScalar& src2);
+
+/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are less or equal compare to elements in second.
+
+The function compares elements of two matrices src1 and src2 of the same size:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  <=  \texttt{src2} (I)\f]
+
+When the comparison result is true, the corresponding element of output
+array is set to 255. The comparison operations can be replaced with the
+equivalent matrix expressions:
+    \f[\texttt{dst} =   \texttt{src1} <= \texttt{src2}\f]
+
+Output matrix of depth @ref CV_8U must have the same size and the same number of channels as
+    the input matrices.
+
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpLE"
+@param src1 first input matrix.
+@param src2 second input matrix/scalar of the same depth as first input matrix.
+@sa min, max, threshold, cmpGT, cmpGE, cmpLS
+*/
+GAPI_EXPORTS GMat cmpLE(const GMat& src1, const GMat& src2);
+/** @overload
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpLEScalar"
+*/
+GAPI_EXPORTS GMat cmpLE(const GMat& src1, const GScalar& src2);
+
+/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are equal to elements in second.
+
+The function compares elements of two matrices src1 and src2 of the same size:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  ==  \texttt{src2} (I)\f]
+
+When the comparison result is true, the corresponding element of output
+array is set to 255. The comparison operations can be replaced with the
+equivalent matrix expressions:
+    \f[\texttt{dst} =   \texttt{src1} == \texttt{src2}\f]
+
+Output matrix of depth @ref CV_8U must have the same size and the same number of channels as
+    the input matrices.
+
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpEQ"
+@param src1 first input matrix.
+@param src2 second input matrix/scalar of the same depth as first input matrix.
+@sa min, max, threshold, cmpNE
+*/
+GAPI_EXPORTS GMat cmpEQ(const GMat& src1, const GMat& src2);
+/** @overload
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpEQScalar"
+*/
+GAPI_EXPORTS GMat cmpEQ(const GMat& src1, const GScalar& src2);
+
+/** @brief Performs the per-element comparison of two matrices checking if elements from first matrix are not equal to elements in second.
+
+The function compares elements of two matrices src1 and src2 of the same size:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  !=  \texttt{src2} (I)\f]
+
+When the comparison result is true, the corresponding element of output
+array is set to 255. The comparison operations can be replaced with the
+equivalent matrix expressions:
+    \f[\texttt{dst} =   \texttt{src1} != \texttt{src2}\f]
+
+Output matrix of depth @ref CV_8U must have the same size and the same number of channels as
+    the input matrices.
+
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpNE"
+@param src1 first input matrix.
+@param src2 second input matrix/scalar of the same depth as first input matrix.
+@sa min, max, threshold, cmpEQ
+*/
+GAPI_EXPORTS GMat cmpNE(const GMat& src1, const GMat& src2);
+/** @overload
+@note Function textual ID is "org.opencv.core.pixelwise.compare.cmpNEScalar"
+*/
+GAPI_EXPORTS GMat cmpNE(const GMat& src1, const GScalar& src2);
+
+/** @brief computes bitwise conjunction of the two matrixes (src1 & src2)
+Calculates the per-element bit-wise logical conjunction of two matrices of the same size.
+
+In case of floating-point matrices, their machine-specific bit
+representations (usually IEEE754-compliant) are used for the operation.
+In case of multi-channel matrices, each channel is processed
+independently. Output matrix must have the same size and depth as the input
+matrices.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.bitwise_and"
+
+@param src1 first input matrix.
+@param src2 second input matrix.
+*/
+GAPI_EXPORTS GMat bitwise_and(const GMat& src1, const GMat& src2);
+/** @overload
+@note Function textual ID is "org.opencv.core.pixelwise.compare.bitwise_andS"
+@param src1 first input matrix.
+@param src2 scalar, which will be per-lemenetly conjuncted with elements of src1.
+*/
+GAPI_EXPORTS GMat bitwise_and(const GMat& src1, const GScalar& src2);
+
+/** @brief computes bitwise disjunction of the two matrixes (src1 | src2)
+Calculates the per-element bit-wise logical disjunction of two matrices of the same size.
+
+In case of floating-point matrices, their machine-specific bit
+representations (usually IEEE754-compliant) are used for the operation.
+In case of multi-channel matrices, each channel is processed
+independently. Output matrix must have the same size and depth as the input
+matrices.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.bitwise_or"
+
+@param src1 first input matrix.
+@param src2 second input matrix.
+*/
+GAPI_EXPORTS GMat bitwise_or(const GMat& src1, const GMat& src2);
+/** @overload
+@note Function textual ID is "org.opencv.core.pixelwise.compare.bitwise_orS"
+@param src1 first input matrix.
+@param src2 scalar, which will be per-lemenetly disjuncted with elements of src1.
+*/
+GAPI_EXPORTS GMat bitwise_or(const GMat& src1, const GScalar& src2);
+
+
+/** @brief computes bitwise logical "exclusive or" of the two matrixes (src1 ^ src2)
+Calculates the per-element bit-wise logical "exclusive or" of two matrices of the same size.
+
+In case of floating-point matrices, their machine-specific bit
+representations (usually IEEE754-compliant) are used for the operation.
+In case of multi-channel matrices, each channel is processed
+independently. Output matrix must have the same size and depth as the input
+matrices.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.bitwise_xor"
+
+@param src1 first input matrix.
+@param src2 second input matrix.
+*/
+GAPI_EXPORTS GMat bitwise_xor(const GMat& src1, const GMat& src2);
+/** @overload
+@note Function textual ID is "org.opencv.core.pixelwise.compare.bitwise_xorS"
+@param src1 first input matrix.
+@param src2 scalar, for which per-lemenet "logical or" operation on elements of src1 will be performed.
+*/
+GAPI_EXPORTS GMat bitwise_xor(const GMat& src1, const GScalar& src2);
+
+
+/** @brief Inverts every bit of an array.
+The function bitwise_not calculates per-element bit-wise inversion of the input
+matrix:
+\f[\texttt{dst} (I) =  \neg \texttt{src} (I)\f]
+
+In case of floating-point matrices, their machine-specific bit
+representations (usually IEEE754-compliant) are used for the operation.
+In case of multi-channel matrices, each channel is processed
+independently. Output matrix must have the same size and depth as the input
+matrix.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.bitwise_not"
+
+@param src input matrix.
+*/
+GAPI_EXPORTS GMat bitwise_not(const GMat& src);
+
+/** @brief Select values from either first or second of input matrices by given mask.
+The function set to the output matrix either the value from the first input matrix if corresponding value of mask matrix is 255,
+ or value from the second input matrix (if value of mask matrix set to 0).
+
+Input mask matrix must be of @ref CV_8UC1 type, two other inout matrices and output matrix should be of the same type. The size should
+be the same for all input and output matrices.
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.pixelwise.select"
+
+@param src1 first input matrix.
+@param src2 second input matrix.
+@param mask mask input matrix.
+*/
+GAPI_EXPORTS GMat select(const GMat& src1, const GMat& src2, const GMat& mask);
+
+//! @} gapi_pixelwise
+
+
+//! @addtogroup gapi_matrixop
+//! @{
+/** @brief Calculates per-element minimum of two matrices.
+
+The function min calculates the per-element minimum of two matrices of the same size, number of channels and depth:
+\f[\texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{src2} (I))\f]
+    where I is a multi-dimensional index of matrix elements. In case of
+    multi-channel matrices, each channel is processed independently.
+Output matrix must be of the same size and depth as src1.
+
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.matrixop.min"
+@param src1 first input matrix.
+@param src2 second input matrix of the same size and depth as src1.
+@sa max, compareEqual, compareLess, compareLessEqual
+*/
+GAPI_EXPORTS GMat min(const GMat& src1, const GMat& src2);
+
+/** @brief Calculates per-element maximum of two matrices.
+
+The function max calculates the per-element maximum of two matrices of the same size, number of channels and depth:
+\f[\texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{src2} (I))\f]
+    where I is a multi-dimensional index of matrix elements. In case of
+    multi-channel matrices, each channel is processed independently.
+Output matrix must be of the same size and depth as src1.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.matrixop.max"
+@param src1 first input matrix.
+@param src2 second input matrix of the same size and depth as src1.
+@sa min, compare, compareEqual, compareGreater, compareGreaterEqual
+*/
+GAPI_EXPORTS GMat max(const GMat& src1, const GMat& src2);
+
+/** @brief Calculates the per-element absolute difference between two matrices.
+
+The function absDiff calculates absolute difference between two matrices of the same size and depth:
+    \f[\texttt{dst}(I) =  \texttt{saturate} (| \texttt{src1}(I) -  \texttt{src2}(I)|)\f]
+    where I is a multi-dimensional index of matrix elements. In case of
+    multi-channel matrices, each channel is processed independently.
+Output matrix must have the same size and depth as input matrices.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.matrixop.absdiff"
+@param src1 first input matrix.
+@param src2 second input matrix.
+@sa abs
+*/
+GAPI_EXPORTS GMat absDiff(const GMat& src1, const GMat& src2);
+
+/** @brief Calculates absolute value of matrix elements.
+
+The function abs calculates absolute difference between matrix elements and given scalar value:
+    \f[\texttt{dst}(I) =  \texttt{saturate} (| \texttt{src1}(I) -  \texttt{matC}(I)|)\f]
+    where matC is constructed from given scalar c and has the same sizes and depth as input matrix src.
+
+Output matrix must be of the same size and depth as src.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.matrixop.absdiffC"
+@param src input matrix.
+@param c scalar to be subtracted.
+@sa min, max
+*/
+GAPI_EXPORTS GMat absDiffC(const GMat& src, const GScalar& c);
+
+/** @brief Calculates sum of all matrix elements.
+
+The function sum calculates sum of all matrix elements, independently for each channel.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.matrixop.sum"
+@param src input matrix.
+@sa min, max
+*/
+GAPI_EXPORTS GScalar sum(const GMat& src);
+
+/** @brief Calculates the weighted sum of two matrices.
+
+The function addWeighted calculates the weighted sum of two matrices as follows:
+\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I)* \texttt{alpha} +  \texttt{src2} (I)* \texttt{beta} +  \texttt{gamma} )\f]
+where I is a multi-dimensional index of array elements. In case of multi-channel matrices, each
+channel is processed independently.
+
+The function can be replaced with a matrix expression:
+    \f[\texttt{dst}(I) =  \texttt{alpha} * \texttt{src1}(I) - \texttt{beta} * \texttt{src2}(I) + \texttt{gamma} \f]
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.matrixop.addweighted"
+@param src1 first input matrix.
+@param alpha weight of the first matrix elements.
+@param src2 second input matrix of the same size and channel number as src1.
+@param beta weight of the second matrix elements.
+@param gamma scalar added to each sum.
+@param ddepth optional depth of the output matrix.
+@sa  add, sub
+*/
+GAPI_EXPORTS GMat addWeighted(const GMat& src1, double alpha, const GMat& src2, double beta, double gamma, int ddepth = -1);
+
+/** @brief Calculates the  absolute L1 norm of a matrix.
+
+This version of normL1 calculates the absolute L1 norm of src.
+
+As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$.
+The \f$ L_{1} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$
+is calculated as follows
+\f{align*}
+    \| r(-1) \|_{L_1} &= |-1| + |2| = 3 \\
+\f}
+and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is
+\f{align*}
+    \| r(0.5) \|_{L_1} &= |0.5| + |0.5| = 1 \\
+\f}
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.matrixop.norml1"
+@param src input matrix.
+@sa normL2, normInf
+*/
+GAPI_EXPORTS GScalar normL1(const GMat& src);
+
+/** @brief Calculates the absolute L2 norm of a matrix.
+
+This version of normL2 calculates the absolute L2 norm of src.
+
+As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$.
+The \f$ L_{2} \f$  norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$
+is calculated as follows
+\f{align*}
+    \| r(-1) \|_{L_2} &= \sqrt{(-1)^{2} + (2)^{2}} = \sqrt{5} \\
+\f}
+and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is
+\f{align*}
+    \| r(0.5) \|_{L_2} &= \sqrt{(0.5)^{2} + (0.5)^{2}} = \sqrt{0.5} \\
+\f}
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+@note Function textual ID is "org.opencv.core.matrixop.norml2"
+@param src input matrix.
+@sa normL1, normInf
+*/
+GAPI_EXPORTS GScalar normL2(const GMat& src);
+
+/** @brief Calculates the absolute infinite norm of a matrix.
+
+This version of normInf calculates the absolute infinite norm of src.
+
+As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$.
+The \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$
+is calculated as follows
+\f{align*}
+    \| r(-1) \|_{L_\infty} &= \max(|-1|,|2|) = 2
+\f}
+and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is
+\f{align*}
+    \| r(0.5) \|_{L_\infty} &= \max(|0.5|,|0.5|) = 0.5.
+\f}
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.core.matrixop.norminf"
+@param src input matrix.
+@sa normL1, normL2
+*/
+GAPI_EXPORTS GScalar normInf(const GMat& src);
+
+/** @brief Calculates the integral of an image.
+
+The function calculates one or more integral images for the source image as follows:
+
+\f[\texttt{sum} (X,Y) =  \sum _{x<X,y<Y}  \texttt{image} (x,y)\f]
+
+\f[\texttt{sqsum} (X,Y) =  \sum _{x<X,y<Y}  \texttt{image} (x,y)^2\f]
+
+The function return integral image as \f$(W+1)\times (H+1)\f$ , 32-bit integer or floating-point (32f or 64f) and
+ integral image for squared pixel values; it is \f$(W+1)\times (H+)\f$, double-precision floating-point (64f) array.
+
+@note Function textual ID is "org.opencv.core.matrixop.integral"
+
+@param src input image.
+@param sdepth desired depth of the integral and the tilted integral images, CV_32S, CV_32F, or
+CV_64F.
+@param sqdepth desired depth of the integral image of squared pixel values, CV_32F or CV_64F.
+ */
+GAPI_EXPORTS std::tuple<GMat, GMat> integral(const GMat& src, int sdepth = -1, int sqdepth = -1);
+
+/** @brief Applies a fixed-level threshold to each matrix element.
+
+The function applies fixed-level thresholding to a single- or multiple-channel matrix.
+The function is typically used to get a bi-level (binary) image out of a grayscale image ( cmp functions could be also used for
+this purpose) or for removing a noise, that is, filtering out pixels with too small or too large
+values. There are several types of thresholding supported by the function. They are determined by
+type parameter.
+
+Also, the special values cv::THRESH_OTSU or cv::THRESH_TRIANGLE may be combined with one of the
+above values. In these cases, the function determines the optimal threshold value using the Otsu's
+or Triangle algorithm and uses it instead of the specified thresh . The function returns the
+computed threshold value in addititon to thresholded matrix.
+The Otsu's and Triangle methods are implemented only for 8-bit matrices.
+
+Input image should be single channel only in case of cv::THRESH_OTSU or cv::THRESH_TRIANGLE flags.
+Output matrix must be of the same size and depth as src.
+
+@note Function textual ID is "org.opencv.core.matrixop.threshold"
+
+@param src input matrix (@ref CV_8UC1, @ref CV_8UC3, or @ref CV_32FC1).
+@param thresh threshold value.
+@param maxval maximum value to use with the cv::THRESH_BINARY and cv::THRESH_BINARY_INV thresholding
+types.
+@param type thresholding type (see the cv::ThresholdTypes).
+
+@sa min, max, cmpGT, cmpLE, cmpGE, cmpLS
+ */
+GAPI_EXPORTS GMat threshold(const GMat& src, const GScalar& thresh, const GScalar& maxval, int type);
+/** @overload
+This function applicable for all threshold types except CV_THRESH_OTSU and CV_THRESH_TRIANGLE
+@note Function textual ID is "org.opencv.core.matrixop.thresholdOT"
+*/
+GAPI_EXPORTS std::tuple<GMat, GScalar> threshold(const GMat& src, const GScalar& maxval, int type);
+
+/** @brief Applies a range-level threshold to each matrix element.
+
+The function applies range-level thresholding to a single- or multiple-channel matrix.
+It sets output pixel value to OxFF if the corresponding pixel value of input matrix is in specified range,or 0 otherwise.
+
+Input and output matrices must be CV_8UC1.
+
+@note Function textual ID is "org.opencv.core.matrixop.inRange"
+
+@param src input matrix (CV_8UC1).
+@param threshLow lower boundary value.
+@param threshUp upper boundary value.
+
+@sa threshold
+ */
+GAPI_EXPORTS GMat inRange(const GMat& src, const GScalar& threshLow, const GScalar& threshUp);
+
+//! @} gapi_matrixop
+
+//! @addtogroup gapi_transform
+//! @{
+/** @brief Resizes an image.
+
+The function resizes the image src down to or up to the specified size.
+
+Output image size will have the size dsize (when dsize is non-zero) or the size computed from
+src.size(), fx, and fy; the depth of output is the same as of src.
+
+If you want to resize src so that it fits the pre-created dst,
+you may call the function as follows:
+@code
+    // explicitly specify dsize=dst.size(); fx and fy will be computed from that.
+    resize(src, dst, dst.size(), 0, 0, interpolation);
+@endcode
+If you want to decimate the image by factor of 2 in each direction, you can call the function this
+way:
+@code
+    // specify fx and fy and let the function compute the destination image size.
+    resize(src, dst, Size(), 0.5, 0.5, interpolation);
+@endcode
+To shrink an image, it will generally look best with cv::INTER_AREA interpolation, whereas to
+enlarge an image, it will generally look best with cv::INTER_CUBIC (slow) or cv::INTER_LINEAR
+(faster but still looks OK).
+
+@note Function textual ID is "org.opencv.core.transform.resize"
+
+@param src input image.
+@param dsize output image size; if it equals zero, it is computed as:
+ \f[\texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}\f]
+ Either dsize or both fx and fy must be non-zero.
+@param fx scale factor along the horizontal axis; when it equals 0, it is computed as
+\f[\texttt{(double)dsize.width/src.cols}\f]
+@param fy scale factor along the vertical axis; when it equals 0, it is computed as
+\f[\texttt{(double)dsize.height/src.rows}\f]
+@param interpolation interpolation method, see cv::InterpolationFlags
+
+@sa  warpAffine, warpPerspective, remap, resizeP
+ */
+GAPI_EXPORTS GMat resize(const GMat& src, const Size& dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
+
+/** @brief Resizes a planar image.
+
+The function resizes the image src down to or up to the specified size.
+Planar image memory layout is three planes laying in the memory contiguously,
+so the image height should be plane_height*plane_number, image type is @ref CV_8UC1.
+
+Output image size will have the size dsize, the depth of output is the same as of src.
+
+@note Function textual ID is "org.opencv.core.transform.resizeP"
+
+@param src input image, must be of @ref CV_8UC1 type;
+@param dsize output image size;
+@param interpolation interpolation method, only cv::INTER_LINEAR is supported at the moment
+
+@sa  warpAffine, warpPerspective, remap, resize
+ */
+GAPI_EXPORTS GMatP resizeP(const GMatP& src, const Size& dsize, int interpolation = cv::INTER_LINEAR);
+
+/** @brief Creates one 3-channel (4-channel) matrix out of 3(4) single-channel ones.
+
+The function merges several matrices to make a single multi-channel matrix. That is, each
+element of the output matrix will be a concatenation of the elements of the input matrices, where
+elements of i-th input matrix are treated as mv[i].channels()-element vectors.
+Input matrix must be of @ref CV_8UC3 (@ref CV_8UC4) type.
+
+The function split3/split4 does the reverse operation.
+
+@note Function textual ID for merge3 is "org.opencv.core.transform.merge3"
+@note Function textual ID for merge4 is "org.opencv.core.transform.merge4"
+
+@param src1 first input matrix to be merged
+@param src2 second input matrix to be merged
+@param src3 third input matrix to be merged
+@param src4 fourth input matrix to be merged
+@sa  split4, split3
+*/
+GAPI_EXPORTS GMat merge4(const GMat& src1, const GMat& src2, const GMat& src3, const GMat& src4);
+GAPI_EXPORTS GMat merge3(const GMat& src1, const GMat& src2, const GMat& src3);
+
+/** @brief Divides a 3-channel (4-channel) matrix into 3(4) single-channel matrices.
+
+The function splits a 3-channel (4-channel) matrix into 3(4) single-channel matrices:
+\f[\texttt{mv} [c](I) =  \texttt{src} (I)_c\f]
+
+All output matrices must be in @ref CV_8UC1.
+
+@note Function textual for split3 ID is "org.opencv.core.transform.split3"
+@note Function textual for split4 ID is "org.opencv.core.transform.split4"
+
+@param src input @ref CV_8UC4 (@ref CV_8UC3) matrix.
+@sa merge3, merge4
+*/
+GAPI_EXPORTS std::tuple<GMat, GMat, GMat,GMat> split4(const GMat& src);
+GAPI_EXPORTS std::tuple<GMat, GMat, GMat> split3(const GMat& src);
+
+/** @brief Applies a generic geometrical transformation to an image.
+
+The function remap transforms the source image using the specified map:
+
+\f[\texttt{dst} (x,y) =  \texttt{src} (map_x(x,y),map_y(x,y))\f]
+
+where values of pixels with non-integer coordinates are computed using one of available
+interpolation methods. \f$map_x\f$ and \f$map_y\f$ can be encoded as separate floating-point maps
+in \f$map_1\f$ and \f$map_2\f$ respectively, or interleaved floating-point maps of \f$(x,y)\f$ in
+\f$map_1\f$, or fixed-point maps created by using convertMaps. The reason you might want to
+convert from floating to fixed-point representations of a map is that they can yield much faster
+(\~2x) remapping operations. In the converted case, \f$map_1\f$ contains pairs (cvFloor(x),
+cvFloor(y)) and \f$map_2\f$ contains indices in a table of interpolation coefficients.
+Output image must be of the same size and depth as input one.
+
+@note Function textual ID is "org.opencv.core.transform.remap"
+
+@param src Source image.
+@param map1 The first map of either (x,y) points or just x values having the type CV_16SC2,
+CV_32FC1, or CV_32FC2.
+@param map2 The second map of y values having the type CV_16UC1, CV_32FC1, or none (empty map
+if map1 is (x,y) points), respectively.
+@param interpolation Interpolation method (see cv::InterpolationFlags). The method INTER_AREA is
+not supported by this function.
+@param borderMode Pixel extrapolation method (see cv::BorderTypes). When
+borderMode=BORDER_TRANSPARENT, it means that the pixels in the destination image that
+corresponds to the "outliers" in the source image are not modified by the function.
+@param borderValue Value used in case of a constant border. By default, it is 0.
+@note
+Due to current implementation limitations the size of an input and output images should be less than 32767x32767.
+ */
+GAPI_EXPORTS GMat remap(const GMat& src, const Mat& map1, const Mat& map2,
+                      int interpolation, int borderMode = BORDER_CONSTANT,
+                      const Scalar& borderValue = Scalar());
+
+/** @brief Flips a 2D matrix around vertical, horizontal, or both axes.
+
+The function flips the matrix in one of three different ways (row
+and column indices are 0-based):
+\f[\texttt{dst} _{ij} =
+\left\{
+\begin{array}{l l}
+\texttt{src} _{\texttt{src.rows}-i-1,j} & if\;  \texttt{flipCode} = 0 \\
+\texttt{src} _{i, \texttt{src.cols} -j-1} & if\;  \texttt{flipCode} > 0 \\
+\texttt{src} _{ \texttt{src.rows} -i-1, \texttt{src.cols} -j-1} & if\; \texttt{flipCode} < 0 \\
+\end{array}
+\right.\f]
+The example scenarios of using the function are the following:
+*   Vertical flipping of the image (flipCode == 0) to switch between
+    top-left and bottom-left image origin. This is a typical operation
+    in video processing on Microsoft Windows\* OS.
+*   Horizontal flipping of the image with the subsequent horizontal
+    shift and absolute difference calculation to check for a
+    vertical-axis symmetry (flipCode \> 0).
+*   Simultaneous horizontal and vertical flipping of the image with
+    the subsequent shift and absolute difference calculation to check
+    for a central symmetry (flipCode \< 0).
+*   Reversing the order of point arrays (flipCode \> 0 or
+    flipCode == 0).
+Output image must be of the same depth as input one, size should be correct for given flipCode.
+
+@note Function textual ID is "org.opencv.core.transform.flip"
+
+@param src input matrix.
+@param flipCode a flag to specify how to flip the array; 0 means
+flipping around the x-axis and positive value (for example, 1) means
+flipping around y-axis. Negative value (for example, -1) means flipping
+around both axes.
+@sa remap
+*/
+GAPI_EXPORTS GMat flip(const GMat& src, int flipCode);
+
+/** @brief Crops a 2D matrix.
+
+The function crops the matrix by given cv::Rect.
+
+Output matrix must be of the same depth as input one, size is specified by given rect size.
+
+@note Function textual ID is "org.opencv.core.transform.crop"
+
+@param src input matrix.
+@param rect a rect to crop a matrix to
+@sa resize
+*/
+GAPI_EXPORTS GMat crop(const GMat& src, const Rect& rect);
+
+/** @brief Copies a matrix.
+
+Copies an input array. Works as a regular Mat::clone but happens in-graph.
+Mainly is used to workaround some existing limitations (e.g. to forward an input frame to outputs
+in the streaming mode). Will be deprecated and removed in the future.
+
+@note Function textual ID is "org.opencv.core.transform.copy"
+
+@param src input matrix.
+@sa crop
+*/
+GAPI_EXPORTS GMat copy(const GMat& src);
+
+/** @brief Applies horizontal concatenation to given matrices.
+
+The function horizontally concatenates two GMat matrices (with the same number of rows).
+@code{.cpp}
+    GMat A = { 1, 4,
+               2, 5,
+               3, 6 };
+    GMat B = { 7, 10,
+               8, 11,
+               9, 12 };
+
+    GMat C = gapi::concatHor(A, B);
+    //C:
+    //[1, 4, 7, 10;
+    // 2, 5, 8, 11;
+    // 3, 6, 9, 12]
+@endcode
+Output matrix must the same number of rows and depth as the src1 and src2, and the sum of cols of the src1 and src2.
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.imgproc.transform.concatHor"
+
+@param src1 first input matrix to be considered for horizontal concatenation.
+@param src2 second input matrix to be considered for horizontal concatenation.
+@sa concatVert
+*/
+GAPI_EXPORTS GMat concatHor(const GMat& src1, const GMat& src2);
+
+/** @overload
+The function horizontally concatenates given number of GMat matrices (with the same number of columns).
+Output matrix must the same number of columns and depth as the input matrices, and the sum of rows of input matrices.
+
+@param v vector of input matrices to be concatenated horizontally.
+*/
+GAPI_EXPORTS GMat concatHor(const std::vector<GMat> &v);
+
+/** @brief Applies vertical concatenation to given matrices.
+
+The function vertically concatenates two GMat matrices (with the same number of cols).
+ @code{.cpp}
+    GMat A = { 1, 7,
+               2, 8,
+               3, 9 };
+    GMat B = { 4, 10,
+               5, 11,
+               6, 12 };
+
+    GMat C = gapi::concatVert(A, B);
+    //C:
+    //[1, 7;
+    // 2, 8;
+    // 3, 9;
+    // 4, 10;
+    // 5, 11;
+    // 6, 12]
+ @endcode
+
+Output matrix must the same number of cols and depth as the src1 and src2, and the sum of rows of the src1 and src2.
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+
+@note Function textual ID is "org.opencv.imgproc.transform.concatVert"
+
+@param src1 first input matrix to be considered for vertical concatenation.
+@param src2 second input matrix to be considered for vertical concatenation.
+@sa concatHor
+*/
+GAPI_EXPORTS GMat concatVert(const GMat& src1, const GMat& src2);
+
+/** @overload
+The function vertically concatenates given number of GMat matrices (with the same number of columns).
+Output matrix must the same number of columns and depth as the input matrices, and the sum of rows of input matrices.
+
+@param v vector of input matrices to be concatenated vertically.
+*/
+GAPI_EXPORTS GMat concatVert(const std::vector<GMat> &v);
+
+
+/** @brief Performs a look-up table transform of a matrix.
+
+The function LUT fills the output matrix with values from the look-up table. Indices of the entries
+are taken from the input matrix. That is, the function processes each element of src as follows:
+\f[\texttt{dst} (I)  \leftarrow \texttt{lut(src(I))}\f]
+
+Supported matrix data types are @ref CV_8UC1.
+Output is a matrix of the same size and number of channels as src, and the same depth as lut.
+
+@note Function textual ID is "org.opencv.core.transform.LUT"
+
+@param src input matrix of 8-bit elements.
+@param lut look-up table of 256 elements; in case of multi-channel input array, the table should
+either have a single channel (in this case the same table is used for all channels) or the same
+number of channels as in the input matrix.
+*/
+GAPI_EXPORTS GMat LUT(const GMat& src, const Mat& lut);
+
+/** @brief Converts a matrix to another data depth with optional scaling.
+
+The method converts source pixel values to the target data depth. saturate_cast\<\> is applied at
+the end to avoid possible overflows:
+
+\f[m(x,y) = saturate \_ cast<rType>( \alpha (*this)(x,y) +  \beta )\f]
+Output matrix must be of the same size as input one.
+
+@note Function textual ID is "org.opencv.core.transform.convertTo"
+@param src input matrix to be converted from.
+@param rdepth desired output matrix depth or, rather, the depth since the number of channels are the
+same as the input has; if rdepth is negative, the output matrix will have the same depth as the input.
+@param alpha optional scale factor.
+@param beta optional delta added to the scaled values.
+ */
+GAPI_EXPORTS GMat convertTo(const GMat& src, int rdepth, double alpha=1, double beta=0);
+
+/** @brief Normalizes the norm or value range of an array.
+
+The function normalizes scale and shift the input array elements so that
+\f[\| \texttt{dst} \| _{L_p}= \texttt{alpha}\f]
+(where p=Inf, 1 or 2) when normType=NORM_INF, NORM_L1, or NORM_L2, respectively; or so that
+\f[\min _I  \texttt{dst} (I)= \texttt{alpha} , \, \, \max _I  \texttt{dst} (I)= \texttt{beta}\f]
+when normType=NORM_MINMAX (for dense arrays only).
+
+@note Function textual ID is "org.opencv.core.normalize"
+
+@param src input array.
+@param alpha norm value to normalize to or the lower range boundary in case of the range
+normalization.
+@param beta upper range boundary in case of the range normalization; it is not used for the norm
+normalization.
+@param norm_type normalization type (see cv::NormTypes).
+@param ddepth when negative, the output array has the same type as src; otherwise, it has the same
+number of channels as src and the depth =ddepth.
+@sa norm, Mat::convertTo
+*/
+GAPI_EXPORTS GMat normalize(const GMat& src, double alpha, double beta,
+                            int norm_type, int ddepth = -1);
+
+/** @brief Applies a perspective transformation to an image.
+
+The function warpPerspective transforms the source image using the specified matrix:
+
+\f[\texttt{dst} (x,y) =  \texttt{src} \left ( \frac{M_{11} x + M_{12} y + M_{13}}{M_{31} x + M_{32} y + M_{33}} ,
+     \frac{M_{21} x + M_{22} y + M_{23}}{M_{31} x + M_{32} y + M_{33}} \right )\f]
+
+when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted with invert
+and then put in the formula above instead of M. The function cannot operate in-place.
+
+@param src input image.
+@param M \f$3\times 3\f$ transformation matrix.
+@param dsize size of the output image.
+@param flags combination of interpolation methods (#INTER_LINEAR or #INTER_NEAREST) and the
+optional flag #WARP_INVERSE_MAP, that sets M as the inverse transformation (
+\f$\texttt{dst}\rightarrow\texttt{src}\f$ ).
+@param borderMode pixel extrapolation method (#BORDER_CONSTANT or #BORDER_REPLICATE).
+@param borderValue value used in case of a constant border; by default, it equals 0.
+
+@sa  warpAffine, resize, remap, getRectSubPix, perspectiveTransform
+ */
+GAPI_EXPORTS GMat warpPerspective(const GMat& src, const Mat& M, const Size& dsize, int flags = cv::INTER_LINEAR,
+                                  int borderMode = cv::BORDER_CONSTANT, const Scalar& borderValue = Scalar());
+
+/** @brief Applies an affine transformation to an image.
+
+The function warpAffine transforms the source image using the specified matrix:
+
+\f[\texttt{dst} (x,y) =  \texttt{src} ( \texttt{M} _{11} x +  \texttt{M} _{12} y +  \texttt{M} _{13}, \texttt{M} _{21} x +  \texttt{M} _{22} y +  \texttt{M} _{23})\f]
+
+when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted
+with #invertAffineTransform and then put in the formula above instead of M. The function cannot
+operate in-place.
+
+@param src input image.
+@param M \f$2\times 3\f$ transformation matrix.
+@param dsize size of the output image.
+@param flags combination of interpolation methods (see #InterpolationFlags) and the optional
+flag #WARP_INVERSE_MAP that means that M is the inverse transformation (
+\f$\texttt{dst}\rightarrow\texttt{src}\f$ ).
+@param borderMode pixel extrapolation method (see #BorderTypes);
+borderMode=#BORDER_TRANSPARENT isn't supported
+@param borderValue value used in case of a constant border; by default, it is 0.
+
+@sa  warpPerspective, resize, remap, getRectSubPix, transform
+ */
+GAPI_EXPORTS GMat warpAffine(const GMat& src, const Mat& M, const Size& dsize, int flags = cv::INTER_LINEAR,
+                             int borderMode = cv::BORDER_CONSTANT, const Scalar& borderValue = Scalar());
+//! @} gapi_transform
+
+} //namespace gapi
+} //namespace cv
+
+#endif //OPENCV_GAPI_CORE_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/cpu/core.hpp b/IPL/include/opencv/opencv2/gapi/cpu/core.hpp
new file mode 100644
index 0000000..ffd3596
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/cpu/core.hpp
@@ -0,0 +1,27 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_CPU_CORE_API_HPP
+#define OPENCV_GAPI_CPU_CORE_API_HPP
+
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+#include <opencv2/gapi/own/exports.hpp> // GAPI_EXPORTS
+
+namespace cv {
+namespace gapi {
+namespace core {
+namespace cpu {
+
+GAPI_EXPORTS GKernelPackage kernels();
+
+} // namespace cpu
+} // namespace core
+} // namespace gapi
+} // namespace cv
+
+
+#endif // OPENCV_GAPI_CPU_CORE_API_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/cpu/gcpukernel.hpp b/IPL/include/opencv/opencv2/gapi/cpu/gcpukernel.hpp
new file mode 100644
index 0000000..764e085
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/cpu/gcpukernel.hpp
@@ -0,0 +1,385 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GCPUKERNEL_HPP
+#define OPENCV_GAPI_GCPUKERNEL_HPP
+
+#include <functional>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <opencv2/core/mat.hpp>
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/own/convert.hpp> //to_ocv
+#include <opencv2/gapi/util/compiler_hints.hpp> //suppress_unused_warning
+#include <opencv2/gapi/util/util.hpp>
+
+// FIXME: namespace scheme for backends?
+namespace cv {
+
+namespace gimpl
+{
+    // Forward-declare an internal class
+    class GCPUExecutable;
+
+    namespace render
+    {
+    namespace ocv
+    {
+        class GRenderExecutable;
+    }
+    }
+} // namespace gimpl
+
+namespace gapi
+{
+namespace cpu
+{
+    /**
+     * \addtogroup gapi_std_backends
+     * @{
+     *
+     * @brief G-API backends available in this OpenCV version
+     *
+     * G-API backends play a corner stone role in G-API execution
+     * stack. Every backend is hardware-oriented and thus can run its
+     * kernels efficiently on the target platform.
+     *
+     * Backends are usually "black boxes" for G-API users -- on the API
+     * side, all backends are represented as different objects of the
+     * same class cv::gapi::GBackend.
+     * User can manipulate with backends by specifying which kernels to use.
+     *
+     * @sa @ref gapi_hld
+     */
+
+    /**
+     * @brief Get a reference to CPU (OpenCV) backend.
+     *
+     * This is the default backend in G-API at the moment, providing
+     * broader functional coverage but losing some graph model
+     * advantages. Provided mostly for reference and prototyping
+     * purposes.
+     *
+     * @sa gapi_std_backends
+     */
+    GAPI_EXPORTS cv::gapi::GBackend backend();
+    /** @} */
+
+    class GOCVFunctor;
+
+    //! @cond IGNORED
+    template<typename K, typename Callable>
+    GOCVFunctor ocv_kernel(const Callable& c);
+
+    template<typename K, typename Callable>
+    GOCVFunctor ocv_kernel(Callable& c);
+    //! @endcond
+
+} // namespace cpu
+} // namespace gapi
+
+// Represents arguments which are passed to a wrapped CPU function
+// FIXME: put into detail?
+class GAPI_EXPORTS GCPUContext
+{
+public:
+    // Generic accessor API
+    template<typename T>
+    const T& inArg(int input) { return m_args.at(input).get<T>(); }
+
+    // Syntax sugar
+    const cv::gapi::own::Mat&   inMat(int input);
+    cv::gapi::own::Mat&         outMatR(int output); // FIXME: Avoid cv::gapi::own::Mat m = ctx.outMatR()
+
+    const cv::Scalar& inVal(int input);
+    cv::Scalar& outValR(int output); // FIXME: Avoid cv::Scalar s = ctx.outValR()
+    template<typename T> std::vector<T>& outVecR(int output) // FIXME: the same issue
+    {
+        return outVecRef(output).wref<T>();
+    }
+    template<typename T> T& outOpaqueR(int output) // FIXME: the same issue
+    {
+        return outOpaqueRef(output).wref<T>();
+    }
+
+protected:
+    detail::VectorRef& outVecRef(int output);
+    detail::OpaqueRef& outOpaqueRef(int output);
+
+    std::vector<GArg> m_args;
+
+    //FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call
+    //to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run,
+    //once on enter for input and output arguments, and once before return for output arguments only
+    std::unordered_map<std::size_t, GRunArgP> m_results;
+
+    friend class gimpl::GCPUExecutable;
+    friend class gimpl::render::ocv::GRenderExecutable;
+};
+
+class GAPI_EXPORTS GCPUKernel
+{
+public:
+    // This function is kernel's execution entry point (does the processing work)
+    using F = std::function<void(GCPUContext &)>;
+
+    GCPUKernel();
+    explicit GCPUKernel(const F& f);
+
+    void apply(GCPUContext &ctx);
+
+protected:
+    F m_f;
+};
+
+// FIXME: This is an ugly ad-hoc implementation. TODO: refactor
+
+namespace detail
+{
+template<class T> struct get_in;
+template<> struct get_in<cv::GMat>
+{
+    static cv::Mat    get(GCPUContext &ctx, int idx) { return to_ocv(ctx.inMat(idx)); }
+};
+template<> struct get_in<cv::GMatP>
+{
+    static cv::Mat    get(GCPUContext &ctx, int idx) { return get_in<cv::GMat>::get(ctx, idx); }
+};
+template<> struct get_in<cv::GFrame>
+{
+    static cv::Mat    get(GCPUContext &ctx, int idx) { return get_in<cv::GMat>::get(ctx, idx); }
+};
+template<> struct get_in<cv::GScalar>
+{
+    static cv::Scalar get(GCPUContext &ctx, int idx) { return ctx.inVal(idx); }
+};
+template<typename U> struct get_in<cv::GArray<U> >
+{
+    static const std::vector<U>& get(GCPUContext &ctx, int idx) { return ctx.inArg<VectorRef>(idx).rref<U>(); }
+};
+template<typename U> struct get_in<cv::GOpaque<U> >
+{
+    static const U& get(GCPUContext &ctx, int idx) { return ctx.inArg<OpaqueRef>(idx).rref<U>(); }
+};
+
+//FIXME(dm): GArray<Mat>/GArray<GMat> conversion should be done more gracefully in the system
+template<> struct get_in<cv::GArray<cv::GMat> >: public get_in<cv::GArray<cv::Mat> >
+{
+};
+
+//FIXME(dm): GArray<Scalar>/GArray<GScalar> conversion should be done more gracefully in the system
+template<> struct get_in<cv::GArray<cv::GScalar> >: public get_in<cv::GArray<cv::Scalar> >
+{
+};
+
+//FIXME(dm): GOpaque<Mat>/GOpaque<GMat> conversion should be done more gracefully in the system
+template<> struct get_in<cv::GOpaque<cv::GMat> >: public get_in<cv::GOpaque<cv::Mat> >
+{
+};
+
+//FIXME(dm): GOpaque<Scalar>/GOpaque<GScalar> conversion should be done more gracefully in the system
+template<> struct get_in<cv::GOpaque<cv::GScalar> >: public get_in<cv::GOpaque<cv::Mat> >
+{
+};
+
+template<class T> struct get_in
+{
+    static T get(GCPUContext &ctx, int idx) { return ctx.inArg<T>(idx); }
+};
+
+struct tracked_cv_mat{
+    tracked_cv_mat(cv::gapi::own::Mat& m) : r{to_ocv(m)}, original_data{m.data} {}
+    cv::Mat r;
+    uchar* original_data;
+
+    operator cv::Mat& (){ return r;}
+    void validate() const{
+        if (r.data != original_data)
+        {
+            util::throw_error
+                (std::logic_error
+                 ("OpenCV kernel output parameter was reallocated. \n"
+                  "Incorrect meta data was provided ?"));
+        }
+    }
+};
+
+template<typename... Outputs>
+void postprocess(Outputs&... outs)
+{
+    struct
+    {
+        void operator()(tracked_cv_mat* bm) { bm->validate();  }
+        void operator()(...)                {                  }
+
+    } validate;
+    //dummy array to unfold parameter pack
+    int dummy[] = { 0, (validate(&outs), 0)... };
+    cv::util::suppress_unused_warning(dummy);
+}
+
+template<class T> struct get_out;
+template<> struct get_out<cv::GMat>
+{
+    static tracked_cv_mat get(GCPUContext &ctx, int idx)
+    {
+        auto& r = ctx.outMatR(idx);
+        return {r};
+    }
+};
+template<> struct get_out<cv::GMatP>
+{
+    static tracked_cv_mat get(GCPUContext &ctx, int idx)
+    {
+        return get_out<cv::GMat>::get(ctx, idx);
+    }
+};
+template<> struct get_out<cv::GScalar>
+{
+    static cv::Scalar& get(GCPUContext &ctx, int idx)
+    {
+        return ctx.outValR(idx);
+    }
+};
+template<typename U> struct get_out<cv::GArray<U>>
+{
+    static std::vector<U>& get(GCPUContext &ctx, int idx)
+    {
+        return ctx.outVecR<U>(idx);
+    }
+};
+template<typename U> struct get_out<cv::GOpaque<U>>
+{
+    static U& get(GCPUContext &ctx, int idx)
+    {
+        return ctx.outOpaqueR<U>(idx);
+    }
+};
+
+template<typename, typename, typename>
+struct OCVCallHelper;
+
+// FIXME: probably can be simplified with std::apply or analogue.
+template<typename Impl, typename... Ins, typename... Outs>
+struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
+{
+    template<typename... Inputs>
+    struct call_and_postprocess
+    {
+        template<typename... Outputs>
+        static void call(Inputs&&... ins, Outputs&&... outs)
+        {
+            //not using a std::forward on outs is deliberate in order to
+            //cause compilation error, by trying to bind rvalue references to lvalue references
+            Impl::run(std::forward<Inputs>(ins)..., outs...);
+            postprocess(outs...);
+        }
+
+        template<typename... Outputs>
+        static void call(Impl& impl, Inputs&&... ins, Outputs&&... outs)
+        {
+            impl(std::forward<Inputs>(ins)..., outs...);
+        }
+    };
+
+    template<int... IIs, int... OIs>
+    static void call_impl(GCPUContext &ctx, detail::Seq<IIs...>, detail::Seq<OIs...>)
+    {
+        //Make sure that OpenCV kernels do not reallocate memory for output parameters
+        //by comparing it's state (data ptr) before and after the call.
+        //This is done by converting each output Mat into tracked_cv_mat object, and binding
+        //them to parameters of ad-hoc function
+        //Convert own::Scalar to cv::Scalar before call kernel and run kernel
+        //convert cv::Scalar to own::Scalar after call kernel and write back results
+        call_and_postprocess<decltype(get_in<Ins>::get(ctx, IIs))...>
+                                      ::call(get_in<Ins>::get(ctx, IIs)...,
+                                             get_out<Outs>::get(ctx, OIs)...);
+    }
+
+    template<int... IIs, int... OIs>
+    static void call_impl(cv::GCPUContext &ctx, Impl& impl, detail::Seq<IIs...>, detail::Seq<OIs...>)
+    {
+        call_and_postprocess<decltype(cv::detail::get_in<Ins>::get(ctx, IIs))...>
+                                      ::call(impl, cv::detail::get_in<Ins>::get(ctx, IIs)...,
+                                                   cv::detail::get_out<Outs>::get(ctx, OIs)...);
+    }
+
+    static void call(GCPUContext &ctx)
+    {
+        call_impl(ctx,
+                  typename detail::MkSeq<sizeof...(Ins)>::type(),
+                  typename detail::MkSeq<sizeof...(Outs)>::type());
+    }
+
+    // NB: Same as call but calling the object
+    // This necessary for kernel implementations that have a state
+    // and are represented as an object
+    static void callFunctor(cv::GCPUContext &ctx, Impl& impl)
+    {
+        call_impl(ctx, impl,
+                  typename detail::MkSeq<sizeof...(Ins)>::type(),
+                  typename detail::MkSeq<sizeof...(Outs)>::type());
+    }
+};
+
+} // namespace detail
+
+template<class Impl, class K>
+class GCPUKernelImpl: public cv::detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
+                      public cv::detail::KernelTag
+{
+    using P = detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
+
+public:
+    using API = K;
+
+    static cv::gapi::GBackend backend()  { return cv::gapi::cpu::backend(); }
+    static cv::GCPUKernel     kernel()   { return GCPUKernel(&P::call);     }
+};
+
+#define GAPI_OCV_KERNEL(Name, API) struct Name: public cv::GCPUKernelImpl<Name, API>
+
+class gapi::cpu::GOCVFunctor : public gapi::GFunctor
+{
+public:
+    using Impl = std::function<void(GCPUContext &)>;
+
+    GOCVFunctor(const char* id, const Impl& impl)
+        : gapi::GFunctor(id), impl_{GCPUKernel(impl)}
+    {
+    }
+
+    GKernelImpl    impl()    const override { return impl_;                }
+    gapi::GBackend backend() const override { return gapi::cpu::backend(); }
+
+private:
+    GKernelImpl impl_;
+};
+
+//! @cond IGNORED
+template<typename K, typename Callable>
+gapi::cpu::GOCVFunctor gapi::cpu::ocv_kernel(Callable& c)
+{
+    using P = detail::OCVCallHelper<Callable, typename K::InArgs, typename K::OutArgs>;
+    return GOCVFunctor(K::id(), std::bind(&P::callFunctor, std::placeholders::_1, std::ref(c)));
+}
+
+template<typename K, typename Callable>
+gapi::cpu::GOCVFunctor gapi::cpu::ocv_kernel(const Callable& c)
+{
+    using P = detail::OCVCallHelper<Callable, typename K::InArgs, typename K::OutArgs>;
+    return GOCVFunctor(K::id(), std::bind(&P::callFunctor, std::placeholders::_1, c));
+}
+//! @endcond
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GCPUKERNEL_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/cpu/imgproc.hpp b/IPL/include/opencv/opencv2/gapi/cpu/imgproc.hpp
new file mode 100644
index 0000000..0b96db0
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/cpu/imgproc.hpp
@@ -0,0 +1,27 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_CPU_IMGPROC_API_HPP
+#define OPENCV_GAPI_CPU_IMGPROC_API_HPP
+
+#include <opencv2/core/cvdef.h>     // GAPI_EXPORTS
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+
+namespace cv {
+namespace gapi {
+namespace imgproc {
+namespace cpu {
+
+GAPI_EXPORTS GKernelPackage kernels();
+
+} // namespace cpu
+} // namespace imgproc
+} // namespace gapi
+} // namespace cv
+
+
+#endif // OPENCV_GAPI_CPU_IMGPROC_API_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/cpu/video.hpp b/IPL/include/opencv/opencv2/gapi/cpu/video.hpp
new file mode 100644
index 0000000..d3c1f2e
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/cpu/video.hpp
@@ -0,0 +1,25 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#ifndef OPENCV_GAPI_CPU_VIDEO_API_HPP
+#define OPENCV_GAPI_CPU_VIDEO_API_HPP
+
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+
+namespace cv {
+namespace gapi {
+namespace video {
+namespace cpu {
+
+GAPI_EXPORTS GKernelPackage kernels();
+
+} // namespace cpu
+} // namespace video
+} // namespace gapi
+} // namespace cv
+
+
+#endif // OPENCV_GAPI_CPU_VIDEO_API_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/fluid/core.hpp b/IPL/include/opencv/opencv2/gapi/fluid/core.hpp
new file mode 100644
index 0000000..8c21f57
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/fluid/core.hpp
@@ -0,0 +1,20 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_FLUID_CORE_HPP
+#define OPENCV_GAPI_FLUID_CORE_HPP
+
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+#include <opencv2/gapi/own/exports.hpp> // GAPI_EXPORTS
+
+namespace cv { namespace gapi { namespace core { namespace fluid {
+
+GAPI_EXPORTS GKernelPackage kernels();
+
+}}}}
+
+#endif // OPENCV_GAPI_FLUID_CORE_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/fluid/gfluidbuffer.hpp b/IPL/include/opencv/opencv2/gapi/fluid/gfluidbuffer.hpp
new file mode 100644
index 0000000..02e74f2
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/fluid/gfluidbuffer.hpp
@@ -0,0 +1,157 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_FLUID_BUFFER_HPP
+#define OPENCV_GAPI_FLUID_BUFFER_HPP
+
+#include <list>
+#include <numeric> // accumulate
+#include <ostream> // ostream
+#include <cstdint> // uint8_t
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/own/mat.hpp>
+#include <opencv2/gapi/gmat.hpp>
+
+#include <opencv2/gapi/util/optional.hpp>
+#include <opencv2/gapi/own/mat.hpp>
+
+namespace cv {
+namespace gapi {
+namespace fluid {
+
+struct Border
+{
+    // This constructor is required to support existing kernels which are part of G-API
+    Border(int _type, cv::Scalar _val) : type(_type), value(_val) {};
+
+    int type;
+    cv::Scalar value;
+};
+
+using BorderOpt = util::optional<Border>;
+
+bool operator == (const Border& b1, const Border& b2);
+
+class GAPI_EXPORTS Buffer;
+
+class GAPI_EXPORTS View
+{
+public:
+    struct Cache
+    {
+        std::vector<const uint8_t*> m_linePtrs;
+        GMatDesc m_desc;
+        int m_border_size = 0;
+
+        inline const uint8_t* linePtr(int index) const
+        {
+            // "out_of_window" check:
+            // user must not request the lines which are outside of specified kernel window
+            GAPI_DbgAssert(index >= -m_border_size
+                        && index <  -m_border_size + static_cast<int>(m_linePtrs.size()));
+            return m_linePtrs[index + m_border_size];
+        }
+    };
+
+    View() = default;
+
+    const inline uint8_t* InLineB(int index) const // -(w-1)/2...0...+(w-1)/2 for Filters
+    {
+        return m_cache->linePtr(index);
+    }
+
+    template<typename T> const inline T* InLine(int i) const
+    {
+        const uint8_t* ptr = this->InLineB(i);
+        return reinterpret_cast<const T*>(ptr);
+    }
+
+    inline operator bool() const { return m_priv != nullptr; }
+    bool ready() const;
+    inline int length() const { return m_cache->m_desc.size.width; }
+    int y() const;
+
+    inline const GMatDesc& meta() const { return m_cache->m_desc; }
+
+    class GAPI_EXPORTS Priv;      // internal use only
+    Priv& priv();               // internal use only
+    const Priv& priv() const;   // internal use only
+
+    View(std::unique_ptr<Priv>&& p);
+    View(View&& v);
+    View& operator=(View&& v);
+    ~View();
+
+private:
+    std::unique_ptr<Priv> m_priv;
+    const Cache* m_cache;
+};
+
+class GAPI_EXPORTS Buffer
+{
+public:
+    struct Cache
+    {
+        std::vector<uint8_t*> m_linePtrs;
+        GMatDesc m_desc;
+    };
+
+    // Default constructor (executable creation stage,
+    // all following initialization performed in Priv::init())
+    Buffer();
+    // Scratch constructor (user kernels)
+    Buffer(const cv::GMatDesc &desc);
+
+    // Constructor for intermediate buffers (for tests)
+    Buffer(const cv::GMatDesc &desc,
+           int max_line_consumption, int border_size,
+           int skew,
+           int wlpi,
+           BorderOpt border);
+    // Constructor for in/out buffers (for tests)
+    Buffer(const cv::gapi::own::Mat &data, bool is_input);
+    ~Buffer();
+    Buffer& operator=(Buffer&&);
+
+    inline uint8_t* OutLineB(int index = 0)
+    {
+        return m_cache->m_linePtrs[index];
+    }
+
+    template<typename T> inline T* OutLine(int index = 0)
+    {
+        uint8_t* ptr = this->OutLineB(index);
+        return reinterpret_cast<T*>(ptr);
+    }
+
+    int y() const;
+
+    int linesReady() const;
+    void debug(std::ostream &os) const;
+    inline int length() const { return m_cache->m_desc.size.width; }
+    int lpi() const;  // LPI for WRITER
+
+    inline const GMatDesc& meta() const { return m_cache->m_desc; }
+
+    View mkView(int borderSize, bool ownStorage);
+    void addView(const View* v);
+
+    class GAPI_EXPORTS Priv;      // internal use only
+    Priv& priv();               // internal use only
+    const Priv& priv() const;   // internal use only
+
+private:
+    std::unique_ptr<Priv> m_priv;
+    const Cache* m_cache;
+};
+
+} // namespace cv::gapi::fluid
+} // namespace cv::gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_FLUID_BUFFER_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/fluid/gfluidkernel.hpp b/IPL/include/opencv/opencv2/gapi/fluid/gfluidkernel.hpp
new file mode 100644
index 0000000..dbb36d8
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/fluid/gfluidkernel.hpp
@@ -0,0 +1,439 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_FLUID_KERNEL_HPP
+#define OPENCV_GAPI_FLUID_KERNEL_HPP
+
+#include <vector>
+#include <functional>
+#include <map>
+#include <unordered_map>
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/garg.hpp>
+
+#include <opencv2/gapi/fluid/gfluidbuffer.hpp>
+
+// FIXME: namespace scheme for backends?
+namespace cv {
+
+namespace gapi
+{
+namespace fluid
+{
+    /**
+     * \addtogroup gapi_std_backends G-API Standard Backends
+     * @{
+     */
+    /**
+     * @brief Get a reference to Fluid backend.
+     *
+     * @sa gapi_std_backends
+     */
+    GAPI_EXPORTS cv::gapi::GBackend backend();
+    /** @} */
+} // namespace fluid
+} // namespace gapi
+
+
+class GAPI_EXPORTS GFluidKernel
+{
+public:
+    enum class Kind
+    {
+        Filter,
+        Resize,
+        YUV420toRGB //Color conversion of 4:2:0 chroma sub-sampling formats (NV12, I420 ..etc) to RGB
+    };
+
+    // This function is a generic "doWork" callback
+    using F = std::function<void(const cv::GArgs&, const std::vector<gapi::fluid::Buffer*> &)>;
+
+    // This function is a generic "initScratch" callback
+    using IS = std::function<void(const cv::GMetaArgs &, const cv::GArgs&, gapi::fluid::Buffer &)>;
+
+    // This function is a generic "resetScratch" callback
+    using RS = std::function<void(gapi::fluid::Buffer &)>;
+
+    // This function describes kernel metadata inference rule.
+    using M = std::function<GMetaArgs(const GMetaArgs &, const GArgs &)>;
+
+    // This function is a generic "getBorder" callback (extracts border-related data from kernel's input parameters)
+    using B = std::function<gapi::fluid::BorderOpt(const GMetaArgs&, const GArgs&)>;
+
+    // This function is a generic "getWindow" callback (extracts window-related data from kernel's input parameters)
+    using GW = std::function<int(const GMetaArgs&, const GArgs&)>;
+
+    // FIXME: move implementations out of header file
+    GFluidKernel() {}
+    GFluidKernel(Kind k, int l, bool scratch, const F& f, const IS &is, const RS &rs, const B& b, const GW& win)
+        : m_kind(k)
+        , m_lpi(l)
+        , m_scratch(scratch)
+        , m_f(f)
+        , m_is(is)
+        , m_rs(rs)
+        , m_b(b)
+        , m_gw(win) {}
+
+    Kind m_kind;
+    const int  m_lpi     = -1;
+    const bool m_scratch = false;
+
+    const F    m_f;
+    const IS   m_is;
+    const RS   m_rs;
+    const B    m_b;
+    const GW   m_gw;
+};
+
+// FIXME!!!
+// This is the temporary and experimental API
+// which should be replaced by runtime roi-based scheduling
+/** \addtogroup gapi_compile_args
+ * @{
+ */
+/**
+ * @brief This structure allows to control the output image region
+ * which Fluid backend will produce in the graph.
+ *
+ * This feature is useful for external tiling and parallelism, but
+ * will be deprecated in the future releases.
+ */
+struct GFluidOutputRois
+{
+    std::vector<cv::Rect> rois;
+};
+
+/**
+ * @brief This structure forces Fluid backend to generate multiple
+ * parallel output regions in the graph. These regions execute in parallel.
+ *
+ * This feature may be deprecated in the future releases.
+ */
+struct GFluidParallelOutputRois
+{
+    std::vector<GFluidOutputRois> parallel_rois;
+};
+
+/**
+ * @brief This structure allows to customize the way how Fluid executes
+ * parallel regions.
+ *
+ * For example, user can utilize his own threading runtime via this parameter.
+ * The `parallel_for` member functor is called by the Fluid runtime with the
+ * following arguments:
+ *
+ * @param size Size of the parallel range to process
+ * @param f A function which should be called for every integer index
+ *   in this range by the specified parallel_for implementation.
+ *
+ * This feature may be deprecated in the future releases.
+ */
+struct GFluidParallelFor
+{
+    //this function accepts:
+    // - size of the "parallel" range as the first argument
+    // - and a function to be called on the range items, designated by item index
+    std::function<void(std::size_t size, std::function<void(std::size_t index)>)> parallel_for;
+};
+/** @} gapi_compile_args */
+
+namespace detail
+{
+template<> struct CompileArgTag<GFluidOutputRois>
+{
+    static const char* tag() { return "gapi.fluid.outputRois"; }
+};
+
+template<> struct CompileArgTag<GFluidParallelFor>
+{
+    static const char* tag() { return "gapi.fluid.parallelFor"; }
+};
+
+template<> struct CompileArgTag<GFluidParallelOutputRois>
+{
+    static const char* tag() { return "gapi.fluid.parallelOutputRois"; }
+};
+
+} // namespace detail
+
+namespace detail
+{
+template<class T> struct fluid_get_in;
+template<> struct fluid_get_in<cv::GMat>
+{
+    static const cv::gapi::fluid::View& get(const cv::GArgs &in_args, int idx)
+    {
+        return *in_args[idx].unsafe_get<cv::gapi::fluid::View*>();
+    }
+};
+
+template<> struct fluid_get_in<cv::GScalar>
+{
+    // FIXME: change to return by reference when moved to own::Scalar
+    static const cv::Scalar get(const cv::GArgs &in_args, int idx)
+    {
+        return in_args[idx].unsafe_get<cv::Scalar>();
+    }
+};
+
+template<typename U> struct fluid_get_in<cv::GArray<U>>
+{
+    static const std::vector<U>& get(const cv::GArgs &in_args, int idx)
+    {
+        return in_args.at(idx).unsafe_get<cv::detail::VectorRef>().rref<U>();
+    }
+};
+
+template<typename U> struct fluid_get_in<cv::GOpaque<U>>
+{
+    static const U& get(const cv::GArgs &in_args, int idx)
+    {
+        return in_args.at(idx).unsafe_get<cv::detail::OpaqueRef>().rref<U>();
+    }
+};
+
+template<class T> struct fluid_get_in
+{
+    static const T& get(const cv::GArgs &in_args, int idx)
+    {
+        return in_args[idx].unsafe_get<T>();
+    }
+};
+
+template<bool, typename Impl, typename... Ins>
+struct scratch_helper;
+
+template<typename Impl, typename... Ins>
+struct scratch_helper<true, Impl, Ins...>
+{
+    // Init
+    template<int... IIs>
+    static void help_init_impl(const cv::GMetaArgs &metas,
+                               const cv::GArgs     &in_args,
+                               gapi::fluid::Buffer &scratch_buf,
+                               detail::Seq<IIs...>)
+    {
+        Impl::initScratch(get_in_meta<Ins>(metas, in_args, IIs)..., scratch_buf);
+    }
+
+    static void help_init(const cv::GMetaArgs &metas,
+                          const cv::GArgs     &in_args,
+                          gapi::fluid::Buffer &b)
+    {
+        help_init_impl(metas, in_args, b, typename detail::MkSeq<sizeof...(Ins)>::type());
+    }
+
+    // Reset
+    static void help_reset(gapi::fluid::Buffer &b)
+    {
+        Impl::resetScratch(b);
+    }
+};
+
+template<typename Impl, typename... Ins>
+struct scratch_helper<false, Impl, Ins...>
+{
+    static void help_init(const cv::GMetaArgs &,
+                          const cv::GArgs     &,
+                          gapi::fluid::Buffer &)
+    {
+        GAPI_Assert(false);
+    }
+    static void help_reset(gapi::fluid::Buffer &)
+    {
+        GAPI_Assert(false);
+    }
+};
+
+template<typename T> struct is_gmat_type
+{
+    static const constexpr bool value = std::is_same<cv::GMat, T>::value;
+};
+
+template<bool CallCustomGetBorder, typename Impl, typename... Ins>
+struct get_border_helper;
+
+template<typename Impl, typename... Ins>
+struct get_border_helper<true, Impl, Ins...>
+{
+    template<int... IIs>
+    static gapi::fluid::BorderOpt get_border_impl(const GMetaArgs &metas,
+                                                  const cv::GArgs &in_args,
+                                                  cv::detail::Seq<IIs...>)
+    {
+        return util::make_optional(Impl::getBorder(cv::detail::get_in_meta<Ins>(metas, in_args, IIs)...));
+    }
+
+    static gapi::fluid::BorderOpt help(const GMetaArgs &metas,
+                                       const cv::GArgs &in_args)
+    {
+        return get_border_impl(metas, in_args, typename detail::MkSeq<sizeof...(Ins)>::type());
+    }
+};
+
+template<typename Impl, typename... Ins>
+struct get_border_helper<false, Impl, Ins...>
+{
+    static gapi::fluid::BorderOpt help(const cv::GMetaArgs &,
+                                       const cv::GArgs     &)
+    {
+        return {};
+    }
+};
+
+template<bool CallCustomGetWindow, typename, typename... Ins>
+struct get_window_helper;
+
+template<typename Impl, typename... Ins>
+struct get_window_helper<true, Impl, Ins...>
+{
+    template<int... IIs>
+    static int get_window_impl(const GMetaArgs &metas,
+                               const cv::GArgs &in_args,
+                               cv::detail::Seq<IIs...>)
+    {
+        return Impl::getWindow(cv::detail::get_in_meta<Ins>(metas, in_args, IIs)...);
+    }
+
+    static int help(const GMetaArgs &metas, const cv::GArgs &in_args)
+    {
+        return get_window_impl(metas, in_args, typename detail::MkSeq<sizeof...(Ins)>::type());
+    }
+};
+
+template<typename Impl, typename... Ins>
+struct get_window_helper<false, Impl, Ins...>
+{
+    static int help(const cv::GMetaArgs &,
+                    const cv::GArgs     &)
+    {
+        return Impl::Window;
+    }
+};
+
+template<typename C, typename T>
+struct has_Window
+{
+private:
+    template<class U>
+    static constexpr auto Check(U*) -> typename std::is_same<decltype(U::Window), T>::type;
+
+    template<typename>
+    static constexpr std::false_type Check(...);
+
+    typedef decltype(Check<C>(0)) Result;
+
+public:
+    static constexpr bool value = Result::value;
+};
+
+template<bool hasWindow, typename Impl>
+struct callCustomGetBorder;
+
+template<typename Impl>
+struct callCustomGetBorder<true, Impl>
+{
+    static constexpr bool value = (Impl::Window != 1);
+};
+
+template<typename Impl>
+struct callCustomGetBorder<false, Impl>
+{
+    static constexpr bool value = true;
+};
+
+template<typename, typename, typename, bool UseScratch>
+struct FluidCallHelper;
+
+template<typename Impl, typename... Ins, typename... Outs, bool UseScratch>
+struct FluidCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>, UseScratch>
+{
+    static_assert(all_satisfy<is_gmat_type, Outs...>::value, "return type must be GMat");
+    static_assert(contains<GMat, Ins...>::value, "input must contain at least one GMat");
+
+    // Execution dispatcher ////////////////////////////////////////////////////
+    template<int... IIs, int... OIs>
+    static void call_impl(const cv::GArgs &in_args,
+                          const std::vector<gapi::fluid::Buffer*> &out_bufs,
+                          detail::Seq<IIs...>,
+                          detail::Seq<OIs...>)
+    {
+        Impl::run(fluid_get_in<Ins>::get(in_args, IIs)..., *out_bufs[OIs]...);
+    }
+
+    static void call(const cv::GArgs &in_args,
+                     const std::vector<gapi::fluid::Buffer*> &out_bufs)
+    {
+        constexpr int numOuts = (sizeof...(Outs)) + (UseScratch ? 1 : 0);
+        call_impl(in_args, out_bufs,
+                  typename detail::MkSeq<sizeof...(Ins)>::type(),
+                  typename detail::MkSeq<numOuts>::type());
+    }
+
+    // Scratch buffer initialization dispatcher ////////////////////////////////
+    static void init_scratch(const GMetaArgs &metas,
+                             const cv::GArgs &in_args,
+                             gapi::fluid::Buffer &b)
+    {
+        scratch_helper<UseScratch, Impl, Ins...>::help_init(metas, in_args, b);
+    }
+
+    // Scratch buffer reset dispatcher /////////////////////////////////////////
+    static void reset_scratch(gapi::fluid::Buffer &scratch_buf)
+    {
+        scratch_helper<UseScratch, Impl, Ins...>::help_reset(scratch_buf);
+    }
+
+    static gapi::fluid::BorderOpt getBorder(const GMetaArgs &metas, const cv::GArgs &in_args)
+    {
+        constexpr bool hasWindow = has_Window<Impl, const int>::value;
+
+        // User must provide "init" callback if Window != 1
+        // TODO: move to constexpr if when we enable C++17
+        return get_border_helper<callCustomGetBorder<hasWindow, Impl>::value, Impl, Ins...>::help(metas, in_args);
+    }
+
+    static int getWindow(const GMetaArgs &metas, const cv::GArgs &in_args)
+    {
+        constexpr bool callCustomGetWindow = !(has_Window<Impl, const int>::value);
+        return get_window_helper<callCustomGetWindow, Impl, Ins...>::help(metas, in_args);
+    }
+};
+} // namespace detail
+
+
+template<class Impl, class K, bool UseScratch>
+class GFluidKernelImpl : public cv::detail::KernelTag
+{
+    static const int LPI = 1;
+    static const auto Kind = GFluidKernel::Kind::Filter;
+    using P = detail::FluidCallHelper<Impl, typename K::InArgs, typename K::OutArgs, UseScratch>;
+
+public:
+    using API = K;
+
+    static GFluidKernel kernel()
+    {
+        // FIXME: call() and getOutMeta() needs to be renamed so it is clear these
+        // functions are internal wrappers, not user API
+        return GFluidKernel(Impl::Kind, Impl::LPI,
+                            UseScratch,
+                            &P::call, &P::init_scratch, &P::reset_scratch, &P::getBorder, &P::getWindow);
+    }
+
+    static cv::gapi::GBackend backend() { return cv::gapi::fluid::backend(); }
+};
+
+#define GAPI_FLUID_KERNEL(Name, API, Scratch) struct Name: public cv::GFluidKernelImpl<Name, API, Scratch>
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GCPUKERNEL_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/fluid/imgproc.hpp b/IPL/include/opencv/opencv2/gapi/fluid/imgproc.hpp
new file mode 100644
index 0000000..dedfa9d
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/fluid/imgproc.hpp
@@ -0,0 +1,20 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_FLUID_IMGPROC_HPP
+#define OPENCV_GAPI_FLUID_IMGPROC_HPP
+
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+#include <opencv2/gapi/own/exports.hpp> // GAPI_EXPORTS
+
+namespace cv { namespace gapi { namespace imgproc { namespace fluid {
+
+GAPI_EXPORTS GKernelPackage kernels();
+
+}}}}
+
+#endif // OPENCV_GAPI_FLUID_IMGPROC_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/garg.hpp b/IPL/include/opencv/opencv2/gapi/garg.hpp
new file mode 100644
index 0000000..5aaea55
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/garg.hpp
@@ -0,0 +1,147 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GARG_HPP
+#define OPENCV_GAPI_GARG_HPP
+
+#include <vector>
+#include <type_traits>
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/own/mat.hpp>
+
+#include <opencv2/gapi/util/any.hpp>
+#include <opencv2/gapi/util/variant.hpp>
+
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/garray.hpp>
+#include <opencv2/gapi/gopaque.hpp>
+#include <opencv2/gapi/gtype_traits.hpp>
+#include <opencv2/gapi/gmetaarg.hpp>
+#include <opencv2/gapi/streaming/source.hpp>
+
+namespace cv {
+
+class GArg;
+
+namespace detail {
+    template<typename T>
+    using is_garg = std::is_same<GArg, typename std::decay<T>::type>;
+}
+
+// Parameter holder class for a node
+// Depending on platform capabilities, can either support arbitrary types
+// (as `boost::any`) or a limited number of types (as `boot::variant`).
+// FIXME: put into "details" as a user shouldn't use it in his code
+class GAPI_EXPORTS GArg
+{
+public:
+    GArg() {}
+
+    template<typename T, typename std::enable_if<!detail::is_garg<T>::value, int>::type = 0>
+    explicit GArg(const T &t)
+        : kind(detail::GTypeTraits<T>::kind)
+        , value(detail::wrap_gapi_helper<T>::wrap(t))
+    {
+    }
+
+    template<typename T, typename std::enable_if<!detail::is_garg<T>::value, int>::type = 0>
+    explicit GArg(T &&t)
+        : kind(detail::GTypeTraits<typename std::decay<T>::type>::kind)
+        , value(detail::wrap_gapi_helper<T>::wrap(t))
+    {
+    }
+
+    template<typename T> inline T& get()
+    {
+        return util::any_cast<typename std::remove_reference<T>::type>(value);
+    }
+
+    template<typename T> inline const T& get() const
+    {
+        return util::any_cast<typename std::remove_reference<T>::type>(value);
+    }
+
+    template<typename T> inline T& unsafe_get()
+    {
+        return util::unsafe_any_cast<typename std::remove_reference<T>::type>(value);
+    }
+
+    template<typename T> inline const T& unsafe_get() const
+    {
+        return util::unsafe_any_cast<typename std::remove_reference<T>::type>(value);
+    }
+
+    detail::ArgKind kind = detail::ArgKind::OPAQUE_VAL;
+
+protected:
+    util::any value;
+};
+
+using GArgs = std::vector<GArg>;
+
+// FIXME: Express as M<GProtoArg...>::type
+// FIXME: Move to a separate file!
+using GRunArg  = util::variant<
+#if !defined(GAPI_STANDALONE)
+    cv::Mat,
+    cv::UMat,
+#endif // !defined(GAPI_STANDALONE)
+    cv::gapi::wip::IStreamSource::Ptr,
+    cv::gapi::own::Mat,
+    cv::Scalar,
+    cv::detail::VectorRef,
+    cv::detail::OpaqueRef
+    >;
+using GRunArgs = std::vector<GRunArg>;
+
+namespace gapi
+{
+namespace wip
+{
+/**
+ * @brief This aggregate type represents all types which G-API can handle (via variant).
+ *
+ * It only exists to overcome C++ language limitations (where a `using`-defined class can't be forward-declared).
+ */
+struct Data: public GRunArg
+{
+    using GRunArg::GRunArg;
+    template <typename T>
+    Data& operator= (const T& t) { GRunArg::operator=(t); return *this; }
+    template <typename T>
+    Data& operator= (T&& t) { GRunArg::operator=(std::move(t)); return *this; }
+};
+} // namespace wip
+} // namespace gapi
+
+using GRunArgP = util::variant<
+#if !defined(GAPI_STANDALONE)
+    cv::Mat*,
+    cv::UMat*,
+#endif // !defined(GAPI_STANDALONE)
+    cv::gapi::own::Mat*,
+    cv::Scalar*,
+    cv::detail::VectorRef,
+    cv::detail::OpaqueRef
+    >;
+using GRunArgsP = std::vector<GRunArgP>;
+
+template<typename... Ts> inline GRunArgs gin(const Ts&... args)
+{
+    return GRunArgs{ GRunArg(detail::wrap_host_helper<Ts>::wrap_in(args))... };
+}
+
+template<typename... Ts> inline GRunArgsP gout(Ts&... args)
+{
+    return GRunArgsP{ GRunArgP(detail::wrap_host_helper<Ts>::wrap_out(args))... };
+}
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GARG_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/garray.hpp b/IPL/include/opencv/opencv2/gapi/garray.hpp
new file mode 100644
index 0000000..35b1f00
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/garray.hpp
@@ -0,0 +1,324 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GARRAY_HPP
+#define OPENCV_GAPI_GARRAY_HPP
+
+#include <functional>
+#include <ostream>
+#include <vector>
+#include <memory>
+
+#include <opencv2/gapi/own/exports.hpp>
+#include <opencv2/gapi/opencv_includes.hpp>
+
+#include <opencv2/gapi/util/variant.hpp>
+#include <opencv2/gapi/util/throw.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+
+#include <opencv2/gapi/gmat.hpp>    // flatten_g only!
+#include <opencv2/gapi/gscalar.hpp> // flatten_g only!
+
+namespace cv
+{
+// Forward declaration; GNode and GOrigin are an internal
+// (user-inaccessible) classes.
+class GNode;
+struct GOrigin;
+template<typename T> class GArray;
+
+/**
+ * \addtogroup gapi_meta_args
+ * @{
+ */
+struct GArrayDesc
+{
+    // FIXME: Body
+    // FIXME: Also implement proper operator== then
+    bool operator== (const GArrayDesc&) const { return true; }
+};
+template<typename U> GArrayDesc descr_of(const std::vector<U> &) { return {};}
+static inline GArrayDesc empty_array_desc() {return {}; }
+/** @} */
+
+std::ostream& operator<<(std::ostream& os, const cv::GArrayDesc &desc);
+
+namespace detail
+{
+    // ConstructVec is a callback which stores information about T and is used by
+    // G-API runtime to construct arrays in host memory (T remains opaque for G-API).
+    // ConstructVec is carried into G-API internals by GArrayU.
+    // Currently it is suitable for Host (CPU) plugins only, real offload may require
+    // more information for manual memory allocation on-device.
+    class VectorRef;
+    using ConstructVec = std::function<void(VectorRef&)>;
+
+    // This is the base struct for GArrayU type holder
+    struct TypeHintBase{virtual ~TypeHintBase() = default;};
+
+    // This class holds type of initial GArray to be checked from GArrayU
+    template <typename T>
+    struct TypeHint final : public TypeHintBase{};
+
+    // This class strips type information from GArray<T> and makes it usable
+    // in the G-API graph compiler (expression unrolling, graph generation, etc).
+    // Part of GProtoArg.
+    class GAPI_EXPORTS GArrayU
+    {
+    public:
+        GArrayU(const GNode &n, std::size_t out); // Operation result constructor
+
+        template <typename T>
+        bool holds() const;                       // Check if was created from GArray<T>
+
+        GOrigin& priv();                          // Internal use only
+        const GOrigin& priv() const;              // Internal use only
+
+    protected:
+        GArrayU();                                // Default constructor
+        template<class> friend class cv::GArray;  //  (available to GArray<T> only)
+
+        void setConstructFcn(ConstructVec &&cv);  // Store T-aware constructor
+
+        template <typename T>
+        void specifyType();                       // Store type of initial GArray<T>
+
+        std::shared_ptr<GOrigin> m_priv;
+        std::shared_ptr<TypeHintBase> m_hint;
+    };
+
+    template <typename T>
+    bool GArrayU::holds() const{
+        GAPI_Assert(m_hint != nullptr);
+        using U = typename std::decay<T>::type;
+        return dynamic_cast<TypeHint<U>*>(m_hint.get()) != nullptr;
+    };
+
+    template <typename T>
+    void GArrayU::specifyType(){
+        m_hint.reset(new TypeHint<typename std::decay<T>::type>);
+    };
+
+    // This class represents a typed STL vector reference.
+    // Depending on origins, this reference may be either "just a" reference to
+    // an object created externally, OR actually own the underlying object
+    // (be value holder).
+    class BasicVectorRef
+    {
+    public:
+        std::size_t    m_elemSize = 0ul;
+        cv::GArrayDesc m_desc;
+        virtual ~BasicVectorRef() {}
+
+        virtual void mov(BasicVectorRef &ref) = 0;
+        virtual const void* ptr() const = 0;
+    };
+
+    template<typename T> class VectorRefT final: public BasicVectorRef
+    {
+        using empty_t  = util::monostate;
+        using ro_ext_t = const std::vector<T> *;
+        using rw_ext_t =       std::vector<T> *;
+        using rw_own_t =       std::vector<T>  ;
+        util::variant<empty_t, ro_ext_t, rw_ext_t, rw_own_t> m_ref;
+
+        inline bool isEmpty() const { return util::holds_alternative<empty_t>(m_ref);  }
+        inline bool isROExt() const { return util::holds_alternative<ro_ext_t>(m_ref); }
+        inline bool isRWExt() const { return util::holds_alternative<rw_ext_t>(m_ref); }
+        inline bool isRWOwn() const { return util::holds_alternative<rw_own_t>(m_ref); }
+
+        void init(const std::vector<T>* vec = nullptr)
+        {
+            m_elemSize = sizeof(T);
+            if (vec) m_desc = cv::descr_of(*vec);
+        }
+
+    public:
+        VectorRefT() { init(); }
+        virtual ~VectorRefT() {}
+
+        explicit VectorRefT(const std::vector<T>& vec) : m_ref(&vec)      { init(&vec); }
+        explicit VectorRefT(std::vector<T>& vec)  : m_ref(&vec)           { init(&vec); }
+        explicit VectorRefT(std::vector<T>&& vec) : m_ref(std::move(vec)) { init(&vec); }
+
+        // Reset a VectorRefT. Called only for objects instantiated
+        // internally in G-API (e.g. temporary GArray<T>'s within a
+        // computation).  Reset here means both initialization
+        // (creating an object) and reset (discarding its existing
+        // content before the next execution).  Must never be called
+        // for external VectorRefTs.
+        void reset()
+        {
+            if (isEmpty())
+            {
+                std::vector<T> empty_vector;
+                m_desc = cv::descr_of(empty_vector);
+                m_ref  = std::move(empty_vector);
+                GAPI_Assert(isRWOwn());
+            }
+            else if (isRWOwn())
+            {
+                util::get<rw_own_t>(m_ref).clear();
+            }
+            else GAPI_Assert(false); // shouldn't be called in *EXT modes
+        }
+
+        // Obtain a WRITE reference to underlying object
+        // Used by CPU kernel API wrappers when a kernel execution frame
+        // is created
+        std::vector<T>& wref()
+        {
+            GAPI_Assert(isRWExt() || isRWOwn());
+            if (isRWExt()) return *util::get<rw_ext_t>(m_ref);
+            if (isRWOwn()) return  util::get<rw_own_t>(m_ref);
+            util::throw_error(std::logic_error("Impossible happened"));
+        }
+
+        // Obtain a READ reference to underlying object
+        // Used by CPU kernel API wrappers when a kernel execution frame
+        // is created
+        const std::vector<T>& rref() const
+        {
+            // ANY vector can be accessed for reading, even if it declared for
+            // output. Example -- a GComputation from [in] to [out1,out2]
+            // where [out2] is a result of operation applied to [out1]:
+            //
+            //            GComputation boundary
+            //            . . . . . . .
+            //            .           .
+            //     [in] ----> foo() ----> [out1]
+            //            .           .    :
+            //            .           . . .:. . .
+            //            .                V    .
+            //            .              bar() ---> [out2]
+            //            . . . . . . . . . . . .
+            //
+            if (isROExt()) return *util::get<ro_ext_t>(m_ref);
+            if (isRWExt()) return *util::get<rw_ext_t>(m_ref);
+            if (isRWOwn()) return  util::get<rw_own_t>(m_ref);
+            util::throw_error(std::logic_error("Impossible happened"));
+        }
+
+        virtual void mov(BasicVectorRef &v) override {
+            VectorRefT<T> *tv = dynamic_cast<VectorRefT<T>*>(&v);
+            GAPI_Assert(tv != nullptr);
+            wref() = std::move(tv->wref());
+        }
+
+        virtual const void* ptr() const override { return &rref(); }
+    };
+
+    // This class strips type information from VectorRefT<> and makes it usable
+    // in the G-API executables (carrying run-time data/information to kernels).
+    // Part of GRunArg.
+    // Its methods are typed proxies to VectorRefT<T>.
+    // VectorRef maintains "reference" semantics so two copies of VectoRef refer
+    // to the same underlying object.
+    // FIXME: Put a good explanation on why cv::OutputArray doesn't fit this role
+    class VectorRef
+    {
+        std::shared_ptr<BasicVectorRef> m_ref;
+
+        template<typename T> inline void check() const
+        {
+            GAPI_DbgAssert(dynamic_cast<VectorRefT<T>*>(m_ref.get()) != nullptr);
+            GAPI_Assert(sizeof(T) == m_ref->m_elemSize);
+        }
+
+    public:
+        VectorRef() = default;
+        template<typename T> explicit VectorRef(const std::vector<T>& vec) : m_ref(new VectorRefT<T>(vec)) {}
+        template<typename T> explicit VectorRef(std::vector<T>& vec)       : m_ref(new VectorRefT<T>(vec)) {}
+        template<typename T> explicit VectorRef(std::vector<T>&& vec)      : m_ref(new VectorRefT<T>(vec)) {}
+
+        template<typename T> void reset()
+        {
+            if (!m_ref) m_ref.reset(new VectorRefT<T>());
+
+            check<T>();
+            static_cast<VectorRefT<T>&>(*m_ref).reset();
+        }
+
+        template<typename T> std::vector<T>& wref()
+        {
+            check<T>();
+            return static_cast<VectorRefT<T>&>(*m_ref).wref();
+        }
+
+        template<typename T> const std::vector<T>& rref() const
+        {
+            check<T>();
+            return static_cast<VectorRefT<T>&>(*m_ref).rref();
+        }
+
+        void mov(VectorRef &v)
+        {
+            m_ref->mov(*v.m_ref);
+        }
+
+        cv::GArrayDesc descr_of() const
+        {
+            return m_ref->m_desc;
+        }
+
+        // May be used to uniquely identify this object internally
+        const void *ptr() const { return m_ref->ptr(); }
+    };
+
+    // Helper (FIXME: work-around?)
+    // stripping G types to their host types
+    // like cv::GArray<GMat> would still map to std::vector<cv::Mat>
+    // but not to std::vector<cv::GMat>
+#if defined(GAPI_STANDALONE)
+#  define FLATTEN_NS cv::gapi::own
+#else
+#  define FLATTEN_NS cv
+#endif
+    template<class T> struct flatten_g;
+    template<> struct flatten_g<cv::GMat>    { using type = FLATTEN_NS::Mat; };
+    template<> struct flatten_g<cv::GScalar> { using type = FLATTEN_NS::Scalar; };
+    template<class T> struct flatten_g       { using type = T; };
+#undef FLATTEN_NS
+    // FIXME: the above mainly duplicates "ProtoToParam" thing from gtyped.hpp
+    // but I decided not to include gtyped here - probably worth moving that stuff
+    // to some common place? (DM)
+} // namespace detail
+
+/** \addtogroup gapi_data_objects
+ * @{
+ */
+
+template<typename T> class GArray
+{
+public:
+    GArray() { putDetails(); }             // Empty constructor
+    explicit GArray(detail::GArrayU &&ref) // GArrayU-based constructor
+        : m_ref(ref) { putDetails(); }     //   (used by GCall, not for users)
+
+    detail::GArrayU strip() const { return m_ref; }
+
+private:
+    // Host type (or Flat type) - the type this GArray is actually
+    // specified to.
+    using HT = typename detail::flatten_g<typename std::decay<T>::type>::type;
+
+    static void VCTor(detail::VectorRef& vref) {
+        vref.reset<HT>();
+    }
+    void putDetails() {
+        m_ref.setConstructFcn(&VCTor);
+        m_ref.specifyType<HT>();
+    }
+
+    detail::GArrayU m_ref;
+};
+
+/** @} */
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GARRAY_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gasync_context.hpp b/IPL/include/opencv/opencv2/gapi/gasync_context.hpp
new file mode 100644
index 0000000..69ce530
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gasync_context.hpp
@@ -0,0 +1,56 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#ifndef OPENCV_GAPI_GASYNC_CONTEXT_HPP
+#define OPENCV_GAPI_GASYNC_CONTEXT_HPP
+
+#if !defined(GAPI_STANDALONE)
+#  include <opencv2/core/cvdef.h>
+#else   // Without OpenCV
+#  include <opencv2/gapi/own/cvdefs.hpp>
+#endif // !defined(GAPI_STANDALONE)
+
+#include <opencv2/gapi/own/exports.hpp>
+
+namespace cv {
+namespace gapi{
+namespace wip {
+
+/**
+ * @brief A class to group async requests to cancel them in a single shot.
+ *
+ * GAsyncContext is passed as an argument to async() and async_apply() functions
+ */
+
+class GAPI_EXPORTS GAsyncContext{
+    std::atomic<bool> cancelation_requested = {false};
+public:
+    /**
+     * @brief Start cancellation process for an associated request.
+     *
+     * User still has to wait for each individual request (either via callback or according std::future object) to make sure it actually canceled.
+     *
+     * @return true if it was a first request to cancel the context
+     */
+    bool cancel();
+
+    /**
+    * @brief Returns true if cancellation was requested for this context.
+    *
+    * @return true if cancellation was requested for this context
+    */
+    bool isCanceled() const;
+};
+
+class GAPI_EXPORTS GAsyncCanceled : public std::exception {
+public:
+    virtual const char* what() const noexcept CV_OVERRIDE;
+};
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+
+#endif //OPENCV_GAPI_GASYNC_CONTEXT_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gcall.hpp b/IPL/include/opencv/opencv2/gapi/gcall.hpp
new file mode 100644
index 0000000..ed5ba5f
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gcall.hpp
@@ -0,0 +1,71 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GCALL_HPP
+#define OPENCV_GAPI_GCALL_HPP
+
+#include <opencv2/gapi/garg.hpp>      // GArg
+#include <opencv2/gapi/gmat.hpp>      // GMat
+#include <opencv2/gapi/gscalar.hpp>   // GScalar
+#include <opencv2/gapi/garray.hpp>    // GArray<T>
+#include <opencv2/gapi/gopaque.hpp>   // GOpaque<T>
+
+namespace cv {
+
+struct GKernel;
+
+// The whole idea of this class is to represent an operation
+// which is applied to arguments. This is part of public API,
+// since it is what users should use to define kernel interfaces.
+
+class GAPI_EXPORTS GCall final
+{
+public:
+    class Priv;
+
+    explicit GCall(const GKernel &k);
+    ~GCall();
+
+    template<typename... Ts>
+    GCall& pass(Ts&&... args)
+    {
+        setArgs({cv::GArg(std::move(args))...});
+        return *this;
+    }
+
+    // A generic yield method - obtain a link to operator's particular GMat output
+    GMat    yield      (int output = 0);
+    GMatP   yieldP     (int output = 0);
+    GScalar yieldScalar(int output = 0);
+
+    template<class T> GArray<T> yieldArray(int output = 0)
+    {
+        return GArray<T>(yieldArray(output));
+    }
+
+    template<class T> GOpaque<T> yieldOpaque(int output = 0)
+    {
+        return GOpaque<T>(yieldOpaque(output));
+    }
+
+    // Internal use only
+    Priv& priv();
+    const Priv& priv() const;
+
+protected:
+    std::shared_ptr<Priv> m_priv;
+
+    void setArgs(std::vector<GArg> &&args);
+
+    // Public versions return a typed array or opaque, those are implementation details
+    detail::GArrayU yieldArray(int output = 0);
+    detail::GOpaqueU yieldOpaque(int output = 0);
+};
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GCALL_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gcommon.hpp b/IPL/include/opencv/opencv2/gapi/gcommon.hpp
new file mode 100644
index 0000000..9ee75f7
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gcommon.hpp
@@ -0,0 +1,169 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GCOMMON_HPP
+#define OPENCV_GAPI_GCOMMON_HPP
+
+#include <functional>   // std::hash
+#include <vector>       // std::vector
+#include <type_traits>  // decay
+
+#include <opencv2/gapi/opencv_includes.hpp>
+
+#include <opencv2/gapi/util/any.hpp>
+#include <opencv2/gapi/own/exports.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+
+namespace cv {
+
+namespace detail
+{
+    // This is a trait-like structure to mark backend-specific compile arguments
+    // with tags
+    template<typename T> struct CompileArgTag;
+
+    // These structures are tags which separate kernels and transformations
+    struct KernelTag
+    {};
+    struct TransformTag
+    {};
+}
+
+// This definition is here because it is reused by both public(?) and internal
+// modules. Keeping it here wouldn't expose public details (e.g., API-level)
+// to components which are internal and operate on a lower-level entities
+// (e.g., compiler, backends).
+// FIXME: merge with ArgKind?
+// FIXME: replace with variant[format desc]?
+enum class GShape: int
+{
+    GMAT,
+    GSCALAR,
+    GARRAY,
+    GOPAQUE,
+};
+
+struct GCompileArg;
+
+namespace detail {
+    template<typename T>
+    using is_compile_arg = std::is_same<GCompileArg, typename std::decay<T>::type>;
+}
+// CompileArg is an unified interface over backend-specific compilation
+// information
+// FIXME: Move to a separate file?
+/** \addtogroup gapi_compile_args
+ * @{
+ *
+ * @brief Compilation arguments: data structures controlling the
+ * compilation process
+ *
+ * G-API comes with a number of graph compilation options which can be
+ * passed to cv::GComputation::apply() or
+ * cv::GComputation::compile(). Known compilation options are listed
+ * in this page, while extra backends may introduce their own
+ * compilation options (G-API transparently accepts _everything_ which
+ * can be passed to cv::compile_args(), it depends on underlying
+ * backends if an option would be interpreted or not).
+ *
+ * For example, if an example computation is executed like this:
+ *
+ * @snippet modules/gapi/samples/api_ref_snippets.cpp graph_decl_apply
+ *
+ * Extra parameter specifying which kernels to compile with can be
+ * passed like this:
+ *
+ * @snippet modules/gapi/samples/api_ref_snippets.cpp apply_with_param
+ */
+
+/**
+ * @brief Represents an arbitrary compilation argument.
+ *
+ * Any value can be wrapped into cv::GCompileArg, but only known ones
+ * (to G-API or its backends) can be interpreted correctly.
+ *
+ * Normally objects of this class shouldn't be created manually, use
+ * cv::compile_args() function which automatically wraps everything
+ * passed in (a variadic template parameter pack) into a vector of
+ * cv::GCompileArg objects.
+ */
+struct GAPI_EXPORTS GCompileArg
+{
+public:
+    std::string tag;
+
+    // FIXME: use decay in GArg/other trait-based wrapper before leg is shot!
+    template<typename T, typename std::enable_if<!detail::is_compile_arg<T>::value, int>::type = 0>
+    explicit GCompileArg(T &&t)
+        : tag(detail::CompileArgTag<typename std::decay<T>::type>::tag())
+        , arg(t)
+    {
+    }
+
+    template<typename T> T& get()
+    {
+        return util::any_cast<T>(arg);
+    }
+
+    template<typename T> const T& get() const
+    {
+        return util::any_cast<T>(arg);
+    }
+
+private:
+    util::any arg;
+};
+
+using GCompileArgs = std::vector<GCompileArg>;
+
+/**
+ * Wraps a list of arguments (a parameter pack) into a vector of
+ * compilation arguments (cv::GCompileArg).
+ */
+template<typename... Ts> GCompileArgs compile_args(Ts&&... args)
+{
+    return GCompileArgs{ GCompileArg(args)... };
+}
+
+/**
+ * @brief Ask G-API to dump compiled graph in Graphviz format under
+ * the given file name.
+ *
+ * Specifies a graph dump path (path to .dot file to be generated).
+ * G-API will dump a .dot file under specified path during a
+ * compilation process if this flag is passed.
+ */
+struct graph_dump_path
+{
+    std::string m_dump_path;
+};
+/** @} */
+
+namespace detail
+{
+    template<> struct CompileArgTag<cv::graph_dump_path>
+    {
+        static const char* tag() { return "gapi.graph_dump_path"; }
+    };
+}
+
+} // namespace cv
+
+// std::hash overload for GShape
+namespace std
+{
+template<> struct hash<cv::GShape>
+{
+    size_t operator() (cv::GShape sh) const
+    {
+        return std::hash<int>()(static_cast<int>(sh));
+    }
+};
+} // namespace std
+
+
+#endif // OPENCV_GAPI_GCOMMON_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gcompiled.hpp b/IPL/include/opencv/opencv2/gapi/gcompiled.hpp
new file mode 100644
index 0000000..b08451a
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gcompiled.hpp
@@ -0,0 +1,219 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GCOMPILED_HPP
+#define OPENCV_GAPI_GCOMPILED_HPP
+
+#include <vector>
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+#include <opencv2/gapi/garg.hpp>
+
+namespace cv {
+
+// This class represents a compiled computation.
+// In theory (and ideally), it can be used w/o the rest of APIs.
+// In theory (and ideally), it can be serialized/deserialized.
+// It can enable scenarious like deployment to an autonomous devince, FuSa, etc.
+//
+// Currently GCompiled assumes all GMats you used to pass data to G-API
+// are valid and not destroyed while you use a GCompiled object.
+//
+// FIXME: In future, there should be a way to name I/O objects and specify it
+// to GCompiled externally (for example, when it is loaded on the target system).
+
+/**
+ * \addtogroup gapi_main_classes
+ * @{
+ */
+/**
+ * @brief Represents a compiled computation (graph). Can only be used
+ * with image / data formats & resolutions it was compiled for, with
+ * some exceptions.
+ *
+ * This class represents a product of graph compilation (calling
+ * cv::GComputation::compile()). Objects of this class actually do
+ * data processing, and graph execution is incapsulated into objects
+ * of this class. Execution model itself depends on kernels and
+ * backends which were using during the compilation, see @ref
+ * gapi_compile_args for details.
+ *
+ * In a general case, GCompiled objects can be applied to data only in
+ * that formats/resolutions they were compiled for (see @ref
+ * gapi_meta_args). However, if the underlying backends allow, a
+ * compiled object can be _reshaped_ to handle data (images) of
+ * different resolution, though formats and types must remain the same.
+ *
+ * GCompiled is very similar to `std::function<>` in its semantics --
+ * running it looks like a function call in the user code.
+ *
+ * At the moment, GCompiled objects are not reentrant -- generally,
+ * the objects are stateful since graph execution itself is a stateful
+ * process and this state is now maintained in GCompiled's own memory
+ * (not on the process stack).
+ *
+ * At the same time, two different GCompiled objects produced from the
+ * single cv::GComputation are completely independent and can be used
+ * concurrently.
+ *
+ * @sa GStreamingCompiled
+ */
+class GAPI_EXPORTS GCompiled
+{
+public:
+    /// @private
+    class GAPI_EXPORTS Priv;
+
+    /**
+     * @brief Constructs an empty object
+     */
+    GCompiled();
+
+    /**
+     * @brief Run the compiled computation, a generic version.
+     *
+     * @param ins vector of inputs to process.
+     * @param outs vector of outputs to produce.
+     *
+     * Input/output vectors must have the same number of elements as
+     * defined in the cv::GComputation protocol (at the moment of its
+     * construction). Shapes of elements also must conform to protocol
+     * (e.g. cv::Mat needs to be passed where cv::GMat has been
+     * declared as input, and so on). Run-time exception is generated
+     * otherwise.
+     *
+     * Objects in output vector may remain empty (like cv::Mat) --
+     * G-API will automatically initialize output objects to proper formats.
+     *
+     * @note Don't construct GRunArgs/GRunArgsP objects manually, use
+     * cv::gin()/cv::gout() wrappers instead.
+     */
+    void operator() (GRunArgs &&ins, GRunArgsP &&outs);          // Generic arg-to-arg
+#if !defined(GAPI_STANDALONE)
+
+    /**
+     * @brief Execute an unary computation
+     *
+     * @overload
+     * @param in input cv::Mat for unary computation
+     * @param out output cv::Mat for unary computation
+     * process.
+     */
+    void operator() (cv::Mat in, cv::Mat &out);                  // Unary overload
+
+    /**
+     * @brief Execute an unary computation
+     *
+     * @overload
+     * @param in input cv::Mat for unary computation
+     * @param out output cv::Scalar for unary computation
+     * process.
+     */
+    void operator() (cv::Mat in, cv::Scalar &out);               // Unary overload (scalar)
+
+    /**
+     * @brief Execute a binary computation
+     *
+     * @overload
+     * @param in1 first input cv::Mat for binary computation
+     * @param in2 second input cv::Mat for binary computation
+     * @param out output cv::Mat for binary computation
+     * process.
+     */
+    void operator() (cv::Mat in1, cv::Mat in2, cv::Mat &out);    // Binary overload
+
+    /**
+     * @brief Execute an binary computation
+     *
+     * @overload
+     * @param in1 first input cv::Mat for binary computation
+     * @param in2 second input cv::Mat for binary computation
+     * @param out output cv::Scalar for binary computation
+     * process.
+     */
+    void operator() (cv::Mat in1, cv::Mat in2, cv::Scalar &out); // Binary overload (scalar)
+
+    /**
+     * @brief Execute a computation with arbitrary number of
+     * inputs/outputs.
+     *
+     * @overload
+     * @param ins vector of input cv::Mat objects to process by the
+     * computation.
+     * @param outs vector of output cv::Mat objects to produce by the
+     * computation.
+     *
+     * Numbers of elements in ins/outs vectors must match numbers of
+     * inputs/outputs which were used to define the source GComputation.
+     */
+    void operator() (const std::vector<cv::Mat> &ins,            // Compatibility overload
+                     const std::vector<cv::Mat> &outs);
+#endif  // !defined(GAPI_STANDALONE)
+    /// @private
+    Priv& priv();
+
+    /**
+     * @brief Check if compiled object is valid (non-empty)
+     *
+     * @return true if the object is runnable (valid), false otherwise
+     */
+    explicit operator bool () const;
+
+    /**
+     * @brief Vector of metadata this graph was compiled for.
+     *
+     * @return Unless _reshape_ is not supported, return value is the
+     * same vector which was passed to cv::GComputation::compile() to
+     * produce this compiled object. Otherwise, it is the latest
+     * metadata vector passed to reshape() (if that call was
+     * successful).
+     */
+    const GMetaArgs& metas() const; // Meta passed to compile()
+
+    /**
+     * @brief Vector of metadata descriptions of graph outputs
+     *
+     * @return vector with formats/resolutions of graph's output
+     * objects, auto-inferred from input metadata vector by
+     * operations which form this computation.
+     *
+     * @note GCompiled objects produced from the same
+     * cv::GComputiation graph with different input metas may return
+     * different values in this vector.
+     */
+    const GMetaArgs& outMetas() const;
+
+    /**
+     * @brief Check if the underlying backends support reshape or not.
+     *
+     * @return true if supported, false otherwise.
+     */
+    bool canReshape() const;
+
+    /**
+     * @brief Reshape a compiled graph to support new image
+     * resolutions.
+     *
+     * Throws an exception if an error occurs.
+     *
+     * @param inMetas new metadata to reshape on. Vector size and
+     * metadata shapes must match the computation's protocol.
+     * @param args compilation arguments to use.
+     */
+    // FIXME: Why it requires compile args?
+    void reshape(const GMetaArgs& inMetas, const GCompileArgs& args);
+
+protected:
+    /// @private
+    std::shared_ptr<Priv> m_priv;
+};
+/** @} */
+
+}
+
+#endif // OPENCV_GAPI_GCOMPILED_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gcompiled_async.hpp b/IPL/include/opencv/opencv2/gapi/gcompiled_async.hpp
new file mode 100644
index 0000000..a0c2917
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gcompiled_async.hpp
@@ -0,0 +1,73 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GCOMPILED_ASYNC_HPP
+#define OPENCV_GAPI_GCOMPILED_ASYNC_HPP
+
+#include <future>           //for std::future
+#include <exception>        //for std::exception_ptr
+#include <functional>       //for std::function
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/own/exports.hpp>
+
+namespace cv {
+    //fwd declaration
+    class GCompiled;
+
+namespace gapi{
+namespace wip {
+    class GAsyncContext;
+    /**
+    These functions asynchronously (i.e. probably on a separate thread of execution) call GCompiled::operator() member function of their first argument with copies of rest of arguments (except callback) passed in.
+    The difference between the function is the way to get the completion notification (via callback or a waiting on std::future object)
+    If exception is occurred during execution of apply it is transferred to the callback (via function parameter) or passed to future (and will be thrown on call to std::future::get)
+
+    N.B. :
+    Input arguments are copied on call to async function (actually on call to cv::gin) and thus do not have to outlive the actual completion of asynchronous activity.
+    While output arguments are "captured" by reference(pointer) and therefore _must_ outlive the asynchronous activity
+    (i.e. live at least until callback is called or future is unblocked)
+
+    @param gcmpld       Compiled computation (graph) to start asynchronously
+    @param callback     Callback to be called when execution of gcmpld is done
+    @param ins          Input parameters for gcmpld
+    @param outs         Output parameters for gcmpld
+    */
+    GAPI_EXPORTS void                async(GCompiled& gcmpld, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs);
+
+    /** @overload
+    @param gcmpld       Compiled computation (graph) to run asynchronously
+    @param callback     Callback to be called when execution of gcmpld is done
+    @param ins          Input parameters for gcmpld
+    @param outs         Output parameters for gcmpld
+    @param ctx          Context this request belongs to
+    @see   async GAsyncContext
+    */
+    GAPI_EXPORTS void                async(GCompiled& gcmpld, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs, GAsyncContext& ctx);
+
+    /** @overload
+    @param gcmpld       Compiled computation (graph) to run asynchronously
+    @param ins          Input parameters for gcmpld
+    @param outs         Output parameters for gcmpld
+    @return             std::future<void> object to wait for completion of async operation
+    @see async
+    */
+    GAPI_EXPORTS std::future<void>   async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs);
+
+    /**
+    @param gcmpld       Compiled computation (graph) to run asynchronously
+    @param ins          Input parameters for gcmpld
+    @param outs         Output parameters for gcmpld
+    @param ctx          Context this request belongs to
+    @return             std::future<void> object to wait for completion of async operation
+    @see   async GAsyncContext
+    */
+    GAPI_EXPORTS std::future<void>   async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs, GAsyncContext& ctx);
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_GCOMPILED_ASYNC_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gcompoundkernel.hpp b/IPL/include/opencv/opencv2/gapi/gcompoundkernel.hpp
new file mode 100644
index 0000000..2f17064
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gcompoundkernel.hpp
@@ -0,0 +1,129 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GCOMPOUNDKERNEL_HPP
+#define OPENCV_GAPI_GCOMPOUNDKERNEL_HPP
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/garg.hpp>
+
+namespace cv {
+namespace gapi
+{
+namespace compound
+{
+    // FIXME User does not need to know about this function
+    // Needs that user may define compound kernels(as cpu kernels)
+    GAPI_EXPORTS cv::gapi::GBackend backend();
+} // namespace compound
+} // namespace gapi
+
+namespace detail
+{
+
+struct GCompoundContext
+{
+    explicit GCompoundContext(const GArgs& in_args);
+    template<typename T>
+    const T& inArg(int input) { return m_args.at(input).get<T>(); }
+
+    GArgs m_args;
+    GArgs m_results;
+};
+
+class GAPI_EXPORTS GCompoundKernel
+{
+// Compound kernel must use all of it's inputs
+public:
+    using F = std::function<void(GCompoundContext& ctx)>;
+
+    explicit GCompoundKernel(const F& f);
+    void apply(GCompoundContext& ctx);
+
+protected:
+    F m_f;
+};
+
+template<typename T> struct get_compound_in
+{
+    static T get(GCompoundContext &ctx, int idx) { return ctx.inArg<T>(idx); }
+};
+
+template<typename U> struct get_compound_in<cv::GArray<U>>
+{
+    static cv::GArray<U> get(GCompoundContext &ctx, int idx)
+    {
+        auto array = cv::GArray<U>();
+        ctx.m_args[idx] = GArg(array);
+        return array;
+    }
+};
+
+template<typename U> struct get_compound_in<cv::GOpaque<U>>
+{
+    static cv::GOpaque<U> get(GCompoundContext &ctx, int idx)
+    {
+        auto opaq = cv::GOpaque<U>();
+        ctx.m_args[idx] = GArg(opaq);
+        return opaq;
+    }
+};
+
+template<typename, typename, typename>
+struct GCompoundCallHelper;
+
+template<typename Impl, typename... Ins, typename... Outs>
+struct GCompoundCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
+{
+    template<int... IIs, int... OIs>
+    static void expand_impl(GCompoundContext &ctx, detail::Seq<IIs...>, detail::Seq<OIs...>)
+    {
+        auto result = Impl::expand(get_compound_in<Ins>::get(ctx, IIs)...);
+        auto tuple_return = tuple_wrap_helper<decltype(result)>::get(std::move(result));
+        ctx.m_results = { cv::GArg(std::get<OIs>(tuple_return))... };
+    }
+
+    static void expand(GCompoundContext &ctx)
+    {
+        expand_impl(ctx,
+                    typename detail::MkSeq<sizeof...(Ins)>::type(),
+                    typename detail::MkSeq<sizeof...(Outs)>::type());
+    }
+};
+
+template<class Impl, class K>
+class GCompoundKernelImpl: public cv::detail::GCompoundCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
+                           public cv::detail::KernelTag
+{
+    using P = cv::detail::GCompoundCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
+
+public:
+    using API = K;
+
+    static cv::gapi::GBackend backend() { return cv::gapi::compound::backend(); }
+    static GCompoundKernel    kernel()  { return GCompoundKernel(&P::expand);   }
+};
+
+} // namespace detail
+
+
+/**
+ * Declares a new compound kernel. See this
+ * [documentation chapter](@ref gapi_kernel_compound)
+ * on compound kernels for more details.
+ *
+ * @param Name type name for new kernel
+ * @param API the interface this kernel implements
+ */
+#define GAPI_COMPOUND_KERNEL(Name, API) \
+    struct Name: public cv::detail::GCompoundKernelImpl<Name, API>
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GCOMPOUNDKERNEL_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gcomputation.hpp b/IPL/include/opencv/opencv2/gapi/gcomputation.hpp
new file mode 100644
index 0000000..1ff874a
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gcomputation.hpp
@@ -0,0 +1,559 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GCOMPUTATION_HPP
+#define OPENCV_GAPI_GCOMPUTATION_HPP
+
+#include <functional>
+
+#include <opencv2/gapi/util/util.hpp>
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/gproto.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gcompiled.hpp>
+#include <opencv2/gapi/gstreaming.hpp>
+
+namespace cv {
+
+namespace detail
+{
+    // FIXME: move to algorithm, cover with separate tests
+    // FIXME: replace with O(1) version (both memory and compilation time)
+    template<typename...>
+    struct last_type;
+
+    template<typename T>
+    struct last_type<T> { using type = T;};
+
+    template<typename T, typename... Ts>
+    struct last_type<T, Ts...> { using type = typename last_type<Ts...>::type; };
+
+    template<typename... Ts>
+    using last_type_t = typename last_type<Ts...>::type;
+}
+
+/**
+ * \addtogroup gapi_main_classes
+ * @{
+ *
+ * @brief G-API classes for constructed and compiled graphs.
+ */
+/**
+ * @brief GComputation class represents a captured computation
+ * graph. GComputation objects form boundaries for expression code
+ * user writes with G-API, allowing to compile and execute it.
+ *
+ * G-API computations are defined with input/output data
+ * objects. G-API will track automatically which operations connect
+ * specified outputs to the inputs, forming up a call graph to be
+ * executed. The below example expresses calculation of Sobel operator
+ * for edge detection (\f$G = \sqrt{G_x^2 + G_y^2}\f$):
+ *
+ * @snippet modules/gapi/samples/api_ref_snippets.cpp graph_def
+ *
+ * Full pipeline can be now captured with this object declaration:
+ *
+ * @snippet modules/gapi/samples/api_ref_snippets.cpp graph_cap_full
+ *
+ * Input/output data objects on which a call graph should be
+ * reconstructed are passed using special wrappers cv::GIn and
+ * cv::GOut. G-API will track automatically which operations form a
+ * path from inputs to outputs and build the execution graph appropriately.
+ *
+ * Note that cv::GComputation doesn't take ownership on data objects
+ * it is defined. Moreover, multiple GComputation objects may be
+ * defined on the same expressions, e.g. a smaller pipeline which
+ * expects that image gradients are already pre-calculated may be
+ * defined like this:
+ *
+ * @snippet modules/gapi/samples/api_ref_snippets.cpp graph_cap_sub
+ *
+ * The resulting graph would expect two inputs and produce one
+ * output. In this case, it doesn't matter if gx/gy data objects are
+ * results of cv::gapi::Sobel operators -- G-API will stop unrolling
+ * expressions and building the underlying graph one reaching this
+ * data objects.
+ *
+ * The way how GComputation is defined is important as its definition
+ * specifies graph _protocol_ -- the way how the graph should be
+ * used. Protocol is defined by number of inputs, number of outputs,
+ * and shapes of inputs and outputs.
+ *
+ * In the above example, sobelEdge expects one Mat on input and
+ * produces one Mat; while sobelEdgeSub expects two Mats on input and
+ * produces one Mat. GComputation's protocol defines how other
+ * computation methods should be used -- cv::GComputation::compile() and
+ * cv::GComputation::apply(). For example, if a graph is defined on
+ * two GMat inputs, two cv::Mat objects have to be passed to apply()
+ * for execution. GComputation checks protocol correctness in runtime
+ * so passing a different number of objects in apply() or passing
+ * cv::Scalar instead of cv::Mat there would compile well as a C++
+ * source but raise an exception in run-time. G-API also comes with a
+ * typed wrapper cv::GComputationT<> which introduces this type-checking in
+ * compile-time.
+ *
+ * cv::GComputation itself is a thin object which just captures what
+ * the graph is. The compiled graph (which actually process data) is
+ * represented by class GCompiled. Use compile() method to generate a
+ * compiled graph with given compile options. cv::GComputation can
+ * also be used to process data with implicit graph compilation
+ * on-the-fly, see apply() for details.
+ *
+ * GComputation is a reference-counted object -- once defined, all its
+ * copies will refer to the same instance.
+ *
+ * @sa GCompiled
+ */
+class GAPI_EXPORTS GComputation
+{
+public:
+    class Priv;
+    typedef std::function<GComputation()> Generator;
+
+    // Various constructors enable different ways to define a computation: /////
+    // 1. Generic constructors
+    /**
+     * @brief Define a computation using a generator function.
+     *
+     * Graph can be defined in-place directly at the moment of its
+     * construction with a lambda:
+     *
+     * @snippet modules/gapi/samples/api_ref_snippets.cpp graph_gen
+     *
+     * This may be useful since all temporary objects (cv::GMats) and
+     * namespaces can be localized to scope of lambda, without
+     * contaminating the parent scope with probably unnecessary objects
+     * and information.
+     *
+     * @param gen generator function which returns a cv::GComputation,
+     * see Generator.
+     */
+    GComputation(const Generator& gen);                // Generator
+                                                       // overload
+
+    /**
+     * @brief Generic GComputation constructor.
+     *
+     * Constructs a new graph with a given protocol, specified as a
+     * flow of operations connecting input/output objects. Throws if
+     * the passed boundaries are invalid, e.g. if there's no
+     * functional dependency (path) between given outputs and inputs.
+     *
+     * @param ins Input data vector.
+     * @param outs Output data vector.
+     *
+     * @note Don't construct GProtoInputArgs/GProtoOutputArgs objects
+     * directly, use cv::GIn()/cv::GOut() wrapper functions instead.
+     *
+     * @sa @ref gapi_data_objects
+     */
+    GComputation(GProtoInputArgs &&ins,
+                 GProtoOutputArgs &&outs);             // Arg-to-arg overload
+
+    // 2. Syntax sugar and compatibility overloads
+    /**
+     * @brief Defines an unary (one input -- one output) computation
+     *
+     * @overload
+     * @param in input GMat of the defined unary computation
+     * @param out output GMat of the defined unary computation
+     */
+    GComputation(GMat in, GMat out);                   // Unary overload
+
+    /**
+     * @brief Defines an unary (one input -- one output) computation
+     *
+     * @overload
+     * @param in input GMat of the defined unary computation
+     * @param out output GScalar of the defined unary computation
+     */
+    GComputation(GMat in, GScalar out);                // Unary overload (scalar)
+
+    /**
+     * @brief Defines a binary (two inputs -- one output) computation
+     *
+     * @overload
+     * @param in1 first input GMat of the defined binary computation
+     * @param in2 second input GMat of the defined binary computation
+     * @param out output GMat of the defined binary computation
+     */
+    GComputation(GMat in1, GMat in2, GMat out);        // Binary overload
+
+    /**
+     * @brief Defines a binary (two inputs -- one output) computation
+     *
+     * @overload
+     * @param in1 first input GMat of the defined binary computation
+     * @param in2 second input GMat of the defined binary computation
+     * @param out output GScalar of the defined binary computation
+     */
+    GComputation(GMat in1, GMat in2, GScalar out);     // Binary
+                                                       // overload
+                                                       // (scalar)
+
+    /**
+     * @brief Defines a computation with arbitrary input/output number.
+     *
+     * @overload
+     * @param ins vector of inputs GMats for this computation
+     * @param outs vector of outputs GMats for this computation
+     *
+     * Use this overload for cases when number of computation
+     * inputs/outputs is not known in compile-time -- e.g. when graph
+     * is programmatically generated to build an image pyramid with
+     * the given number of levels, etc.
+     */
+    GComputation(const std::vector<GMat> &ins,         // Compatibility overload
+                 const std::vector<GMat> &outs);
+
+    // Various versions of apply(): ////////////////////////////////////////////
+    // 1. Generic apply()
+    /**
+     * @brief Compile graph on-the-fly and immediately execute it on
+     * the inputs data vectors.
+     *
+     * Number of input/output data objects must match GComputation's
+     * protocol, also types of host data objects (cv::Mat, cv::Scalar)
+     * must match the shapes of data objects from protocol (cv::GMat,
+     * cv::GScalar). If there's a mismatch, a run-time exception will
+     * be generated.
+     *
+     * Internally, a cv::GCompiled object is created for the given
+     * input format configuration, which then is executed on the input
+     * data immediately. cv::GComputation caches compiled objects
+     * produced within apply() -- if this method would be called next
+     * time with the same input parameters (image formats, image
+     * resolution, etc), the underlying compiled graph will be reused
+     * without recompilation. If new metadata doesn't match the cached
+     * one, the underlying compiled graph is regenerated.
+     *
+     * @note compile() always triggers a compilation process and
+     * produces a new GCompiled object regardless if a similar one has
+     * been cached via apply() or not.
+     *
+     * @param ins vector of input data to process. Don't create
+     * GRunArgs object manually, use cv::gin() wrapper instead.
+     * @param outs vector of output data to fill results in. cv::Mat
+     * objects may be empty in this vector, G-API will automatically
+     * initialize it with the required format & dimensions. Don't
+     * create GRunArgsP object manually, use cv::gout() wrapper instead.
+     * @param args a list of compilation arguments to pass to the
+     * underlying compilation process. Don't create GCompileArgs
+     * object manually, use cv::compile_args() wrapper instead.
+     *
+     * @sa @ref gapi_data_objects, @ref gapi_compile_args
+     */
+    void apply(GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args = {});       // Arg-to-arg overload
+
+    /// @private -- Exclude this function from OpenCV documentation
+    void apply(const std::vector<cv::gapi::own::Mat>& ins,                        // Compatibility overload
+               const std::vector<cv::gapi::own::Mat>& outs,
+               GCompileArgs &&args = {});
+
+    // 2. Syntax sugar and compatibility overloads
+#if !defined(GAPI_STANDALONE)
+    /**
+     * @brief Execute an unary computation (with compilation on the fly)
+     *
+     * @overload
+     * @param in input cv::Mat for unary computation
+     * @param out output cv::Mat for unary computation
+     * @param args compilation arguments for underlying compilation
+     * process.
+     */
+    void apply(cv::Mat in, cv::Mat &out, GCompileArgs &&args = {});               // Unary overload
+
+    /**
+     * @brief Execute an unary computation (with compilation on the fly)
+     *
+     * @overload
+     * @param in input cv::Mat for unary computation
+     * @param out output cv::Scalar for unary computation
+     * @param args compilation arguments for underlying compilation
+     * process.
+     */
+    void apply(cv::Mat in, cv::Scalar &out, GCompileArgs &&args = {});            // Unary overload (scalar)
+
+    /**
+     * @brief Execute a binary computation (with compilation on the fly)
+     *
+     * @overload
+     * @param in1 first input cv::Mat for binary computation
+     * @param in2 second input cv::Mat for binary computation
+     * @param out output cv::Mat for binary computation
+     * @param args compilation arguments for underlying compilation
+     * process.
+     */
+    void apply(cv::Mat in1, cv::Mat in2, cv::Mat &out, GCompileArgs &&args = {}); // Binary overload
+
+    /**
+     * @brief Execute an binary computation (with compilation on the fly)
+     *
+     * @overload
+     * @param in1 first input cv::Mat for binary computation
+     * @param in2 second input cv::Mat for binary computation
+     * @param out output cv::Scalar for binary computation
+     * @param args compilation arguments for underlying compilation
+     * process.
+     */
+    void apply(cv::Mat in1, cv::Mat in2, cv::Scalar &out, GCompileArgs &&args = {}); // Binary overload (scalar)
+
+    /**
+     * @brief Execute a computation with arbitrary number of
+     * inputs/outputs (with compilation on-the-fly).
+     *
+     * @overload
+     * @param ins vector of input cv::Mat objects to process by the
+     * computation.
+     * @param outs vector of output cv::Mat objects to produce by the
+     * computation.
+     * @param args compilation arguments for underlying compilation
+     * process.
+     *
+     * Numbers of elements in ins/outs vectors must match numbers of
+     * inputs/outputs which were used to define this GComputation.
+     */
+    void apply(const std::vector<cv::Mat>& ins,         // Compatibility overload
+                     std::vector<cv::Mat>& outs,
+               GCompileArgs &&args = {});
+#endif // !defined(GAPI_STANDALONE)
+    // Various versions of compile(): //////////////////////////////////////////
+    // 1. Generic compile() - requires metas to be passed as vector
+    /**
+     * @brief Compile the computation for specific input format(s).
+     *
+     * This method triggers compilation process and produces a new
+     * GCompiled object which then can process data of the given
+     * format. Passing data with different format to the compiled
+     * computation will generate a run-time exception.
+     *
+     * @param in_metas vector of input metadata configuration. Grab
+     * metadata from real data objects (like cv::Mat or cv::Scalar)
+     * using cv::descr_of(), or create it on your own.
+     * @param args compilation arguments for this compilation
+     * process. Compilation arguments directly affect what kind of
+     * executable object would be produced, e.g. which kernels (and
+     * thus, devices) would be used to execute computation.
+     *
+     * @return GCompiled, an executable computation compiled
+     * specifically for the given input parameters.
+     *
+     * @sa @ref gapi_compile_args
+     */
+    GCompiled compile(GMetaArgs &&in_metas, GCompileArgs &&args = {});
+
+    // 2. Syntax sugar - variadic list of metas, no extra compile args
+    // FIXME: SFINAE looks ugly in the generated documentation
+    /**
+     * @overload
+     *
+     * Takes a variadic parameter pack with metadata
+     * descriptors for which a compiled object needs to be produced.
+     *
+     * @return GCompiled, an executable computation compiled
+     * specifically for the given input parameters.
+     */
+    template<typename... Ts>
+    auto compile(const Ts&... metas) ->
+        typename std::enable_if<detail::are_meta_descrs<Ts...>::value, GCompiled>::type
+    {
+        return compile(GMetaArgs{GMetaArg(metas)...}, GCompileArgs());
+    }
+
+    // 3. Syntax sugar - variadic list of metas, extra compile args
+    // (seems optional parameters don't work well when there's an variadic template
+    // comes first)
+    //
+    // Ideally it should look like:
+    //
+    //     template<typename... Ts>
+    //     GCompiled compile(const Ts&... metas, GCompileArgs &&args)
+    //
+    // But not all compilers can handle this (and seems they shouldn't be able to).
+    // FIXME: SFINAE looks ugly in the generated documentation
+    /**
+     * @overload
+     *
+     * Takes a  variadic parameter pack with metadata
+     * descriptors for which a compiled object needs to be produced,
+     * followed by GCompileArgs object representing compilation
+     * arguments for this process.
+     *
+     * @return GCompiled, an executable computation compiled
+     * specifically for the given input parameters.
+     */
+    template<typename... Ts>
+    auto compile(const Ts&... meta_and_compile_args) ->
+        typename std::enable_if<detail::are_meta_descrs_but_last<Ts...>::value
+                                && std::is_same<GCompileArgs, detail::last_type_t<Ts...> >::value,
+                                GCompiled>::type
+    {
+        //FIXME: wrapping meta_and_compile_args into a tuple to unwrap them inside a helper function is the overkill
+        return compile(std::make_tuple(meta_and_compile_args...),
+                       typename detail::MkSeq<sizeof...(Ts)-1>::type());
+    }
+
+
+    // FIXME: Document properly in the Doxygen format
+    // Video-oriented pipeline compilation:
+    // 1. A generic version
+    /**
+     * @brief Compile the computation for streaming mode.
+     *
+     * This method triggers compilation process and produces a new
+     * GStreamingCompiled object which then can process video stream
+     * data of the given format. Passing a stream in a different
+     * format to the compiled computation will generate a run-time
+     * exception.
+     *
+     * @param in_metas vector of input metadata configuration. Grab
+     * metadata from real data objects (like cv::Mat or cv::Scalar)
+     * using cv::descr_of(), or create it on your own.
+     *
+     * @param args compilation arguments for this compilation
+     * process. Compilation arguments directly affect what kind of
+     * executable object would be produced, e.g. which kernels (and
+     * thus, devices) would be used to execute computation.
+     *
+     * @return GStreamingCompiled, a streaming-oriented executable
+     * computation compiled specifically for the given input
+     * parameters.
+     *
+     * @sa @ref gapi_compile_args
+     */
+    GStreamingCompiled compileStreaming(GMetaArgs &&in_metas, GCompileArgs &&args = {});
+
+    /**
+     * @brief Compile the computation for streaming mode.
+     *
+     * This method triggers compilation process and produces a new
+     * GStreamingCompiled object which then can process video stream
+     * data in any format. Underlying mechanisms will be adjusted to
+     * every new input video stream automatically, but please note that
+     * _not all_ existing backends support this (see reshape()).
+     *
+     * @param args compilation arguments for this compilation
+     * process. Compilation arguments directly affect what kind of
+     * executable object would be produced, e.g. which kernels (and
+     * thus, devices) would be used to execute computation.
+     *
+     * @return GStreamingCompiled, a streaming-oriented executable
+     * computation compiled for any input image format.
+     *
+     * @sa @ref gapi_compile_args
+     */
+    GStreamingCompiled compileStreaming(GCompileArgs &&args = {});
+
+    // 2. Direct metadata version
+    /**
+     * @overload
+     *
+     * Takes a variadic parameter pack with metadata
+     * descriptors for which a compiled object needs to be produced.
+     *
+     * @return GStreamingCompiled, a streaming-oriented executable
+     * computation compiled specifically for the given input
+     * parameters.
+     */
+    template<typename... Ts>
+    auto compileStreaming(const Ts&... metas) ->
+        typename std::enable_if<detail::are_meta_descrs<Ts...>::value, GStreamingCompiled>::type
+    {
+        return compileStreaming(GMetaArgs{GMetaArg(metas)...}, GCompileArgs());
+    }
+
+    // 2. Direct metadata + compile arguments version
+    /**
+     * @overload
+     *
+     * Takes a  variadic parameter pack with metadata
+     * descriptors for which a compiled object needs to be produced,
+     * followed by GCompileArgs object representing compilation
+     * arguments for this process.
+     *
+     * @return GStreamingCompiled, a streaming-oriented executable
+     * computation compiled specifically for the given input
+     * parameters.
+     */
+    template<typename... Ts>
+    auto compileStreaming(const Ts&... meta_and_compile_args) ->
+        typename std::enable_if<detail::are_meta_descrs_but_last<Ts...>::value
+                                && std::is_same<GCompileArgs, detail::last_type_t<Ts...> >::value,
+                                GStreamingCompiled>::type
+    {
+        //FIXME: wrapping meta_and_compile_args into a tuple to unwrap them inside a helper function is the overkill
+        return compileStreaming(std::make_tuple(meta_and_compile_args...),
+                                typename detail::MkSeq<sizeof...(Ts)-1>::type());
+    }
+
+    // Internal use only
+    /// @private
+    Priv& priv();
+    /// @private
+    const Priv& priv() const;
+
+protected:
+
+    // 4. Helper methods for (3)
+    /// @private
+    template<typename... Ts, int... IIs>
+    GCompiled compile(const std::tuple<Ts...> &meta_and_compile_args, detail::Seq<IIs...>)
+    {
+        GMetaArgs meta_args = {GMetaArg(std::get<IIs>(meta_and_compile_args))...};
+        GCompileArgs comp_args = std::get<sizeof...(Ts)-1>(meta_and_compile_args);
+        return compile(std::move(meta_args), std::move(comp_args));
+    }
+    template<typename... Ts, int... IIs>
+    GStreamingCompiled compileStreaming(const std::tuple<Ts...> &meta_and_compile_args, detail::Seq<IIs...>)
+    {
+        GMetaArgs meta_args = {GMetaArg(std::get<IIs>(meta_and_compile_args))...};
+        GCompileArgs comp_args = std::get<sizeof...(Ts)-1>(meta_and_compile_args);
+        return compileStreaming(std::move(meta_args), std::move(comp_args));
+    }
+    /// @private
+    std::shared_ptr<Priv> m_priv;
+};
+/** @} */
+
+namespace gapi
+{
+    // FIXME: all these standalone functions need to be added to some
+    // common documentation section
+    /**
+     * @brief Define an tagged island (subgraph) within a computation.
+     *
+     * Declare an Island tagged with `name` and defined from `ins` to `outs`
+     * (exclusively, as ins/outs are data objects, and regioning is done on
+     * operations level).
+     * Throws if any operation between `ins` and `outs` are already assigned
+     * to another island.
+     *
+     * Islands allow to partition graph into subgraphs, fine-tuning
+     * the way it is scheduled by the underlying executor.
+     *
+     * @param name name of the Island to create
+     * @param ins vector of input data objects where the subgraph
+     * begins
+     * @param outs vector of output data objects where the subgraph
+     * ends.
+     *
+     * The way how an island is defined is similar to how
+     * cv::GComputation is defined on input/output data objects.
+     * Same rules apply here as well -- if there's no functional
+     * dependency between inputs and outputs or there's not enough
+     * input data objects were specified to properly calculate all
+     * outputs, an exception is thrown.
+     *
+     * Use cv::GIn() / cv::GOut() to specify input/output vectors.
+     */
+    void GAPI_EXPORTS island(const std::string &name,
+                             GProtoInputArgs  &&ins,
+                             GProtoOutputArgs &&outs);
+} // namespace gapi
+
+} // namespace cv
+#endif // OPENCV_GAPI_GCOMPUTATION_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gcomputation_async.hpp b/IPL/include/opencv/opencv2/gapi/gcomputation_async.hpp
new file mode 100644
index 0000000..8af603e
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gcomputation_async.hpp
@@ -0,0 +1,69 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#ifndef OPENCV_GAPI_GCOMPUTATION_ASYNC_HPP
+#define OPENCV_GAPI_GCOMPUTATION_ASYNC_HPP
+
+
+#include <future>                           //for std::future
+#include <exception>                        //for std::exception_ptr
+#include <functional>                       //for std::function
+#include <opencv2/gapi/garg.hpp>            //for GRunArgs, GRunArgsP
+#include <opencv2/gapi/gcommon.hpp>         //for GCompileArgs
+#include <opencv2/gapi/own/exports.hpp>
+
+
+namespace cv {
+    //fwd declaration
+    class GComputation;
+namespace gapi {
+namespace wip  {
+    class GAsyncContext;
+    /** In contrast to async() functions, these do call GComputation::apply() member function of the GComputation passed in.
+
+    @param gcomp        Computation (graph) to run asynchronously
+    @param callback     Callback to be called when execution of gcomp is done
+    @param ins          Input parameters for gcomp
+    @param outs         Output parameters for gcomp
+    @param args         Compile arguments to pass to GComputation::apply()
+    @see                async
+    */
+    GAPI_EXPORTS void                async_apply(GComputation& gcomp, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args = {});
+    /** @overload
+    @param gcomp        Computation (graph) to run asynchronously
+    @param callback     Callback to be called when execution of gcomp is done
+    @param ins          Input parameters for gcomp
+    @param outs         Output parameters for gcomp
+    @param args         Compile arguments to pass to GComputation::apply()
+    @param ctx          Context this request belongs to
+    @see                async_apply async GAsyncContext
+    */
+    GAPI_EXPORTS void                async_apply(GComputation& gcomp, std::function<void(std::exception_ptr)>&& callback, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args, GAsyncContext& ctx);
+    /** @overload
+    @param gcomp        Computation (graph) to run asynchronously
+    @param ins          Input parameters for gcomp
+    @param outs         Output parameters for gcomp
+    @param args         Compile arguments to pass to GComputation::apply()
+    @return             std::future<void> object to wait for completion of async operation
+    @see                async_apply async
+    */
+    GAPI_EXPORTS std::future<void>   async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args = {});
+    /** @overload
+    @param gcomp        Computation (graph) to run asynchronously
+    @param ins          Input parameters for gcomp
+    @param outs         Output parameters for gcomp
+    @param args         Compile arguments to pass to GComputation::apply()
+    @param ctx          Context this request belongs to
+    @return             std::future<void> object to wait for completion of async operation
+    @see                async_apply async GAsyncContext
+    */
+    GAPI_EXPORTS std::future<void>   async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args,  GAsyncContext& ctx);
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+
+
+#endif //OPENCV_GAPI_GCOMPUTATION_ASYNC_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gkernel.hpp b/IPL/include/opencv/opencv2/gapi/gkernel.hpp
new file mode 100644
index 0000000..478d7d3
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gkernel.hpp
@@ -0,0 +1,703 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GKERNEL_HPP
+#define OPENCV_GAPI_GKERNEL_HPP
+
+#include <functional>
+#include <iostream>
+#include <string>  // string
+#include <type_traits> // false_type, true_type
+#include <unordered_map> // map (for GKernelPackage)
+#include <utility> // tuple
+
+#include <opencv2/gapi/gcommon.hpp> // CompileArgTag
+#include <opencv2/gapi/util/util.hpp> // Seq
+#include <opencv2/gapi/gcall.hpp>
+#include <opencv2/gapi/garg.hpp>      // GArg
+#include <opencv2/gapi/gmetaarg.hpp>  // GMetaArg
+#include <opencv2/gapi/gtype_traits.hpp> // GTypeTraits
+#include <opencv2/gapi/util/compiler_hints.hpp> //suppress_unused_warning
+#include <opencv2/gapi/gtransform.hpp>
+
+namespace cv {
+
+using GShapes = std::vector<GShape>;
+
+// GKernel describes kernel API to the system
+// FIXME: add attributes of a kernel, (e.g. number and types
+// of inputs, etc)
+struct GAPI_EXPORTS GKernel
+{
+    using M = std::function<GMetaArgs(const GMetaArgs &, const GArgs &)>;
+
+    const std::string name;       // kernel ID, defined by its API (signature)
+    const std::string tag;        // some (implementation-specific) tag
+    const M           outMeta;    // generic adaptor to API::outMeta(...)
+    const GShapes     outShapes;  // types (shapes) kernel's outputs
+};
+
+// GKernelImpl describes particular kernel implementation to the system
+struct GAPI_EXPORTS GKernelImpl
+{
+    util::any         opaque;    // backend-specific opaque info
+};
+
+template<typename, typename> class GKernelTypeM;
+
+namespace detail
+{
+    ////////////////////////////////////////////////////////////////////////////
+    // yield() is used in graph construction time as a generic method to obtain
+    // lazy "return value" of G-API operations
+    //
+    namespace
+    {
+        template<typename T> struct Yield;
+        template<> struct Yield<cv::GMat>
+        {
+            static inline cv::GMat yield(cv::GCall &call, int i) { return call.yield(i); }
+        };
+        template<> struct Yield<cv::GMatP>
+        {
+            static inline cv::GMatP yield(cv::GCall &call, int i) { return call.yieldP(i); }
+        };
+        template<> struct Yield<cv::GScalar>
+        {
+            static inline cv::GScalar yield(cv::GCall &call, int i) { return call.yieldScalar(i); }
+        };
+        template<typename U> struct Yield<cv::GArray<U> >
+        {
+            static inline cv::GArray<U> yield(cv::GCall &call, int i) { return call.yieldArray<U>(i); }
+        };
+        template<typename U> struct Yield<cv::GOpaque<U> >
+        {
+            static inline cv::GOpaque<U> yield(cv::GCall &call, int i) { return call.yieldOpaque<U>(i); }
+        };
+    } // anonymous namespace
+
+    ////////////////////////////////////////////////////////////////////////////
+    // Helper classes which brings outputMeta() marshalling to kernel
+    // implementations
+    //
+    // 1. MetaType establishes G#Type -> G#Meta mapping between G-API dynamic
+    //    types and its metadata descriptor types.
+    //    This mapping is used to transform types to call outMeta() callback.
+    template<typename T> struct MetaType;
+    template<> struct MetaType<cv::GMat>    { using type = GMatDesc; };
+    template<> struct MetaType<cv::GMatP>   { using type = GMatDesc; };
+    template<> struct MetaType<cv::GFrame>  { using type = GMatDesc; };
+    template<> struct MetaType<cv::GScalar> { using type = GScalarDesc; };
+    template<typename U> struct MetaType<cv::GArray<U> >  { using type = GArrayDesc; };
+    template<typename U> struct MetaType<cv::GOpaque<U> > { using type = GOpaqueDesc; };
+    template<typename T> struct MetaType    { using type = T; }; // opaque args passed as-is
+
+    // 2. Hacky test based on MetaType to check if we operate on G-* type or not
+    template<typename T> using is_nongapi_type = std::is_same<T, typename MetaType<T>::type>;
+
+    // 3. Two ways to transform input arguments to its meta - for G-* and non-G* types:
+    template<typename T>
+    typename std::enable_if<!is_nongapi_type<T>::value, typename MetaType<T>::type>
+    ::type get_in_meta(const GMetaArgs &in_meta, const GArgs &, int idx)
+    {
+        return util::get<typename MetaType<T>::type>(in_meta.at(idx));
+    }
+
+    template<typename T>
+    typename std::enable_if<is_nongapi_type<T>::value, T>
+    ::type get_in_meta(const GMetaArgs &, const GArgs &in_args, int idx)
+    {
+        return in_args.at(idx).template get<T>();
+    }
+
+    // 4. The MetaHelper itself: an entity which generates outMeta() call
+    //    based on kernel signature, with arguments properly substituted.
+    // 4.1 - case for multiple return values
+    // FIXME: probably can be simplified with std::apply or analogue.
+    template<typename, typename, typename>
+    struct MetaHelper;
+
+    template<typename K, typename... Ins, typename... Outs>
+    struct MetaHelper<K, std::tuple<Ins...>, std::tuple<Outs...> >
+    {
+        template<int... IIs, int... OIs>
+        static GMetaArgs getOutMeta_impl(const GMetaArgs &in_meta,
+                                         const GArgs &in_args,
+                                         detail::Seq<IIs...>,
+                                         detail::Seq<OIs...>)
+        {
+            // FIXME: decay?
+            using R   = std::tuple<typename MetaType<Outs>::type...>;
+            const R r = K::outMeta( get_in_meta<Ins>(in_meta, in_args, IIs)... );
+            return GMetaArgs{ GMetaArg(std::get<OIs>(r))... };
+        }
+        // FIXME: help users identify how outMeta must look like (via default impl w/static_assert?)
+
+        static GMetaArgs getOutMeta(const GMetaArgs &in_meta,
+                                    const GArgs &in_args)
+        {
+            return getOutMeta_impl(in_meta,
+                                   in_args,
+                                   typename detail::MkSeq<sizeof...(Ins)>::type(),
+                                   typename detail::MkSeq<sizeof...(Outs)>::type());
+        }
+    };
+
+    // 4.1 - case for a single return value
+    // FIXME: How to avoid duplication here?
+    template<typename K, typename... Ins, typename Out>
+    struct MetaHelper<K, std::tuple<Ins...>, Out >
+    {
+        template<int... IIs>
+        static GMetaArgs getOutMeta_impl(const GMetaArgs &in_meta,
+                                         const GArgs &in_args,
+                                         detail::Seq<IIs...>)
+        {
+            // FIXME: decay?
+            using R = typename MetaType<Out>::type;
+            const R r = K::outMeta( get_in_meta<Ins>(in_meta, in_args, IIs)... );
+            return GMetaArgs{ GMetaArg(r) };
+        }
+        // FIXME: help users identify how outMeta must look like (via default impl w/static_assert?)
+
+        static GMetaArgs getOutMeta(const GMetaArgs &in_meta,
+                                    const GArgs &in_args)
+        {
+            return getOutMeta_impl(in_meta,
+                                   in_args,
+                                   typename detail::MkSeq<sizeof...(Ins)>::type());
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////
+    // Helper class to introduce tags to calls. By default there's no tag
+    struct NoTag {
+        static constexpr const char *tag() { return ""; }
+    };
+
+} // namespace detail
+
+// GKernelType and GKernelTypeM are base classes which implement typed ::on()
+// method based on kernel signature. GKernelTypeM stands for multiple-return-value kernels
+//
+// G_TYPED_KERNEL and G_TYPED_KERNEL_M macros inherit user classes from GKernelType and
+// GKernelTypeM respectively.
+
+template<typename K, typename... R, typename... Args>
+class GKernelTypeM<K, std::function<std::tuple<R...>(Args...)> >
+    : public detail::MetaHelper<K, std::tuple<Args...>, std::tuple<R...>>
+    , public detail::NoTag
+{
+    template<int... IIs>
+    static std::tuple<R...> yield(cv::GCall &call, detail::Seq<IIs...>)
+    {
+        return std::make_tuple(detail::Yield<R>::yield(call, IIs)...);
+    }
+
+public:
+    using InArgs  = std::tuple<Args...>;
+    using OutArgs = std::tuple<R...>;
+
+    static std::tuple<R...> on(Args... args)
+    {
+        cv::GCall call(GKernel{K::id(), K::tag(), &K::getOutMeta, {detail::GTypeTraits<R>::shape...}});
+        call.pass(args...);
+        return yield(call, typename detail::MkSeq<sizeof...(R)>::type());
+    }
+};
+
+template<typename, typename> class GKernelType;
+
+template<typename K, typename R, typename... Args>
+class GKernelType<K, std::function<R(Args...)> >
+    : public detail::MetaHelper<K, std::tuple<Args...>, R>
+    , public detail::NoTag
+{
+public:
+    using InArgs  = std::tuple<Args...>;
+    using OutArgs = std::tuple<R>;
+
+    static_assert(!cv::detail::contains<GFrame, OutArgs>::value, "Values of GFrame type can't be used as operation outputs");
+
+    static R on(Args... args)
+    {
+        cv::GCall call(GKernel{K::id(), K::tag(), &K::getOutMeta, {detail::GTypeTraits<R>::shape}});
+        call.pass(args...);
+        return detail::Yield<R>::yield(call, 0);
+    }
+};
+
+namespace detail {
+// This tiny class eliminates the semantic difference between
+// GKernelType and GKernelTypeM.
+template<typename, typename> class KernelTypeMedium;
+
+template<typename K, typename... R, typename... Args>
+class KernelTypeMedium<K, std::function<std::tuple<R...>(Args...)>> :
+    public cv::GKernelTypeM<K, std::function<std::tuple<R...>(Args...)>> {};
+
+template<typename K, typename R, typename... Args>
+class KernelTypeMedium<K, std::function<R(Args...)>> :
+    public cv::GKernelType<K, std::function<R(Args...)>> {};
+} // namespace detail
+
+} // namespace cv
+
+
+// FIXME: I don't know a better way so far. Feel free to suggest one
+// The problem is that every typed kernel should have ::id() but body
+// of the class is defined by user (with outMeta, other stuff)
+
+//! @cond IGNORED
+#define G_ID_HELPER_CLASS(Class)  Class##IdHelper
+
+#define G_ID_HELPER_BODY(Class, Id)                                         \
+    struct G_ID_HELPER_CLASS(Class)                                         \
+    {                                                                       \
+        static constexpr const char * id() {return Id;}                     \
+    };                                                                      \
+//! @endcond
+
+#define GET_G_TYPED_KERNEL(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, NAME, ...) NAME
+#define COMBINE_SIGNATURE(...) __VA_ARGS__
+// Ensure correct __VA_ARGS__ expansion on Windows
+#define __WRAP_VAARGS(x) x
+
+/**
+ * Helper for G_TYPED_KERNEL declares a new G-API Operation. See [Kernel API](@ref gapi_kernel_api)
+ * for more details.
+ *
+ * @param Class type name for this operation.
+ * @param API an `std::function<>`-like signature for the operation;
+ *        return type is a single value or a tuple of multiple values.
+ * @param Id string identifier for the operation. Must be unique.
+ */
+#define G_TYPED_KERNEL_HELPER(Class, API, Id)                                               \
+    G_ID_HELPER_BODY(Class, Id)                                                             \
+    struct Class final: public cv::detail::KernelTypeMedium<Class, std::function API >,     \
+                        public G_ID_HELPER_CLASS(Class)
+// {body} is to be defined by user
+
+#define G_TYPED_KERNEL_HELPER_2(Class, _1, _2, Id) \
+G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2), Id)
+
+#define G_TYPED_KERNEL_HELPER_3(Class, _1, _2, _3, Id) \
+G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3), Id)
+
+#define G_TYPED_KERNEL_HELPER_4(Class, _1, _2, _3, _4, Id) \
+G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4), Id)
+
+#define G_TYPED_KERNEL_HELPER_5(Class, _1, _2, _3, _4, _5, Id) \
+G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5), Id)
+
+#define G_TYPED_KERNEL_HELPER_6(Class, _1, _2, _3, _4, _5, _6, Id) \
+G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5, _6), Id)
+
+#define G_TYPED_KERNEL_HELPER_7(Class, _1, _2, _3, _4, _5, _6, _7, Id) \
+G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5, _6, _7), Id)
+
+#define G_TYPED_KERNEL_HELPER_8(Class, _1, _2, _3, _4, _5, _6, _7, _8, Id) \
+G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5, _6, _7, _8), Id)
+
+#define G_TYPED_KERNEL_HELPER_9(Class, _1, _2, _3, _4, _5, _6, _7, _8, _9, Id) \
+G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5, _6, _7, _8, _9), Id)
+
+#define G_TYPED_KERNEL_HELPER_10(Class, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, Id) \
+G_TYPED_KERNEL_HELPER(Class, COMBINE_SIGNATURE(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10), Id)
+
+/**
+ * Declares a new G-API Operation. See [Kernel API](@ref gapi_kernel_api)
+ * for more details.
+ *
+ * @param Class type name for this operation.
+ */
+#define G_TYPED_KERNEL(Class, ...) __WRAP_VAARGS(GET_G_TYPED_KERNEL(__VA_ARGS__, \
+                                                 G_TYPED_KERNEL_HELPER_10, \
+                                                 G_TYPED_KERNEL_HELPER_9, \
+                                                 G_TYPED_KERNEL_HELPER_8, \
+                                                 G_TYPED_KERNEL_HELPER_7, \
+                                                 G_TYPED_KERNEL_HELPER_6, \
+                                                 G_TYPED_KERNEL_HELPER_5, \
+                                                 G_TYPED_KERNEL_HELPER_4, \
+                                                 G_TYPED_KERNEL_HELPER_3, \
+                                                 G_TYPED_KERNEL_HELPER_2, \
+                                                 G_TYPED_KERNEL_HELPER)(Class, __VA_ARGS__)) \
+
+/**
+ * Declares a new G-API Operation. See [Kernel API](@ref gapi_kernel_api) for more details.
+ *
+ * @deprecated This macro is deprecated in favor of `G_TYPED_KERNEL` that is used for declaring any
+ * G-API Operation.
+ *
+ * @param Class type name for this operation.
+ */
+#define G_TYPED_KERNEL_M G_TYPED_KERNEL
+
+#define G_API_OP   G_TYPED_KERNEL
+#define G_API_OP_M G_API_OP
+
+namespace cv
+{
+namespace gapi
+{
+    // Prework: model "Device" API before it gets to G-API headers.
+    // FIXME: Don't mix with internal Backends class!
+    class GAPI_EXPORTS GBackend
+    {
+    public:
+        class Priv;
+
+        // TODO: make it template (call `new` within??)
+        GBackend();
+        explicit GBackend(std::shared_ptr<Priv> &&p);
+
+        Priv& priv();
+        const Priv& priv() const;
+        std::size_t hash() const;
+
+        bool operator== (const GBackend &rhs) const;
+
+    private:
+        std::shared_ptr<Priv> m_priv;
+    };
+
+    inline bool operator != (const GBackend &lhs, const GBackend &rhs)
+    {
+        return !(lhs == rhs);
+    }
+} // namespace gapi
+} // namespace cv
+
+namespace std
+{
+    template<> struct hash<cv::gapi::GBackend>
+    {
+        std::size_t operator() (const cv::gapi::GBackend &b) const
+        {
+            return b.hash();
+        }
+    };
+} // namespace std
+
+
+namespace cv {
+namespace gapi {
+    class GFunctor
+    {
+    public:
+        virtual cv::GKernelImpl impl()       const = 0;
+        virtual cv::gapi::GBackend backend() const = 0;
+        const char* id()                     const { return m_id; }
+
+        virtual ~GFunctor() = default;
+    protected:
+        GFunctor(const char* id) : m_id(id) { };
+    private:
+        const char* m_id;
+    };
+
+    /** \addtogroup gapi_compile_args
+     * @{
+     */
+
+    // FIXME: Hide implementation
+    /**
+     * @brief A container class for heterogeneous kernel
+     * implementation collections and graph transformations.
+     *
+     * GKernelPackage is a special container class which stores kernel
+     * _implementations_ and graph _transformations_. Objects of this class
+     * are created and passed to cv::GComputation::compile() to specify
+     * which kernels to use and which transformations to apply in the
+     * compiled graph. GKernelPackage may contain kernels of
+     * different backends, e.g. be heterogeneous.
+     *
+     * The most easy way to create a kernel package is to use function
+     * cv::gapi::kernels(). This template functions takes kernel
+     * implementations in form of type list (variadic template) and
+     * generates a kernel package atop of that.
+     *
+     * Kernel packages can be also generated programmatically, starting
+     * with an empty package (created with the default constructor)
+     * and then by populating it with kernels via call to
+     * GKernelPackage::include(). Note this method is also a template
+     * one since G-API kernel and transformation implementations are _types_,
+     * not objects.
+     *
+     * Finally, two kernel packages can be combined into a new one
+     * with function cv::gapi::combine().
+     */
+    class GAPI_EXPORTS GKernelPackage
+    {
+
+        /// @private
+        using M = std::unordered_map<std::string, std::pair<GBackend, GKernelImpl>>;
+
+        /// @private
+        M m_id_kernels;
+
+        /// @private
+        std::vector<GTransform> m_transformations;
+
+    protected:
+        /// @private
+        // Check if package contains ANY implementation of a kernel API
+        // by API textual id.
+        bool includesAPI(const std::string &id) const;
+
+        /// @private
+        // Remove ALL implementations of the given API (identified by ID)
+        void removeAPI(const std::string &id);
+
+        /// @private
+        // Partial include() specialization for kernels
+        template <typename KImpl>
+        typename std::enable_if<(std::is_base_of<detail::KernelTag, KImpl>::value), void>::type
+        includeHelper()
+        {
+            auto backend     = KImpl::backend();
+            auto kernel_id   = KImpl::API::id();
+            auto kernel_impl = GKernelImpl{KImpl::kernel()};
+            removeAPI(kernel_id);
+
+            m_id_kernels[kernel_id] = std::make_pair(backend, kernel_impl);
+        }
+
+        /// @private
+        // Partial include() specialization for transformations
+        template <typename TImpl>
+        typename std::enable_if<(std::is_base_of<detail::TransformTag, TImpl>::value), void>::type
+        includeHelper()
+        {
+            m_transformations.emplace_back(TImpl::transformation());
+        }
+
+    public:
+        void include(const GFunctor& functor)
+        {
+            m_id_kernels[functor.id()] = std::make_pair(functor.backend(), functor.impl());
+        }
+        /**
+         * @brief Returns total number of kernels
+         * in the package (across all backends included)
+         *
+         * @return a number of kernels in the package
+         */
+        std::size_t size() const;
+
+        /**
+         * @brief Returns vector of transformations included in the package
+         *
+         * @return vector of transformations included in the package
+         */
+        const std::vector<GTransform>& get_transformations() const;
+
+        /**
+         * @brief Test if a particular kernel _implementation_ KImpl is
+         * included in this kernel package.
+         *
+         * @sa includesAPI()
+         *
+         * @note cannot be applied to transformations
+         *
+         * @return true if there is such kernel, false otherwise.
+         */
+        template<typename KImpl>
+        bool includes() const
+        {
+            static_assert(std::is_base_of<detail::KernelTag, KImpl>::value,
+                          "includes() can be applied to kernels only");
+
+            auto kernel_it = m_id_kernels.find(KImpl::API::id());
+            return kernel_it != m_id_kernels.end() &&
+                   kernel_it->second.first == KImpl::backend();
+        }
+
+        /**
+         * @brief Remove all kernels associated with the given backend
+         * from the package.
+         *
+         * Does nothing if there's no kernels of this backend in the package.
+         *
+         * @param backend backend which kernels to remove
+         */
+        void remove(const GBackend& backend);
+
+        /**
+         * @brief Remove all kernels implementing the given API from
+         * the package.
+         *
+         * Does nothing if there's no kernels implementing the given interface.
+         */
+        template<typename KAPI>
+        void remove()
+        {
+            removeAPI(KAPI::id());
+        }
+
+        // FIXME: Rename to includes() and distinguish API/impl case by
+        //     statically?
+        /**
+         * Check if package contains ANY implementation of a kernel API
+         * by API type.
+         */
+        template<typename KAPI>
+        bool includesAPI() const
+        {
+            return includesAPI(KAPI::id());
+        }
+
+        // FIXME: The below comment is wrong, and who needs this function?
+        /**
+         * @brief Find a kernel (by its API)
+         *
+         * Returns implementation corresponding id.
+         * Throws if nothing found.
+         *
+         * @return Backend which hosts matching kernel implementation.
+         *
+         */
+        template<typename KAPI>
+        GBackend lookup() const
+        {
+            return lookup(KAPI::id()).first;
+        }
+
+        /// @private
+        std::pair<cv::gapi::GBackend, cv::GKernelImpl>
+        lookup(const std::string &id) const;
+
+        // FIXME: No overwrites allowed?
+        /**
+         * @brief Put a new kernel implementation or a new transformation
+         * KImpl into the package.
+         */
+        template<typename KImpl>
+        void include()
+        {
+            includeHelper<KImpl>();
+        }
+
+        /**
+         * @brief Lists all backends which are included into package
+         *
+         * @return vector of backends
+         */
+        std::vector<GBackend> backends() const;
+
+        // TODO: Doxygen bug -- it wants me to place this comment
+        // here, not below.
+        /**
+         * @brief Create a new package based on `lhs` and `rhs`.
+         *
+         * @param lhs "Left-hand-side" package in the process
+         * @param rhs "Right-hand-side" package in the process
+         * @return a new kernel package.
+         */
+        friend GAPI_EXPORTS GKernelPackage combine(const GKernelPackage  &lhs,
+                                                   const GKernelPackage  &rhs);
+    };
+
+    /**
+     * @brief Create a kernel package object containing kernels
+     * and transformations specified in variadic template argument.
+     *
+     * In G-API, kernel implementations and transformations are _types_.
+     * Every backend has its own kernel API (like GAPI_OCV_KERNEL() and
+     * GAPI_FLUID_KERNEL()) but all of that APIs define a new type for
+     * each kernel implementation.
+     *
+     * Use this function to pass kernel implementations (defined in
+     * either way) and transformations to the system. Example:
+     *
+     * @snippet modules/gapi/samples/api_ref_snippets.cpp kernels_snippet
+     *
+     * Note that kernels() itself is a function returning object, not
+     * a type, so having `()` at the end is important -- it must be a
+     * function call.
+     */
+    template<typename... KK> GKernelPackage kernels()
+    {
+        // FIXME: currently there is no check that transformations' signatures are unique
+        // and won't be any intersection in graph compilation stage
+        static_assert(detail::all_unique<typename KK::API...>::value, "Kernels API must be unique");
+
+        GKernelPackage pkg;
+
+        // For those who wonder - below is a trick to call a number of
+        // methods based on parameter pack (zeroes just help hiding these
+        // calls into a sequence which helps to expand this parameter pack).
+        // Just note that `f(),a` always equals to `a` (with f() called!)
+        // and parentheses are used to hide function call in the expanded sequence.
+        // Leading 0 helps to handle case when KK is an empty list (kernels<>()).
+        int unused[] = { 0, (pkg.include<KK>(), 0)... };
+        cv::util::suppress_unused_warning(unused);
+        return pkg;
+    };
+
+    template<typename... FF>
+    GKernelPackage kernels(FF&... functors)
+    {
+        GKernelPackage pkg;
+        int unused[] = { 0, (pkg.include(functors), 0)... };
+        cv::util::suppress_unused_warning(unused);
+        return pkg;
+    };
+
+    /** @} */
+
+    // FYI - this function is already commented above
+    GAPI_EXPORTS GKernelPackage combine(const GKernelPackage  &lhs,
+                                        const GKernelPackage  &rhs);
+
+    /**
+     * @brief Combines multiple G-API kernel packages into one
+     *
+     * @overload
+     *
+     * This function successively combines the passed kernel packages using a right fold.
+     * Calling `combine(a, b, c)` is equal to `combine(a, combine(b, c))`.
+     *
+     * @return The resulting kernel package
+     */
+    template<typename... Ps>
+    GKernelPackage combine(const GKernelPackage &a, const GKernelPackage &b, Ps&&... rest)
+    {
+        return combine(a, combine(b, rest...));
+    }
+
+    /** \addtogroup gapi_compile_args
+     * @{
+     */
+    /**
+     * @brief cv::use_only() is a special combinator which hints G-API to use only
+     * kernels specified in cv::GComputation::compile() (and not to extend kernels available by
+     * default with that package).
+     */
+    struct GAPI_EXPORTS use_only
+    {
+        GKernelPackage pkg;
+    };
+    /** @} */
+
+} // namespace gapi
+
+namespace detail
+{
+    template<> struct CompileArgTag<cv::gapi::GKernelPackage>
+    {
+        static const char* tag() { return "gapi.kernel_package"; }
+    };
+
+    template<> struct CompileArgTag<cv::gapi::use_only>
+    {
+        static const char* tag() { return "gapi.use_only"; }
+    };
+} // namespace detail
+} // namespace cv
+
+#endif // OPENCV_GAPI_GKERNEL_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gmat.hpp b/IPL/include/opencv/opencv2/gapi/gmat.hpp
new file mode 100644
index 0000000..eed9364
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gmat.hpp
@@ -0,0 +1,232 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GMAT_HPP
+#define OPENCV_GAPI_GMAT_HPP
+
+#include <ostream>
+#include <memory>                 // std::shared_ptr
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/gcommon.hpp> // GShape
+
+#include <opencv2/gapi/own/assert.hpp>
+
+// TODO GAPI_EXPORTS or so
+namespace cv
+{
+// Forward declaration; GNode and GOrigin are an internal
+// (user-inaccessible) classes.
+class GNode;
+struct GOrigin;
+
+/** \addtogroup gapi_data_objects
+ * @{
+ *
+ * @brief G-API data objects used to build G-API expressions.
+ *
+ * These objects do not own any particular data (except compile-time
+ * associated values like with cv::GScalar) and are used to construct
+ * graphs.
+ *
+ * Every graph in G-API starts and ends with data objects.
+ *
+ * Once constructed and compiled, G-API operates with regular host-side
+ * data instead. Refer to the below table to find the mapping between
+ * G-API and regular data types.
+ *
+ *    G-API data type    | I/O data type
+ *    ------------------ | -------------
+ *    cv::GMat           | cv::Mat
+ *    cv::GScalar        | cv::Scalar
+ *    `cv::GArray<T>`    | std::vector<T>
+ *    `cv::GOpaque<T>`   | T
+ */
+class GAPI_EXPORTS GMat
+{
+public:
+    GMat();                                 // Empty constructor
+    GMat(const GNode &n, std::size_t out);  // Operation result constructor
+
+    GOrigin& priv();                        // Internal use only
+    const GOrigin& priv()  const;           // Internal use only
+
+private:
+    std::shared_ptr<GOrigin> m_priv;
+};
+
+class GAPI_EXPORTS GMatP : public GMat
+{
+public:
+    using GMat::GMat;
+};
+
+class GAPI_EXPORTS GFrame : public GMat
+{
+public:
+    using GMat::GMat;
+};
+
+namespace gapi { namespace own {
+    class Mat;
+}}//gapi::own
+
+/** @} */
+
+/**
+ * \addtogroup gapi_meta_args
+ * @{
+ */
+struct GAPI_EXPORTS GMatDesc
+{
+    // FIXME: Default initializers in C++14
+    int depth;
+    int chan;
+    cv::Size size; // NB.: no multi-dimensional cases covered yet
+    bool planar;
+    std::vector<int> dims; // FIXME: Maybe it's real questionable to have it here
+
+    GMatDesc(int d, int c, cv::Size s, bool p = false)
+        : depth(d), chan(c), size(s), planar(p) {}
+
+    GMatDesc(int d, const std::vector<int> &dd)
+        : depth(d), chan(-1), size{-1,-1}, planar(false), dims(dd) {}
+
+    GMatDesc(int d, std::vector<int> &&dd)
+        : depth(d), chan(-1), size{-1,-1}, planar(false), dims(std::move(dd)) {}
+
+    GMatDesc() : GMatDesc(-1, -1, {-1,-1}) {}
+
+    inline bool operator== (const GMatDesc &rhs) const
+    {
+        return    depth  == rhs.depth
+               && chan   == rhs.chan
+               && size   == rhs.size
+               && planar == rhs.planar
+               && dims   == rhs.dims;
+    }
+
+    inline bool operator!= (const GMatDesc &rhs) const
+    {
+        return !(*this == rhs);
+    }
+
+    bool isND() const { return !dims.empty(); }
+
+    // Checks if the passed mat can be described by this descriptor
+    // (it handles the case when
+    // 1-channel mat can be reinterpreted as is (1-channel mat)
+    // and as a 3-channel planar mat with height divided by 3)
+    bool canDescribe(const cv::gapi::own::Mat& mat) const;
+
+    // Meta combinator: return a new GMatDesc which differs in size by delta
+    // (all other fields are taken unchanged from this GMatDesc)
+    // FIXME: a better name?
+    GMatDesc withSizeDelta(cv::Size delta) const
+    {
+        GMatDesc desc(*this);
+        desc.size += delta;
+        return desc;
+    }
+#if !defined(GAPI_STANDALONE)
+    bool canDescribe(const cv::Mat& mat) const;
+#endif // !defined(GAPI_STANDALONE)
+    // Meta combinator: return a new GMatDesc which differs in size by delta
+    // (all other fields are taken unchanged from this GMatDesc)
+    //
+    // This is an overload.
+    GMatDesc withSizeDelta(int dx, int dy) const
+    {
+        return withSizeDelta(cv::Size{dx,dy});
+    }
+
+    GMatDesc withSize(cv::Size sz) const
+    {
+        GMatDesc desc(*this);
+        desc.size = sz;
+        return desc;
+    }
+
+    // Meta combinator: return a new GMatDesc with specified data depth.
+    // (all other fields are taken unchanged from this GMatDesc)
+    GMatDesc withDepth(int ddepth) const
+    {
+        GAPI_Assert(CV_MAT_CN(ddepth) == 1 || ddepth == -1);
+        GMatDesc desc(*this);
+        if (ddepth != -1) desc.depth = ddepth;
+        return desc;
+    }
+
+    // Meta combinator: return a new GMatDesc with specified data depth
+    // and number of channels.
+    // (all other fields are taken unchanged from this GMatDesc)
+    GMatDesc withType(int ddepth, int dchan) const
+    {
+        GAPI_Assert(CV_MAT_CN(ddepth) == 1 || ddepth == -1);
+        GMatDesc desc = withDepth(ddepth);
+        desc.chan = dchan;
+        return desc;
+    }
+
+    // Meta combinator: return a new GMatDesc with planar flag set
+    // (no size changes are performed, only channel interpretation is changed
+    // (interleaved -> planar)
+    GMatDesc asPlanar() const
+    {
+        GAPI_Assert(planar == false);
+        GMatDesc desc(*this);
+        desc.planar = true;
+        return desc;
+    }
+
+    // Meta combinator: return a new GMatDesc
+    // reinterpreting 1-channel input as planar image
+    // (size height is divided by plane number)
+    GMatDesc asPlanar(int planes) const
+    {
+        GAPI_Assert(planar == false);
+        GAPI_Assert(chan == 1);
+        GAPI_Assert(planes > 1);
+        GAPI_Assert(size.height % planes == 0);
+        GMatDesc desc(*this);
+        desc.size.height /=  planes;
+        desc.chan = planes;
+        return desc.asPlanar();
+    }
+
+    // Meta combinator: return a new GMatDesc with planar flag set to false
+    // (no size changes are performed, only channel interpretation is changed
+    // (planar -> interleaved)
+    GMatDesc asInterleaved() const
+    {
+        GAPI_Assert(planar == true);
+        GMatDesc desc(*this);
+        desc.planar = false;
+        return desc;
+    }
+};
+
+static inline GMatDesc empty_gmat_desc() { return GMatDesc{-1,-1,{-1,-1}}; }
+
+#if !defined(GAPI_STANDALONE)
+class Mat;
+GAPI_EXPORTS GMatDesc descr_of(const cv::Mat &mat);
+GAPI_EXPORTS GMatDesc descr_of(const cv::UMat &mat);
+#endif // !defined(GAPI_STANDALONE)
+
+/** @} */
+
+// FIXME: WHY??? WHY it is under different namespace?
+namespace gapi { namespace own {
+    GAPI_EXPORTS GMatDesc descr_of(const Mat &mat);
+}}//gapi::own
+
+GAPI_EXPORTS std::ostream& operator<<(std::ostream& os, const cv::GMatDesc &desc);
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GMAT_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gmetaarg.hpp b/IPL/include/opencv/opencv2/gapi/gmetaarg.hpp
new file mode 100644
index 0000000..39f087f
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gmetaarg.hpp
@@ -0,0 +1,80 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GMETAARG_HPP
+#define OPENCV_GAPI_GMETAARG_HPP
+
+#include <vector>
+#include <type_traits>
+
+#include <opencv2/gapi/util/util.hpp>
+#include <opencv2/gapi/util/variant.hpp>
+
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/garray.hpp>
+#include <opencv2/gapi/gopaque.hpp>
+
+namespace cv
+{
+// FIXME: Rename to GMeta?
+// FIXME: user shouldn't deal with it - put to detail?
+// GMetaArg is an union type over descriptions of G-types which can serve as
+// GComputation's in/output slots.
+//
+// GMetaArg objects are passed as arguments to GComputation::compile()
+// to specify which data a compiled computation should be specialized on.
+// For manual compile(), user must supply this metadata, in case of apply()
+// this metadata is taken from arguments computation should operate on.
+//
+// The first type (monostate) is equal to "uninitialized"/"unresolved" meta.
+using GMetaArg = util::variant
+    < util::monostate
+    , GMatDesc
+    , GScalarDesc
+    , GArrayDesc
+    , GOpaqueDesc
+    >;
+GAPI_EXPORTS std::ostream& operator<<(std::ostream& os, const GMetaArg &);
+
+using GMetaArgs = std::vector<GMetaArg>;
+
+namespace detail
+{
+    // These traits are used by GComputation::compile()
+
+    // FIXME: is_constructible<T> doesn't work as variant doesn't do any SFINAE
+    // in its current template constructor
+
+    template<typename T> struct is_meta_descr    : std::false_type {};
+    template<> struct is_meta_descr<GMatDesc>    : std::true_type {};
+    template<> struct is_meta_descr<GScalarDesc> : std::true_type {};
+    template<> struct is_meta_descr<GArrayDesc>  : std::true_type {};
+    template<> struct is_meta_descr<GOpaqueDesc> : std::true_type {};
+
+    template<typename... Ts>
+    using are_meta_descrs = all_satisfy<is_meta_descr, Ts...>;
+
+    template<typename... Ts>
+    using are_meta_descrs_but_last = all_satisfy<is_meta_descr, typename all_but_last<Ts...>::type>;
+
+} // namespace detail
+
+// Note: descr_of(std::vector<..>) returns a GArrayDesc, while
+//       descrs_of(std::vector<..>) returns an array of Meta args!
+class Mat;
+class UMat;
+GAPI_EXPORTS cv::GMetaArgs descrs_of(const std::vector<cv::Mat> &vec);
+GAPI_EXPORTS cv::GMetaArgs descrs_of(const std::vector<cv::UMat> &vec);
+namespace gapi { namespace own {
+    class Mat;
+    GAPI_EXPORTS cv::GMetaArgs descrs_of(const std::vector<Mat> &vec);
+}} // namespace gapi::own
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GMETAARG_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gopaque.hpp b/IPL/include/opencv/opencv2/gapi/gopaque.hpp
new file mode 100644
index 0000000..f5d06bb
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gopaque.hpp
@@ -0,0 +1,294 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GOPAQUE_HPP
+#define OPENCV_GAPI_GOPAQUE_HPP
+
+#include <functional>
+#include <ostream>
+#include <memory>
+
+#include <opencv2/gapi/own/exports.hpp>
+#include <opencv2/gapi/opencv_includes.hpp>
+
+#include <opencv2/gapi/util/variant.hpp>
+#include <opencv2/gapi/util/throw.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+
+namespace cv
+{
+// Forward declaration; GNode and GOrigin are an internal
+// (user-inaccessible) classes.
+class GNode;
+struct GOrigin;
+template<typename T> class GOpaque;
+
+/**
+ * \addtogroup gapi_meta_args
+ * @{
+ */
+struct GOpaqueDesc
+{
+    // FIXME: Body
+    // FIXME: Also implement proper operator== then
+    bool operator== (const GOpaqueDesc&) const { return true; }
+};
+template<typename U> GOpaqueDesc descr_of(const U &) { return {};}
+static inline GOpaqueDesc empty_gopaque_desc() {return {}; }
+/** @} */
+
+std::ostream& operator<<(std::ostream& os, const cv::GOpaqueDesc &desc);
+
+namespace detail
+{
+    // ConstructOpaque is a callback which stores information about T and is used by
+    // G-API runtime to construct an object in host memory (T remains opaque for G-API).
+    // ConstructOpaque is carried into G-API internals by GOpaqueU.
+    // Currently it is suitable for Host (CPU) plugins only, real offload may require
+    // more information for manual memory allocation on-device.
+    class OpaqueRef;
+    using ConstructOpaque = std::function<void(OpaqueRef&)>;
+
+    // FIXME: garray.hpp already contains hint classes (for actual T type verification),
+    // need to think where it can be moved (currently opaque uses it from garray)
+
+    // This class strips type information from GOpaque<T> and makes it usable
+    // in the G-API graph compiler (expression unrolling, graph generation, etc).
+    // Part of GProtoArg.
+    class GAPI_EXPORTS GOpaqueU
+    {
+    public:
+        GOpaqueU(const GNode &n, std::size_t out); // Operation result constructor
+
+        template <typename T>
+        bool holds() const;                       // Check if was created from GOpaque<T>
+
+        GOrigin& priv();                          // Internal use only
+        const GOrigin& priv() const;              // Internal use only
+
+    protected:
+        GOpaqueU();                                // Default constructor
+        template<class> friend class cv::GOpaque;  // (available for GOpaque<T> only)
+
+        void setConstructFcn(ConstructOpaque &&cv);  // Store T-aware constructor
+
+        template <typename T>
+        void specifyType();                       // Store type of initial GOpaque<T>
+
+        std::shared_ptr<GOrigin> m_priv;
+        std::shared_ptr<TypeHintBase> m_hint;
+    };
+
+    template <typename T>
+    bool GOpaqueU::holds() const{
+        GAPI_Assert(m_hint != nullptr);
+        using U = typename std::decay<T>::type;
+        return dynamic_cast<TypeHint<U>*>(m_hint.get()) != nullptr;
+    };
+
+    template <typename T>
+    void GOpaqueU::specifyType(){
+        m_hint.reset(new TypeHint<typename std::decay<T>::type>);
+    };
+
+    // This class represents a typed object reference.
+    // Depending on origins, this reference may be either "just a" reference to
+    // an object created externally, OR actually own the underlying object
+    // (be value holder).
+    class BasicOpaqueRef
+    {
+    public:
+        cv::GOpaqueDesc m_desc;
+        virtual ~BasicOpaqueRef() {}
+
+        virtual void mov(BasicOpaqueRef &ref) = 0;
+        virtual const void* ptr() const = 0;
+    };
+
+    template<typename T> class OpaqueRefT final: public BasicOpaqueRef
+    {
+        using empty_t  = util::monostate;
+        using ro_ext_t = const T *;
+        using rw_ext_t =       T *;
+        using rw_own_t =       T  ;
+        util::variant<empty_t, ro_ext_t, rw_ext_t, rw_own_t> m_ref;
+
+        inline bool isEmpty() const { return util::holds_alternative<empty_t>(m_ref);  }
+        inline bool isROExt() const { return util::holds_alternative<ro_ext_t>(m_ref); }
+        inline bool isRWExt() const { return util::holds_alternative<rw_ext_t>(m_ref); }
+        inline bool isRWOwn() const { return util::holds_alternative<rw_own_t>(m_ref); }
+
+        void init(const T* obj = nullptr)
+        {
+            if (obj) m_desc = cv::descr_of(*obj);
+        }
+
+    public:
+        OpaqueRefT() { init(); }
+        virtual ~OpaqueRefT() {}
+
+        explicit OpaqueRefT(const T&  obj) : m_ref(&obj)           { init(&obj); }
+        explicit OpaqueRefT(      T&  obj) : m_ref(&obj)           { init(&obj); }
+        explicit OpaqueRefT(      T&& obj) : m_ref(std::move(obj)) { init(&obj); }
+
+        // Reset a OpaqueRefT. Called only for objects instantiated
+        // internally in G-API (e.g. temporary GOpaque<T>'s within a
+        // computation).  Reset here means both initialization
+        // (creating an object) and reset (discarding its existing
+        // content before the next execution). Must never be called
+        // for external OpaqueRefTs.
+        void reset()
+        {
+            if (isEmpty())
+            {
+                T empty_obj{};
+                m_desc = cv::descr_of(empty_obj);
+                m_ref  = std::move(empty_obj);
+                GAPI_Assert(isRWOwn());
+            }
+            else if (isRWOwn())
+            {
+                util::get<rw_own_t>(m_ref) = {};
+            }
+            else GAPI_Assert(false); // shouldn't be called in *EXT modes
+        }
+
+        // Obtain a WRITE reference to underlying object
+        // Used by CPU kernel API wrappers when a kernel execution frame
+        // is created
+        T& wref()
+        {
+            GAPI_Assert(isRWExt() || isRWOwn());
+            if (isRWExt()) return *util::get<rw_ext_t>(m_ref);
+            if (isRWOwn()) return  util::get<rw_own_t>(m_ref);
+            util::throw_error(std::logic_error("Impossible happened"));
+        }
+
+        // Obtain a READ reference to underlying object
+        // Used by CPU kernel API wrappers when a kernel execution frame
+        // is created
+        const T& rref() const
+        {
+            // ANY object can be accessed for reading, even if it declared for
+            // output. Example -- a GComputation from [in] to [out1,out2]
+            // where [out2] is a result of operation applied to [out1]:
+            //
+            //            GComputation boundary
+            //            . . . . . . .
+            //            .           .
+            //     [in] ----> foo() ----> [out1]
+            //            .           .    :
+            //            .           . . .:. . .
+            //            .                V    .
+            //            .              bar() ---> [out2]
+            //            . . . . . . . . . . . .
+            //
+            if (isROExt()) return *util::get<ro_ext_t>(m_ref);
+            if (isRWExt()) return *util::get<rw_ext_t>(m_ref);
+            if (isRWOwn()) return  util::get<rw_own_t>(m_ref);
+            util::throw_error(std::logic_error("Impossible happened"));
+        }
+
+        virtual void mov(BasicOpaqueRef &v) override {
+            OpaqueRefT<T> *tv = dynamic_cast<OpaqueRefT<T>*>(&v);
+            GAPI_Assert(tv != nullptr);
+            wref() = std::move(tv->wref());
+        }
+
+        virtual const void* ptr() const override { return &rref(); }
+    };
+
+    // This class strips type information from OpaqueRefT<> and makes it usable
+    // in the G-API executables (carrying run-time data/information to kernels).
+    // Part of GRunArg.
+    // Its methods are typed proxies to OpaqueRefT<T>.
+    // OpaqueRef maintains "reference" semantics so two copies of OpaqueRef refer
+    // to the same underlying object.
+    class OpaqueRef
+    {
+        std::shared_ptr<BasicOpaqueRef> m_ref;
+
+        template<typename T> inline void check() const
+        {
+            GAPI_DbgAssert(dynamic_cast<OpaqueRefT<T>*>(m_ref.get()) != nullptr);
+        }
+
+    public:
+        OpaqueRef() = default;
+
+        template<typename T> explicit OpaqueRef(T&& obj) :
+            m_ref(new OpaqueRefT<typename std::decay<T>::type>(std::forward<T>(obj))) {}
+
+        template<typename T> void reset()
+        {
+            if (!m_ref) m_ref.reset(new OpaqueRefT<T>());
+
+            check<T>();
+            static_cast<OpaqueRefT<T>&>(*m_ref).reset();
+        }
+
+        template<typename T> T& wref()
+        {
+            check<T>();
+            return static_cast<OpaqueRefT<T>&>(*m_ref).wref();
+        }
+
+        template<typename T> const T& rref() const
+        {
+            check<T>();
+            return static_cast<OpaqueRefT<T>&>(*m_ref).rref();
+        }
+
+        void mov(OpaqueRef &v)
+        {
+            m_ref->mov(*v.m_ref);
+        }
+
+        cv::GOpaqueDesc descr_of() const
+        {
+            return m_ref->m_desc;
+        }
+
+        // May be used to uniquely identify this object internally
+        const void *ptr() const { return m_ref->ptr(); }
+    };
+} // namespace detail
+
+/** \addtogroup gapi_data_objects
+ * @{
+ */
+
+template<typename T> class GOpaque
+{
+public:
+    GOpaque() { putDetails(); }              // Empty constructor
+    explicit GOpaque(detail::GOpaqueU &&ref) // GOpaqueU-based constructor
+        : m_ref(ref) { putDetails(); }       // (used by GCall, not for users)
+
+    detail::GOpaqueU strip() const { return m_ref; }
+
+private:
+    // Host type (or Flat type) - the type this GOpaque is actually
+    // specified to.
+    using HT = typename detail::flatten_g<typename std::decay<T>::type>::type;
+
+    static void CTor(detail::OpaqueRef& ref) {
+        ref.reset<HT>();
+    }
+    void putDetails() {
+        m_ref.setConstructFcn(&CTor);
+        m_ref.specifyType<HT>();
+    }
+
+    detail::GOpaqueU m_ref;
+};
+
+/** @} */
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GOPAQUE_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gproto.hpp b/IPL/include/opencv/opencv2/gapi/gproto.hpp
new file mode 100644
index 0000000..8858199
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gproto.hpp
@@ -0,0 +1,136 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GPROTO_HPP
+#define OPENCV_GAPI_GPROTO_HPP
+
+#include <type_traits>
+#include <vector>
+#include <ostream>
+
+#include <opencv2/gapi/util/variant.hpp>
+
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/garray.hpp>
+#include <opencv2/gapi/gopaque.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gmetaarg.hpp>
+
+namespace cv {
+
+// FIXME: user shouldn't deal with it - put to detail?
+// GProtoArg is an union type over G-types which can serve as
+// GComputation's in/output slots. In other words, GProtoArg
+// wraps any type which can serve as G-API exchange type.
+//
+// In Runtime, GProtoArgs are substituted with appropriate GRunArgs.
+//
+// GProtoArg objects are constructed in-place when user describes
+// (captures) computations, user doesn't interact with these types
+// directly.
+using GProtoArg = util::variant
+    < GMat
+    , GMatP
+    , GFrame
+    , GScalar
+    , detail::GArrayU  // instead of GArray<T>
+    , detail::GOpaqueU // instead of GOpaque<T>
+    >;
+
+using GProtoArgs = std::vector<GProtoArg>;
+
+namespace detail
+{
+template<typename... Ts> inline GProtoArgs packArgs(Ts... args)
+{
+    return GProtoArgs{ GProtoArg(wrap_gapi_helper<Ts>::wrap(args))... };
+}
+
+}
+
+template<class Tag>
+struct GIOProtoArgs
+{
+public:
+    explicit GIOProtoArgs(const GProtoArgs& args) : m_args(args) {}
+    explicit GIOProtoArgs(GProtoArgs &&args)      : m_args(std::move(args)) {}
+
+    GProtoArgs m_args;
+};
+
+struct In_Tag{};
+struct Out_Tag{};
+
+using GProtoInputArgs  = GIOProtoArgs<In_Tag>;
+using GProtoOutputArgs = GIOProtoArgs<Out_Tag>;
+
+// Perfect forwarding
+template<typename... Ts> inline GProtoInputArgs GIn(Ts&&... ts)
+{
+    return GProtoInputArgs(detail::packArgs(std::forward<Ts>(ts)...));
+}
+
+template<typename... Ts> inline GProtoOutputArgs GOut(Ts&&... ts)
+{
+    return GProtoOutputArgs(detail::packArgs(std::forward<Ts>(ts)...));
+}
+
+namespace detail
+{
+    // Extract elements form tuple
+    // FIXME: Someday utilize a generic tuple_to_vec<> routine
+    template<typename... Ts, int... Indexes>
+    static GProtoOutputArgs getGOut_impl(const std::tuple<Ts...>& ts, detail::Seq<Indexes...>)
+    {
+        return GProtoOutputArgs{ detail::packArgs(std::get<Indexes>(ts)...)};
+    }
+}
+
+template<typename... Ts> inline GProtoOutputArgs GOut(const std::tuple<Ts...>& ts)
+{
+    // TODO: think of std::forward(ts)
+    return detail::getGOut_impl(ts, typename detail::MkSeq<sizeof...(Ts)>::type());
+}
+
+// Takes rvalue as input arg
+template<typename... Ts> inline GProtoOutputArgs GOut(std::tuple<Ts...>&& ts)
+{
+    // TODO: think of std::forward(ts)
+    return detail::getGOut_impl(ts, typename detail::MkSeq<sizeof...(Ts)>::type());
+}
+
+// Extract run-time arguments from node origin
+// Can be used to extract constant values associated with G-objects
+// (like GScalar) at graph construction time
+GRunArg value_of(const GOrigin &origin);
+
+// Transform run-time computation arguments into a collection of metadata
+// extracted from that arguments
+GMetaArg  GAPI_EXPORTS descr_of(const GRunArg  &arg );
+GMetaArgs GAPI_EXPORTS descr_of(const GRunArgs &args);
+
+// Transform run-time operation result argument into metadata extracted from that argument
+// Used to compare the metadata, which generated at compile time with the metadata result operation in run time
+GMetaArg GAPI_EXPORTS descr_of(const GRunArgP& argp);
+
+// Checks if run-time computation argument can be described by metadata
+bool GAPI_EXPORTS can_describe(const GMetaArg&  meta,  const GRunArg&  arg);
+bool GAPI_EXPORTS can_describe(const GMetaArgs& metas, const GRunArgs& args);
+
+// Checks if run-time computation result argument can be described by metadata.
+// Used to check if the metadata generated at compile time
+// coincides with output arguments passed to computation in cpu and ocl backends
+bool GAPI_EXPORTS can_describe(const GMetaArg&  meta,  const GRunArgP& argp);
+
+// Validates input arguments
+void GAPI_EXPORTS validate_input_arg(const GRunArg& arg);
+void GAPI_EXPORTS validate_input_args(const GRunArgs& args);
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GPROTO_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gpu/core.hpp b/IPL/include/opencv/opencv2/gapi/gpu/core.hpp
new file mode 100644
index 0000000..a7ee595
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gpu/core.hpp
@@ -0,0 +1,27 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GPU_CORE_API_HPP
+#define OPENCV_GAPI_GPU_CORE_API_HPP
+/** @file
+* @deprecated Use <opencv2/gapi/ocl/core.hpp> instead.
+*/
+
+#include <opencv2/gapi/ocl/core.hpp>
+
+namespace cv {
+namespace gapi {
+namespace core {
+namespace gpu {
+    using namespace ocl;
+} // namespace gpu
+} // namespace core
+} // namespace gapi
+} // namespace cv
+
+
+#endif // OPENCV_GAPI_GPU_CORE_API_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gpu/ggpukernel.hpp b/IPL/include/opencv/opencv2/gapi/gpu/ggpukernel.hpp
new file mode 100644
index 0000000..b52c21d
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gpu/ggpukernel.hpp
@@ -0,0 +1,18 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GGPUKERNEL_HPP
+#define OPENCV_GAPI_GGPUKERNEL_HPP
+/** @file
+* @deprecated Use <opencv2/gapi/ocl/goclkernel.hpp> instead.
+*/
+
+#include <opencv2/gapi/ocl/goclkernel.hpp>
+#define GAPI_GPU_KERNEL GAPI_OCL_KERNEL
+
+
+#endif // OPENCV_GAPI_GGPUKERNEL_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gpu/imgproc.hpp b/IPL/include/opencv/opencv2/gapi/gpu/imgproc.hpp
new file mode 100644
index 0000000..b0df7ae
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gpu/imgproc.hpp
@@ -0,0 +1,28 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GPU_IMGPROC_API_HPP
+#define OPENCV_GAPI_GPU_IMGPROC_API_HPP
+/** @file
+* @deprecated Use <opencv2/gapi/ocl/imgproc.hpp> instead.
+*/
+
+#include <opencv2/gapi/ocl/imgproc.hpp>
+
+
+namespace cv {
+namespace gapi {
+namespace imgproc {
+namespace gpu {
+    using namespace ocl;
+} // namespace gpu
+} // namespace imgproc
+} // namespace gapi
+} // namespace cv
+
+
+#endif // OPENCV_GAPI_GPU_IMGPROC_API_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gscalar.hpp b/IPL/include/opencv/opencv2/gapi/gscalar.hpp
new file mode 100644
index 0000000..be20048
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gscalar.hpp
@@ -0,0 +1,75 @@
+// This file is part of OpenCV project.
+
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GSCALAR_HPP
+#define OPENCV_GAPI_GSCALAR_HPP
+
+#include <ostream>
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/gcommon.hpp> // GShape
+#include <opencv2/gapi/util/optional.hpp>
+
+namespace cv
+{
+// Forward declaration; GNode and GOrigin are an internal
+// (user-inaccessible) classes.
+class GNode;
+struct GOrigin;
+
+/** \addtogroup gapi_data_objects
+ * @{
+ */
+
+class GAPI_EXPORTS GScalar
+{
+public:
+    GScalar();                                         // Empty constructor
+    explicit GScalar(const cv::Scalar& s);  // Constant value constructor from cv::Scalar
+    explicit GScalar(cv::Scalar&& s);       // Constant value move-constructor from cv::Scalar
+
+    GScalar(double v0);                                // Constant value constructor from double
+    GScalar(const GNode &n, std::size_t out);          // Operation result constructor
+
+    GOrigin& priv();                                   // Internal use only
+    const GOrigin& priv()  const;                      // Internal use only
+
+private:
+    std::shared_ptr<GOrigin> m_priv;
+};
+
+/** @} */
+
+/**
+ * \addtogroup gapi_meta_args
+ * @{
+ */
+struct GScalarDesc
+{
+    // NB.: right now it is empty
+
+    inline bool operator== (const GScalarDesc &) const
+    {
+        return true; // NB: implement this method if GScalar meta appears
+    }
+
+    inline bool operator!= (const GScalarDesc &rhs) const
+    {
+        return !(*this == rhs);
+    }
+};
+
+static inline GScalarDesc empty_scalar_desc() { return GScalarDesc(); }
+
+GAPI_EXPORTS GScalarDesc descr_of(const cv::Scalar            &scalar);
+
+std::ostream& operator<<(std::ostream& os, const cv::GScalarDesc &desc);
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GSCALAR_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gstreaming.hpp b/IPL/include/opencv/opencv2/gapi/gstreaming.hpp
new file mode 100644
index 0000000..7079042
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gstreaming.hpp
@@ -0,0 +1,231 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GSTREAMING_COMPILED_HPP
+#define OPENCV_GAPI_GSTREAMING_COMPILED_HPP
+
+#include <vector>
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/streaming/source.hpp>
+
+namespace cv {
+
+/**
+ * \addtogroup gapi_main_classes
+ * @{
+ */
+/**
+ * @brief Represents a computation (graph) compiled for streaming.
+ *
+ * This class represents a product of graph compilation (calling
+ * cv::GComputation::compileStreaming()). Objects of this class
+ * actually do stream processing, and the whole pipeline execution
+ * complexity is incapsulated into objects of this class. Execution
+ * model has two levels: at the very top, the execution of a
+ * heterogeneous graph is aggressively pipelined; at the very bottom
+ * the execution of every internal block is determined by its
+ * associated backend. Backends are selected based on kernel packages
+ * passed via compilation arguments ( see @ref gapi_compile_args,
+ * GNetworkPackage, GKernelPackage for details).
+ *
+ * GStreamingCompiled objects have a "player" semantics -- there are
+ * methods like start() and stop(). GStreamingCompiled has a full
+ * control over a videostream and so is stateful. You need to specify the
+ * input stream data using setSource() and then call start() to
+ * actually start processing. After that, use pull() or try_pull() to
+ * obtain next processed data frame from the graph in a blocking or
+ * non-blocking way, respectively.
+ *
+ * Currently a single GStreamingCompiled can process only one video
+ * streat at time. Produce multiple GStreamingCompiled objects to run the
+ * same graph on multiple video streams.
+ *
+ * @sa GCompiled
+ */
+class GAPI_EXPORTS GStreamingCompiled
+{
+public:
+    class GAPI_EXPORTS Priv;
+    GStreamingCompiled();
+
+    // FIXME: More overloads?
+    /**
+     * @brief Specify the input data to GStreamingCompiled for
+     * processing, a generic version.
+     *
+     * Use gin() to create an input parameter vector.
+     *
+     * Input vectors must have the same number of elements as defined
+     * in the cv::GComputation protocol (at the moment of its
+     * construction). Shapes of elements also must conform to protocol
+     * (e.g. cv::Mat needs to be passed where cv::GMat has been
+     * declared as input, and so on). Run-time exception is generated
+     * on type mismatch.
+     *
+     * In contrast with regular GCompiled, user can also pass an
+     * object of type GVideoCapture for a GMat parameter of the parent
+     * GComputation.  The compiled pipeline will start fetching data
+     * from that GVideoCapture and feeding it into the
+     * pipeline. Pipeline stops when a GVideoCapture marks end of the
+     * stream (or when stop() is called).
+     *
+     * Passing a regular Mat for a GMat parameter makes it "infinite"
+     * source -- pipeline may run forever feeding with this Mat until
+     * stopped explicitly.
+     *
+     * Currently only a single GVideoCapture is supported as input. If
+     * the parent GComputation is declared with multiple input GMat's,
+     * one of those can be specified as GVideoCapture but all others
+     * must be regular Mat objects.
+     *
+     * Throws if pipeline is already running. Use stop() and then
+     * setSource() to run the graph on a new video stream.
+     *
+     * @note This method is not thread-safe (with respect to the user
+     * side) at the moment. Protect the access if
+     * start()/stop()/setSource() may be called on the same object in
+     * multiple threads in your application.
+     *
+     * @param ins vector of inputs to process.
+     * @sa gin
+     */
+    void setSource(GRunArgs &&ins);
+
+    /**
+     * @brief Specify an input video stream for a single-input
+     * computation pipeline.
+     *
+     * Throws if pipeline is already running. Use stop() and then
+     * setSource() to run the graph on a new video stream.
+     *
+     * @overload
+     * @param s a shared pointer to IStreamSource representing the
+     * input video stream.
+     */
+    void setSource(const gapi::wip::IStreamSource::Ptr& s);
+
+    /**
+     * @brief Start the pipeline execution.
+     *
+     * Use pull()/try_pull() to obtain data. Throws an exception if
+     * a video source was not specified.
+     *
+     * setSource() must be called first, even if the pipeline has been
+     * working already and then stopped (explicitly via stop() or due
+     * stream completion)
+     *
+     * @note This method is not thread-safe (with respect to the user
+     * side) at the moment. Protect the access if
+     * start()/stop()/setSource() may be called on the same object in
+     * multiple threads in your application.
+     */
+    void start();
+
+    /**
+     * @brief Get the next processed frame from the pipeline.
+     *
+     * Use gout() to create an output parameter vector.
+     *
+     * Output vectors must have the same number of elements as defined
+     * in the cv::GComputation protocol (at the moment of its
+     * construction). Shapes of elements also must conform to protocol
+     * (e.g. cv::Mat needs to be passed where cv::GMat has been
+     * declared as output, and so on). Run-time exception is generated
+     * on type mismatch.
+     *
+     * This method writes new data into objects passed via output
+     * vector.  If there is no data ready yet, this method blocks. Use
+     * try_pull() if you need a non-blocking version.
+     *
+     * @param outs vector of output parameters to obtain.
+     * @return true if next result has been obtained,
+     *    false marks end of the stream.
+     */
+    bool pull(cv::GRunArgsP &&outs);
+
+    /**
+     * @brief Try to get the next processed frame from the pipeline.
+     *
+     * Use gout() to create an output parameter vector.
+     *
+     * This method writes new data into objects passed via output
+     * vector.  If there is no data ready yet, the output vector
+     * remains unchanged and false is returned.
+     *
+     * @return true if data has been obtained, and false if it was
+     *    not. Note: false here doesn't mark the end of the stream.
+     */
+    bool try_pull(cv::GRunArgsP &&outs);
+
+    /**
+     * @brief Stop (abort) processing the pipeline.
+     *
+     * Note - it is not pause but a complete stop. Calling start()
+     * will cause G-API to start processing the stream from the early beginning.
+     *
+     * Throws if the pipeline is not running.
+     */
+    void stop();
+
+    /**
+     * @brief Test if the pipeline is running.
+     *
+     * @note This method is not thread-safe (with respect to the user
+     * side) at the moment. Protect the access if
+     * start()/stop()/setSource() may be called on the same object in
+     * multiple threads in your application.
+     *
+     * @return true if the current stream is not over yet.
+     */
+    bool running() const;
+
+    /// @private
+    Priv& priv();
+
+    /**
+     * @brief Check if compiled object is valid (non-empty)
+     *
+     * @return true if the object is runnable (valid), false otherwise
+     */
+    explicit operator bool () const;
+
+    /**
+     * @brief Vector of metadata this graph was compiled for.
+     *
+     * @return Unless _reshape_ is not supported, return value is the
+     * same vector which was passed to cv::GComputation::compile() to
+     * produce this compiled object. Otherwise, it is the latest
+     * metadata vector passed to reshape() (if that call was
+     * successful).
+     */
+    const GMetaArgs& metas() const; // Meta passed to compile()
+
+    /**
+     * @brief Vector of metadata descriptions of graph outputs
+     *
+     * @return vector with formats/resolutions of graph's output
+     * objects, auto-inferred from input metadata vector by
+     * operations which form this computation.
+     *
+     * @note GCompiled objects produced from the same
+     * cv::GComputiation graph with different input metas may return
+     * different values in this vector.
+     */
+    const GMetaArgs& outMetas() const;
+
+protected:
+    /// @private
+    std::shared_ptr<Priv> m_priv;
+};
+/** @} */
+
+}
+
+#endif // OPENCV_GAPI_GSTREAMING_COMPILED_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gtransform.hpp b/IPL/include/opencv/opencv2/gapi/gtransform.hpp
new file mode 100644
index 0000000..5d1b91b
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gtransform.hpp
@@ -0,0 +1,103 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#ifndef OPENCV_GAPI_GTRANSFORM_HPP
+#define OPENCV_GAPI_GTRANSFORM_HPP
+
+#include <functional>
+#include <type_traits>
+#include <utility>
+
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/util/util.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gtype_traits.hpp>
+#include <opencv2/gapi/util/compiler_hints.hpp>
+#include <opencv2/gapi/gcomputation.hpp>
+
+namespace cv
+{
+
+struct GAPI_EXPORTS GTransform
+{
+    // FIXME: consider another simplified
+    // class instead of GComputation
+    using F = std::function<GComputation()>;
+
+    std::string description;
+    F pattern;
+    F substitute;
+
+    GTransform(const std::string& d, const F &p, const F &s) : description(d), pattern(p), substitute(s){};
+};
+
+namespace detail
+{
+
+template <typename, typename, typename>
+struct TransHelper;
+
+template <typename K, typename... Ins, typename Out>
+struct TransHelper<K, std::tuple<Ins...>, Out>
+{
+    template <typename Callable, int... IIs, int... OIs>
+    static GComputation invoke(Callable f, Seq<IIs...>, Seq<OIs...>)
+    {
+        const std::tuple<Ins...> ins;
+        const auto r = tuple_wrap_helper<Out>::get(f(std::get<IIs>(ins)...));
+        return GComputation(cv::GIn(std::get<IIs>(ins)...),
+                            cv::GOut(std::get<OIs>(r)...));
+    }
+
+    static GComputation get_pattern()
+    {
+        return invoke(K::pattern, typename MkSeq<sizeof...(Ins)>::type(),
+                      typename MkSeq<std::tuple_size<typename tuple_wrap_helper<Out>::type>::value>::type());
+    }
+    static GComputation get_substitute()
+    {
+        return invoke(K::substitute, typename MkSeq<sizeof...(Ins)>::type(),
+                      typename MkSeq<std::tuple_size<typename tuple_wrap_helper<Out>::type>::value>::type());
+    }
+};
+} // namespace detail
+
+template <typename, typename>
+class GTransformImpl;
+
+template <typename K, typename R, typename... Args>
+class GTransformImpl<K, std::function<R(Args...)>> : public cv::detail::TransHelper<K, std::tuple<Args...>, R>,
+                                                     public cv::detail::TransformTag
+{
+public:
+    // FIXME: currently there is no check that transformations' signatures are unique
+    // and won't be any intersection in graph compilation stage
+    using API = K;
+
+    static GTransform transformation()
+    {
+        return GTransform(K::descr(), &K::get_pattern, &K::get_substitute);
+    }
+};
+} // namespace cv
+
+#define G_DESCR_HELPER_CLASS(Class) Class##DescrHelper
+
+#define G_DESCR_HELPER_BODY(Class, Descr)                       \
+    namespace detail                                            \
+    {                                                           \
+    struct G_DESCR_HELPER_CLASS(Class)                          \
+    {                                                           \
+        static constexpr const char *descr() { return Descr; }; \
+    };                                                          \
+    }
+
+#define GAPI_TRANSFORM(Class, API, Descr)                                     \
+    G_DESCR_HELPER_BODY(Class, Descr)                                         \
+    struct Class final : public cv::GTransformImpl<Class, std::function API>, \
+                         public detail::G_DESCR_HELPER_CLASS(Class)
+
+#endif // OPENCV_GAPI_GTRANSFORM_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gtype_traits.hpp b/IPL/include/opencv/opencv2/gapi/gtype_traits.hpp
new file mode 100644
index 0000000..fbdac15
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gtype_traits.hpp
@@ -0,0 +1,188 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GTYPE_TRAITS_HPP
+#define OPENCV_GAPI_GTYPE_TRAITS_HPP
+
+#include <vector>
+#include <type_traits>
+
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+#include <opencv2/gapi/garray.hpp>
+#include <opencv2/gapi/gopaque.hpp>
+#include <opencv2/gapi/streaming/source.hpp>
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/own/convert.hpp>
+
+namespace cv
+{
+namespace detail
+{
+    // FIXME: These traits and enum and possible numerous switch(kind)
+    // block may be replaced with a special Handler<T> object or with
+    // a double dispatch
+    enum class ArgKind: int
+    {
+        OPAQUE_VAL,   // Unknown, generic, opaque-to-GAPI data type - STATIC
+                      // Note: OPAQUE is sometimes defined in Win sys headers
+#if !defined(OPAQUE) && !defined(CV_DOXYGEN)
+        OPAQUE = OPAQUE_VAL,  // deprecated value used for compatibility, use OPAQUE_VAL instead
+#endif
+        GOBJREF,      // <internal> reference to object
+        GMAT,         // a cv::GMat
+        GMATP,        // a cv::GMatP
+        GFRAME,       // a cv::GFrame
+        GSCALAR,      // a cv::GScalar
+        GARRAY,       // a cv::GArrayU  (note - exactly GArrayU,  not GArray<T>!)
+        GOPAQUE,      // a cv::GOpaqueU (note - exactly GOpaqueU, not GOpaque<T>!)
+    };
+
+    // Describe G-API types (G-types) with traits.  Mostly used by
+    // cv::GArg to store meta information about types passed into
+    // operation arguments. Please note that cv::GComputation is
+    // defined on GProtoArgs, not GArgs!
+    template<typename T> struct GTypeTraits;
+    template<typename T> struct GTypeTraits
+    {
+        static constexpr const ArgKind kind = ArgKind::OPAQUE_VAL;
+    };
+    template<>           struct GTypeTraits<cv::GMat>
+    {
+        static constexpr const ArgKind kind = ArgKind::GMAT;
+        static constexpr const GShape shape = GShape::GMAT;
+    };
+    template<>           struct GTypeTraits<cv::GMatP>
+    {
+        static constexpr const ArgKind kind = ArgKind::GMATP;
+        static constexpr const GShape shape = GShape::GMAT;
+    };
+    template<>           struct GTypeTraits<cv::GFrame>
+    {
+        static constexpr const ArgKind kind = ArgKind::GFRAME;
+        static constexpr const GShape shape = GShape::GMAT;
+    };
+    template<>           struct GTypeTraits<cv::GScalar>
+    {
+        static constexpr const ArgKind kind = ArgKind::GSCALAR;
+        static constexpr const GShape shape = GShape::GSCALAR;
+    };
+    template<class T> struct GTypeTraits<cv::GArray<T> >
+    {
+        static constexpr const ArgKind kind = ArgKind::GARRAY;
+        static constexpr const GShape shape = GShape::GARRAY;
+        using host_type  = std::vector<T>;
+        using strip_type = cv::detail::VectorRef;
+        static cv::detail::GArrayU   wrap_value(const cv::GArray<T>  &t) { return t.strip();}
+        static cv::detail::VectorRef wrap_in   (const std::vector<T> &t) { return detail::VectorRef(t); }
+        static cv::detail::VectorRef wrap_out  (      std::vector<T> &t) { return detail::VectorRef(t); }
+    };
+    template<class T> struct GTypeTraits<cv::GOpaque<T> >
+    {
+        static constexpr const ArgKind kind = ArgKind::GOPAQUE;
+        static constexpr const GShape shape = GShape::GOPAQUE;
+        using host_type  = T;
+        using strip_type = cv::detail::OpaqueRef;
+        static cv::detail::GOpaqueU  wrap_value(const cv::GOpaque<T>  &t) { return t.strip();}
+        static cv::detail::OpaqueRef wrap_in   (const T &t) { return detail::OpaqueRef(t); }
+        static cv::detail::OpaqueRef wrap_out  (      T &t) { return detail::OpaqueRef(t); }
+    };
+
+    // Tests if Trait for type T requires extra marshalling ("custom wrap") or not.
+    // If Traits<T> has wrap_value() defined, it does.
+    template<class T> struct has_custom_wrap
+    {
+        template<class,class> class check;
+        template<typename C> static std::true_type  test(check<C, decltype(&GTypeTraits<C>::wrap_value)> *);
+        template<typename C> static std::false_type test(...);
+        using type = decltype(test<T>(nullptr));
+        static const constexpr bool value = std::is_same<std::true_type, decltype(test<T>(nullptr))>::value;
+    };
+
+    // Resolve a Host type back to its associated G-Type.
+    // FIXME: Probably it can be avoided
+    // FIXME: GMatP is not present here.
+    // (Actually these traits is used only to check
+    // if associated G-type has custom wrap functions
+    // and GMat behavior is correct for GMatP)
+    template<typename T> struct GTypeOf;
+#if !defined(GAPI_STANDALONE)
+    template<>           struct GTypeOf<cv::Mat>               { using type = cv::GMat;      };
+    template<>           struct GTypeOf<cv::UMat>              { using type = cv::GMat;      };
+#endif // !defined(GAPI_STANDALONE)
+    template<>           struct GTypeOf<cv::gapi::own::Mat>    { using type = cv::GMat;      };
+    template<>           struct GTypeOf<cv::Scalar>            { using type = cv::GScalar;   };
+    template<typename U> struct GTypeOf<std::vector<U> >       { using type = cv::GArray<U>; };
+    template<typename U> struct GTypeOf                        { using type = cv::GOpaque<U>;};
+    // FIXME: This is not quite correct since IStreamSource may produce not only Mat but also Scalar
+    // and vector data. TODO: Extend the type dispatching on these types too.
+    template<>           struct GTypeOf<cv::gapi::wip::IStreamSource::Ptr> { using type = cv::GMat;};
+    template<class T> using g_type_of_t = typename GTypeOf<T>::type;
+
+    // Marshalling helper for G-types and its Host types. Helps G-API
+    // to store G types in internal generic containers for further
+    // processing. Implements the following callbacks:
+    //
+    // * wrap() - converts user-facing G-type into an internal one
+    //   for internal storage.
+    //   Used when G-API operation is instantiated (G<Kernel>::on(),
+    //   etc) during expressing a pipeline. Mostly returns input
+    //   value "as is" except the case when G-type is a template. For
+    //   template G-classes, calls custom wrap() from Traits.
+    //   The value returned by wrap() is then wrapped into GArg() and
+    //   stored in G-API metadata.
+    //
+    //   Example:
+    //   - cv::GMat arguments are passed as-is.
+    //   - integers, pointers, STL containers, user types are passed as-is.
+    //   - cv::GArray<T> is converted to cv::GArrayU.
+    //
+    // * wrap_in() / wrap_out() - convert Host type associated with
+    //   G-type to internal representation type.
+    //
+    //   - For "simple" (non-template) G-types, returns value as-is.
+    //     Example: cv::GMat has host type cv::Mat, when user passes a
+    //              cv::Mat, system stores it internally as cv::Mat.
+    //
+    //   - For "complex" (template) G-types, utilizes custom
+    //     wrap_in()/wrap_out() as described in Traits.
+    //     Example: cv::GArray<T> has host type std::vector<T>, when
+    //              user passes a std::vector<T>, system stores it
+    //              internally as VectorRef (with <T> stripped away).
+    template<typename T, class Custom = void> struct WrapValue
+    {
+        static auto wrap(const T& t) ->
+            typename std::remove_reference<T>::type
+        {
+            return static_cast<typename std::remove_reference<T>::type>(t);
+        }
+
+        template<typename U> static U  wrap_in (const U &u) { return  u;  }
+        template<typename U> static U* wrap_out(U &u)       { return &u;  }
+    };
+    template<typename T> struct WrapValue<T, typename std::enable_if<has_custom_wrap<T>::value>::type>
+    {
+        static auto wrap(const T& t) -> decltype(GTypeTraits<T>::wrap_value(t))
+        {
+            return GTypeTraits<T>::wrap_value(t);
+        }
+        template<typename U> static auto wrap_in (const U &u) -> typename GTypeTraits<T>::strip_type
+        {
+            return GTypeTraits<T>::wrap_in(u);
+        }
+        template<typename U> static auto wrap_out(U &u) -> typename GTypeTraits<T>::strip_type
+        {
+            return GTypeTraits<T>::wrap_out(u);
+        }
+    };
+
+    template<typename T> using wrap_gapi_helper = WrapValue<typename std::decay<T>::type>;
+    template<typename T> using wrap_host_helper = WrapValue<typename std::decay<g_type_of_t<T> >::type>;
+} // namespace detail
+} // namespace cv
+
+#endif // OPENCV_GAPI_GTYPE_TRAITS_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/gtyped.hpp b/IPL/include/opencv/opencv2/gapi/gtyped.hpp
new file mode 100644
index 0000000..1ce6201
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/gtyped.hpp
@@ -0,0 +1,229 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GTYPED_HPP
+#define OPENCV_GAPI_GTYPED_HPP
+#if !defined(GAPI_STANDALONE)
+
+#include <vector>
+
+#include <opencv2/gapi/gcomputation.hpp>
+#include <opencv2/gapi/gcompiled.hpp>
+#include <opencv2/gapi/gproto.hpp>
+#include <opencv2/gapi/gcommon.hpp>
+
+namespace cv {
+
+namespace detail
+{
+    // FIXME: How to prevent coolhackers from extending it by their own types?
+    // FIXME: ...Should we care?
+    template<typename T> struct ProtoToParam;
+    template<> struct ProtoToParam<cv::GMat>    { using type = cv::Mat; };
+    template<> struct ProtoToParam<cv::GScalar> { using type = cv::Scalar; };
+    template<typename U> struct ProtoToParam<cv::GArray<U> >  { using type = std::vector<U>; };
+    template<typename U> struct ProtoToParam<cv::GOpaque<U> > { using type = U; };
+    template<typename T> using ProtoToParamT = typename ProtoToParam<T>::type;
+
+    template<typename T> struct ProtoToMeta;
+    template<> struct ProtoToMeta<cv::GMat>     { using type = cv::GMatDesc; };
+    template<> struct ProtoToMeta<cv::GScalar>  { using type = cv::GScalarDesc; };
+    template<typename U> struct ProtoToMeta<cv::GArray<U> >  { using type = cv::GArrayDesc; };
+    template<typename U> struct ProtoToMeta<cv::GOpaque<U> > { using type = cv::GOpaqueDesc; };
+    template<typename T> using ProtoToMetaT = typename ProtoToMeta<T>::type;
+
+    //workaround for MSVC 19.0 bug
+    template <typename T>
+    auto make_default()->decltype(T{}) {return {};}
+}; // detail
+
+/**
+ * @brief This class is a typed wrapper over a regular GComputation.
+ *
+ * `std::function<>`-like template parameter specifies the graph
+ *  signature so methods so the object's constructor, methods like
+ *  `apply()` and the derived `GCompiledT::operator()` also become
+ *  typed.
+ *
+ *  There is no need to use cv::gin() or cv::gout() modifiers with
+ *  objects of this class.  Instead, all input arguments are followed
+ *  by all output arguments in the order from the template argument
+ *  signature.
+ *
+ *  Refer to the following example. Regular (untyped) code is written this way:
+ *
+ *  @snippet modules/gapi/samples/api_ref_snippets.cpp Untyped_Example
+ *
+ *  Here:
+ *
+ *  - cv::GComputation object is created with a lambda constructor
+ *    where it is defined as a two-input, one-output graph.
+ *
+ *  - Its method `apply()` in fact takes arbitrary number of arguments
+ *    (as vectors) so user can pass wrong number of inputs/outputs
+ *    here. C++ compiler wouldn't notice that since the cv::GComputation
+ *    API is polymorphic, and only a run-time error will be generated.
+ *
+ *  Now the same code written with typed API:
+ *
+ *  @snippet modules/gapi/samples/api_ref_snippets.cpp Typed_Example
+ *
+ *  The key difference is:
+ *
+ *  - Now the constructor lambda *must take* parameters and *must
+ *    return* values as defined in the `GComputationT<>` signature.
+ *  - Its method `apply()` does not require any extra specifiers to
+ *    separate input arguments from the output ones
+ *  - A `GCompiledT` (compilation product) takes input/output
+ *    arguments with no extra specifiers as well.
+ */
+template<typename> class GComputationT;
+
+// Single return value implementation
+template<typename R, typename... Args> class GComputationT<R(Args...)>
+{
+public:
+    typedef std::function<R(Args...)> Gen;
+
+    class GCompiledT
+    {
+    private:
+        friend class GComputationT<R(Args...)>;
+
+        cv::GCompiled m_comp;
+
+        explicit GCompiledT(const cv::GCompiled &comp) : m_comp(comp) {}
+
+    public:
+        GCompiledT() {}
+
+        void operator()(detail::ProtoToParamT<Args>... inArgs,
+                        detail::ProtoToParamT<R> &outArg)
+        {
+            m_comp(cv::gin(inArgs...), cv::gout(outArg));
+        }
+
+        explicit operator bool() const
+        {
+            return static_cast<bool>(m_comp);
+        }
+    };
+
+private:
+    typedef std::pair<R, GProtoInputArgs > Captured;
+
+    Captured capture(const Gen& g, Args... args)
+    {
+        return Captured(g(args...), cv::GIn(args...));
+    }
+
+    Captured m_capture;
+    cv::GComputation m_comp;
+
+public:
+    GComputationT(const Gen &generator)
+        : m_capture(capture(generator, detail::make_default<Args>()...))
+        , m_comp(cv::GProtoInputArgs(std::move(m_capture.second)),
+                 cv::GOut(m_capture.first))
+    {
+    }
+
+    void apply(detail::ProtoToParamT<Args>... inArgs,
+               detail::ProtoToParamT<R> &outArg)
+    {
+        m_comp.apply(cv::gin(inArgs...), cv::gout(outArg));
+    }
+
+    GCompiledT compile(detail::ProtoToMetaT<Args>... inDescs)
+    {
+        GMetaArgs inMetas = { GMetaArg(inDescs)... };
+        return GCompiledT(m_comp.compile(std::move(inMetas), GCompileArgs()));
+    }
+
+    GCompiledT compile(detail::ProtoToMetaT<Args>... inDescs, GCompileArgs &&args)
+    {
+        GMetaArgs inMetas = { GMetaArg(inDescs)... };
+        return GCompiledT(m_comp.compile(std::move(inMetas), std::move(args)));
+    }
+};
+
+// Multiple (fixed) return value implementation. FIXME: How to avoid copy-paste?
+template<typename... R, typename... Args> class GComputationT<std::tuple<R...>(Args...)>
+{
+public:
+    typedef std::function<std::tuple<R...>(Args...)> Gen;
+
+    class GCompiledT
+    {
+    private:
+        friend class GComputationT<std::tuple<R...>(Args...)>;
+
+        cv::GCompiled m_comp;
+        explicit GCompiledT(const cv::GCompiled &comp) : m_comp(comp) {}
+
+    public:
+        GCompiledT() {}
+
+        void operator()(detail::ProtoToParamT<Args>... inArgs,
+                        detail::ProtoToParamT<R>&... outArgs)
+        {
+            m_comp(cv::gin(inArgs...), cv::gout(outArgs...));
+        }
+
+        explicit operator bool() const
+        {
+            return static_cast<bool>(m_comp);
+        }
+    };
+
+private:
+    typedef std::pair<GProtoArgs, GProtoArgs> Captured;
+
+    template<int... IIs>
+    Captured capture(GProtoArgs &&args, const std::tuple<R...> &rr, detail::Seq<IIs...>)
+    {
+        return Captured(cv::GOut(std::get<IIs>(rr)...).m_args, args);
+    }
+
+    Captured capture(const Gen& g, Args... args)
+    {
+        return capture(cv::GIn(args...).m_args, g(args...), typename detail::MkSeq<sizeof...(R)>::type());
+    }
+
+    Captured m_capture;
+    cv::GComputation m_comp;
+
+public:
+    GComputationT(const Gen &generator)
+        : m_capture(capture(generator, detail::make_default<Args>()...))
+        , m_comp(cv::GProtoInputArgs(std::move(m_capture.second)),
+                 cv::GProtoOutputArgs(std::move(m_capture.first)))
+    {
+    }
+
+    void apply(detail::ProtoToParamT<Args>... inArgs,
+               detail::ProtoToParamT<R>&... outArgs)
+    {
+        m_comp.apply(cv::gin(inArgs...), cv::gout(outArgs...));
+    }
+
+    GCompiledT compile(detail::ProtoToMetaT<Args>... inDescs)
+    {
+        GMetaArgs inMetas = { GMetaArg(inDescs)... };
+        return GCompiledT(m_comp.compile(std::move(inMetas), GCompileArgs()));
+    }
+
+    GCompiledT compile(detail::ProtoToMetaT<Args>... inDescs, GCompileArgs &&args)
+    {
+        GMetaArgs inMetas = { GMetaArg(inDescs)... };
+        return GCompiledT(m_comp.compile(std::move(inMetas), std::move(args)));
+    }
+};
+
+} // namespace cv
+#endif // !defined(GAPI_STANDALONE)
+#endif // OPENCV_GAPI_GTYPED_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/imgproc.hpp b/IPL/include/opencv/opencv2/gapi/imgproc.hpp
new file mode 100644
index 0000000..4faf5e1
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/imgproc.hpp
@@ -0,0 +1,1009 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2020 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_IMGPROC_HPP
+#define OPENCV_GAPI_IMGPROC_HPP
+
+#include <opencv2/imgproc.hpp>
+
+#include <utility> // std::tuple
+
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+
+
+/** \defgroup gapi_imgproc G-API Image processing functionality
+@{
+    @defgroup gapi_filters Graph API: Image filters
+    @defgroup gapi_colorconvert Graph API: Converting image from one color space to another
+@}
+ */
+
+namespace cv { namespace gapi {
+
+namespace imgproc {
+    using GMat2 = std::tuple<GMat,GMat>;
+    using GMat3 = std::tuple<GMat,GMat,GMat>; // FIXME: how to avoid this?
+
+    G_TYPED_KERNEL(GFilter2D, <GMat(GMat,int,Mat,Point,Scalar,int,Scalar)>,"org.opencv.imgproc.filters.filter2D") {
+        static GMatDesc outMeta(GMatDesc in, int ddepth, Mat, Point, Scalar, int, Scalar) {
+            return in.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GSepFilter, <GMat(GMat,int,Mat,Mat,Point,Scalar,int,Scalar)>, "org.opencv.imgproc.filters.sepfilter") {
+        static GMatDesc outMeta(GMatDesc in, int ddepth, Mat, Mat, Point, Scalar, int, Scalar) {
+            return in.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GBoxFilter, <GMat(GMat,int,Size,Point,bool,int,Scalar)>, "org.opencv.imgproc.filters.boxfilter") {
+        static GMatDesc outMeta(GMatDesc in, int ddepth, Size, Point, bool, int, Scalar) {
+            return in.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL(GBlur, <GMat(GMat,Size,Point,int,Scalar)>,         "org.opencv.imgproc.filters.blur"){
+        static GMatDesc outMeta(GMatDesc in, Size, Point, int, Scalar) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GGaussBlur, <GMat(GMat,Size,double,double,int,Scalar)>, "org.opencv.imgproc.filters.gaussianBlur") {
+        static GMatDesc outMeta(GMatDesc in, Size, double, double, int, Scalar) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GMedianBlur, <GMat(GMat,int)>, "org.opencv.imgproc.filters.medianBlur") {
+        static GMatDesc outMeta(GMatDesc in, int) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GErode, <GMat(GMat,Mat,Point,int,int,Scalar)>, "org.opencv.imgproc.filters.erode") {
+        static GMatDesc outMeta(GMatDesc in, Mat, Point, int, int, Scalar) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GDilate, <GMat(GMat,Mat,Point,int,int,Scalar)>, "org.opencv.imgproc.filters.dilate") {
+        static GMatDesc outMeta(GMatDesc in, Mat, Point, int, int, Scalar) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GSobel, <GMat(GMat,int,int,int,int,double,double,int,Scalar)>, "org.opencv.imgproc.filters.sobel") {
+        static GMatDesc outMeta(GMatDesc in, int ddepth, int, int, int, double, double, int, Scalar) {
+            return in.withDepth(ddepth);
+        }
+    };
+
+    G_TYPED_KERNEL_M(GSobelXY, <GMat2(GMat,int,int,int,double,double,int,Scalar)>, "org.opencv.imgproc.filters.sobelxy") {
+        static std::tuple<GMatDesc, GMatDesc> outMeta(GMatDesc in, int ddepth, int, int, double, double, int, Scalar) {
+            return std::make_tuple(in.withDepth(ddepth), in.withDepth(ddepth));
+        }
+    };
+
+    G_TYPED_KERNEL(GEqHist, <GMat(GMat)>, "org.opencv.imgproc.equalizeHist"){
+        static GMatDesc outMeta(GMatDesc in) {
+            return in.withType(CV_8U, 1);
+        }
+    };
+
+    G_TYPED_KERNEL(GCanny, <GMat(GMat,double,double,int,bool)>, "org.opencv.imgproc.canny"){
+        static GMatDesc outMeta(GMatDesc in, double, double, int, bool) {
+            return in.withType(CV_8U, 1);
+        }
+    };
+
+    G_TYPED_KERNEL(GGoodFeatures,
+                   <cv::GArray<cv::Point2f>(GMat,int,double,double,Mat,int,bool,double)>,
+                   "org.opencv.imgproc.goodFeaturesToTrack") {
+        static GArrayDesc outMeta(GMatDesc, int, double, double, const Mat&, int, bool, double) {
+            return empty_array_desc();
+        }
+    };
+
+    G_TYPED_KERNEL(GRGB2YUV, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.rgb2yuv") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in; // type still remains CV_8UC3;
+        }
+    };
+
+    G_TYPED_KERNEL(GYUV2RGB, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.yuv2rgb") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in; // type still remains CV_8UC3;
+        }
+    };
+
+    G_TYPED_KERNEL(GNV12toRGB, <GMat(GMat, GMat)>, "org.opencv.imgproc.colorconvert.nv12torgb") {
+        static GMatDesc outMeta(GMatDesc in_y, GMatDesc in_uv) {
+            GAPI_Assert(in_y.chan == 1);
+            GAPI_Assert(in_uv.chan == 2);
+            GAPI_Assert(in_y.depth == CV_8U);
+            GAPI_Assert(in_uv.depth == CV_8U);
+            // UV size should be aligned with Y
+            GAPI_Assert(in_y.size.width == 2 * in_uv.size.width);
+            GAPI_Assert(in_y.size.height == 2 * in_uv.size.height);
+            return in_y.withType(CV_8U, 3); // type will be CV_8UC3;
+        }
+    };
+
+    G_TYPED_KERNEL(GNV12toBGR, <GMat(GMat, GMat)>, "org.opencv.imgproc.colorconvert.nv12tobgr") {
+        static GMatDesc outMeta(GMatDesc in_y, GMatDesc in_uv) {
+            GAPI_Assert(in_y.chan == 1);
+            GAPI_Assert(in_uv.chan == 2);
+            GAPI_Assert(in_y.depth == CV_8U);
+            GAPI_Assert(in_uv.depth == CV_8U);
+            // UV size should be aligned with Y
+            GAPI_Assert(in_y.size.width == 2 * in_uv.size.width);
+            GAPI_Assert(in_y.size.height == 2 * in_uv.size.height);
+            return in_y.withType(CV_8U, 3); // type will be CV_8UC3;
+        }
+    };
+
+    G_TYPED_KERNEL(GRGB2Lab, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.rgb2lab") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in; // type still remains CV_8UC3;
+        }
+    };
+
+    G_TYPED_KERNEL(GBGR2LUV, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.bgr2luv") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in; // type still remains CV_8UC3;
+        }
+    };
+
+    G_TYPED_KERNEL(GLUV2BGR, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.luv2bgr") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in; // type still remains CV_8UC3;
+        }
+    };
+
+    G_TYPED_KERNEL(GYUV2BGR, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.yuv2bgr") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in; // type still remains CV_8UC3;
+        }
+    };
+
+    G_TYPED_KERNEL(GBGR2YUV, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.bgr2yuv") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in; // type still remains CV_8UC3;
+        }
+    };
+
+    G_TYPED_KERNEL(GRGB2Gray, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.rgb2gray") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in.withType(CV_8U, 1);
+        }
+    };
+
+    G_TYPED_KERNEL(GRGB2GrayCustom, <GMat(GMat,float,float,float)>, "org.opencv.imgproc.colorconvert.rgb2graycustom") {
+        static GMatDesc outMeta(GMatDesc in, float, float, float) {
+            return in.withType(CV_8U, 1);
+        }
+    };
+
+    G_TYPED_KERNEL(GBGR2Gray, <GMat(GMat)>, "org.opencv.imgproc.colorconvert.bgr2gray") {
+        static GMatDesc outMeta(GMatDesc in) {
+            return in.withType(CV_8U, 1);
+        }
+    };
+
+    G_TYPED_KERNEL(GBayerGR2RGB, <cv::GMat(cv::GMat)>, "org.opencv.imgproc.colorconvert.bayergr2rgb") {
+        static cv::GMatDesc outMeta(cv::GMatDesc in) {
+            return in.withType(CV_8U, 3);
+        }
+    };
+
+    G_TYPED_KERNEL(GRGB2HSV, <cv::GMat(cv::GMat)>, "org.opencv.imgproc.colorconvert.rgb2hsv") {
+        static cv::GMatDesc outMeta(cv::GMatDesc in) {
+            return in;
+        }
+    };
+
+    G_TYPED_KERNEL(GRGB2YUV422, <cv::GMat(cv::GMat)>, "org.opencv.imgproc.colorconvert.rgb2yuv422") {
+        static cv::GMatDesc outMeta(cv::GMatDesc in) {
+            GAPI_Assert(in.depth == CV_8U);
+            GAPI_Assert(in.chan == 3);
+            return in.withType(in.depth, 2);
+        }
+    };
+
+    G_TYPED_KERNEL(GNV12toRGBp, <GMatP(GMat,GMat)>, "org.opencv.colorconvert.imgproc.nv12torgbp") {
+        static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) {
+            GAPI_Assert(inY.depth == CV_8U);
+            GAPI_Assert(inUV.depth == CV_8U);
+            GAPI_Assert(inY.chan == 1);
+            GAPI_Assert(inY.planar == false);
+            GAPI_Assert(inUV.chan == 2);
+            GAPI_Assert(inUV.planar == false);
+            GAPI_Assert(inY.size.width  == 2 * inUV.size.width);
+            GAPI_Assert(inY.size.height == 2 * inUV.size.height);
+            return inY.withType(CV_8U, 3).asPlanar();
+        }
+    };
+
+    G_TYPED_KERNEL(GNV12toGray, <GMat(GMat,GMat)>, "org.opencv.colorconvert.imgproc.nv12togray") {
+        static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) {
+            GAPI_Assert(inY.depth   == CV_8U);
+            GAPI_Assert(inUV.depth  == CV_8U);
+            GAPI_Assert(inY.chan    == 1);
+            GAPI_Assert(inY.planar  == false);
+            GAPI_Assert(inUV.chan   == 2);
+            GAPI_Assert(inUV.planar == false);
+
+            GAPI_Assert(inY.size.width  == 2 * inUV.size.width);
+            GAPI_Assert(inY.size.height == 2 * inUV.size.height);
+            return inY.withType(CV_8U, 1);
+        }
+    };
+
+    G_TYPED_KERNEL(GNV12toBGRp, <GMatP(GMat,GMat)>, "org.opencv.colorconvert.imgproc.nv12tobgrp") {
+        static GMatDesc outMeta(GMatDesc inY, GMatDesc inUV) {
+            GAPI_Assert(inY.depth == CV_8U);
+            GAPI_Assert(inUV.depth == CV_8U);
+            GAPI_Assert(inY.chan == 1);
+            GAPI_Assert(inY.planar == false);
+            GAPI_Assert(inUV.chan == 2);
+            GAPI_Assert(inUV.planar == false);
+            GAPI_Assert(inY.size.width  == 2 * inUV.size.width);
+            GAPI_Assert(inY.size.height == 2 * inUV.size.height);
+            return inY.withType(CV_8U, 3).asPlanar();
+        }
+    };
+
+} //namespace imgproc
+
+//! @addtogroup gapi_filters
+//! @{
+/** @brief Applies a separable linear filter to a matrix(image).
+
+The function applies a separable linear filter to the matrix. That is, first, every row of src is
+filtered with the 1D kernel kernelX. Then, every column of the result is filtered with the 1D
+kernel kernelY. The final result is returned.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+Output image must have the same type, size, and number of channels as the input image.
+@note In case of floating-point computation, rounding to nearest even is procedeed
+if hardware supports it (if not - to nearest value).
+
+@note Function textual ID is "org.opencv.imgproc.filters.sepfilter"
+@param src Source image.
+@param ddepth desired depth of the destination image (the following combinations of src.depth() and ddepth are supported:
+
+        src.depth() = CV_8U, ddepth = -1/CV_16S/CV_32F/CV_64F
+        src.depth() = CV_16U/CV_16S, ddepth = -1/CV_32F/CV_64F
+        src.depth() = CV_32F, ddepth = -1/CV_32F/CV_64F
+        src.depth() = CV_64F, ddepth = -1/CV_64F
+
+when ddepth=-1, the output image will have the same depth as the source)
+@param kernelX Coefficients for filtering each row.
+@param kernelY Coefficients for filtering each column.
+@param anchor Anchor position within the kernel. The default value \f$(-1,-1)\f$ means that the anchor
+is at the kernel center.
+@param delta Value added to the filtered results before storing them.
+@param borderType Pixel extrapolation method, see cv::BorderTypes
+@param borderValue border value in case of constant border type
+@sa  boxFilter, gaussianBlur, medianBlur
+ */
+GAPI_EXPORTS GMat sepFilter(const GMat& src, int ddepth, const Mat& kernelX, const Mat& kernelY, const Point& anchor /*FIXME: = Point(-1,-1)*/,
+                            const Scalar& delta /*FIXME = GScalar(0)*/, int borderType = BORDER_DEFAULT,
+                            const Scalar& borderValue = Scalar(0));
+
+/** @brief Convolves an image with the kernel.
+
+The function applies an arbitrary linear filter to an image. When
+the aperture is partially outside the image, the function interpolates outlier pixel values
+according to the specified border mode.
+
+The function does actually compute correlation, not the convolution:
+
+\f[\texttt{dst} (x,y) =  \sum _{ \stackrel{0\leq x' < \texttt{kernel.cols},}{0\leq y' < \texttt{kernel.rows}} }  \texttt{kernel} (x',y')* \texttt{src} (x+x'- \texttt{anchor.x} ,y+y'- \texttt{anchor.y} )\f]
+
+That is, the kernel is not mirrored around the anchor point. If you need a real convolution, flip
+the kernel using flip and set the new anchor to `(kernel.cols - anchor.x - 1, kernel.rows -
+anchor.y - 1)`.
+
+Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+Output image must have the same size and number of channels an input image.
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+
+@note Function textual ID is "org.opencv.imgproc.filters.filter2D"
+
+@param src input image.
+@param ddepth desired depth of the destination image
+@param kernel convolution kernel (or rather a correlation kernel), a single-channel floating point
+matrix; if you want to apply different kernels to different channels, split the image into
+separate color planes using split and process them individually.
+@param anchor anchor of the kernel that indicates the relative position of a filtered point within
+the kernel; the anchor should lie within the kernel; default value (-1,-1) means that the anchor
+is at the kernel center.
+@param delta optional value added to the filtered pixels before storing them in dst.
+@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderValue border value in case of constant border type
+@sa  sepFilter
+ */
+GAPI_EXPORTS GMat filter2D(const GMat& src, int ddepth, const Mat& kernel, const Point& anchor = Point(-1,-1), const Scalar& delta = Scalar(0),
+                           int borderType = BORDER_DEFAULT, const Scalar& borderValue = Scalar(0));
+
+
+/** @brief Blurs an image using the box filter.
+
+The function smooths an image using the kernel:
+
+\f[\texttt{K} =  \alpha \begin{bmatrix} 1 & 1 & 1 &  \cdots & 1 & 1  \\ 1 & 1 & 1 &  \cdots & 1 & 1  \\ \hdotsfor{6} \\ 1 & 1 & 1 &  \cdots & 1 & 1 \end{bmatrix}\f]
+
+where
+
+\f[\alpha = \fork{\frac{1}{\texttt{ksize.width*ksize.height}}}{when \texttt{normalize=true}}{1}{otherwise}\f]
+
+Unnormalized box filter is useful for computing various integral characteristics over each pixel
+neighborhood, such as covariance matrices of image derivatives (used in dense optical flow
+algorithms, and so on). If you need to compute pixel sums over variable-size windows, use cv::integral.
+
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+Output image must have the same type, size, and number of channels as the input image.
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+
+@note Function textual ID is "org.opencv.imgproc.filters.boxfilter"
+
+@param src Source image.
+@param dtype the output image depth (-1 to set the input image data type).
+@param ksize blurring kernel size.
+@param anchor Anchor position within the kernel. The default value \f$(-1,-1)\f$ means that the anchor
+is at the kernel center.
+@param normalize flag, specifying whether the kernel is normalized by its area or not.
+@param borderType Pixel extrapolation method, see cv::BorderTypes
+@param borderValue border value in case of constant border type
+@sa  sepFilter, gaussianBlur, medianBlur, integral
+ */
+GAPI_EXPORTS GMat boxFilter(const GMat& src, int dtype, const Size& ksize, const Point& anchor = Point(-1,-1),
+                            bool normalize = true, int borderType = BORDER_DEFAULT,
+                            const Scalar& borderValue = Scalar(0));
+
+/** @brief Blurs an image using the normalized box filter.
+
+The function smooths an image using the kernel:
+
+\f[\texttt{K} =  \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 &  \cdots & 1 & 1  \\ 1 & 1 & 1 &  \cdots & 1 & 1  \\ \hdotsfor{6} \\ 1 & 1 & 1 &  \cdots & 1 & 1  \\ \end{bmatrix}\f]
+
+The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(),
+anchor, true, borderType)`.
+
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+Output image must have the same type, size, and number of channels as the input image.
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+
+@note Function textual ID is "org.opencv.imgproc.filters.blur"
+
+@param src Source image.
+@param ksize blurring kernel size.
+@param anchor anchor point; default value Point(-1,-1) means that the anchor is at the kernel
+center.
+@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes
+@param borderValue border value in case of constant border type
+@sa  boxFilter, bilateralFilter, GaussianBlur, medianBlur
+ */
+GAPI_EXPORTS GMat blur(const GMat& src, const Size& ksize, const Point& anchor = Point(-1,-1),
+                       int borderType = BORDER_DEFAULT, const Scalar& borderValue = Scalar(0));
+
+
+//GAPI_EXPORTS_W void blur( InputArray src, OutputArray dst,
+ //                       Size ksize, Point anchor = Point(-1,-1),
+ //                       int borderType = BORDER_DEFAULT );
+
+
+/** @brief Blurs an image using a Gaussian filter.
+
+The function filter2Ds the source image with the specified Gaussian kernel.
+Output image must have the same type and number of channels an input image.
+
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1.
+Output image must have the same type, size, and number of channels as the input image.
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+
+@note Function textual ID is "org.opencv.imgproc.filters.gaussianBlur"
+
+@param src input image;
+@param ksize Gaussian kernel size. ksize.width and ksize.height can differ but they both must be
+positive and odd. Or, they can be zero's and then they are computed from sigma.
+@param sigmaX Gaussian kernel standard deviation in X direction.
+@param sigmaY Gaussian kernel standard deviation in Y direction; if sigmaY is zero, it is set to be
+equal to sigmaX, if both sigmas are zeros, they are computed from ksize.width and ksize.height,
+respectively (see cv::getGaussianKernel for details); to fully control the result regardless of
+possible future modifications of all this semantics, it is recommended to specify all of ksize,
+sigmaX, and sigmaY.
+@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderValue border value in case of constant border type
+@sa  sepFilter, boxFilter, medianBlur
+ */
+GAPI_EXPORTS GMat gaussianBlur(const GMat& src, const Size& ksize, double sigmaX, double sigmaY = 0,
+                               int borderType = BORDER_DEFAULT, const Scalar& borderValue = Scalar(0));
+
+/** @brief Blurs an image using the median filter.
+
+The function smoothes an image using the median filter with the \f$\texttt{ksize} \times
+\texttt{ksize}\f$ aperture. Each channel of a multi-channel image is processed independently.
+Output image must have the same type, size, and number of channels as the input image.
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+The median filter uses cv::BORDER_REPLICATE internally to cope with border pixels, see cv::BorderTypes
+
+@note Function textual ID is "org.opencv.imgproc.filters.medianBlur"
+
+@param src input matrix (image)
+@param ksize aperture linear size; it must be odd and greater than 1, for example: 3, 5, 7 ...
+@sa  boxFilter, gaussianBlur
+ */
+GAPI_EXPORTS GMat medianBlur(const GMat& src, int ksize);
+
+/** @brief Erodes an image by using a specific structuring element.
+
+The function erodes the source image using the specified structuring element that determines the
+shape of a pixel neighborhood over which the minimum is taken:
+
+\f[\texttt{dst} (x,y) =  \min _{(x',y'):  \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y')\f]
+
+Erosion can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently.
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1.
+Output image must have the same type, size, and number of channels as the input image.
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+
+@note Function textual ID is "org.opencv.imgproc.filters.erode"
+
+@param src input image
+@param kernel structuring element used for erosion; if `element=Mat()`, a `3 x 3` rectangular
+structuring element is used. Kernel can be created using getStructuringElement.
+@param anchor position of the anchor within the element; default value (-1, -1) means that the
+anchor is at the element center.
+@param iterations number of times erosion is applied.
+@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderValue border value in case of a constant border
+@sa  dilate
+ */
+GAPI_EXPORTS GMat erode(const GMat& src, const Mat& kernel, const Point& anchor = Point(-1,-1), int iterations = 1,
+                        int borderType = BORDER_CONSTANT,
+                        const  Scalar& borderValue = morphologyDefaultBorderValue());
+
+/** @brief Erodes an image by using 3 by 3 rectangular structuring element.
+
+The function erodes the source image using the rectangular structuring element with rectangle center as an anchor.
+Erosion can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently.
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1.
+Output image must have the same type, size, and number of channels as the input image.
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+
+@param src input image
+@param iterations number of times erosion is applied.
+@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderValue border value in case of a constant border
+@sa  erode, dilate3x3
+ */
+GAPI_EXPORTS GMat erode3x3(const GMat& src, int iterations = 1,
+                           int borderType = BORDER_CONSTANT,
+                           const  Scalar& borderValue = morphologyDefaultBorderValue());
+
+/** @brief Dilates an image by using a specific structuring element.
+
+The function dilates the source image using the specified structuring element that determines the
+shape of a pixel neighborhood over which the maximum is taken:
+\f[\texttt{dst} (x,y) =  \max _{(x',y'):  \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y')\f]
+
+Dilation can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently.
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1.
+Output image must have the same type, size, and number of channels as the input image.
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+
+@note Function textual ID is "org.opencv.imgproc.filters.dilate"
+
+@param src input image.
+@param kernel structuring element used for dilation; if elemenat=Mat(), a 3 x 3 rectangular
+structuring element is used. Kernel can be created using getStructuringElement
+@param anchor position of the anchor within the element; default value (-1, -1) means that the
+anchor is at the element center.
+@param iterations number of times dilation is applied.
+@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderValue border value in case of a constant border
+@sa  erode, morphologyEx, getStructuringElement
+ */
+GAPI_EXPORTS GMat dilate(const GMat& src, const Mat& kernel, const Point& anchor = Point(-1,-1), int iterations = 1,
+                         int borderType = BORDER_CONSTANT,
+                         const  Scalar& borderValue = morphologyDefaultBorderValue());
+
+/** @brief Dilates an image by using 3 by 3 rectangular structuring element.
+
+The function dilates the source image using the specified structuring element that determines the
+shape of a pixel neighborhood over which the maximum is taken:
+\f[\texttt{dst} (x,y) =  \max _{(x',y'):  \, \texttt{element} (x',y') \ne0 } \texttt{src} (x+x',y+y')\f]
+
+Dilation can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently.
+Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1.
+Output image must have the same type, size, and number of channels as the input image.
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+
+@note Function textual ID is "org.opencv.imgproc.filters.dilate"
+
+@param src input image.
+@param iterations number of times dilation is applied.
+@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderValue border value in case of a constant border
+@sa  dilate, erode3x3
+ */
+
+GAPI_EXPORTS GMat dilate3x3(const GMat& src, int iterations = 1,
+                            int borderType = BORDER_CONSTANT,
+                            const  Scalar& borderValue = morphologyDefaultBorderValue());
+
+/** @brief Calculates the first, second, third, or mixed image derivatives using an extended Sobel operator.
+
+In all cases except one, the \f$\texttt{ksize} \times \texttt{ksize}\f$ separable kernel is used to
+calculate the derivative. When \f$\texttt{ksize = 1}\f$, the \f$3 \times 1\f$ or \f$1 \times 3\f$
+kernel is used (that is, no Gaussian smoothing is done). `ksize = 1` can only be used for the first
+or the second x- or y- derivatives.
+
+There is also the special value `ksize = FILTER_SCHARR (-1)` that corresponds to the \f$3\times3\f$ Scharr
+filter that may give more accurate results than the \f$3\times3\f$ Sobel. The Scharr aperture is
+
+\f[\vecthreethree{-3}{0}{3}{-10}{0}{10}{-3}{0}{3}\f]
+
+for the x-derivative, or transposed for the y-derivative.
+
+The function calculates an image derivative by convolving the image with the appropriate kernel:
+
+\f[\texttt{dst} =  \frac{\partial^{xorder+yorder} \texttt{src}}{\partial x^{xorder} \partial y^{yorder}}\f]
+
+The Sobel operators combine Gaussian smoothing and differentiation, so the result is more or less
+resistant to the noise. Most often, the function is called with ( xorder = 1, yorder = 0, ksize = 3)
+or ( xorder = 0, yorder = 1, ksize = 3) to calculate the first x- or y- image derivative. The first
+case corresponds to a kernel of:
+
+\f[\vecthreethree{-1}{0}{1}{-2}{0}{2}{-1}{0}{1}\f]
+
+The second case corresponds to a kernel of:
+
+\f[\vecthreethree{-1}{-2}{-1}{0}{0}{0}{1}{2}{1}\f]
+
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+
+@note Function textual ID is "org.opencv.imgproc.filters.sobel"
+
+@param src input image.
+@param ddepth output image depth, see @ref filter_depths "combinations"; in the case of
+    8-bit input images it will result in truncated derivatives.
+@param dx order of the derivative x.
+@param dy order of the derivative y.
+@param ksize size of the extended Sobel kernel; it must be odd.
+@param scale optional scale factor for the computed derivative values; by default, no scaling is
+applied (see cv::getDerivKernels for details).
+@param delta optional delta value that is added to the results prior to storing them in dst.
+@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderValue border value in case of constant border type
+@sa filter2D, gaussianBlur, cartToPolar
+ */
+GAPI_EXPORTS GMat Sobel(const GMat& src, int ddepth, int dx, int dy, int ksize = 3,
+                        double scale = 1, double delta = 0,
+                        int borderType = BORDER_DEFAULT,
+                        const Scalar& borderValue = Scalar(0));
+
+/** @brief Calculates the first, second, third, or mixed image derivatives using an extended Sobel operator.
+
+In all cases except one, the \f$\texttt{ksize} \times \texttt{ksize}\f$ separable kernel is used to
+calculate the derivative. When \f$\texttt{ksize = 1}\f$, the \f$3 \times 1\f$ or \f$1 \times 3\f$
+kernel is used (that is, no Gaussian smoothing is done). `ksize = 1` can only be used for the first
+or the second x- or y- derivatives.
+
+There is also the special value `ksize = FILTER_SCHARR (-1)` that corresponds to the \f$3\times3\f$ Scharr
+filter that may give more accurate results than the \f$3\times3\f$ Sobel. The Scharr aperture is
+
+\f[\vecthreethree{-3}{0}{3}{-10}{0}{10}{-3}{0}{3}\f]
+
+for the x-derivative, or transposed for the y-derivative.
+
+The function calculates an image derivative by convolving the image with the appropriate kernel:
+
+\f[\texttt{dst} =  \frac{\partial^{xorder+yorder} \texttt{src}}{\partial x^{xorder} \partial y^{yorder}}\f]
+
+The Sobel operators combine Gaussian smoothing and differentiation, so the result is more or less
+resistant to the noise. Most often, the function is called with ( xorder = 1, yorder = 0, ksize = 3)
+or ( xorder = 0, yorder = 1, ksize = 3) to calculate the first x- or y- image derivative. The first
+case corresponds to a kernel of:
+
+\f[\vecthreethree{-1}{0}{1}{-2}{0}{2}{-1}{0}{1}\f]
+
+The second case corresponds to a kernel of:
+
+\f[\vecthreethree{-1}{-2}{-1}{0}{0}{0}{1}{2}{1}\f]
+
+@note First returned matrix correspons to dx derivative while the second one to dy.
+
+@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest.
+
+@note Function textual ID is "org.opencv.imgproc.filters.sobelxy"
+
+@param src input image.
+@param ddepth output image depth, see @ref filter_depths "combinations"; in the case of
+    8-bit input images it will result in truncated derivatives.
+@param order order of the derivatives.
+@param ksize size of the extended Sobel kernel; it must be odd.
+@param scale optional scale factor for the computed derivative values; by default, no scaling is
+applied (see cv::getDerivKernels for details).
+@param delta optional delta value that is added to the results prior to storing them in dst.
+@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderValue border value in case of constant border type
+@sa filter2D, gaussianBlur, cartToPolar
+ */
+GAPI_EXPORTS std::tuple<GMat, GMat> SobelXY(const GMat& src, int ddepth, int order, int ksize = 3,
+                        double scale = 1, double delta = 0,
+                        int borderType = BORDER_DEFAULT,
+                        const Scalar& borderValue = Scalar(0));
+
+/** @brief Finds edges in an image using the Canny algorithm.
+
+The function finds edges in the input image and marks them in the output map edges using the
+Canny algorithm. The smallest value between threshold1 and threshold2 is used for edge linking. The
+largest value is used to find initial segments of strong edges. See
+<http://en.wikipedia.org/wiki/Canny_edge_detector>
+
+@note Function textual ID is "org.opencv.imgproc.filters.canny"
+
+@param image 8-bit input image.
+@param threshold1 first threshold for the hysteresis procedure.
+@param threshold2 second threshold for the hysteresis procedure.
+@param apertureSize aperture size for the Sobel operator.
+@param L2gradient a flag, indicating whether a more accurate \f$L_2\f$ norm
+\f$=\sqrt{(dI/dx)^2 + (dI/dy)^2}\f$ should be used to calculate the image gradient magnitude (
+L2gradient=true ), or whether the default \f$L_1\f$ norm \f$=|dI/dx|+|dI/dy|\f$ is enough (
+L2gradient=false ).
+ */
+GAPI_EXPORTS GMat Canny(const GMat& image, double threshold1, double threshold2,
+                        int apertureSize = 3, bool L2gradient = false);
+
+/** @brief Determines strong corners on an image.
+
+The function finds the most prominent corners in the image or in the specified image region, as
+described in @cite Shi94
+
+-   Function calculates the corner quality measure at every source image pixel using the
+    #cornerMinEigenVal or #cornerHarris .
+-   Function performs a non-maximum suppression (the local maximums in *3 x 3* neighborhood are
+    retained).
+-   The corners with the minimal eigenvalue less than
+    \f$\texttt{qualityLevel} \cdot \max_{x,y} qualityMeasureMap(x,y)\f$ are rejected.
+-   The remaining corners are sorted by the quality measure in the descending order.
+-   Function throws away each corner for which there is a stronger corner at a distance less than
+    maxDistance.
+
+The function can be used to initialize a point-based tracker of an object.
+
+@note If the function is called with different values A and B of the parameter qualityLevel , and
+A \> B, the vector of returned corners with qualityLevel=A will be the prefix of the output vector
+with qualityLevel=B .
+
+@note Function textual ID is "org.opencv.imgproc.goodFeaturesToTrack"
+
+@param image Input 8-bit or floating-point 32-bit, single-channel image.
+@param maxCorners Maximum number of corners to return. If there are more corners than are found,
+the strongest of them is returned. `maxCorners <= 0` implies that no limit on the maximum is set
+and all detected corners are returned.
+@param qualityLevel Parameter characterizing the minimal accepted quality of image corners. The
+parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue
+(see #cornerMinEigenVal ) or the Harris function response (see #cornerHarris ). The corners with the
+quality measure less than the product are rejected. For example, if the best corner has the
+quality measure = 1500, and the qualityLevel=0.01 , then all the corners with the quality measure
+less than 15 are rejected.
+@param minDistance Minimum possible Euclidean distance between the returned corners.
+@param mask Optional region of interest. If the image is not empty (it needs to have the type
+CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected.
+@param blockSize Size of an average block for computing a derivative covariation matrix over each
+pixel neighborhood. See cornerEigenValsAndVecs .
+@param useHarrisDetector Parameter indicating whether to use a Harris detector (see #cornerHarris)
+or #cornerMinEigenVal.
+@param k Free parameter of the Harris detector.
+
+@return vector of detected corners.
+ */
+GAPI_EXPORTS GArray<Point2f> goodFeaturesToTrack(const GMat  &image,
+                                                       int    maxCorners,
+                                                       double qualityLevel,
+                                                       double minDistance,
+                                                 const Mat   &mask = Mat(),
+                                                       int    blockSize = 3,
+                                                       bool   useHarrisDetector = false,
+                                                       double k = 0.04);
+
+/** @brief Equalizes the histogram of a grayscale image.
+
+The function equalizes the histogram of the input image using the following algorithm:
+
+- Calculate the histogram \f$H\f$ for src .
+- Normalize the histogram so that the sum of histogram bins is 255.
+- Compute the integral of the histogram:
+\f[H'_i =  \sum _{0  \le j < i} H(j)\f]
+- Transform the image using \f$H'\f$ as a look-up table: \f$\texttt{dst}(x,y) = H'(\texttt{src}(x,y))\f$
+
+The algorithm normalizes the brightness and increases the contrast of the image.
+@note The returned image is of the same size and type as input.
+
+@note Function textual ID is "org.opencv.imgproc.equalizeHist"
+
+@param src Source 8-bit single channel image.
+ */
+GAPI_EXPORTS GMat equalizeHist(const GMat& src);
+
+//! @} gapi_filters
+
+//! @addtogroup gapi_colorconvert
+//! @{
+/** @brief Converts an image from RGB color space to gray-scaled.
+The conventional ranges for R, G, and B channel values are 0 to 255.
+Resulting gray color value computed as
+\f[\texttt{dst} (I)= \texttt{0.299} * \texttt{src}(I).R + \texttt{0.587} * \texttt{src}(I).G  + \texttt{0.114} * \texttt{src}(I).B \f]
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2gray"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC1.
+@sa RGB2YUV
+ */
+GAPI_EXPORTS GMat RGB2Gray(const GMat& src);
+
+/** @overload
+Resulting gray color value computed as
+\f[\texttt{dst} (I)= \texttt{rY} * \texttt{src}(I).R + \texttt{gY} * \texttt{src}(I).G  + \texttt{bY} * \texttt{src}(I).B \f]
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2graycustom"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC1.
+@param rY float multiplier for R channel.
+@param gY float multiplier for G channel.
+@param bY float multiplier for B channel.
+@sa RGB2YUV
+ */
+GAPI_EXPORTS GMat RGB2Gray(const GMat& src, float rY, float gY, float bY);
+
+/** @brief Converts an image from BGR color space to gray-scaled.
+The conventional ranges for B, G, and R channel values are 0 to 255.
+Resulting gray color value computed as
+\f[\texttt{dst} (I)= \texttt{0.114} * \texttt{src}(I).B + \texttt{0.587} * \texttt{src}(I).G  + \texttt{0.299} * \texttt{src}(I).R \f]
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.bgr2gray"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC1.
+@sa BGR2LUV
+ */
+GAPI_EXPORTS GMat BGR2Gray(const GMat& src);
+
+/** @brief Converts an image from RGB color space to YUV color space.
+
+The function converts an input image from RGB color space to YUV.
+The conventional ranges for R, G, and B channel values are 0 to 255.
+
+In case of linear transformations, the range does not matter. But in case of a non-linear
+transformation, an input RGB image should be normalized to the proper value range to get the correct
+results, like here, at RGB \f$\rightarrow\f$ Y\*u\*v\* transformation.
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2yuv"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+@sa YUV2RGB, RGB2Lab
+*/
+GAPI_EXPORTS GMat RGB2YUV(const GMat& src);
+
+/** @brief Converts an image from BGR color space to LUV color space.
+
+The function converts an input image from BGR color space to LUV.
+The conventional ranges for B, G, and R channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.bgr2luv"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+@sa RGB2Lab, RGB2LUV
+*/
+GAPI_EXPORTS GMat BGR2LUV(const GMat& src);
+
+/** @brief Converts an image from LUV color space to BGR color space.
+
+The function converts an input image from LUV color space to BGR.
+The conventional ranges for B, G, and R channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.luv2bgr"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+@sa BGR2LUV
+*/
+GAPI_EXPORTS GMat LUV2BGR(const GMat& src);
+
+/** @brief Converts an image from YUV color space to BGR color space.
+
+The function converts an input image from YUV color space to BGR.
+The conventional ranges for B, G, and R channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.yuv2bgr"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+@sa BGR2YUV
+*/
+GAPI_EXPORTS GMat YUV2BGR(const GMat& src);
+
+/** @brief Converts an image from BGR color space to YUV color space.
+
+The function converts an input image from BGR color space to YUV.
+The conventional ranges for B, G, and R channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.bgr2yuv"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+@sa YUV2BGR
+*/
+GAPI_EXPORTS GMat BGR2YUV(const GMat& src);
+
+/** @brief Converts an image from RGB color space to Lab color space.
+
+The function converts an input image from BGR color space to Lab.
+The conventional ranges for R, G, and B channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC1.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2lab"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC1.
+@sa RGB2YUV, RGB2LUV
+*/
+GAPI_EXPORTS GMat RGB2Lab(const GMat& src);
+
+/** @brief Converts an image from YUV color space to RGB.
+The function converts an input image from YUV color space to RGB.
+The conventional ranges for Y, U, and V channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.yuv2rgb"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@sa RGB2Lab, RGB2YUV
+*/
+GAPI_EXPORTS GMat YUV2RGB(const GMat& src);
+
+/** @brief Converts an image from NV12 (YUV420p) color space to RGB.
+The function converts an input image from NV12 color space to RGB.
+The conventional ranges for Y, U, and V channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12torgb"
+
+@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2.
+
+@sa YUV2RGB, NV12toBGR
+*/
+GAPI_EXPORTS GMat NV12toRGB(const GMat& src_y, const GMat& src_uv);
+
+/** @brief Converts an image from NV12 (YUV420p) color space to gray-scaled.
+The function converts an input image from NV12 color space to gray-scaled.
+The conventional ranges for Y, U, and V channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 1-channel image @ref CV_8UC1.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12togray"
+
+@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2.
+
+@sa YUV2RGB, NV12toBGR
+*/
+GAPI_EXPORTS GMat NV12toGray(const GMat& src_y, const GMat& src_uv);
+
+/** @brief Converts an image from NV12 (YUV420p) color space to BGR.
+The function converts an input image from NV12 color space to RGB.
+The conventional ranges for Y, U, and V channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12tobgr"
+
+@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2.
+
+@sa YUV2BGR, NV12toRGB
+*/
+GAPI_EXPORTS GMat NV12toBGR(const GMat& src_y, const GMat& src_uv);
+
+/** @brief Converts an image from BayerGR color space to RGB.
+The function converts an input image from BayerGR color space to RGB.
+The conventional ranges for G, R, and B channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.bayergr2rgb"
+
+@param src_gr input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+
+@sa YUV2BGR, NV12toRGB
+*/
+GAPI_EXPORTS GMat BayerGR2RGB(const GMat& src_gr);
+
+/** @brief Converts an image from RGB color space to HSV.
+The function converts an input image from RGB color space to HSV.
+The conventional ranges for R, G, and B channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2hsv"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@sa YUV2BGR, NV12toRGB
+*/
+GAPI_EXPORTS GMat RGB2HSV(const GMat& src);
+
+/** @brief Converts an image from RGB color space to YUV422.
+The function converts an input image from RGB color space to YUV422.
+The conventional ranges for R, G, and B channel values are 0 to 255.
+
+Output image must be 8-bit unsigned 2-channel image @ref CV_8UC2.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.rgb2yuv422"
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+
+@sa YUV2BGR, NV12toRGB
+*/
+GAPI_EXPORTS GMat RGB2YUV422(const GMat& src);
+
+/** @brief Converts an image from NV12 (YUV420p) color space to RGB.
+The function converts an input image from NV12 color space to RGB.
+The conventional ranges for Y, U, and V channel values are 0 to 255.
+
+Output image must be 8-bit unsigned planar 3-channel image @ref CV_8UC1.
+Planar image memory layout is three planes laying in the memory contiguously,
+so the image height should be plane_height*plane_number,
+image type is @ref CV_8UC1.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12torgbp"
+
+@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2.
+
+@sa YUV2RGB, NV12toBGRp, NV12toRGB
+*/
+GAPI_EXPORTS GMatP NV12toRGBp(const GMat &src_y, const GMat &src_uv);
+
+/** @brief Converts an image from NV12 (YUV420p) color space to BGR.
+The function converts an input image from NV12 color space to BGR.
+The conventional ranges for Y, U, and V channel values are 0 to 255.
+
+Output image must be 8-bit unsigned planar 3-channel image @ref CV_8UC1.
+Planar image memory layout is three planes laying in the memory contiguously,
+so the image height should be plane_height*plane_number,
+image type is @ref CV_8UC1.
+
+@note Function textual ID is "org.opencv.imgproc.colorconvert.nv12torgbp"
+
+@param src_y input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@param src_uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2.
+
+@sa YUV2RGB, NV12toRGBp, NV12toBGR
+*/
+GAPI_EXPORTS GMatP NV12toBGRp(const GMat &src_y, const GMat &src_uv);
+
+//! @} gapi_colorconvert
+} //namespace gapi
+} //namespace cv
+
+#endif // OPENCV_GAPI_IMGPROC_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/infer.hpp b/IPL/include/opencv/opencv2/gapi/infer.hpp
new file mode 100644
index 0000000..5a4caff
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/infer.hpp
@@ -0,0 +1,219 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_INFER_HPP
+#define OPENCV_GAPI_INFER_HPP
+
+// FIXME: Inference API is currently only available in full mode
+#if !defined(GAPI_STANDALONE)
+
+#include <functional>
+#include <string>  // string
+#include <utility> // tuple
+
+#include <opencv2/gapi/util/any.hpp>  // any<>
+#include <opencv2/gapi/gkernel.hpp>   // GKernelType[M], GBackend
+#include <opencv2/gapi/garg.hpp>      // GArg
+#include <opencv2/gapi/gcommon.hpp>   // CompileArgTag
+#include <opencv2/gapi/gmetaarg.hpp>  // GMetaArg
+
+namespace cv {
+
+template<typename, typename> class GNetworkType;
+
+// TODO: maybe tuple_wrap_helper from util.hpp may help with this.
+// Multiple-return-value network definition (specialized base class)
+template<typename K, typename... R, typename... Args>
+class GNetworkType<K, std::function<std::tuple<R...>(Args...)> >
+{
+public:
+    using InArgs  = std::tuple<Args...>;
+    using OutArgs = std::tuple<R...>;
+
+    using Result  = OutArgs;
+    using API     = std::function<Result(Args...)>;
+
+    using ResultL = std::tuple< cv::GArray<R>... >;
+    using APIList = std::function<ResultL(cv::GArray<cv::Rect>, Args...)>;
+};
+
+// Single-return-value network definition (specialized base class)
+template<typename K, typename R, typename... Args>
+class GNetworkType<K, std::function<R(Args...)> >
+{
+public:
+    using InArgs  = std::tuple<Args...>;
+    using OutArgs = std::tuple<R>;
+
+    using Result  = R;
+    using API     = std::function<R(Args...)>;
+
+    using ResultL = cv::GArray<R>;
+    using APIList = std::function<ResultL(cv::GArray<cv::Rect>, Args...)>;
+};
+
+// Base "Infer" kernel. Note - for whatever network, kernel ID
+// is always the same. Different inference calls are distinguished by
+// network _tag_ (an extra field in GCall)
+//
+// getOutMeta is a stub callback collected by G-API kernel subsystem
+// automatically. This is a rare case when this callback is defined by
+// a particular backend, not by a network itself.
+struct GInferBase {
+    static constexpr const char * id() {
+        return "org.opencv.dnn.infer";     // Universal stub
+    }
+    static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) {
+        return GMetaArgs{};                // One more universal stub
+    }
+};
+
+
+// Base "Infer list" kernel.
+// All notes from "Infer" kernel apply here as well.
+struct GInferListBase {
+    static constexpr const char * id() {
+        return "org.opencv.dnn.infer-roi"; // Universal stub
+    }
+    static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) {
+        return GMetaArgs{};                // One more universal stub
+    }
+};
+
+// A generic inference kernel. API (::on()) is fully defined by the Net
+// template parameter.
+// Acts as a regular kernel in graph (via KernelTypeMedium).
+template<typename Net>
+struct GInfer final
+    : public GInferBase
+    , public detail::KernelTypeMedium< GInfer<Net>
+                                     , typename Net::API > {
+    using GInferBase::getOutMeta; // FIXME: name lookup conflict workaround?
+
+    static constexpr const char* tag() { return Net::tag(); }
+};
+
+// A generic roi-list inference kernel. API (::on()) is derived from
+// the Net template parameter (see more in infer<> overload).
+template<typename Net>
+struct GInferList final
+    : public GInferListBase
+    , public detail::KernelTypeMedium< GInferList<Net>
+                                     , typename Net::APIList > {
+    using GInferListBase::getOutMeta; // FIXME: name lookup conflict workaround?
+
+    static constexpr const char* tag() { return Net::tag(); }
+};
+
+} // namespace cv
+
+// FIXME: Probably the <API> signature makes a function/tuple/function round-trip
+#define G_API_NET(Class, API, Tag)                                      \
+    struct Class final: public cv::GNetworkType<Class, std::function API> { \
+        static constexpr const char * tag() { return Tag; }             \
+    }
+
+namespace cv {
+namespace gapi {
+
+
+/** @brief Calculates responses for the specified network (template
+ *     parameter) for every region in the source image.
+ *
+ * @tparam A network type defined with G_API_NET() macro.
+ * @param roi a list of rectangles describing regions of interest
+ *   in the source image. Usually an output of object detector or tracker.
+ * @param args network's input parameters as specified in G_API_NET() macro.
+ *   NOTE: verified to work reliably with 1-input topologies only.
+ * @return a list of objects of return type as defined in G_API_NET().
+ *   If a network has multiple return values (defined with a tuple), a tuple of
+ *   GArray<> objects is returned with the appropriate types inside.
+ * @sa  G_API_NET()
+ */
+template<typename Net, typename... Args>
+typename Net::ResultL infer(cv::GArray<cv::Rect> roi, Args&&... args) {
+    return GInferList<Net>::on(roi, std::forward<Args>(args)...);
+}
+
+/**
+ * @brief Calculates response for the specified network (template
+ *     parameter) given the input data.
+ *
+ * @tparam A network type defined with G_API_NET() macro.
+ * @param args network's input parameters as specified in G_API_NET() macro.
+ * @return an object of return type as defined in G_API_NET().
+ *   If a network has multiple return values (defined with a tuple), a tuple of
+ *   objects of appropriate type is returned.
+ * @sa  G_API_NET()
+ */
+template<typename Net, typename... Args>
+typename Net::Result infer(Args&&... args) {
+    return GInfer<Net>::on(std::forward<Args>(args)...);
+}
+
+
+} // namespace gapi
+} // namespace cv
+
+#endif // GAPI_STANDALONE
+
+namespace cv {
+namespace gapi {
+
+// Note: the below code _is_ part of STANDALONE build,
+// just to make our compiler code compileable.
+
+// A type-erased form of network parameters.
+// Similar to how a type-erased GKernel is represented and used.
+struct GAPI_EXPORTS GNetParam {
+    std::string tag;     // FIXME: const?
+    GBackend backend;    // Specifies the execution model
+    util::any params;    // Backend-interpreted parameter structure
+};
+
+/** \addtogroup gapi_compile_args
+ * @{
+ */
+/**
+ * @brief A container class for network configurations. Similar to
+ * GKernelPackage.Use cv::gapi::networks() to construct this object.
+ *
+ * @sa cv::gapi::networks
+ */
+struct GAPI_EXPORTS GNetPackage {
+    GNetPackage() : GNetPackage({}) {}
+    explicit GNetPackage(std::initializer_list<GNetParam> &&ii);
+    std::vector<GBackend> backends() const;
+    std::vector<GNetParam> networks;
+};
+/** @} gapi_compile_args */
+} // namespace gapi
+
+namespace detail {
+template<typename T>
+gapi::GNetParam strip(T&& t) {
+    return gapi::GNetParam { t.tag()
+                           , t.backend()
+                           , t.params()
+                           };
+}
+
+template<> struct CompileArgTag<cv::gapi::GNetPackage> {
+    static const char* tag() { return "gapi.net_package"; }
+};
+
+} // namespace cv::detail
+
+namespace gapi {
+template<typename... Args>
+cv::gapi::GNetPackage networks(Args&&... args) {
+    return cv::gapi::GNetPackage({ cv::detail::strip(args)... });
+}
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_INFER_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/infer/ie.hpp b/IPL/include/opencv/opencv2/gapi/infer/ie.hpp
new file mode 100644
index 0000000..6e8c2c3
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/infer/ie.hpp
@@ -0,0 +1,123 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#ifndef OPENCV_GAPI_INFER_IE_HPP
+#define OPENCV_GAPI_INFER_IE_HPP
+
+#include <unordered_map>
+#include <string>
+#include <array>
+#include <tuple> // tuple, tuple_size
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/util/any.hpp>
+
+#include <opencv2/core/cvdef.h>     // GAPI_EXPORTS
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+
+namespace cv {
+namespace gapi {
+// FIXME: introduce a new sub-namespace for NN?
+namespace ie {
+
+GAPI_EXPORTS cv::gapi::GBackend backend();
+
+/**
+ * Specify how G-API and IE should trait input data
+ *
+ * In OpenCV, the same cv::Mat is used to represent both
+ * image and tensor data. Sometimes those are hardly distinguishable,
+ * so this extra parameter is used to give G-API a hint.
+ *
+ * This hint controls how G-API reinterprets the data when converting
+ * it to IE Blob format (and which layout/etc is assigned to this data).
+ */
+enum class TraitAs: int
+{
+    TENSOR, //!< G-API traits an associated cv::Mat as a raw tensor and passes dimensions as-is
+    IMAGE   //!< G-API traits an associated cv::Mat as an image so creates an "image" blob (NCHW/NHWC, etc)
+};
+
+namespace detail {
+    struct ParamDesc {
+        std::string model_path;
+        std::string weights_path;
+        std::string device_id;
+
+        // NB: Here order follows the `Net` API
+        std::vector<std::string> input_names;
+        std::vector<std::string> output_names;
+
+        using ConstInput = std::pair<cv::Mat, TraitAs>;
+        std::unordered_map<std::string, ConstInput> const_inputs;
+
+        // NB: nun_* may differ from topology's real input/output port numbers
+        // (e.g. topology's partial execution)
+        std::size_t num_in;  // How many inputs are defined in the operation
+        std::size_t num_out; // How many outputs are defined in the operation
+    };
+} // namespace detail
+
+// FIXME: this is probably a shared (reusable) thing
+template<typename Net>
+struct PortCfg {
+    using In = std::array
+        < std::string
+        , std::tuple_size<typename Net::InArgs>::value >;
+    using Out = std::array
+        < std::string
+        , std::tuple_size<typename Net::OutArgs>::value >;
+};
+
+template<typename Net> class Params {
+public:
+    Params(const std::string &model,
+           const std::string &weights,
+           const std::string &device)
+        : desc{ model, weights, device, {}, {}, {}
+              , std::tuple_size<typename Net::InArgs>::value
+              , std::tuple_size<typename Net::OutArgs>::value
+              } {
+    };
+
+    Params<Net>& cfgInputLayers(const typename PortCfg<Net>::In &ll) {
+        desc.input_names.clear();
+        desc.input_names.reserve(ll.size());
+        std::copy(ll.begin(), ll.end(),
+                  std::back_inserter(desc.input_names));
+        return *this;
+    }
+
+    Params<Net>& cfgOutputLayers(const typename PortCfg<Net>::Out &ll) {
+        desc.output_names.clear();
+        desc.output_names.reserve(ll.size());
+        std::copy(ll.begin(), ll.end(),
+                  std::back_inserter(desc.output_names));
+        return *this;
+    }
+
+    Params<Net>& constInput(const std::string &layer_name,
+                            const cv::Mat &data,
+                            TraitAs hint = TraitAs::TENSOR) {
+        desc.const_inputs[layer_name] = {data, hint};
+        return *this;
+    }
+
+    // BEGIN(G-API's network parametrization API)
+    GBackend      backend() const { return cv::gapi::ie::backend();  }
+    std::string   tag()     const { return Net::tag(); }
+    cv::util::any params()  const { return { desc }; }
+    // END(G-API's network parametrization API)
+
+protected:
+    detail::ParamDesc desc;
+};
+
+} // namespace ie
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_INFER_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/ocl/core.hpp b/IPL/include/opencv/opencv2/gapi/ocl/core.hpp
new file mode 100644
index 0000000..4ab85e2
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/ocl/core.hpp
@@ -0,0 +1,27 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OCL_CORE_API_HPP
+#define OPENCV_GAPI_OCL_CORE_API_HPP
+
+#include <opencv2/core/cvdef.h>     // GAPI_EXPORTS
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+
+namespace cv {
+namespace gapi {
+namespace core {
+namespace ocl {
+
+        GAPI_EXPORTS GKernelPackage kernels();
+
+} // namespace ocl
+} // namespace core
+} // namespace gapi
+} // namespace cv
+
+
+#endif // OPENCV_GAPI_OCL_CORE_API_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/ocl/goclkernel.hpp b/IPL/include/opencv/opencv2/gapi/ocl/goclkernel.hpp
new file mode 100644
index 0000000..ee363c0
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/ocl/goclkernel.hpp
@@ -0,0 +1,246 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GOCLKERNEL_HPP
+#define OPENCV_GAPI_GOCLKERNEL_HPP
+
+#include <vector>
+#include <functional>
+#include <map>
+#include <unordered_map>
+
+#include <opencv2/core/mat.hpp>
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/garg.hpp>
+
+// FIXME: namespace scheme for backends?
+namespace cv {
+
+namespace gimpl
+{
+    // Forward-declare an internal class
+    class GOCLExecutable;
+} // namespace gimpl
+
+namespace gapi
+{
+namespace ocl
+{
+    /**
+     * \addtogroup gapi_std_backends G-API Standard Backends
+     * @{
+     */
+    /**
+     * @brief Get a reference to OCL backend.
+     *
+     * At the moment, the OCL backend is built atop of OpenCV
+     * "Transparent API" (T-API), see cv::UMat for details.
+     *
+     * @sa gapi_std_backends
+     */
+    GAPI_EXPORTS cv::gapi::GBackend backend();
+    /** @} */
+} // namespace ocl
+} // namespace gapi
+
+
+// Represents arguments which are passed to a wrapped OCL function
+// FIXME: put into detail?
+class GAPI_EXPORTS GOCLContext
+{
+public:
+    // Generic accessor API
+    template<typename T>
+    const T& inArg(int input) { return m_args.at(input).get<T>(); }
+
+    // Syntax sugar
+    const cv::UMat&  inMat(int input);
+    cv::UMat&  outMatR(int output); // FIXME: Avoid cv::Mat m = ctx.outMatR()
+
+    const cv::Scalar& inVal(int input);
+    cv::Scalar& outValR(int output); // FIXME: Avoid cv::Scalar s = ctx.outValR()
+    template<typename T> std::vector<T>& outVecR(int output) // FIXME: the same issue
+    {
+        return outVecRef(output).wref<T>();
+    }
+    template<typename T> T& outOpaqueR(int output) // FIXME: the same issue
+    {
+        return outOpaqueRef(output).wref<T>();
+    }
+
+protected:
+    detail::VectorRef& outVecRef(int output);
+    detail::VectorRef& outOpaqueRef(int output);
+
+    std::vector<GArg> m_args;
+    std::unordered_map<std::size_t, GRunArgP> m_results;
+
+
+    friend class gimpl::GOCLExecutable;
+};
+
+class GAPI_EXPORTS GOCLKernel
+{
+public:
+    // This function is kernel's execution entry point (does the processing work)
+    using F = std::function<void(GOCLContext &)>;
+
+    GOCLKernel();
+    explicit GOCLKernel(const F& f);
+
+    void apply(GOCLContext &ctx);
+
+protected:
+    F m_f;
+};
+
+// FIXME: This is an ugly ad-hoc implementation. TODO: refactor
+
+namespace detail
+{
+template<class T> struct ocl_get_in;
+template<> struct ocl_get_in<cv::GMat>
+{
+    static cv::UMat    get(GOCLContext &ctx, int idx) { return ctx.inMat(idx); }
+};
+template<> struct ocl_get_in<cv::GScalar>
+{
+    static cv::Scalar get(GOCLContext &ctx, int idx) { return ctx.inVal(idx); }
+};
+template<typename U> struct ocl_get_in<cv::GArray<U> >
+{
+    static const std::vector<U>& get(GOCLContext &ctx, int idx) { return ctx.inArg<VectorRef>(idx).rref<U>(); }
+};
+template<typename U> struct ocl_get_in<cv::GOpaque<U> >
+{
+    static const U& get(GOCLContext &ctx, int idx) { return ctx.inArg<OpaqueRef>(idx).rref<U>(); }
+};
+template<class T> struct ocl_get_in
+{
+    static T get(GOCLContext &ctx, int idx) { return ctx.inArg<T>(idx); }
+};
+
+struct tracked_cv_umat{
+    //TODO Think if T - API could reallocate UMat to a proper size - how do we handle this ?
+    //tracked_cv_umat(cv::UMat& m) : r{(m)}, original_data{m.getMat(ACCESS_RW).data} {}
+    tracked_cv_umat(cv::UMat& m) : r(m), original_data{ nullptr } {}
+    cv::UMat &r; // FIXME: It was a value (not a reference) before.
+                 // Actually OCL backend should allocate its internal data!
+    uchar* original_data;
+
+    operator cv::UMat& (){ return r;}
+    void validate() const{
+        //if (r.getMat(ACCESS_RW).data != original_data)
+        //{
+        //    util::throw_error
+        //        (std::logic_error
+        //         ("OpenCV kernel output parameter was reallocated. \n"
+        //          "Incorrect meta data was provided ?"));
+        //}
+
+    }
+};
+
+template<typename... Outputs>
+void postprocess_ocl(Outputs&... outs)
+{
+    struct
+    {
+        void operator()(tracked_cv_umat* bm) { bm->validate(); }
+        void operator()(...) {                  }
+
+    } validate;
+    //dummy array to unfold parameter pack
+    int dummy[] = { 0, (validate(&outs), 0)... };
+    cv::util::suppress_unused_warning(dummy);
+}
+
+template<class T> struct ocl_get_out;
+template<> struct ocl_get_out<cv::GMat>
+{
+    static tracked_cv_umat get(GOCLContext &ctx, int idx)
+    {
+        auto& r = ctx.outMatR(idx);
+        return{ r };
+    }
+};
+template<> struct ocl_get_out<cv::GScalar>
+{
+    static cv::Scalar& get(GOCLContext &ctx, int idx)
+    {
+        return ctx.outValR(idx);
+    }
+};
+template<typename U> struct ocl_get_out<cv::GArray<U> >
+{
+    static std::vector<U>& get(GOCLContext &ctx, int idx) { return ctx.outVecR<U>(idx);  }
+};
+template<typename U> struct ocl_get_out<cv::GOpaque<U> >
+{
+    static U& get(GOCLContext &ctx, int idx) { return ctx.outOpaqueR<U>(idx);  }
+};
+
+template<typename, typename, typename>
+struct OCLCallHelper;
+
+// FIXME: probably can be simplified with std::apply or analogue.
+template<typename Impl, typename... Ins, typename... Outs>
+struct OCLCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
+{
+    template<typename... Inputs>
+    struct call_and_postprocess
+    {
+        template<typename... Outputs>
+        static void call(Inputs&&... ins, Outputs&&... outs)
+        {
+            //not using a std::forward on outs is deliberate in order to
+            //cause compilation error, by trying to bind rvalue references to lvalue references
+            Impl::run(std::forward<Inputs>(ins)..., outs...);
+
+            postprocess_ocl(outs...);
+        }
+    };
+
+    template<int... IIs, int... OIs>
+    static void call_impl(GOCLContext &ctx, detail::Seq<IIs...>, detail::Seq<OIs...>)
+    {
+        //TODO: Make sure that OpenCV kernels do not reallocate memory for output parameters
+        //by comparing it's state (data ptr) before and after the call.
+        //Convert own::Scalar to cv::Scalar before call kernel and run kernel
+        //convert cv::Scalar to own::Scalar after call kernel and write back results
+        call_and_postprocess<decltype(ocl_get_in<Ins>::get(ctx, IIs))...>::call(ocl_get_in<Ins>::get(ctx, IIs)..., ocl_get_out<Outs>::get(ctx, OIs)...);
+    }
+
+    static void call(GOCLContext &ctx)
+    {
+        call_impl(ctx,
+            typename detail::MkSeq<sizeof...(Ins)>::type(),
+            typename detail::MkSeq<sizeof...(Outs)>::type());
+    }
+};
+
+} // namespace detail
+
+template<class Impl, class K>
+class GOCLKernelImpl: public cv::detail::OCLCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
+                      public cv::detail::KernelTag
+{
+    using P = detail::OCLCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
+
+public:
+    using API = K;
+
+    static cv::gapi::GBackend backend()  { return cv::gapi::ocl::backend(); }
+    static cv::GOCLKernel     kernel()   { return GOCLKernel(&P::call);     }
+};
+
+#define GAPI_OCL_KERNEL(Name, API) struct Name: public cv::GOCLKernelImpl<Name, API>
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GOCLKERNEL_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/ocl/imgproc.hpp b/IPL/include/opencv/opencv2/gapi/ocl/imgproc.hpp
new file mode 100644
index 0000000..1bb5911
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/ocl/imgproc.hpp
@@ -0,0 +1,27 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OCL_IMGPROC_API_HPP
+#define OPENCV_GAPI_OCL_IMGPROC_API_HPP
+
+#include <opencv2/core/cvdef.h>     // GAPI_EXPORTS
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+
+namespace cv {
+namespace gapi {
+namespace imgproc {
+namespace ocl {
+
+    GAPI_EXPORTS GKernelPackage kernels();
+
+} // namespace ocl
+} // namespace imgproc
+} // namespace gapi
+} // namespace cv
+
+
+#endif // OPENCV_GAPI_OCL_IMGPROC_API_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/opencv_includes.hpp b/IPL/include/opencv/opencv2/gapi/opencv_includes.hpp
new file mode 100644
index 0000000..5acf280
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/opencv_includes.hpp
@@ -0,0 +1,21 @@
+// This file is part of OpenCV project.
+
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OPENCV_INCLUDES_HPP
+#define OPENCV_GAPI_OPENCV_INCLUDES_HPP
+
+#if !defined(GAPI_STANDALONE)
+#  include <opencv2/core/mat.hpp>
+#  include <opencv2/core/cvdef.h>
+#  include <opencv2/core/types.hpp>
+#  include <opencv2/core/base.hpp>
+#else   // Without OpenCV
+#  include <opencv2/gapi/own/cvdefs.hpp>
+#endif // !defined(GAPI_STANDALONE)
+
+#endif // OPENCV_GAPI_OPENCV_INCLUDES_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/operators.hpp b/IPL/include/opencv/opencv2/gapi/operators.hpp
new file mode 100644
index 0000000..b20062c
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/operators.hpp
@@ -0,0 +1,69 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OPERATORS_HPP
+#define OPENCV_GAPI_OPERATORS_HPP
+
+#include <opencv2/gapi/gmat.hpp>
+#include <opencv2/gapi/gscalar.hpp>
+
+GAPI_EXPORTS cv::GMat operator+(const cv::GMat&    lhs, const cv::GMat&    rhs);
+
+GAPI_EXPORTS cv::GMat operator+(const cv::GMat&    lhs, const cv::GScalar& rhs);
+GAPI_EXPORTS cv::GMat operator+(const cv::GScalar& lhs, const cv::GMat&    rhs);
+
+GAPI_EXPORTS cv::GMat operator-(const cv::GMat&    lhs, const cv::GMat&    rhs);
+
+GAPI_EXPORTS cv::GMat operator-(const cv::GMat&    lhs, const cv::GScalar& rhs);
+GAPI_EXPORTS cv::GMat operator-(const cv::GScalar& lhs, const cv::GMat&    rhs);
+
+GAPI_EXPORTS cv::GMat operator*(const cv::GMat&    lhs, float              rhs);
+GAPI_EXPORTS cv::GMat operator*(float              lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator*(const cv::GMat&    lhs, const cv::GScalar& rhs);
+GAPI_EXPORTS cv::GMat operator*(const cv::GScalar& lhs, const cv::GMat&    rhs);
+
+GAPI_EXPORTS cv::GMat operator/(const cv::GMat&    lhs, const cv::GScalar& rhs);
+GAPI_EXPORTS cv::GMat operator/(const cv::GScalar& lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator/(const cv::GMat&    lhs, const cv::GMat&    rhs);
+
+GAPI_EXPORTS cv::GMat operator&(const cv::GMat&    lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator|(const cv::GMat&    lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator^(const cv::GMat&    lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator~(const cv::GMat&    lhs);
+
+GAPI_EXPORTS cv::GMat operator&(const cv::GScalar& lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator|(const cv::GScalar& lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator^(const cv::GScalar& lhs, const cv::GMat&    rhs);
+
+GAPI_EXPORTS cv::GMat operator&(const cv::GMat& lhs, const cv::GScalar&    rhs);
+GAPI_EXPORTS cv::GMat operator|(const cv::GMat& lhs, const cv::GScalar&    rhs);
+GAPI_EXPORTS cv::GMat operator^(const cv::GMat& lhs, const cv::GScalar&    rhs);
+
+GAPI_EXPORTS cv::GMat operator>(const cv::GMat&    lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator>=(const cv::GMat&   lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator<(const cv::GMat&    lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator<=(const cv::GMat&   lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator==(const cv::GMat&   lhs, const cv::GMat&    rhs);
+GAPI_EXPORTS cv::GMat operator!=(const cv::GMat&   lhs, const cv::GMat&    rhs);
+
+GAPI_EXPORTS cv::GMat operator>(const cv::GMat&    lhs, const cv::GScalar& rhs);
+GAPI_EXPORTS cv::GMat operator>=(const cv::GMat&   lhs, const cv::GScalar& rhs);
+GAPI_EXPORTS cv::GMat operator<(const cv::GMat&    lhs, const cv::GScalar& rhs);
+GAPI_EXPORTS cv::GMat operator<=(const cv::GMat&   lhs, const cv::GScalar& rhs);
+GAPI_EXPORTS cv::GMat operator==(const cv::GMat&   lhs, const cv::GScalar& rhs);
+GAPI_EXPORTS cv::GMat operator!=(const cv::GMat&   lhs, const cv::GScalar& rhs);
+
+GAPI_EXPORTS cv::GMat operator>(const cv::GScalar&    lhs, const cv::GMat& rhs);
+GAPI_EXPORTS cv::GMat operator>=(const cv::GScalar&   lhs, const cv::GMat& rhs);
+GAPI_EXPORTS cv::GMat operator<(const cv::GScalar&    lhs, const cv::GMat& rhs);
+GAPI_EXPORTS cv::GMat operator<=(const cv::GScalar&   lhs, const cv::GMat& rhs);
+GAPI_EXPORTS cv::GMat operator==(const cv::GScalar&   lhs, const cv::GMat& rhs);
+GAPI_EXPORTS cv::GMat operator!=(const cv::GScalar&   lhs, const cv::GMat& rhs);
+
+
+
+#endif // OPENCV_GAPI_OPERATORS_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/own/assert.hpp b/IPL/include/opencv/opencv2/gapi/own/assert.hpp
new file mode 100644
index 0000000..d0e0f1c
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/own/assert.hpp
@@ -0,0 +1,43 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OWN_ASSERT_HPP
+#define OPENCV_GAPI_OWN_ASSERT_HPP
+
+#if !defined(GAPI_STANDALONE)
+#include <opencv2/core/base.hpp>
+#define GAPI_Assert CV_Assert
+#define GAPI_DbgAssert CV_DbgAssert
+
+#else
+#include <stdexcept>
+#include <sstream>
+#include <opencv2/gapi/util/throw.hpp>
+
+namespace detail
+{
+    [[noreturn]] inline void assert_abort(const char* str, int line, const char* file, const char* func)
+    {
+        std::stringstream ss;
+        ss << file << ":" << line << ": Assertion " << str << " in function " << func << " failed\n";
+        cv::util::throw_error(std::logic_error(ss.str()));
+    }
+}
+
+#define GAPI_Assert(expr) \
+{ if (!(expr)) ::detail::assert_abort(#expr, __LINE__, __FILE__, __func__); }
+
+
+#ifdef NDEBUG
+#  define GAPI_DbgAssert(expr)
+#else
+#  define GAPI_DbgAssert(expr) GAPI_Assert(expr)
+#endif
+
+#endif // GAPI_STANDALONE
+
+#endif // OPENCV_GAPI_OWN_ASSERT_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/own/convert.hpp b/IPL/include/opencv/opencv2/gapi/own/convert.hpp
new file mode 100644
index 0000000..58f291c
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/own/convert.hpp
@@ -0,0 +1,51 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OWN_CONVERT_HPP
+#define OPENCV_GAPI_OWN_CONVERT_HPP
+
+#if !defined(GAPI_STANDALONE)
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/own/mat.hpp>
+
+namespace cv
+{
+    template<typename T>
+    std::vector<T> to_own(const cv::MatSize &sz) {
+        std::vector<T> result(sz.dims());
+        for (int i = 0; i < sz.dims(); i++) {
+            // Note: cv::MatSize is not iterable
+            result[i] = static_cast<T>(sz[i]);
+        }
+        return result;
+    }
+
+           cv::gapi::own::Mat to_own(Mat&&) = delete;
+
+    inline cv::gapi::own::Mat to_own(Mat const& m) {
+        return (m.dims == 2)
+            ?  cv::gapi::own::Mat{m.rows, m.cols, m.type(), m.data, m.step}
+            :  cv::gapi::own::Mat{to_own<int>(m.size), m.type(), m.data};
+    };
+namespace gapi
+{
+namespace own
+{
+    inline cv::Mat to_ocv(Mat const& m) {
+        return m.dims.empty()
+            ? cv::Mat{m.rows, m.cols, m.type(), m.data, m.step}
+            : cv::Mat{m.dims, m.type(), m.data};
+    }
+           cv::Mat to_ocv(Mat&&)    = delete;
+} // namespace own
+} // namespace gapi
+} // namespace cv
+
+#endif // !defined(GAPI_STANDALONE)
+
+#endif // OPENCV_GAPI_OWN_CONVERT_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/own/cvdefs.hpp b/IPL/include/opencv/opencv2/gapi/own/cvdefs.hpp
new file mode 100644
index 0000000..354609b
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/own/cvdefs.hpp
@@ -0,0 +1,157 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_CV_DEFS_HPP
+#define OPENCV_GAPI_CV_DEFS_HPP
+
+#if defined(GAPI_STANDALONE)
+#include <opencv2/gapi/own/types.hpp> // cv::gapi::own::Rect/Size/Point
+#include <opencv2/gapi/own/scalar.hpp> // cv::gapi::own::Scalar
+
+// Simulate OpenCV definitions taken from various
+// OpenCV interface headers if G-API is built in a
+// standalone mode.
+
+// interface.h:
+
+typedef unsigned char uchar;
+typedef          char schar;
+
+typedef unsigned short ushort;
+
+#define CV_CN_MAX     512
+#define CV_CN_SHIFT   3
+#define CV_DEPTH_MAX  (1 << CV_CN_SHIFT)
+
+
+#define CV_8U   0
+#define CV_8S   1
+#define CV_16U  2
+#define CV_16S  3
+#define CV_32S  4
+#define CV_32F  5
+#define CV_64F  6
+#define CV_USRTYPE1 7
+
+#define CV_MAT_DEPTH_MASK       (CV_DEPTH_MAX - 1)
+#define CV_MAT_DEPTH(flags)     ((flags) & CV_MAT_DEPTH_MASK)
+
+#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
+#define CV_MAKE_TYPE CV_MAKETYPE
+
+#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
+#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
+#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
+#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
+#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
+
+#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
+#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
+#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
+#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
+#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
+
+#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
+#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
+#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
+#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
+#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
+
+#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
+#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
+#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
+#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
+#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
+
+#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
+#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
+#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
+#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
+#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
+
+#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
+#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
+#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
+#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
+#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
+
+#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
+#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
+#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
+#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
+#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
+
+// cvdef.h:
+
+#define CV_MAT_CN_MASK          ((CV_CN_MAX - 1) << CV_CN_SHIFT)
+#define CV_MAT_CN(flags)        ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
+#define CV_MAT_TYPE_MASK        (CV_DEPTH_MAX*CV_CN_MAX - 1)
+#define CV_MAT_TYPE(flags)      ((flags) & CV_MAT_TYPE_MASK)
+#define CV_MAT_CONT_FLAG_SHIFT  14
+#define CV_MAT_CONT_FLAG        (1 << CV_MAT_CONT_FLAG_SHIFT)
+#define CV_IS_MAT_CONT(flags)   ((flags) & CV_MAT_CONT_FLAG)
+#define CV_IS_CONT_MAT          CV_IS_MAT_CONT
+#define CV_SUBMAT_FLAG_SHIFT    15
+#define CV_SUBMAT_FLAG          (1 << CV_SUBMAT_FLAG_SHIFT)
+#define CV_IS_SUBMAT(flags)     ((flags) & CV_MAT_SUBMAT_FLAG)
+
+///** Size of each channel item,
+//   0x8442211 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
+//#define CV_ELEM_SIZE1(type) \
+//    ((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15)
+
+#define CV_MAT_TYPE(flags)      ((flags) & CV_MAT_TYPE_MASK)
+
+/** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
+#define CV_ELEM_SIZE(type) \
+    (CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
+
+#ifndef CV_OVERRIDE
+#  define CV_OVERRIDE override
+#endif
+
+// base.h:
+namespace cv
+{
+enum BorderTypes {
+    BORDER_CONSTANT    = 0, //!< `iiiiii|abcdefgh|iiiiiii`  with some specified `i`
+    BORDER_REPLICATE   = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
+    BORDER_REFLECT     = 2, //!< `fedcba|abcdefgh|hgfedcb`
+    BORDER_WRAP        = 3, //!< `cdefgh|abcdefgh|abcdefg`
+    BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
+    BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno`
+
+    BORDER_REFLECT101  = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
+    BORDER_DEFAULT     = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
+    BORDER_ISOLATED    = 16 //!< do not look outside of ROI
+};
+// imgproc.hpp:
+enum InterpolationFlags{
+    INTER_NEAREST        = 0,
+    INTER_LINEAR         = 1,
+    INTER_CUBIC          = 2,
+    INTER_AREA           = 3,
+    INTER_LANCZOS4       = 4,
+    INTER_LINEAR_EXACT   = 5,
+    INTER_MAX            = 7,
+};
+// replacement of cv's structures:
+using Rect   = gapi::own::Rect;
+using Size   = gapi::own::Size;
+using Point  = gapi::own::Point;
+using Scalar = gapi::own::Scalar;
+} // namespace cv
+
+static inline int cvFloor( double value )
+{
+    int i = (int)value;
+    return i - (i > value);
+}
+
+#endif //  defined(GAPI_STANDALONE)
+
+#endif //  OPENCV_GAPI_CV_DEFS_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/own/exports.hpp b/IPL/include/opencv/opencv2/gapi/own/exports.hpp
new file mode 100644
index 0000000..53bff2a
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/own/exports.hpp
@@ -0,0 +1,31 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OWN_TYPES_HPP
+#define OPENCV_GAPI_OWN_TYPES_HPP
+
+#   if defined(__OPENCV_BUILD)
+#       include <opencv2/core/base.hpp>
+#       define GAPI_EXPORTS CV_EXPORTS
+#   else
+#       define GAPI_EXPORTS
+
+#if 0  // Note: the following version currently is not needed for non-OpenCV build
+#       if defined _WIN32
+#           define GAPI_EXPORTS __declspec(dllexport)
+#       elif defined __GNUC__ && __GNUC__ >= 4
+#           define GAPI_EXPORTS __attribute__ ((visibility ("default")))
+#       endif
+
+#       ifndef GAPI_EXPORTS
+#           define GAPI_EXPORTS
+#       endif
+#endif
+
+#   endif
+
+#endif // OPENCV_GAPI_OWN_TYPES_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/own/mat.hpp b/IPL/include/opencv/opencv2/gapi/own/mat.hpp
new file mode 100644
index 0000000..a5f5b5e
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/own/mat.hpp
@@ -0,0 +1,341 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OWN_MAT_HPP
+#define OPENCV_GAPI_OWN_MAT_HPP
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/own/types.hpp>
+#include <opencv2/gapi/own/scalar.hpp>
+#include <opencv2/gapi/own/saturate.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+
+#include <memory>                   //std::shared_ptr
+#include <cstring>                  //std::memcpy
+#include <numeric>                  //std::accumulate
+#include <opencv2/gapi/util/throw.hpp>
+
+namespace cv { namespace gapi { namespace own {
+    namespace detail {
+        template <typename T, unsigned char channels>
+        void assign_row(void* ptr, int cols, Scalar const& s)
+        {
+            auto p = static_cast<T*>(ptr);
+            for (int c = 0; c < cols; c++)
+            {
+                for (int ch = 0; ch < channels; ch++)
+                {
+                    p[c * channels + ch] = saturate<T>(s[ch], roundd);
+                }
+            }
+        }
+
+        inline size_t default_step(int type, int cols)
+        {
+            return CV_ELEM_SIZE(type) * cols;
+        }
+        //Matrix header, i.e. fields that are unique to each Mat object.
+        //Devoted class is needed to implement custom behavior on move (erasing state of moved from object)
+        struct MatHeader{
+            enum { AUTO_STEP = 0};
+            enum { TYPE_MASK = 0x00000FFF  };
+
+            MatHeader() = default;
+
+            MatHeader(int _rows, int _cols, int type, void* _data, size_t _step)
+            : flags((type & TYPE_MASK)), rows(_rows), cols(_cols), data((uchar*)_data), step(_step == AUTO_STEP ? detail::default_step(type, _cols) : _step)
+            {}
+
+            MatHeader(const std::vector<int> &_dims, int type, void* _data)
+            : flags((type & TYPE_MASK)), data((uchar*)_data), step(0), dims(_dims)
+            {}
+
+            MatHeader(const MatHeader& ) = default;
+            MatHeader(MatHeader&& src) : MatHeader(src) // reuse copy constructor here
+            {
+                MatHeader empty; //give it a name to call copy(not move) assignment below
+                src = empty;
+            }
+            MatHeader& operator=(const MatHeader& ) = default;
+            MatHeader& operator=(MatHeader&& src)
+            {
+                *this = src; //calling a copy assignment here, not move one
+                MatHeader empty; //give it a name to call copy(not move) assignment below
+                src = empty;
+                return *this;
+            }
+            /*! includes several bit-fields:
+                 - depth
+                 - number of channels
+             */
+            int flags = 0;
+
+            //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
+            int rows = 0, cols = 0;
+            //! pointer to the data
+            uchar* data = nullptr;
+            size_t step = 0;
+            //! dimensions (ND-case)
+            std::vector<int> dims;
+        };
+    } // namespace detail
+    //concise version of cv::Mat suitable for GAPI needs (used when no dependence on OpenCV is required)
+    class Mat : public detail::MatHeader{
+    public:
+
+        Mat() = default;
+
+        /** @overload
+        @param _rows Number of rows in a 2D array.
+        @param _cols Number of columns in a 2D array.
+        @param _type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+        CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+        @param _data Pointer to the user data. Matrix constructors that take data and step parameters do not
+        allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+        data, which means that no data is copied. This operation is very efficient and can be used to
+        process external data using OpenCV functions. The external data is not automatically deallocated, so
+        you should take care of it.
+        @param _step Number of bytes each matrix row occupies. The value should include the padding bytes at
+        the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed
+        and the actual step is calculated as cols*elemSize(). See Mat::elemSize.
+        */
+        Mat(int _rows, int _cols, int _type, void* _data, size_t _step = AUTO_STEP)
+        : MatHeader (_rows, _cols, _type, _data, _step)
+        {}
+
+        Mat(const std::vector<int> &_dims, int _type, void* _data)
+        : MatHeader (_dims, _type, _data)
+        {}
+
+        Mat(std::vector<int> &&_dims, int _type, void* _data)
+        : MatHeader (std::move(_dims), _type, _data)
+        {}
+
+        Mat(Mat const& src, const Rect& roi )
+        : Mat(src)
+        {
+           rows = roi.height;
+           cols = roi.width;
+           data = ptr(roi.y, roi.x);
+        }
+
+        Mat(Mat const& src) = default;
+        Mat(Mat&& src) = default;
+
+        Mat& operator=(Mat const& src) = default;
+        Mat& operator=(Mat&& src) = default;
+
+        /** @brief Sets all or some of the array elements to the specified value.
+        @param s Assigned scalar converted to the actual array type.
+        */
+        Mat& operator = (const Scalar& s)
+        {
+            constexpr unsigned max_channels = 4; //Scalar can't fit more than 4
+            using func_p_t = void (*)(void*, int, Scalar const&);
+            using detail::assign_row;
+            #define TABLE_ENTRY(type)  {assign_row<type, 1>, assign_row<type, 2>, assign_row<type, 3>, assign_row<type, 4>}
+            static constexpr func_p_t func_tbl[][max_channels] = {
+                    TABLE_ENTRY(uchar),
+                    TABLE_ENTRY(schar),
+                    TABLE_ENTRY(ushort),
+                    TABLE_ENTRY(short),
+                    TABLE_ENTRY(int),
+                    TABLE_ENTRY(float),
+                    TABLE_ENTRY(double)
+            };
+            #undef TABLE_ENTRY
+
+            static_assert(CV_8U == 0 && CV_8S == 1  && CV_16U == 2 && CV_16S == 3
+                       && CV_32S == 4 && CV_32F == 5 && CV_64F == 6,
+                       "OCV type ids used as indexes to array, thus exact numbers are important!"
+            );
+
+            const auto depth = static_cast<unsigned int>(this->depth());
+            GAPI_Assert(depth < sizeof(func_tbl)/sizeof(func_tbl[0]));
+
+            if (dims.empty())
+            {
+                const auto channels = static_cast<unsigned int>(this->channels());
+                GAPI_Assert(channels <= max_channels);
+
+                auto* f = func_tbl[depth][channels - 1];
+                for (int r = 0; r < rows; ++r)
+                {
+                    (*f)(static_cast<void *>(ptr(r)), cols, s );
+                }
+            }
+            else
+            {
+                auto* f = func_tbl[depth][0];
+                // FIXME: better to refactor assign_row to use std::size_t by default
+                (*f)(static_cast<void *>(data), static_cast<int>(total()), s);
+            }
+            return *this;
+        }
+
+        /** @brief Returns the matrix element size in bytes.
+
+        The method returns the matrix element size in bytes. For example, if the matrix type is CV_16SC3 ,
+        the method returns 3\*sizeof(short) or 6.
+         */
+        size_t elemSize() const
+        {
+            return CV_ELEM_SIZE(type());
+        }
+        /** @brief Returns the type of a matrix element.
+
+        The method returns a matrix element type. This is an identifier compatible with the CvMat type
+        system, like CV_16SC3 or 16-bit signed 3-channel array, and so on.
+         */
+        int type() const            {return CV_MAT_TYPE(flags);}
+
+        /** @brief Returns the depth of a matrix element.
+
+        The method returns the identifier of the matrix element depth (the type of each individual channel).
+        For example, for a 16-bit signed element array, the method returns CV_16S . A complete list of
+        matrix types contains the following values:
+        -   CV_8U - 8-bit unsigned integers ( 0..255 )
+        -   CV_8S - 8-bit signed integers ( -128..127 )
+        -   CV_16U - 16-bit unsigned integers ( 0..65535 )
+        -   CV_16S - 16-bit signed integers ( -32768..32767 )
+        -   CV_32S - 32-bit signed integers ( -2147483648..2147483647 )
+        -   CV_32F - 32-bit floating-point numbers ( -FLT_MAX..FLT_MAX, INF, NAN )
+        -   CV_64F - 64-bit floating-point numbers ( -DBL_MAX..DBL_MAX, INF, NAN )
+         */
+        int depth() const           {return CV_MAT_DEPTH(flags);}
+
+        /** @brief Returns the number of matrix channels.
+
+        The method returns the number of matrix channels.
+        If matrix is N-dimensional, -1 is returned.
+         */
+        int channels() const        {return dims.empty() ? CV_MAT_CN(flags) : -1;}
+
+        /**
+        @param _rows New number of rows.
+        @param _cols New number of columns.
+        @param _type New matrix type.
+         */
+        void create(int _rows, int _cols, int _type)
+        {
+            create(Size{_cols, _rows}, _type);
+        }
+        /** @overload
+        @param _size Alternative new matrix size specification: Size(cols, rows)
+        @param _type New matrix type.
+        */
+        void create(Size _size, int _type)
+        {
+            GAPI_Assert(_size.height >= 0 && _size.width >= 0);
+            if (_size != Size{cols, rows} )
+            {
+                Mat tmp{_size.height, _size.width, _type, nullptr};
+                tmp.memory.reset(new uchar[ tmp.step * tmp.rows], [](uchar * p){delete[] p;});
+                tmp.data = tmp.memory.get();
+
+                *this = std::move(tmp);
+            }
+        }
+
+        void create(const std::vector<int> &_dims, int _type)
+        {
+            // FIXME: make a proper reallocation-on-demands
+            // WARNING: no tensor views, so no strides
+            Mat tmp{_dims, _type, nullptr};
+            // FIXME: this accumulate duplicates a lot
+            const auto sz = std::accumulate(_dims.begin(), _dims.end(), 1, std::multiplies<int>());
+            tmp.memory.reset(new uchar[CV_ELEM_SIZE(_type)*sz], [](uchar * p){delete[] p;});
+            tmp.data = tmp.memory.get();
+            *this = std::move(tmp);
+        }
+
+        /** @brief Copies the matrix to another one.
+
+        The method copies the matrix data to another matrix. Before copying the data, the method invokes :
+        @code
+            m.create(this->size(), this->type());
+        @endcode
+        so that the destination matrix is reallocated if needed. While m.copyTo(m); works flawlessly, the
+        function does not handle the case of a partial overlap between the source and the destination
+        matrices.
+         */
+        void copyTo(Mat& dst) const
+        {
+            if (dims.empty())
+            {
+                dst.create(rows, cols, type());
+                for (int r = 0; r < rows; ++r)
+                {
+                    std::copy_n(ptr(r), detail::default_step(type(),cols), dst.ptr(r));
+                }
+            }
+            else
+            {
+                dst.create(dims, depth());
+                std::copy_n(data, total()*elemSize(), data);
+            }
+        }
+
+        /** @brief Returns true if the array has no elements.
+
+        The method returns true if Mat::total() is 0 or if Mat::data is NULL. Because of pop_back() and
+        resize() methods `M.total() == 0` does not imply that `M.data == NULL`.
+         */
+        bool empty() const
+        {
+            return data == 0 || total() == 0;
+        }
+
+        /** @brief Returns the total number of array elements.
+
+        The method returns the number of array elements (a number of pixels if the array represents an
+        image).
+         */
+        size_t total() const
+        {
+            return dims.empty()
+                 ? (static_cast<std::size_t>(rows) * cols)
+                 : std::accumulate(dims.begin(), dims.end(), static_cast<std::size_t>(1), std::multiplies<size_t>());
+        }
+
+        /** @overload
+        @param roi Extracted submatrix specified as a rectangle.
+        */
+        Mat operator()( const Rect& roi ) const
+        {
+            return Mat{*this, roi};
+        }
+
+
+        /** @brief Returns a pointer to the specified matrix row.
+
+        The methods return `uchar*` or typed pointer to the specified matrix row. See the sample in
+        Mat::isContinuous to know how to use these methods.
+        @param row Index along the dimension 0
+        @param col Index along the dimension 1
+        */
+        uchar* ptr(int row, int col = 0)
+        {
+            return const_cast<uchar*>(const_cast<const Mat*>(this)->ptr(row,col));
+        }
+        /** @overload */
+        const uchar* ptr(int row, int col = 0) const
+        {
+            return data + step * row + CV_ELEM_SIZE(type()) * col;
+        }
+
+
+    private:
+        //actual memory allocated for storage, or nullptr if object is non owning view to over memory
+        std::shared_ptr<uchar> memory;
+    };
+
+} //namespace own
+} //namespace gapi
+} //namespace cv
+
+#endif /* OPENCV_GAPI_OWN_MAT_HPP */
diff --git a/IPL/include/opencv/opencv2/gapi/own/saturate.hpp b/IPL/include/opencv/opencv2/gapi/own/saturate.hpp
new file mode 100644
index 0000000..5b23247
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/own/saturate.hpp
@@ -0,0 +1,90 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OWN_SATURATE_HPP
+#define OPENCV_GAPI_OWN_SATURATE_HPP
+
+#include <math.h>
+
+#include <limits>
+#include <type_traits>
+
+#include <opencv2/gapi/own/assert.hpp>
+
+namespace cv { namespace gapi { namespace own {
+//-----------------------------
+//
+// Numeric cast with saturation
+//
+//-----------------------------
+
+template<typename DST, typename SRC>
+static inline DST saturate(SRC x)
+{
+    // only integral types please!
+    GAPI_DbgAssert(std::is_integral<DST>::value &&
+                   std::is_integral<SRC>::value);
+
+    if (std::is_same<DST, SRC>::value)
+        return static_cast<DST>(x);
+
+    if (sizeof(DST) > sizeof(SRC))
+        return static_cast<DST>(x);
+
+    // compiler must recognize this saturation,
+    // so compile saturate<s16>(a + b) with adds
+    // instruction (e.g.: _mm_adds_epi16 if x86)
+    return x < std::numeric_limits<DST>::min()?
+               std::numeric_limits<DST>::min():
+           x > std::numeric_limits<DST>::max()?
+               std::numeric_limits<DST>::max():
+           static_cast<DST>(x);
+}
+
+// Note, that OpenCV rounds differently:
+// - like std::round() for add, subtract
+// - like std::rint() for multiply, divide
+template<typename DST, typename SRC, typename R>
+static inline DST saturate(SRC x, R round)
+{
+    if (std::is_floating_point<DST>::value)
+    {
+        return static_cast<DST>(x);
+    }
+    else if (std::is_integral<SRC>::value)
+    {
+        GAPI_DbgAssert(std::is_integral<DST>::value &&
+                       std::is_integral<SRC>::value);
+        return saturate<DST>(x);
+    }
+    else
+    {
+        GAPI_DbgAssert(std::is_integral<DST>::value &&
+                 std::is_floating_point<SRC>::value);
+#ifdef _WIN32
+// Suppress warning about converting x to floating-point
+// Note that x is already floating-point at this point
+#pragma warning(disable: 4244)
+#endif
+        int ix = static_cast<int>(round(x));
+#ifdef _WIN32
+#pragma warning(default: 4244)
+#endif
+        return saturate<DST>(ix);
+    }
+}
+
+// explicit suffix 'd' for double type
+inline double  ceild(double x) { return ceil(x); }
+inline double floord(double x) { return floor(x); }
+inline double roundd(double x) { return round(x); }
+inline double  rintd(double x) { return rint(x); }
+
+} //namespace own
+} //namespace gapi
+} //namespace cv
+#endif /* OPENCV_GAPI_OWN_SATURATE_HPP */
diff --git a/IPL/include/opencv/opencv2/gapi/own/scalar.hpp b/IPL/include/opencv/opencv2/gapi/own/scalar.hpp
new file mode 100644
index 0000000..bda91c8
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/own/scalar.hpp
@@ -0,0 +1,47 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GAPI_OWN_SCALAR_HPP
+#define OPENCV_GAPI_GAPI_OWN_SCALAR_HPP
+
+#include <opencv2/gapi/own/exports.hpp>
+
+namespace cv
+{
+namespace gapi
+{
+namespace own
+{
+
+class GAPI_EXPORTS Scalar
+{
+public:
+    Scalar() = default;
+    explicit Scalar(double v0) { val[0] = v0; };
+    Scalar(double v0, double v1, double v2 = 0, double v3 = 0)
+        : val{v0, v1, v2, v3}
+    {
+    }
+
+    const double& operator[](int i) const { return val[i]; }
+          double& operator[](int i)       { return val[i]; }
+
+    static Scalar all(double v0) { return Scalar(v0, v0, v0, v0); }
+
+    double val[4] = {0};
+};
+
+inline bool operator==(const Scalar& lhs, const Scalar& rhs)
+{
+    return std::equal(std::begin(lhs.val), std::end(lhs.val), std::begin(rhs.val));
+}
+
+} // namespace own
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_GAPI_OWN_SCALAR_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/own/types.hpp b/IPL/include/opencv/opencv2/gapi/own/types.hpp
new file mode 100644
index 0000000..20445ee
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/own/types.hpp
@@ -0,0 +1,135 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_TYPES_HPP
+#define OPENCV_GAPI_TYPES_HPP
+
+#include <algorithm>              // std::max, std::min
+#include <ostream>
+
+namespace cv
+{
+namespace gapi
+{
+namespace own
+{
+
+class Point
+{
+public:
+    Point() = default;
+    Point(int _x, int _y) : x(_x),  y(_y)  {};
+
+    int x = 0;
+    int y = 0;
+};
+
+class Rect
+{
+public:
+    Rect() = default;
+    Rect(int _x, int _y, int _width, int _height) : x(_x), y(_y),   width(_width),  height(_height)  {};
+#if !defined(GAPI_STANDALONE)
+    Rect(const cv::Rect& other) : x(other.x), y(other.y), width(other.width), height(other.height) {};
+    inline Rect& operator=(const cv::Rect& other)
+    {
+        x = other.x;
+        y = other.x;
+        width  = other.width;
+        height = other.height;
+        return *this;
+    }
+#endif // !defined(GAPI_STANDALONE)
+
+    int x      = 0; //!< x coordinate of the top-left corner
+    int y      = 0; //!< y coordinate of the top-left corner
+    int width  = 0; //!< width of the rectangle
+    int height = 0; //!< height of the rectangle
+};
+
+inline bool operator==(const Rect& lhs, const Rect& rhs)
+{
+    return lhs.x == rhs.x && lhs.y == rhs.y && lhs.width == rhs.width && lhs.height == rhs.height;
+}
+
+inline bool operator!=(const Rect& lhs, const Rect& rhs)
+{
+    return !(lhs == rhs);
+}
+
+inline Rect& operator&=(Rect& lhs, const Rect& rhs)
+{
+    int x1 = std::max(lhs.x, rhs.x);
+    int y1 = std::max(lhs.y, rhs.y);
+    lhs.width  = std::min(lhs.x + lhs.width,  rhs.x + rhs.width) -  x1;
+    lhs.height = std::min(lhs.y + lhs.height, rhs.y + rhs.height) - y1;
+    lhs.x = x1;
+    lhs.y = y1;
+    if( lhs.width <= 0 || lhs.height <= 0 )
+        lhs = Rect();
+    return lhs;
+}
+
+inline const Rect operator&(const Rect& lhs, const Rect& rhs)
+{
+    Rect result = lhs;
+    return result &= rhs;
+}
+
+inline std::ostream& operator<<(std::ostream& o, const Rect& rect)
+{
+    return o << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
+}
+
+class Size
+{
+public:
+    Size() = default;
+    Size(int _width, int _height) : width(_width),  height(_height)  {};
+#if !defined(GAPI_STANDALONE)
+    Size(const cv::Size& other) : width(other.width), height(other.height) {};
+    inline Size& operator=(const cv::Size& rhs)
+    {
+        width  = rhs.width;
+        height = rhs.height;
+        return *this;
+    }
+#endif // !defined(GAPI_STANDALONE)
+
+    int width  = 0;
+    int height = 0;
+};
+
+inline Size& operator+=(Size& lhs, const Size& rhs)
+{
+    lhs.width  += rhs.width;
+    lhs.height += rhs.height;
+    return lhs;
+}
+
+inline bool operator==(const Size& lhs, const Size& rhs)
+{
+    return lhs.width == rhs.width && lhs.height == rhs.height;
+}
+
+inline bool operator!=(const Size& lhs, const Size& rhs)
+{
+    return !(lhs == rhs);
+}
+
+
+inline std::ostream& operator<<(std::ostream& o, const Size& s)
+{
+    o << "[" << s.width << " x " << s.height << "]";
+    return o;
+}
+
+} // namespace own
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_TYPES_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/plaidml/core.hpp b/IPL/include/opencv/opencv2/gapi/plaidml/core.hpp
new file mode 100644
index 0000000..47ac486
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/plaidml/core.hpp
@@ -0,0 +1,20 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_PLAIDML_CORE_HPP
+#define OPENCV_GAPI_PLAIDML_CORE_HPP
+
+#include <opencv2/gapi/gkernel.hpp>     // GKernelPackage
+#include <opencv2/gapi/own/exports.hpp> // GAPI_EXPORTS
+
+namespace cv { namespace gapi { namespace core { namespace plaidml {
+
+GAPI_EXPORTS GKernelPackage kernels();
+
+}}}}
+
+#endif // OPENCV_GAPI_PLAIDML_CORE_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/plaidml/gplaidmlkernel.hpp b/IPL/include/opencv/opencv2/gapi/plaidml/gplaidmlkernel.hpp
new file mode 100644
index 0000000..7ce00cf
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/plaidml/gplaidmlkernel.hpp
@@ -0,0 +1,140 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+//
+
+
+#ifndef OPENCV_GAPI_GPLAIDMLKERNEL_HPP
+#define OPENCV_GAPI_GPLAIDMLKERNEL_HPP
+
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/garg.hpp>
+
+namespace plaidml
+{
+namespace edsl
+{
+    class Tensor;
+} // namespace edsl
+} // namespace plaidml
+
+namespace cv
+{
+namespace gapi
+{
+namespace plaidml
+{
+
+GAPI_EXPORTS cv::gapi::GBackend backend();
+
+} // namespace plaidml
+} // namespace gapi
+
+struct GPlaidMLContext
+{
+    // Generic accessor API
+    template<typename T>
+    const T& inArg(int input) { return m_args.at(input).get<T>(); }
+
+    // Syntax sugar
+    const plaidml::edsl::Tensor& inTensor(int input)
+    {
+        return inArg<plaidml::edsl::Tensor>(input);
+    }
+
+    plaidml::edsl::Tensor& outTensor(int output)
+    {
+        return *(m_results.at(output).get<plaidml::edsl::Tensor*>());
+    }
+
+    std::vector<GArg> m_args;
+    std::unordered_map<std::size_t, GArg> m_results;
+};
+
+class GAPI_EXPORTS GPlaidMLKernel
+{
+public:
+    using F = std::function<void(GPlaidMLContext &)>;
+
+    GPlaidMLKernel() = default;
+    explicit GPlaidMLKernel(const F& f) : m_f(f) {};
+
+    void apply(GPlaidMLContext &ctx) const
+    {
+        GAPI_Assert(m_f);
+        m_f(ctx);
+    }
+
+protected:
+    F m_f;
+};
+
+
+namespace detail
+{
+
+template<class T> struct plaidml_get_in;
+template<> struct plaidml_get_in<cv::GMat>
+{
+    static const plaidml::edsl::Tensor& get(GPlaidMLContext& ctx, int idx)
+    {
+        return ctx.inTensor(idx);
+    }
+};
+
+template<class T> struct plaidml_get_in
+{
+    static T get(GPlaidMLContext &ctx, int idx) { return ctx.inArg<T>(idx); }
+};
+
+template<class T> struct plaidml_get_out;
+template<> struct plaidml_get_out<cv::GMat>
+{
+    static plaidml::edsl::Tensor& get(GPlaidMLContext& ctx, int idx)
+    {
+        return ctx.outTensor(idx);
+    }
+};
+
+template<typename, typename, typename>
+struct PlaidMLCallHelper;
+
+template<typename Impl, typename... Ins, typename... Outs>
+struct PlaidMLCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
+{
+    template<int... IIs, int... OIs>
+    static void call_impl(GPlaidMLContext &ctx, detail::Seq<IIs...>, detail::Seq<OIs...>)
+    {
+        Impl::run(plaidml_get_in<Ins>::get(ctx, IIs)..., plaidml_get_out<Outs>::get(ctx, OIs)...);
+    }
+
+    static void call(GPlaidMLContext& ctx)
+    {
+        call_impl(ctx,
+                  typename detail::MkSeq<sizeof...(Ins)>::type(),
+                  typename detail::MkSeq<sizeof...(Outs)>::type());
+    }
+};
+
+} // namespace detail
+
+template<class Impl, class K>
+class GPlaidMLKernelImpl: public cv::detail::PlaidMLCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
+                          public cv::detail::KernelTag
+{
+    using P = detail::PlaidMLCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
+
+public:
+    using API = K;
+
+    static cv::gapi::GBackend backend()  { return cv::gapi::plaidml::backend(); }
+    static cv::GPlaidMLKernel kernel()   { return GPlaidMLKernel(&P::call);     }
+};
+
+#define GAPI_PLAIDML_KERNEL(Name, API) struct Name: public cv::GPlaidMLKernelImpl<Name, API>
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GPLAIDMLKERNEL_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/plaidml/plaidml.hpp b/IPL/include/opencv/opencv2/gapi/plaidml/plaidml.hpp
new file mode 100644
index 0000000..bd12d25
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/plaidml/plaidml.hpp
@@ -0,0 +1,48 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_PLAIDML_PLAIDML_HPP
+#define OPENCV_GAPI_PLAIDML_PLAIDML_HPP
+
+#include <string>
+#include <opencv2/gapi/gcommon.hpp> // CompileArgTag
+
+namespace cv
+{
+namespace gapi
+{
+namespace plaidml
+{
+
+/** \addtogroup gapi_compile_args
+ * @{
+ */
+/**
+ * @brief This structure represents the basic parameters for the experimental
+ * PlaidML backend.
+ */
+struct config
+{
+    std::string dev_id; //!< Device ID. Refer to PlaidML documentation for details.
+    std::string trg_id; //!< Target ID. Refer to PlaidML documentation for details.
+};
+/** @} gapi_compile_args */
+
+} // namespace plaidml
+} // namespace gapi
+
+namespace detail
+{
+    template<> struct CompileArgTag<cv::gapi::plaidml::config>
+    {
+        static const char* tag() { return "gapi.plaidml.config"; }
+    };
+} // namespace detail
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_PLAIDML_PLAIDML_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/render.hpp b/IPL/include/opencv/opencv2/gapi/render.hpp
new file mode 100644
index 0000000..52e55b0
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/render.hpp
@@ -0,0 +1,14 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#ifndef OPENCV_GAPI_RENDER_ROOT_HPP
+#define OPENCV_GAPI_RENDER_ROOT_HPP
+
+// This file is just a shortcut to render/render.hpp
+
+#include <opencv2/gapi/render/render.hpp>
+
+#endif // OPENCV_GAPI_RENDER_ROOT_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/render/render.hpp b/IPL/include/opencv/opencv2/gapi/render/render.hpp
new file mode 100644
index 0000000..fcb69cb
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/render/render.hpp
@@ -0,0 +1,469 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_RENDER_HPP
+#define OPENCV_GAPI_RENDER_HPP
+
+#include <string>
+#include <vector>
+
+#include <opencv2/imgproc.hpp>
+#include <opencv2/gapi.hpp>
+
+#include <opencv2/gapi/opencv_includes.hpp>
+#include <opencv2/gapi/util/variant.hpp>
+#include <opencv2/gapi/own/exports.hpp>
+
+
+/** \defgroup gapi_draw G-API Drawing and composition functionality
+ *  @{
+ *
+ *  @brief Functions for in-graph drawing.
+ *
+ *  @note This is a Work in Progress functionality and APIs may
+ *  change in the future releases.
+ *
+ *  G-API can do some in-graph drawing with a generic operations and a
+ *  set of [rendering primitives](@ref gapi_draw_prims).
+ *  In contrast with traditional OpenCV, in G-API user need to form a
+ *  *rendering list* of primitives to draw. This list can be built
+ *  manually or generated within a graph. This list is passed to
+ *  [special operations or functions](@ref gapi_draw_api) where all
+ *  primitives are interpreted and applied to the image.
+ *
+ *  For example, in a complex pipeline a list of detected objects
+ *  can be translated in-graph to a list of cv::gapi::wip::draw::Rect
+ *  primitives to highlight those with bounding boxes, or a list of
+ *  detected faces can be translated in-graph to a list of
+ *  cv::gapi::wip::draw::Mosaic primitives to hide sensitive content
+ *  or protect privacy.
+ *
+ *  Like any other operations, rendering in G-API can be reimplemented
+ *  by different backends. Currently only an OpenCV-based backend is
+ *  available.
+ *
+ *  In addition to the graph-level operations, there are also regular
+ *  (immediate) OpenCV-like functions are available -- see
+ *  cv::gapi::wip::draw::render(). These functions are just wrappers
+ *  over regular G-API and build the rendering graphs on the fly, so
+ *  take compilation arguments as parameters.
+ *
+ *  Currently this API is more machine-oriented than human-oriented.
+ *  The main purpose is to translate a set of domain-specific objects
+ *  to a list of primitives to draw. For example, in order to generate
+ *  a picture like this:
+ *
+ *  ![](modules/gapi/doc/pics/render_example.png)
+ *
+ *  Rendering list needs to be generated as follows:
+ *
+ *  @include modules/gapi/samples/draw_example.cpp
+ *
+ *  @defgroup gapi_draw_prims Drawing primitives
+ *  @defgroup gapi_draw_api Drawing operations and functions
+ *  @}
+ */
+
+namespace cv
+{
+namespace gapi
+{
+namespace wip
+{
+namespace draw
+{
+
+/**
+ * @brief This structure specifies which FreeType font to use by FText primitives.
+ */
+struct freetype_font
+{
+    /*@{*/
+    std::string path; //!< The path to the font file (.ttf)
+    /*@{*/
+};
+
+//! @addtogroup gapi_draw_prims
+//! @{
+/**
+ * @brief This structure represents a text string to draw.
+ *
+ * Parameters match cv::putText().
+ */
+struct Text
+{
+    /**
+     * @brief Text constructor
+     *
+     * @param text_               The text string to be drawn
+     * @param org_                The bottom-left corner of the text string in the image
+     * @param ff_                 The font type, see #HersheyFonts
+     * @param fs_                 The font scale factor that is multiplied by the font-specific base size
+     * @param color_              The text color
+     * @param thick_              The thickness of the lines used to draw a text
+     * @param lt_                 The line type. See #LineTypes
+     * @param bottom_left_origin_ When true, the image data origin is at the bottom-left corner. Otherwise, it is at the top-left corner
+     */
+    Text(const std::string& text_,
+         const cv::Point& org_,
+         int ff_,
+         double fs_,
+         const cv::Scalar& color_,
+         int thick_ = 1,
+         int lt_ = cv::LINE_8,
+         bool bottom_left_origin_ = false) :
+        text(text_), org(org_), ff(ff_), fs(fs_),
+        color(color_), thick(thick_), lt(lt_), bottom_left_origin(bottom_left_origin_)
+    {
+    }
+
+    /*@{*/
+    std::string text;               //!< The text string to be drawn
+    cv::Point   org;                //!< The bottom-left corner of the text string in the image
+    int         ff;                 //!< The font type, see #HersheyFonts
+    double      fs;                 //!< The font scale factor that is multiplied by the font-specific base size
+    cv::Scalar  color;              //!< The text color
+    int         thick;              //!< The thickness of the lines used to draw a text
+    int         lt;                 //!< The line type. See #LineTypes
+    bool        bottom_left_origin; //!< When true, the image data origin is at the bottom-left corner. Otherwise, it is at the top-left corner
+    /*@{*/
+};
+
+/**
+ * @brief This structure represents a text string to draw using
+ * FreeType renderer.
+ *
+ * If OpenCV is built without FreeType support, this primitive will
+ * fail at the execution stage.
+ */
+struct FText
+{
+    /**
+     * @brief FText constructor
+     *
+     * @param text_ The text string to be drawn
+     * @param org_  The bottom-left corner of the text string in the image
+     * @param fh_   The height of text
+     * @param color_ The text color
+     */
+    FText(const std::wstring& text_,
+          const cv::Point& org_,
+          int fh_,
+          const cv::Scalar& color_) :
+        text(text_), org(org_), fh(fh_), color(color_)
+    {
+    }
+
+    /*@{*/
+    std::wstring text;              //!< The text string to be drawn
+    cv::Point    org;               //!< The bottom-left corner of the text string in the image
+    int          fh;                //!< The height of text
+    cv::Scalar   color;             //!< The text color
+    /*@{*/
+};
+
+/**
+ * @brief This structure represents a rectangle to draw.
+ *
+ * Parameters match cv::rectangle().
+ */
+struct Rect
+{
+    /**
+     * @brief Rect constructor
+     *
+     * @param rect_   Coordinates of the rectangle
+     * @param color_  The bottom-left corner of the text string in the image
+     * @param thick_  The thickness of lines that make up the rectangle. Negative values, like #FILLED, mean that the function has to draw a filled rectangle
+     * @param lt_     The type of the line. See #LineTypes
+     * @param shift_  The number of fractional bits in the point coordinates
+     */
+    Rect(const cv::Rect& rect_,
+         const cv::Scalar& color_,
+         int thick_ = 1,
+         int lt_ = cv::LINE_8,
+         int shift_ = 0) :
+        rect(rect_), color(color_), thick(thick_), lt(lt_), shift(shift_)
+    {
+    }
+
+    /*@{*/
+    cv::Rect   rect;  //!< Coordinates of the rectangle
+    cv::Scalar color; //!< The rectangle color or brightness (grayscale image)
+    int        thick; //!< The thickness of lines that make up the rectangle. Negative values, like #FILLED, mean that the function has to draw a filled rectangle
+    int        lt;    //!< The type of the line. See #LineTypes
+    int        shift; //!< The number of fractional bits in the point coordinates
+    /*@{*/
+};
+
+/**
+ * @brief This structure represents a circle to draw.
+ *
+ * Parameters match cv::circle().
+ */
+struct Circle
+{
+    /**
+     * @brief Circle constructor
+     *
+     * @param  center_ The center of the circle
+     * @param  radius_ The radius of the circle
+     * @param  color_  The color of the  circle
+     * @param  thick_  The thickness of the circle outline, if positive. Negative values, like #FILLED, mean that a filled circle is to be drawn
+     * @param  lt_     The Type of the circle boundary. See #LineTypes
+     * @param  shift_  The Number of fractional bits in the coordinates of the center and in the radius value
+     */
+    Circle(const cv::Point& center_,
+           int radius_,
+           const cv::Scalar& color_,
+           int thick_ = 1,
+           int lt_ = cv::LINE_8,
+           int shift_ = 0) :
+        center(center_), radius(radius_), color(color_), thick(thick_), lt(lt_), shift(shift_)
+    {
+    }
+
+    /*@{*/
+    cv::Point  center; //!< The center of the circle
+    int        radius; //!< The radius of the circle
+    cv::Scalar color;  //!< The color of the  circle
+    int        thick;  //!< The thickness of the circle outline, if positive. Negative values, like #FILLED, mean that a filled circle is to be drawn
+    int        lt;     //!< The Type of the circle boundary. See #LineTypes
+    int        shift;  //!< The Number of fractional bits in the coordinates of the center and in the radius value
+    /*@{*/
+};
+
+/**
+ * @brief This structure represents a line to draw.
+ *
+ * Parameters match cv::line().
+ */
+struct Line
+{
+    /**
+     * @brief Line constructor
+     *
+     * @param  pt1_    The first point of the line segment
+     * @param  pt2_    The second point of the line segment
+     * @param  color_  The line color
+     * @param  thick_  The thickness of line
+     * @param  lt_     The Type of the line. See #LineTypes
+     * @param  shift_  The number of fractional bits in the point coordinates
+    */
+    Line(const cv::Point& pt1_,
+         const cv::Point& pt2_,
+         const cv::Scalar& color_,
+         int thick_ = 1,
+         int lt_ = cv::LINE_8,
+         int shift_ = 0) :
+        pt1(pt1_), pt2(pt2_), color(color_), thick(thick_), lt(lt_), shift(shift_)
+    {
+    }
+
+    /*@{*/
+    cv::Point  pt1;    //!< The first point of the line segment
+    cv::Point  pt2;    //!< The second point of the line segment
+    cv::Scalar color;  //!< The line color
+    int        thick;  //!< The thickness of line
+    int        lt;     //!< The Type of the line. See #LineTypes
+    int        shift;  //!< The number of fractional bits in the point coordinates
+    /*@{*/
+};
+
+/**
+ * @brief This structure represents a mosaicing operation.
+ *
+ * Mosaicing is a very basic method to obfuscate regions in the image.
+ */
+struct Mosaic
+{
+    /**
+     * @brief Mosaic constructor
+     *
+     * @param mos_    Coordinates of the mosaic
+     * @param cellSz_ Cell size (same for X, Y)
+     * @param decim_  Decimation (0 stands for no decimation)
+    */
+    Mosaic(const cv::Rect& mos_,
+           int cellSz_,
+           int decim_) :
+        mos(mos_), cellSz(cellSz_), decim(decim_)
+    {
+    }
+
+    /*@{*/
+    cv::Rect   mos;    //!< Coordinates of the mosaic
+    int        cellSz; //!< Cell size (same for X, Y)
+    int        decim;  //!< Decimation (0 stands for no decimation)
+    /*@{*/
+};
+
+/**
+ * @brief This structure represents an image to draw.
+ *
+ * Image is blended on a frame using the specified mask.
+ */
+struct Image
+{
+    /**
+     * @brief Mosaic constructor
+     *
+     * @param  org_   The bottom-left corner of the image
+     * @param  img_   Image to draw
+     * @param  alpha_ Alpha channel for image to draw (same size and number of channels)
+    */
+    Image(const cv::Point& org_,
+          const cv::Mat& img_,
+          const cv::Mat& alpha_) :
+        org(org_), img(img_), alpha(alpha_)
+    {
+    }
+
+    /*@{*/
+    cv::Point org;   //!< The bottom-left corner of the image
+    cv::Mat   img;   //!< Image to draw
+    cv::Mat   alpha; //!< Alpha channel for image to draw (same size and number of channels)
+    /*@{*/
+};
+
+/**
+ * @brief This structure represents a polygon to draw.
+ */
+struct Poly
+{
+    /**
+     * @brief Mosaic constructor
+     *
+     * @param points_ Points to connect
+     * @param color_  The line color
+     * @param thick_  The thickness of line
+     * @param lt_     The Type of the line. See #LineTypes
+     * @param shift_  The number of fractional bits in the point coordinate
+    */
+    Poly(const std::vector<cv::Point>& points_,
+         const cv::Scalar& color_,
+         int thick_ = 1,
+         int lt_ = cv::LINE_8,
+         int shift_ = 0) :
+        points(points_), color(color_), thick(thick_), lt(lt_), shift(shift_)
+    {
+    }
+
+    /*@{*/
+    std::vector<cv::Point> points;  //!< Points to connect
+    cv::Scalar             color;   //!< The line color
+    int                    thick;   //!< The thickness of line
+    int                    lt;      //!< The Type of the line. See #LineTypes
+    int                    shift;   //!< The number of fractional bits in the point coordinate
+    /*@{*/
+};
+
+using Prim  = util::variant
+    < Text
+    , FText
+    , Rect
+    , Circle
+    , Line
+    , Mosaic
+    , Image
+    , Poly
+    >;
+
+using Prims     = std::vector<Prim>;
+//! @} gapi_draw_prims
+
+using GMat2     = std::tuple<cv::GMat,cv::GMat>;
+using GMatDesc2 = std::tuple<cv::GMatDesc,cv::GMatDesc>;
+
+
+//! @addtogroup gapi_draw_api
+//! @{
+/** @brief The function renders on the input image passed drawing primitivies
+
+@param bgr input image: 8-bit unsigned 3-channel image @ref CV_8UC3.
+@param prims vector of drawing primitivies
+@param args graph compile time parameters
+*/
+void GAPI_EXPORTS render(cv::Mat& bgr,
+                         const Prims& prims,
+                         cv::GCompileArgs&& args = {});
+
+/** @brief The function renders on two NV12 planes passed drawing primitivies
+
+@param y_plane input image: 8-bit unsigned 1-channel image @ref CV_8UC1.
+@param uv_plane input image: 8-bit unsigned 2-channel image @ref CV_8UC2.
+@param prims vector of drawing primitivies
+@param args graph compile time parameters
+*/
+void GAPI_EXPORTS render(cv::Mat& y_plane,
+                         cv::Mat& uv_plane,
+                         const Prims& prims,
+                         cv::GCompileArgs&& args = {});
+
+G_TYPED_KERNEL_M(GRenderNV12, <GMat2(cv::GMat,cv::GMat,cv::GArray<wip::draw::Prim>)>, "org.opencv.render.nv12")
+{
+     static GMatDesc2 outMeta(GMatDesc y_plane, GMatDesc uv_plane, GArrayDesc)
+     {
+         return std::make_tuple(y_plane, uv_plane);
+     }
+};
+
+G_TYPED_KERNEL(GRenderBGR, <cv::GMat(cv::GMat,cv::GArray<wip::draw::Prim>)>, "org.opencv.render.bgr")
+{
+     static GMatDesc outMeta(GMatDesc bgr, GArrayDesc)
+     {
+         return bgr;
+     }
+};
+
+/** @brief Renders on 3 channels input
+
+Output image must be 8-bit unsigned planar 3-channel image
+
+@param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3
+@param prims draw primitives
+*/
+GAPI_EXPORTS GMat render3ch(const GMat& src, const GArray<Prim>& prims);
+
+/** @brief Renders on two planes
+
+Output y image must be 8-bit unsigned planar 1-channel image @ref CV_8UC1
+uv image must be 8-bit unsigned planar 2-channel image @ref CV_8UC2
+
+@param y  input image: 8-bit unsigned 1-channel image @ref CV_8UC1
+@param uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2
+@param prims draw primitives
+*/
+GAPI_EXPORTS GMat2 renderNV12(const GMat& y,
+                              const GMat& uv,
+                              const GArray<Prim>& prims);
+//! @} gapi_draw_api
+
+} // namespace draw
+} // namespace wip
+
+namespace render
+{
+namespace ocv
+{
+    GAPI_EXPORTS cv::gapi::GKernelPackage kernels();
+
+} // namespace ocv
+} // namespace render
+} // namespace gapi
+
+namespace detail
+{
+    template<> struct CompileArgTag<cv::gapi::wip::draw::freetype_font>
+    {
+        static const char* tag() { return "gapi.freetype_font"; }
+    };
+} // namespace detail
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_RENDER_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/streaming/cap.hpp b/IPL/include/opencv/opencv2/gapi/streaming/cap.hpp
new file mode 100644
index 0000000..faa5550
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/streaming/cap.hpp
@@ -0,0 +1,110 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#ifndef OPENCV_GAPI_STREAMING_CAP_HPP
+#define OPENCV_GAPI_STREAMING_CAP_HPP
+
+/**
+ * YOUR ATTENTION PLEASE!
+ *
+ * This is a header-only implementation of cv::VideoCapture-based
+ * Stream source.  It is not built by default with G-API as G-API
+ * doesn't depend on videoio module.
+ *
+ * If you want to use it in your application, please make sure
+ * videioio is available in your OpenCV package and is linked to your
+ * application.
+ *
+ * Note for developers: please don't put videoio dependency in G-API
+ * because of this file.
+ */
+
+#include <opencv2/videoio.hpp>
+#include <opencv2/gapi/garg.hpp>
+
+namespace cv {
+namespace gapi {
+namespace wip {
+
+/**
+ * @brief OpenCV's VideoCapture-based streaming source.
+ *
+ * This class implements IStreamSource interface.
+ * Its constructor takes the same parameters as cv::VideoCapture does.
+ *
+ * Please make sure that videoio OpenCV module is available before using
+ * this in your application (G-API doesn't depend on it directly).
+ *
+ * @note stream sources are passed to G-API via shared pointers, so
+ *  please gapi::make_src<> to create objects and ptr() to pass a
+ *  GCaptureSource to cv::gin().
+ */
+class GCaptureSource: public IStreamSource
+{
+public:
+    explicit GCaptureSource(int id) : cap(id) { prep(); }
+    explicit GCaptureSource(const std::string &path) : cap(path) { prep(); }
+
+    // TODO: Add more constructor overloads to make it
+    // fully compatible with VideoCapture's interface.
+
+protected:
+    cv::VideoCapture cap;
+    cv::Mat first;
+    bool first_pulled = false;
+
+    void prep()
+    {
+        // Prepare first frame to report its meta to engine
+        // when needed
+        GAPI_Assert(first.empty());
+        cv::Mat tmp;
+        if (!cap.read(tmp))
+        {
+            GAPI_Assert(false && "Couldn't grab the very first frame");
+        }
+        // NOTE: Some decode/media VideoCapture backends continue
+        // owning the video buffer under cv::Mat so in order to
+        // process it safely in a highly concurrent pipeline, clone()
+        // is the only right way.
+        first = tmp.clone();
+    }
+
+    virtual bool pull(cv::gapi::wip::Data &data) override
+    {
+        if (!first_pulled)
+        {
+            GAPI_Assert(!first.empty());
+            first_pulled = true;
+            data = first; // no need to clone here since it was cloned already
+            return true;
+        }
+
+        if (!cap.isOpened()) return false;
+
+        cv::Mat frame;
+        if (!cap.read(frame))
+        {
+            // end-of-stream happened
+            return false;
+        }
+        // Same reason to clone as in prep()
+        data = frame.clone();
+        return true;
+    }
+
+    virtual GMetaArg descr_of() const override
+    {
+        GAPI_Assert(!first.empty());
+        return cv::GMetaArg{cv::descr_of(first)};
+    }
+};
+
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_STREAMING_CAP_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/streaming/source.hpp b/IPL/include/opencv/opencv2/gapi/streaming/source.hpp
new file mode 100644
index 0000000..6597cad
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/streaming/source.hpp
@@ -0,0 +1,62 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2019 Intel Corporation
+
+#ifndef OPENCV_GAPI_STREAMING_SOURCE_HPP
+#define OPENCV_GAPI_STREAMING_SOURCE_HPP
+
+#include <memory>                      // shared_ptr
+#include <type_traits>                 // is_base_of
+
+#include <opencv2/gapi/gmetaarg.hpp>   // GMetaArg
+
+
+namespace cv {
+namespace gapi {
+namespace wip {
+    struct Data; // "forward-declaration" of GRunArg
+
+/**
+ * @brief Abstract streaming pipeline source.
+ *
+ * Implement this interface if you want customize the way how data is
+ * streaming into GStreamingCompiled.
+ *
+ * Objects implementing this interface can be passed to
+ * GStreamingCompiled using setSource() with cv::gin(). Regular
+ * compiled graphs (GCompiled) don't support input objects of this
+ * type.
+ *
+ * Default cv::VideoCapture-based implementation is available, see
+ * cv::gapi::wip::GCaptureSource.
+ *
+ * @note stream sources are passed to G-API via shared pointers, so
+ *  please use ptr() when passing a IStreamSource implementation to
+ *  cv::gin().
+ */
+class IStreamSource: public std::enable_shared_from_this<IStreamSource>
+{
+public:
+    using Ptr = std::shared_ptr<IStreamSource>;
+    Ptr ptr() { return shared_from_this(); }
+    virtual bool pull(Data &data) = 0;
+    virtual GMetaArg descr_of() const = 0;
+    virtual ~IStreamSource() = default;
+};
+
+template<class T, class... Args>
+IStreamSource::Ptr inline make_src(Args&&... args)
+{
+    static_assert(std::is_base_of<IStreamSource, T>::value,
+                  "T must implement the cv::gapi::IStreamSource interface!");
+    auto src_ptr = std::make_shared<T>(std::forward<Args>(args)...);
+    return src_ptr->ptr();
+}
+
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_STREAMING_SOURCE_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/util/any.hpp b/IPL/include/opencv/opencv2/gapi/util/any.hpp
new file mode 100644
index 0000000..5f97e95
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/util/any.hpp
@@ -0,0 +1,186 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_UTIL_ANY_HPP
+#define OPENCV_GAPI_UTIL_ANY_HPP
+
+#include <memory>
+#include <type_traits>
+#include <typeinfo>
+#include <utility>
+
+#include <opencv2/gapi/util/throw.hpp>
+
+#if defined(_MSC_VER)
+   // disable MSVC warning on "multiple copy constructors specified"
+#  pragma warning(disable: 4521)
+#endif
+
+namespace cv
+{
+
+namespace internal
+{
+    template <class T, class Source>
+    T down_cast(Source operand)
+    {
+#if defined(__GXX_RTTI) || defined(_CPPRTTI)
+       return dynamic_cast<T>(operand);
+#else
+    #warning used static cast instead of dynamic because RTTI is disabled
+       return static_cast<T>(operand);
+#endif
+    }
+}
+
+namespace util
+{
+   class bad_any_cast : public std::bad_cast
+   {
+   public:
+       virtual const char* what() const noexcept override
+       {
+           return "Bad any cast";
+       }
+   };
+
+   //modeled against C++17 std::any
+
+   class any
+   {
+   private:
+      struct holder;
+      using holder_ptr = std::unique_ptr<holder>;
+      struct holder
+      {
+         virtual holder_ptr clone() = 0;
+         virtual ~holder() = default;
+      };
+
+      template <typename value_t>
+      struct holder_impl : holder
+      {
+         value_t v;
+         template<typename arg_t>
+         holder_impl(arg_t&& a) : v(std::forward<arg_t>(a)) {}
+         holder_ptr clone() override { return holder_ptr(new holder_impl (v));}
+      };
+
+      holder_ptr hldr;
+   public:
+      template<class value_t>
+      any(value_t&& arg) :  hldr(new holder_impl<typename std::decay<value_t>::type>( std::forward<value_t>(arg))) {}
+
+      any(any const& src) : hldr( src.hldr ? src.hldr->clone() : nullptr) {}
+      //simple hack in order not to write enable_if<not any> for the template constructor
+      any(any & src) : any (const_cast<any const&>(src)) {}
+
+      any()       = default;
+      any(any&& ) = default;
+
+      any& operator=(any&&) = default;
+
+      any& operator=(any const& src)
+      {
+         any copy(src);
+         swap(*this, copy);
+         return *this;
+      }
+
+      template<class value_t>
+      friend value_t* any_cast(any* operand);
+
+      template<class value_t>
+      friend const value_t* any_cast(const any* operand);
+
+      template<class value_t>
+      friend value_t& unsafe_any_cast(any& operand);
+
+      template<class value_t>
+      friend const value_t& unsafe_any_cast(const any& operand);
+
+      friend void swap(any & lhs, any& rhs)
+      {
+         swap(lhs.hldr, rhs.hldr);
+      }
+
+   };
+
+   template<class value_t>
+   value_t* any_cast(any* operand)
+   {
+      auto casted = internal::down_cast<any::holder_impl<typename std::decay<value_t>::type> *>(operand->hldr.get());
+      if (casted){
+         return & (casted->v);
+      }
+      return nullptr;
+   }
+
+   template<class value_t>
+   const value_t* any_cast(const any* operand)
+   {
+      auto casted = internal::down_cast<any::holder_impl<typename std::decay<value_t>::type> *>(operand->hldr.get());
+      if (casted){
+         return & (casted->v);
+      }
+      return nullptr;
+   }
+
+   template<class value_t>
+   value_t& any_cast(any& operand)
+   {
+      auto ptr = any_cast<value_t>(&operand);
+      if (ptr)
+      {
+         return *ptr;
+      }
+
+      throw_error(bad_any_cast());
+   }
+
+
+   template<class value_t>
+   const value_t& any_cast(const any& operand)
+   {
+      auto ptr = any_cast<value_t>(&operand);
+      if (ptr)
+      {
+         return *ptr;
+      }
+
+      throw_error(bad_any_cast());
+   }
+
+   template<class value_t>
+   inline value_t& unsafe_any_cast(any& operand)
+   {
+#ifdef DEBUG
+      return any_cast<value_t>(operand);
+#else
+      return static_cast<any::holder_impl<typename std::decay<value_t>::type> *>(operand.hldr.get())->v;
+#endif
+   }
+
+   template<class value_t>
+   inline const value_t& unsafe_any_cast(const any& operand)
+   {
+#ifdef DEBUG
+      return any_cast<value_t>(operand);
+#else
+      return static_cast<any::holder_impl<typename std::decay<value_t>::type> *>(operand.hldr.get())->v;
+#endif
+   }
+
+} // namespace util
+} // namespace cv
+
+#if defined(_MSC_VER)
+   // Enable "multiple copy constructors specified" back
+#  pragma warning(default: 4521)
+#endif
+
+#endif // OPENCV_GAPI_UTIL_ANY_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/util/compiler_hints.hpp b/IPL/include/opencv/opencv2/gapi/util/compiler_hints.hpp
new file mode 100644
index 0000000..a41a971
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/util/compiler_hints.hpp
@@ -0,0 +1,19 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+#ifndef OPENCV_GAPI_UTIL_COMPILER_HINTS_HPP
+#define OPENCV_GAPI_UTIL_COMPILER_HINTS_HPP
+
+namespace cv
+{
+namespace util
+{
+    //! Utility template function to prevent "unused" warnings by various compilers.
+    template<typename T> void suppress_unused_warning( const T& ) {}
+} // namespace util
+} // namespace cv
+
+#endif /* OPENCV_GAPI_UTIL_COMPILER_HINTS_HPP */
diff --git a/IPL/include/opencv/opencv2/gapi/util/optional.hpp b/IPL/include/opencv/opencv2/gapi/util/optional.hpp
new file mode 100644
index 0000000..1aa2b26
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/util/optional.hpp
@@ -0,0 +1,178 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_UTIL_OPTIONAL_HPP
+#define OPENCV_GAPI_UTIL_OPTIONAL_HPP
+
+#include <opencv2/gapi/util/variant.hpp>
+
+// A poor man's `optional` implementation, incompletely modeled against C++17 spec.
+namespace cv
+{
+namespace util
+{
+    class bad_optional_access: public std::exception
+    {
+    public:
+        virtual const char *what() const noexcept override
+        {
+            return "Bad optional access";
+        }
+    };
+
+    // TODO: nullopt_t
+
+    // Interface ///////////////////////////////////////////////////////////////
+    template<typename T> class optional
+    {
+    public:
+        // Constructors
+        // NB.: there were issues with Clang 3.8 when =default() was used
+        // instead {}
+        optional() {};
+        optional(const optional&) = default;
+        explicit optional(T &&value) noexcept;
+        explicit optional(const T &value) noexcept;
+        optional(optional &&) noexcept;
+        // TODO: optional(nullopt_t) noexcept;
+        // TODO: optional(const optional<U> &)
+        // TODO: optional(optional<U> &&)
+        // TODO: optional(Args&&...)
+        // TODO: optional(initializer_list<U>)
+        // TODO: optional(U&& value);
+
+        // Assignment
+        optional& operator=(const optional& rhs) = default;
+        optional& operator=(optional&& rhs);
+
+        // Observers
+        T* operator-> ();
+        const T* operator-> () const;
+        T& operator* ();
+        const T& operator* () const;
+        // TODO: && versions
+
+        operator bool() const noexcept;
+        bool has_value() const noexcept;
+
+        T& value();
+        const T& value() const;
+        // TODO: && versions
+
+        template<class U>
+        T value_or(U &&default_value) const;
+
+        void swap(optional &other) noexcept;
+        void reset() noexcept;
+        // TODO: emplace
+
+        // TODO: operator==, !=, <, <=, >, >=
+
+    private:
+        struct nothing {};
+        util::variant<nothing, T> m_holder;
+    };
+
+    template<class T>
+    optional<typename std::decay<T>::type> make_optional(T&& value);
+
+    // TODO: Args... and initializer_list versions
+
+    // Implementation //////////////////////////////////////////////////////////
+    template<class T> optional<T>::optional(T &&v) noexcept
+        : m_holder(v)
+    {
+    }
+
+    template<class T> optional<T>::optional(const T &v) noexcept
+        : m_holder(v)
+    {
+    }
+
+    template<class T> optional<T>::optional(optional&& rhs) noexcept
+        : m_holder(std::move(rhs.m_holder))
+    {
+        rhs.reset();
+    }
+
+    template<class T> optional<T>& optional<T>::operator=(optional&& rhs)
+    {
+        m_holder = std::move(rhs.m_holder);
+        rhs.reset();
+        return *this;
+    }
+
+    template<class T> T* optional<T>::operator-> ()
+    {
+        return & *(*this);
+    }
+
+    template<class T> const T* optional<T>::operator-> () const
+    {
+        return & *(*this);
+    }
+
+    template<class T> T& optional<T>::operator* ()
+    {
+        return this->value();
+    }
+
+    template<class T> const T& optional<T>::operator* () const
+    {
+        return this->value();
+    }
+
+    template<class T> optional<T>::operator bool() const noexcept
+    {
+        return this->has_value();
+    }
+
+    template<class T> bool optional<T>::has_value() const noexcept
+    {
+        return util::holds_alternative<T>(m_holder);
+    }
+
+    template<class T> T& optional<T>::value()
+    {
+        if (!this->has_value())
+            throw_error(bad_optional_access());
+        return util::get<T>(m_holder);
+    }
+
+    template<class T> const T& optional<T>::value() const
+    {
+        if (!this->has_value())
+            throw_error(bad_optional_access());
+        return util::get<T>(m_holder);
+    }
+
+    template<class T>
+    template<class U> T optional<T>::value_or(U &&default_value) const
+    {
+        return (this->has_value() ? this->value() : T(default_value));
+    }
+
+    template<class T> void optional<T>::swap(optional<T> &other) noexcept
+    {
+        m_holder.swap(other.m_holder);
+    }
+
+    template<class T> void optional<T>::reset() noexcept
+    {
+        if (this->has_value())
+            m_holder = nothing{};
+    }
+
+    template<class T>
+    optional<typename std::decay<T>::type> make_optional(T&& value)
+    {
+        return optional<typename std::decay<T>::type>(std::forward<T>(value));
+    }
+} // namespace util
+} // namespace cv
+
+#endif // OPENCV_GAPI_UTIL_OPTIONAL_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/util/throw.hpp b/IPL/include/opencv/opencv2/gapi/util/throw.hpp
new file mode 100644
index 0000000..689bf58
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/util/throw.hpp
@@ -0,0 +1,36 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_UTIL_THROW_HPP
+#define OPENCV_GAPI_UTIL_THROW_HPP
+
+#include <utility>  // std::forward
+
+#if !defined(__EXCEPTIONS)
+#include <stdlib.h>
+#include <stdio.h>
+#endif
+
+namespace cv
+{
+namespace util
+{
+template <class ExceptionType>
+[[noreturn]] void throw_error(ExceptionType &&e)
+{
+#if defined(__EXCEPTIONS) || defined(_CPPUNWIND)
+    throw std::forward<ExceptionType>(e);
+#else
+    fprintf(stderr, "An exception thrown! %s\n" , e.what());
+    fflush(stderr);
+    abort();
+#endif
+}
+} // namespace util
+} // namespace cv
+
+#endif // OPENCV_GAPI_UTIL_THROW_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/util/util.hpp b/IPL/include/opencv/opencv2/gapi/util/util.hpp
new file mode 100644
index 0000000..afcf559
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/util/util.hpp
@@ -0,0 +1,124 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_UTIL_HPP
+#define OPENCV_GAPI_UTIL_HPP
+
+#include <tuple>
+
+// \cond HIDDEN_SYMBOLS
+// This header file contains some generic utility functions which are
+// used in other G-API Public API headers.
+//
+// PLEASE don't put any stuff here if it is NOT used in public API headers!
+
+namespace cv
+{
+namespace detail
+{
+    // Recursive integer sequence type, useful for enumerating elements of
+    // template parameter packs.
+    template<int... I> struct Seq     { using next = Seq<I..., sizeof...(I)>; };
+    template<int Sz>   struct MkSeq   { using type = typename MkSeq<Sz-1>::type::next; };
+    template<>         struct MkSeq<0>{ using type = Seq<>; };
+
+    // Checks if elements of variadic template satisfy the given Predicate.
+    // Implemented via tuple, with an interface to accept plain type lists
+    template<template<class> class, typename, typename...> struct all_satisfy;
+
+    template<template<class> class F, typename T, typename... Ts>
+    struct all_satisfy<F, std::tuple<T, Ts...> >
+    {
+        static const constexpr bool value = F<T>::value
+            && all_satisfy<F, std::tuple<Ts...> >::value;
+    };
+    template<template<class> class F, typename T>
+    struct all_satisfy<F, std::tuple<T> >
+    {
+        static const constexpr bool value = F<T>::value;
+    };
+
+    template<template<class> class F, typename T, typename... Ts>
+    struct all_satisfy: public all_satisfy<F, std::tuple<T, Ts...> > {};
+
+    // Permute given tuple type C with given integer sequence II
+    // Sequence may be less than tuple C size.
+    template<class, class> struct permute_tuple;
+
+    template<class C, int... IIs>
+    struct permute_tuple<C, Seq<IIs...> >
+    {
+        using type = std::tuple< typename std::tuple_element<IIs, C>::type... >;
+    };
+
+    // Given T..., generates a type sequence of sizeof...(T)-1 elements
+    // which is T... without its last element
+    // Implemented via tuple, with an interface to accept plain type lists
+    template<typename T, typename... Ts> struct all_but_last;
+
+    template<typename T, typename... Ts>
+    struct all_but_last<std::tuple<T, Ts...> >
+    {
+        using C    = std::tuple<T, Ts...>;
+        using S    = typename MkSeq<std::tuple_size<C>::value - 1>::type;
+        using type = typename permute_tuple<C, S>::type;
+    };
+
+    template<typename T, typename... Ts>
+    struct all_but_last: public all_but_last<std::tuple<T, Ts...> > {};
+
+    template<typename... Ts>
+    using all_but_last_t = typename all_but_last<Ts...>::type;
+
+    // NB.: This is here because there's no constexpr std::max in C++11
+    template<std::size_t S0, std::size_t... SS> struct max_of_t
+    {
+        static constexpr const std::size_t rest  = max_of_t<SS...>::value;
+        static constexpr const std::size_t value = rest > S0 ? rest : S0;
+    };
+    template<std::size_t S> struct max_of_t<S>
+    {
+        static constexpr const std::size_t value = S;
+    };
+
+    template <typename...>
+    struct contains : std::false_type{};
+
+    template <typename T1, typename T2, typename... Ts>
+    struct contains<T1, T2, Ts...> : std::integral_constant<bool, std::is_same<T1, T2>::value ||
+                                                                  contains<T1, Ts...>::value> {};
+    template<typename T, typename... Types>
+    struct contains<T, std::tuple<Types...>> : std::integral_constant<bool, contains<T, Types...>::value> {};
+
+    template <typename...>
+    struct all_unique : std::true_type{};
+
+    template <typename T1, typename... Ts>
+    struct all_unique<T1, Ts...> : std::integral_constant<bool, !contains<T1, Ts...>::value &&
+                                                                 all_unique<Ts...>::value> {};
+
+    template<typename>
+    struct tuple_wrap_helper;
+
+    template<typename T> struct tuple_wrap_helper
+    {
+        using type = std::tuple<T>;
+        static type get(T&& obj) { return std::make_tuple(std::move(obj)); }
+    };
+
+    template<typename... Objs>
+    struct tuple_wrap_helper<std::tuple<Objs...>>
+    {
+        using type = std::tuple<Objs...>;
+        static type get(std::tuple<Objs...>&& objs) { return std::forward<std::tuple<Objs...>>(objs); }
+    };
+} // namespace detail
+} // namespace cv
+
+// \endcond
+
+#endif //  OPENCV_GAPI_UTIL_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/util/variant.hpp b/IPL/include/opencv/opencv2/gapi/util/variant.hpp
new file mode 100644
index 0000000..22dfb2e
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/util/variant.hpp
@@ -0,0 +1,392 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_UTIL_VARIANT_HPP
+#define OPENCV_GAPI_UTIL_VARIANT_HPP
+
+#include <array>
+#include <type_traits>
+
+#include <opencv2/gapi/util/throw.hpp>
+#include <opencv2/gapi/util/util.hpp> // max_of_t
+
+// A poor man's `variant` implementation, incompletely modeled against C++17 spec.
+namespace cv
+{
+namespace util
+{
+    namespace detail
+    {
+        template<std::size_t I, typename Target, typename First, typename... Remaining>
+        struct type_list_index_helper
+        {
+            static const constexpr bool is_same = std::is_same<Target, First>::value;
+            static const constexpr std::size_t value =
+                std::conditional<is_same, std::integral_constant<std::size_t, I>, type_list_index_helper<I + 1, Target, Remaining...>>::type::value;
+        };
+
+        template<std::size_t I, typename Target, typename First>
+        struct type_list_index_helper<I, Target, First>
+        {
+            static_assert(std::is_same<Target, First>::value, "Type not found");
+            static const constexpr std::size_t value = I;
+        };
+
+        template< bool B, class T = void >
+        using enable_if_t = typename std::enable_if<B,T>::type;
+
+        template<class T, class U, class V = void>
+        using are_different_t = enable_if_t<
+                !std::is_same<typename std::decay<T>::type,
+                              typename std::decay<U>::type>::value,
+                 V>;
+    }
+
+    template<typename Target, typename... Types>
+    struct type_list_index
+    {
+        static const constexpr std::size_t value = detail::type_list_index_helper<0, Target, Types...>::value;
+    };
+
+    class bad_variant_access: public std::exception
+    {
+    public:
+        virtual const char *what() const noexcept override
+        {
+            return "Bad variant access";
+        }
+    };
+
+    // Interface ///////////////////////////////////////////////////////////////
+    struct monostate {};
+    inline bool operator==(const util::monostate&, const util::monostate&)
+    {
+        return true;
+    }
+
+    template<typename... Ts> // FIXME: no references, arrays, and void
+    class variant
+    {
+        // FIXME: Replace with std::aligned_union after gcc4.8 support is dropped
+        static constexpr const std::size_t S = cv::detail::max_of_t<sizeof(Ts)...>::value;
+        static constexpr const std::size_t A = cv::detail::max_of_t<alignof(Ts)...>::value;
+        using Memory = typename std::aligned_storage<S, A>::type[1];
+
+        template<typename T> struct cctr_h {
+            static void help(Memory memory, const Memory from) {
+                new (memory) T(*reinterpret_cast<const T*>(from));
+            }
+        };
+
+        template<typename T> struct vctr_h {
+            static void help(Memory memory, const void* pval) {
+                new (memory) T(*reinterpret_cast<const T*>(pval));
+            }
+        };
+
+        template<typename T> struct mctr_h {
+            static void help(Memory memory, void *pval) {
+                new (memory) T(std::move(*reinterpret_cast<T*>(pval)));
+            }
+        };
+
+        template<typename T> struct copy_h {
+            static void help(Memory to, const Memory from) {
+                *reinterpret_cast<T*>(to) = *reinterpret_cast<const T*>(from);
+            }
+        };
+
+        template<typename T> struct move_h {
+            static void help(Memory to, const Memory from) {
+                *reinterpret_cast<T*>(to) = std::move(*reinterpret_cast<const T*>(from));
+            }
+        };
+
+        template<typename T> struct swap_h {
+            static void help(Memory to, Memory from) {
+                std::swap(*reinterpret_cast<T*>(to), *reinterpret_cast<T*>(from));
+            }
+        };
+
+        template<typename T> struct dtor_h {
+            static void help(Memory memory) {
+                (void) memory; // MSCV warning
+                reinterpret_cast<T*>(memory)->~T();
+            }
+        };
+
+        template<typename T> struct equal_h {
+            static bool help(const Memory lhs, const Memory rhs) {
+                const T& t_lhs = *reinterpret_cast<const T*>(lhs);
+                const T& t_rhs = *reinterpret_cast<const T*>(rhs);
+                return t_lhs == t_rhs;
+            }
+        };
+
+        typedef void (*CCtr) (Memory, const Memory);  // Copy c-tor (variant)
+        typedef void (*VCtr) (Memory, const void*);   // Copy c-tor (value)
+        typedef void (*MCtr) (Memory, void*);         // Generic move c-tor
+        typedef void (*Copy) (Memory, const Memory);  // Copy assignment
+        typedef void (*Move) (Memory, const Memory);  // Move assignment
+        typedef void (*Swap) (Memory, Memory);        // Swap
+        typedef void (*Dtor) (Memory);                // Destructor
+
+        typedef bool (*Equal)(const Memory, const Memory); // Equality test (external)
+
+        static constexpr std::array<CCtr, sizeof...(Ts)> cctrs(){ return {{(&cctr_h<Ts>::help)...}};}
+        static constexpr std::array<VCtr, sizeof...(Ts)> vctrs(){ return {{(&vctr_h<Ts>::help)...}};}
+        static constexpr std::array<MCtr, sizeof...(Ts)> mctrs(){ return {{(&mctr_h<Ts>::help)...}};}
+        static constexpr std::array<Copy, sizeof...(Ts)> cpyrs(){ return {{(&copy_h<Ts>::help)...}};}
+        static constexpr std::array<Move, sizeof...(Ts)> mvers(){ return {{(&move_h<Ts>::help)...}};}
+        static constexpr std::array<Swap, sizeof...(Ts)> swprs(){ return {{(&swap_h<Ts>::help)...}};}
+        static constexpr std::array<Dtor, sizeof...(Ts)> dtors(){ return {{(&dtor_h<Ts>::help)...}};}
+
+        std::size_t m_index = 0;
+
+    protected:
+        template<typename T, typename... Us> friend T& get(variant<Us...> &v);
+        template<typename T, typename... Us> friend const T& get(const variant<Us...> &v);
+        template<typename... Us> friend bool operator==(const variant<Us...> &lhs,
+                                                        const variant<Us...> &rhs);
+        Memory memory;
+
+    public:
+        // Constructors
+        variant() noexcept;
+        variant(const variant& other);
+        variant(variant&& other) noexcept;
+        template<typename T> explicit variant(const T& t);
+        // are_different_t is a SFINAE trick to avoid variant(T &&t) with T=variant
+        // for some reason, this version is called instead of variant(variant&& o) when
+        // variant is used in STL containers (examples: vector assignment).
+        // detail::enable_if_t<! std::is_lvalue_reference<T>::value> is a SFINAE
+        // trick to limit this constructor only to rvalue reference argument
+        template<
+            typename T,
+            typename = detail::are_different_t<variant, T>,
+            typename = detail::enable_if_t<! std::is_lvalue_reference<T>::value>
+        >
+        explicit variant(T&& t);
+        // template<class T, class... Args> explicit variant(Args&&... args);
+        // FIXME: other constructors
+
+        // Destructor
+        ~variant();
+
+        // Assignment
+        variant& operator=(const variant& rhs);
+        variant& operator=(variant &&rhs) noexcept;
+
+        // SFINAE trick to avoid operator=(T&&) with T=variant<>, see comment above
+        template<
+            typename T,
+            typename = detail::are_different_t<variant, T>
+        >
+        variant& operator=(T&& t) noexcept;
+
+        // Observers
+        std::size_t index() const noexcept;
+        // FIXME: valueless_by_exception()
+
+        // Modifiers
+        // FIXME: emplace()
+        void swap(variant &rhs) noexcept;
+
+        // Non-C++17x!
+        template<typename T> static constexpr std::size_t index_of();
+    };
+
+    // FIMXE: visit
+
+    template<typename T, typename... Types>
+    T& get(util::variant<Types...> &v);
+
+    template<typename T, typename... Types>
+    const T& get(const util::variant<Types...> &v);
+
+    template<typename T, typename... Types>
+    bool holds_alternative(const util::variant<Types...> &v) noexcept;
+
+    // FIXME: T&&, const TT&& versions.
+
+    // Implementation //////////////////////////////////////////////////////////
+    template<typename... Ts>
+    variant<Ts...>::variant() noexcept
+    {
+        typedef typename std::tuple_element<0, std::tuple<Ts...> >::type TFirst;
+        new (memory) TFirst();
+    }
+
+    template<typename... Ts>
+    variant<Ts...>::variant(const variant &other)
+        : m_index(other.m_index)
+    {
+        (cctrs()[m_index])(memory, other.memory);
+    }
+
+    template<typename... Ts>
+    variant<Ts...>::variant(variant &&other) noexcept
+        : m_index(other.m_index)
+    {
+        (mctrs()[m_index])(memory, other.memory);
+    }
+
+    template<typename... Ts>
+    template<class T>
+    variant<Ts...>::variant(const T& t)
+        : m_index(util::type_list_index<T, Ts...>::value)
+    {
+        (vctrs()[m_index])(memory, &t);
+    }
+
+    template<typename... Ts>
+    template<class T, typename , typename>
+    variant<Ts...>::variant(T&& t)
+        : m_index(util::type_list_index<typename std::remove_reference<T>::type, Ts...>::value)
+    {
+        (mctrs()[m_index])(memory, &t);
+    }
+
+    template<typename... Ts>
+    variant<Ts...>::~variant()
+    {
+        (dtors()[m_index])(memory);
+    }
+
+    template<typename... Ts>
+    variant<Ts...>& variant<Ts...>::operator=(const variant<Ts...> &rhs)
+    {
+        if (m_index != rhs.m_index)
+        {
+            (dtors()[    m_index])(memory);
+            (cctrs()[rhs.m_index])(memory, rhs.memory);
+            m_index = rhs.m_index;
+        }
+        else
+        {
+            (cpyrs()[rhs.m_index])(memory, rhs.memory);
+        }
+        return *this;
+    }
+
+    template<typename... Ts>
+    variant<Ts...>& variant<Ts...>::operator=(variant<Ts...> &&rhs) noexcept
+    {
+        if (m_index != rhs.m_index)
+        {
+            (dtors()[    m_index])(memory);
+            (mctrs()[rhs.m_index])(memory, rhs.memory);
+            m_index = rhs.m_index;
+        }
+        else
+        {
+            (mvers()[rhs.m_index])(memory, rhs.memory);
+        }
+        return *this;
+    }
+
+    template<typename... Ts>
+    template<typename T, typename>
+    variant<Ts...>& variant<Ts...>::operator=(T&& t) noexcept
+    {
+        using decayed_t = typename std::decay<T>::type;
+        // FIXME: No version with implicit type conversion available!
+        static const constexpr std::size_t t_index =
+            util::type_list_index<decayed_t, Ts...>::value;
+
+        if (t_index == m_index)
+        {
+            util::get<decayed_t>(*this) = std::forward<T>(t);
+            return *this;
+        }
+        else return (*this = variant(std::forward<T>(t)));
+    }
+
+    template<typename... Ts>
+    std::size_t util::variant<Ts...>::index() const noexcept
+    {
+        return m_index;
+    }
+
+    template<typename... Ts>
+    void variant<Ts...>::swap(variant<Ts...> &rhs) noexcept
+    {
+        if (m_index == rhs.index())
+        {
+            (swprs()[m_index](memory, rhs.memory));
+        }
+        else
+        {
+            variant<Ts...> tmp(std::move(*this));
+            *this = std::move(rhs);
+            rhs   = std::move(tmp);
+        }
+    }
+
+    template<typename... Ts>
+    template<typename T>
+    constexpr std::size_t variant<Ts...>::index_of()
+    {
+        return util::type_list_index<T, Ts...>::value; // FIXME: tests!
+    }
+
+    template<typename T, typename... Types>
+    T& get(util::variant<Types...> &v)
+    {
+        const constexpr std::size_t t_index =
+            util::type_list_index<T, Types...>::value;
+
+        if (v.index() == t_index)
+            return *(T*)(&v.memory);  // workaround for ICC 2019
+            // original code: return reinterpret_cast<T&>(v.memory);
+        else
+            throw_error(bad_variant_access());
+    }
+
+    template<typename T, typename... Types>
+    const T& get(const util::variant<Types...> &v)
+    {
+        const constexpr std::size_t t_index =
+            util::type_list_index<T, Types...>::value;
+
+        if (v.index() == t_index)
+            return *(const T*)(&v.memory);  // workaround for ICC 2019
+            // original code: return reinterpret_cast<const T&>(v.memory);
+        else
+            throw_error(bad_variant_access());
+    }
+
+    template<typename T, typename... Types>
+    bool holds_alternative(const util::variant<Types...> &v) noexcept
+    {
+        return v.index() == util::variant<Types...>::template index_of<T>();
+    }
+
+    template<typename... Us> bool operator==(const variant<Us...> &lhs,
+                                             const variant<Us...> &rhs)
+    {
+        using V = variant<Us...>;
+
+        // Instantiate table only here since it requires operator== for <Us...>
+        // <Us...> should have operator== only if this one is used, not in general
+        static const std::array<typename V::Equal, sizeof...(Us)> eqs = {
+            {(&V::template equal_h<Us>::help)...}
+        };
+        if (lhs.index() != rhs.index())
+            return false;
+        return (eqs[lhs.index()])(lhs.memory, rhs.memory);
+    }
+
+    template<typename... Us> bool operator!=(const variant<Us...> &lhs,
+                                             const variant<Us...> &rhs)
+    {
+        return !(lhs == rhs);
+    }
+} // namespace cv
+} // namespace util
+
+#endif // OPENCV_GAPI_UTIL_VARIANT_HPP
diff --git a/IPL/include/opencv/opencv2/gapi/video.hpp b/IPL/include/opencv/opencv2/gapi/video.hpp
new file mode 100644
index 0000000..7602e1d
--- /dev/null
+++ b/IPL/include/opencv/opencv2/gapi/video.hpp
@@ -0,0 +1,135 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#ifndef OPENCV_GAPI_VIDEO_HPP
+#define OPENCV_GAPI_VIDEO_HPP
+
+#include <utility> // std::tuple
+
+#include <opencv2/gapi/gkernel.hpp>
+
+
+/** \defgroup gapi_video G-API Video processing functionality
+ */
+
+namespace cv { namespace gapi {
+namespace  video
+{
+    using GOptFlowLKOutput = std::tuple<cv::GArray<cv::Point2f>,
+                                        cv::GArray<uchar>,
+                                        cv::GArray<float>>;
+
+    G_TYPED_KERNEL(GCalcOptFlowLK,
+                   <GOptFlowLKOutput(GMat,GMat,cv::GArray<cv::Point2f>,cv::GArray<cv::Point2f>,Size,
+                                     int,TermCriteria,int,double)>,
+                   "org.opencv.video.calcOpticalFlowPyrLK")
+    {
+        static std::tuple<GArrayDesc,GArrayDesc,GArrayDesc> outMeta(GMatDesc,GMatDesc,GArrayDesc,
+                                                                    GArrayDesc,const Size&,int,
+                                                                    const TermCriteria&,int,double)
+        {
+            return std::make_tuple(empty_array_desc(), empty_array_desc(), empty_array_desc());
+        }
+
+    };
+
+    G_TYPED_KERNEL(GCalcOptFlowLKForPyr,
+                   <GOptFlowLKOutput(cv::GArray<cv::GMat>,cv::GArray<cv::GMat>,
+                                     cv::GArray<cv::Point2f>,cv::GArray<cv::Point2f>,Size,int,
+                                     TermCriteria,int,double)>,
+                   "org.opencv.video.calcOpticalFlowPyrLKForPyr")
+    {
+        static std::tuple<GArrayDesc,GArrayDesc,GArrayDesc> outMeta(GArrayDesc,GArrayDesc,
+                                                                    GArrayDesc,GArrayDesc,
+                                                                    const Size&,int,
+                                                                    const TermCriteria&,int,double)
+        {
+            return std::make_tuple(empty_array_desc(), empty_array_desc(), empty_array_desc());
+        }
+    };
+} //namespace video
+
+//! @addtogroup gapi_video
+//! @{
+/** @brief Calculates an optical flow for a sparse feature set using the iterative Lucas-Kanade
+method with pyramids.
+
+See @cite Bouguet00 .
+
+@note Function textual ID is "org.opencv.video.calcOpticalFlowPyrLK"
+
+@param prevImg first 8-bit input image (GMat) or pyramid (GArray<GMat>) constructed by
+buildOpticalFlowPyramid.
+@param nextImg second input image (GMat) or pyramid (GArray<GMat>) of the same size and the same
+type as prevImg.
+@param prevPts GArray of 2D points for which the flow needs to be found; point coordinates must be
+single-precision floating-point numbers.
+@param predPts GArray of 2D points initial for the flow search; make sense only when
+OPTFLOW_USE_INITIAL_FLOW flag is passed; in that case the vector must have the same size as in
+the input.
+@param winSize size of the search window at each pyramid level.
+@param maxLevel 0-based maximal pyramid level number; if set to 0, pyramids are not used (single
+level), if set to 1, two levels are used, and so on; if pyramids are passed to input then
+algorithm will use as many levels as pyramids have but no more than maxLevel.
+@param criteria parameter, specifying the termination criteria of the iterative search algorithm
+(after the specified maximum number of iterations criteria.maxCount or when the search window
+moves by less than criteria.epsilon).
+@param flags operation flags:
+ -   **OPTFLOW_USE_INITIAL_FLOW** uses initial estimations, stored in nextPts; if the flag is
+     not set, then prevPts is copied to nextPts and is considered the initial estimate.
+ -   **OPTFLOW_LK_GET_MIN_EIGENVALS** use minimum eigen values as an error measure (see
+     minEigThreshold description); if the flag is not set, then L1 distance between patches
+     around the original and a moved point, divided by number of pixels in a window, is used as a
+     error measure.
+@param minEigThresh the algorithm calculates the minimum eigen value of a 2x2 normal matrix of
+optical flow equations (this matrix is called a spatial gradient matrix in @cite Bouguet00), divided
+by number of pixels in a window; if this value is less than minEigThreshold, then a corresponding
+feature is filtered out and its flow is not processed, so it allows to remove bad points and get a
+performance boost.
+
+@return GArray of 2D points (with single-precision floating-point coordinates)
+containing the calculated new positions of input features in the second image.
+@return status GArray (of unsigned chars); each element of the vector is set to 1 if
+the flow for the corresponding features has been found, otherwise, it is set to 0.
+@return GArray of errors (doubles); each element of the vector is set to an error for the
+corresponding feature, type of the error measure can be set in flags parameter; if the flow wasn't
+found then the error is not defined (use the status parameter to find such cases).
+ */
+GAPI_EXPORTS std::tuple<GArray<Point2f>, GArray<uchar>, GArray<float>>
+calcOpticalFlowPyrLK(const GMat            &prevImg,
+                     const GMat            &nextImg,
+                     const GArray<Point2f> &prevPts,
+                     const GArray<Point2f> &predPts,
+                     const Size            &winSize      = Size(21, 21),
+                           int              maxLevel     = 3,
+                     const TermCriteria    &criteria     = TermCriteria(TermCriteria::COUNT |
+                                                                        TermCriteria::EPS,
+                                                                        30, 0.01),
+                           int              flags        = 0,
+                           double           minEigThresh = 1e-4);
+
+/**
+@overload
+@note Function textual ID is "org.opencv.video.calcOpticalFlowPyrLKForPyr"
+*/
+GAPI_EXPORTS std::tuple<GArray<Point2f>, GArray<uchar>, GArray<float>>
+calcOpticalFlowPyrLK(const GArray<GMat>    &prevPyr,
+                     const GArray<GMat>    &nextPyr,
+                     const GArray<Point2f> &prevPts,
+                     const GArray<Point2f> &predPts,
+                     const Size            &winSize      = Size(21, 21),
+                           int              maxLevel     = 3,
+                     const TermCriteria    &criteria     = TermCriteria(TermCriteria::COUNT |
+                                                                        TermCriteria::EPS,
+                                                                        30, 0.01),
+                           int              flags        = 0,
+                           double           minEigThresh = 1e-4);
+
+//! @} gapi_video
+} //namespace gapi
+} //namespace cv
+
+#endif // OPENCV_GAPI_VIDEO_HPP
diff --git a/IPL/include/opencv/opencv2/hfs.hpp b/IPL/include/opencv/opencv2/hfs.hpp
new file mode 100644
index 0000000..948f117
--- /dev/null
+++ b/IPL/include/opencv/opencv2/hfs.hpp
@@ -0,0 +1,153 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "opencv2/core.hpp"
+
+namespace cv { namespace hfs {
+
+/** @defgroup hfs Hierarchical Feature Selection for Efficient Image Segmentation
+
+The opencv hfs module contains an efficient algorithm to segment an image.
+This module is implemented based on the paper Hierarchical Feature Selection for Efficient
+Image Segmentation, ECCV 2016. The original project was developed by
+Yun Liu(https://github.com/yun-liu/hfs).
+
+
+Introduction to Hierarchical Feature Selection
+----------------------------------------------
+
+
+This algorithm is executed in 3 stages:
+
+In the first stage, the algorithm uses SLIC (simple linear iterative clustering) algorithm
+to obtain the superpixel of the input image.
+
+In the second stage, the algorithm view each superpixel as a node in the graph.
+It will calculate a feature vector for each edge of the graph. It then calculates a weight
+for each edge based on the feature vector and trained SVM parameters. After obtaining
+weight for each edge, it will exploit  EGB (Efficient Graph-based Image Segmentation)
+algorithm to merge some nodes in the graph thus obtaining a coarser segmentation
+After these operations, a post process will be executed to merge regions that are smaller
+then a specific number of pixels into their nearby region.
+
+In the third stage, the algorithm exploits the similar mechanism to further merge
+the small regions obtained in the second stage into even coarser segmentation.
+
+After these three stages, we can obtain the final segmentation of the image.
+For further details about the algorithm, please refer to the original paper:
+Hierarchical Feature Selection for Efficient Image Segmentation, ECCV 2016
+
+*/
+
+//! @addtogroup hfs
+//! @{
+class CV_EXPORTS_W HfsSegment : public Algorithm {
+public:
+
+/** @brief: set and get the parameter segEgbThresholdI.
+* This parameter is used in the second stage mentioned above.
+* It is a constant used to threshold weights of the edge when merging
+* adjacent nodes when applying EGB algorithm. The segmentation result
+* tends to have more regions remained if this value is large and vice versa.
+*/
+CV_WRAP virtual void setSegEgbThresholdI(float c) = 0;
+CV_WRAP virtual float getSegEgbThresholdI() = 0;
+
+
+/** @brief: set and get the parameter minRegionSizeI.
+* This parameter is used in the second stage
+* mentioned above. After the EGB segmentation, regions that have fewer
+* pixels then this parameter will be merged into it's adjacent region.
+*/
+CV_WRAP virtual void setMinRegionSizeI(int n) = 0;
+CV_WRAP virtual int getMinRegionSizeI() = 0;
+
+
+/** @brief: set and get the parameter segEgbThresholdII.
+* This parameter is used in the third stage
+* mentioned above. It serves the same purpose as segEgbThresholdI.
+* The segmentation result tends to have more regions remained if
+* this value is large and vice versa.
+*/
+CV_WRAP virtual void setSegEgbThresholdII(float c) = 0;
+CV_WRAP virtual float getSegEgbThresholdII() = 0;
+
+
+/** @brief: set and get the parameter minRegionSizeII.
+* This parameter is used in the third stage
+* mentioned above. It serves the same purpose as minRegionSizeI
+*/
+CV_WRAP virtual void setMinRegionSizeII(int n) = 0;
+CV_WRAP virtual int getMinRegionSizeII() = 0;
+
+
+/** @brief: set and get the parameter spatialWeight.
+* This parameter is used in the first stage
+* mentioned above(the SLIC stage). It describes how important is the role
+* of position when calculating the distance between each pixel and it's
+* center. The exact formula to calculate the distance is
+* \f$colorDistance + spatialWeight \times spatialDistance\f$.
+* The segmentation result tends to have more local consistency
+* if this value is larger.
+*/
+CV_WRAP virtual void setSpatialWeight(float w) = 0;
+CV_WRAP virtual float getSpatialWeight() = 0;
+
+
+/** @brief: set and get the parameter slicSpixelSize.
+* This parameter is used in the first stage mentioned
+* above(the SLIC stage). It describes the size of each
+* superpixel when initializing SLIC. Every superpixel
+* approximately has \f$slicSpixelSize \times slicSpixelSize\f$
+* pixels in the beginning.
+*/
+CV_WRAP virtual void setSlicSpixelSize(int n) = 0;
+CV_WRAP virtual int getSlicSpixelSize() = 0;
+
+
+/** @brief: set and get the parameter numSlicIter.
+* This parameter is used in the first stage. It
+* describes how many iteration to perform when executing SLIC.
+*/
+CV_WRAP virtual void setNumSlicIter(int n) = 0;
+CV_WRAP virtual int getNumSlicIter() = 0;
+
+/** @brief do segmentation gpu
+* @param src: the input image
+* @param ifDraw: if draw the image in the returned Mat. if this parameter is false,
+* then the content of the returned Mat is a matrix of index, describing the region
+* each pixel belongs to. And it's data type is CV_16U. If this parameter is true,
+* then the returned Mat is a segmented picture, and color of each region is the
+* average color of all pixels in that region. And it's data type is the same as
+* the input image
+*/
+CV_WRAP virtual Mat performSegmentGpu(InputArray src, bool ifDraw = true) = 0;
+
+/** @brief do segmentation with cpu
+* This method is only implemented for reference.
+* It is highly NOT recommanded to use it.
+*/
+CV_WRAP virtual Mat performSegmentCpu(InputArray src, bool ifDraw = true) = 0;
+
+/** @brief: create a hfs object
+* @param height: the height of the input image
+* @param width: the width of the input image
+* @param segEgbThresholdI: parameter segEgbThresholdI
+* @param minRegionSizeI: parameter minRegionSizeI
+* @param segEgbThresholdII: parameter segEgbThresholdII
+* @param minRegionSizeII: parameter minRegionSizeII
+* @param spatialWeight: parameter spatialWeight
+* @param slicSpixelSize: parameter slicSpixelSize
+* @param numSlicIter: parameter numSlicIter
+*/
+CV_WRAP static Ptr<HfsSegment> create(int height, int width,
+    float segEgbThresholdI = 0.08f, int minRegionSizeI = 100,
+    float segEgbThresholdII = 0.28f, int minRegionSizeII = 200,
+    float spatialWeight = 0.6f, int slicSpixelSize = 8, int numSlicIter = 5);
+
+};
+
+//! @}
+
+}} // namespace cv { namespace hfs {
diff --git a/IPL/include/opencv/opencv2/highgui.hpp b/IPL/include/opencv/opencv2/highgui.hpp
index 41bd8af..f109640 100644
--- a/IPL/include/opencv/opencv2/highgui.hpp
+++ b/IPL/include/opencv/opencv2/highgui.hpp
@@ -40,12 +40,16 @@
 //
 //M*/
 
-#ifndef __OPENCV_HIGHGUI_HPP__
-#define __OPENCV_HIGHGUI_HPP__
+#ifndef OPENCV_HIGHGUI_HPP
+#define OPENCV_HIGHGUI_HPP
 
 #include "opencv2/core.hpp"
+#ifdef HAVE_OPENCV_IMGCODECS
 #include "opencv2/imgcodecs.hpp"
+#endif
+#ifdef HAVE_OPENCV_VIDEOIO
 #include "opencv2/videoio.hpp"
+#endif
 
 /**
 @defgroup highgui High-level GUI
@@ -182,15 +186,19 @@ enum WindowFlags {
 
        WINDOW_FULLSCREEN = 1,          //!< change the window to fullscreen.
        WINDOW_FREERATIO  = 0x00000100, //!< the image expends as much as it can (no ratio constraint).
-       WINDOW_KEEPRATIO  = 0x00000000  //!< the ratio of the image is respected.
-     };
+       WINDOW_KEEPRATIO  = 0x00000000, //!< the ratio of the image is respected.
+       WINDOW_GUI_EXPANDED=0x00000000, //!< status bar and tool bar
+       WINDOW_GUI_NORMAL = 0x00000010, //!< old fashious way
+    };
 
 //! Flags for cv::setWindowProperty / cv::getWindowProperty
 enum WindowPropertyFlags {
        WND_PROP_FULLSCREEN   = 0, //!< fullscreen property    (can be WINDOW_NORMAL or WINDOW_FULLSCREEN).
        WND_PROP_AUTOSIZE     = 1, //!< autosize property      (can be WINDOW_NORMAL or WINDOW_AUTOSIZE).
        WND_PROP_ASPECT_RATIO = 2, //!< window's aspect ration (can be set to WINDOW_FREERATIO or WINDOW_KEEPRATIO).
-       WND_PROP_OPENGL       = 3  //!< opengl support.
+       WND_PROP_OPENGL       = 3, //!< opengl support.
+       WND_PROP_VISIBLE      = 4, //!< checks whether the window exists and is visible
+       WND_PROP_TOPMOST      = 5  //!< property to toggle normal window being topmost or not
      };
 
 //! Mouse Events see cv::MouseCallback
@@ -237,9 +245,10 @@ enum QtFontStyles {
 
 //! Qt "button" type
 enum QtButtonTypes {
-       QT_PUSH_BUTTON = 0, //!< Push button.
-       QT_CHECKBOX    = 1, //!< Checkbox button.
-       QT_RADIOBOX    = 2  //!< Radiobox button.
+       QT_PUSH_BUTTON   = 0,    //!< Push button.
+       QT_CHECKBOX      = 1,    //!< Checkbox button.
+       QT_RADIOBOX      = 2,    //!< Radiobox button.
+       QT_NEW_BUTTONBAR = 1024  //!< Button should create a new buttonbar
      };
 
 /** @brief Callback function for mouse events. see cv::setMouseCallback
@@ -287,9 +296,9 @@ Qt backend supports additional flags:
      displayed image (see imshow ), and you cannot change the window size manually.
  -   **WINDOW_FREERATIO or WINDOW_KEEPRATIO:** WINDOW_FREERATIO adjusts the image
      with no respect to its ratio, whereas WINDOW_KEEPRATIO keeps the image ratio.
- -   **CV_GUI_NORMAL or CV_GUI_EXPANDED:** CV_GUI_NORMAL is the old way to draw the window
-     without statusbar and toolbar, whereas CV_GUI_EXPANDED is a new enhanced GUI.
-By default, flags == WINDOW_AUTOSIZE | WINDOW_KEEPRATIO | CV_GUI_EXPANDED
+ -   **WINDOW_GUI_NORMAL or WINDOW_GUI_EXPANDED:** WINDOW_GUI_NORMAL is the old way to draw the window
+     without statusbar and toolbar, whereas WINDOW_GUI_EXPANDED is a new enhanced GUI.
+By default, flags == WINDOW_AUTOSIZE | WINDOW_KEEPRATIO | WINDOW_GUI_EXPANDED
 
 @param winname Name of the window in the window caption that may be used as a window identifier.
 @param flags Flags of the window. The supported flags are: (cv::WindowFlags)
@@ -312,6 +321,15 @@ CV_EXPORTS_W void destroyAllWindows();
 
 CV_EXPORTS_W int startWindowThread();
 
+/** @brief Similar to #waitKey, but returns full key code.
+
+@note
+
+Key code is implementation specific and depends on used backend: QT/GTK/Win32/etc
+
+*/
+CV_EXPORTS_W int waitKeyEx(int delay = 0);
+
 /** @brief Waits for a pressed key.
 
 The function waitKey waits for a key event infinitely (when \f$\texttt{delay}\leq 0\f$ ) or for delay
@@ -344,7 +362,7 @@ Otherwise, the image is scaled to fit the window. The function may scale the ima
 -   If the image is 8-bit unsigned, it is displayed as is.
 -   If the image is 16-bit unsigned or 32-bit integer, the pixels are divided by 256. That is, the
     value range [0,255\*256] is mapped to [0,255].
--   If the image is 32-bit floating-point, the pixel values are multiplied by 255. That is, the
+-   If the image is 32-bit or 64-bit floating-point, the pixel values are multiplied by 255. That is, the
     value range [0,1] is mapped to [0,255].
 
 If window was created with OpenGL support, cv::imshow also support ogl::Buffer , ogl::Texture2D and
@@ -384,6 +402,12 @@ CV_EXPORTS_W void imshow(const String& winname, InputArray mat);
  */
 CV_EXPORTS_W void resizeWindow(const String& winname, int width, int height);
 
+/** @overload
+@param winname Window name.
+@param size The new window size.
+*/
+CV_EXPORTS_W void resizeWindow(const String& winname, const cv::Size& size);
+
 /** @brief Moves window to the specified position
 
 @param winname Name of the window.
@@ -419,12 +443,23 @@ The function getWindowProperty returns properties of a window.
  */
 CV_EXPORTS_W double getWindowProperty(const String& winname, int prop_id);
 
+/** @brief Provides rectangle of image in the window.
+
+The function getWindowImageRect returns the client screen coordinates, width and height of the image rendering area.
+
+@param winname Name of the window.
+
+@sa resizeWindow moveWindow
+ */
+CV_EXPORTS_W Rect getWindowImageRect(const String& winname);
+
+/** @example samples/cpp/create_mask.cpp
+This program demonstrates using mouse events and how to make and use a mask image (black and white) .
+*/
 /** @brief Sets mouse handler for the specified window
 
 @param winname Name of the window.
-@param onMouse Mouse callback. See OpenCV samples, such as
-<https://github.com/Itseez/opencv/tree/master/samples/cpp/ffilldemo.cpp>, on how to specify and
-use the callback.
+@param onMouse Callback function for mouse events. See OpenCV samples on how to specify and use the callback.
 @param userdata The optional parameter passed to the callback.
  */
 CV_EXPORTS void setMouseCallback(const String& winname, MouseCallback onMouse, void* userdata = 0);
@@ -451,6 +486,44 @@ Mouse-wheel events are currently supported only on Windows.
  */
 CV_EXPORTS int getMouseWheelDelta(int flags);
 
+/** @brief Selects ROI on the given image.
+Function creates a window and allows user to select a ROI using mouse.
+Controls: use `space` or `enter` to finish selection, use key `c` to cancel selection (function will return the zero cv::Rect).
+
+@param windowName name of the window where selection process will be shown.
+@param img image to select a ROI.
+@param showCrosshair if true crosshair of selection rectangle will be shown.
+@param fromCenter if true center of selection will match initial mouse position. In opposite case a corner of
+selection rectangle will correspont to the initial mouse position.
+@return selected ROI or empty rect if selection canceled.
+
+@note The function sets it's own mouse callback for specified window using cv::setMouseCallback(windowName, ...).
+After finish of work an empty callback will be set for the used window.
+ */
+CV_EXPORTS_W Rect selectROI(const String& windowName, InputArray img, bool showCrosshair = true, bool fromCenter = false);
+
+/** @overload
+ */
+CV_EXPORTS_W Rect selectROI(InputArray img, bool showCrosshair = true, bool fromCenter = false);
+
+/** @brief Selects ROIs on the given image.
+Function creates a window and allows user to select a ROIs using mouse.
+Controls: use `space` or `enter` to finish current selection and start a new one,
+use `esc` to terminate multiple ROI selection process.
+
+@param windowName name of the window where selection process will be shown.
+@param img image to select a ROI.
+@param boundingBoxes selected ROIs.
+@param showCrosshair if true crosshair of selection rectangle will be shown.
+@param fromCenter if true center of selection will match initial mouse position. In opposite case a corner of
+selection rectangle will correspont to the initial mouse position.
+
+@note The function sets it's own mouse callback for specified window using cv::setMouseCallback(windowName, ...).
+After finish of work an empty callback will be set for the used window.
+ */
+CV_EXPORTS_W void selectROIs(const String& windowName, InputArray img,
+                             CV_OUT std::vector<Rect>& boundingBoxes, bool showCrosshair = true, bool fromCenter = false);
+
 /** @brief Creates a trackbar and attaches it to the specified window.
 
 The function createTrackbar creates a trackbar (a slider or range control) with the specified name
@@ -460,7 +533,7 @@ displayed in the specified window winname.
 
 @note
 
-[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar should be attached to the
+[__Qt Backend Only__] winname can be empty if the trackbar should be attached to the
 control panel.
 
 Clicking the label of each trackbar enables editing the trackbar values manually.
@@ -488,7 +561,7 @@ The function returns the current position of the specified trackbar.
 
 @note
 
-[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control
+[__Qt Backend Only__] winname can be empty if the trackbar is attached to the control
 panel.
 
 @param trackbarname Name of the trackbar.
@@ -502,7 +575,7 @@ The function sets the position of the specified trackbar in the specified window
 
 @note
 
-[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control
+[__Qt Backend Only__] winname can be empty if the trackbar is attached to the control
 panel.
 
 @param trackbarname Name of the trackbar.
@@ -517,7 +590,7 @@ The function sets the maximum position of the specified trackbar in the specifie
 
 @note
 
-[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control
+[__Qt Backend Only__] winname can be empty if the trackbar is attached to the control
 panel.
 
 @param trackbarname Name of the trackbar.
@@ -532,12 +605,12 @@ The function sets the minimum position of the specified trackbar in the specifie
 
 @note
 
-[__Qt Backend Only__] winname can be empty (or NULL) if the trackbar is attached to the control
+[__Qt Backend Only__] winname can be empty if the trackbar is attached to the control
 panel.
 
 @param trackbarname Name of the trackbar.
 @param winname Name of the window that is the parent of trackbar.
-@param minval New maximum position.
+@param minval New minimum position.
  */
 CV_EXPORTS_W void setTrackbarMin(const String& trackbarname, const String& winname, int minval);
 
@@ -663,6 +736,23 @@ The function addText draws *text* on the image *img* using a specific font *font
  */
 CV_EXPORTS void addText( const Mat& img, const String& text, Point org, const QtFont& font);
 
+/** @brief Draws a text on the image.
+
+@param img 8-bit 3-channel image where the text should be drawn.
+@param text Text to write on an image.
+@param org Point(x,y) where the text should start on an image.
+@param nameFont Name of the font. The name should match the name of a system font (such as
+*Times*). If the font is not found, a default one is used.
+@param pointSize Size of the font. If not specified, equal zero or negative, the point size of the
+font is set to a system-dependent default value. Generally, this is 12 points.
+@param color Color of the font in BGRA where A = 255 is fully transparent.
+@param weight Font weight. Available operation flags are : cv::QtFontWeights You can also specify a positive integer for better control.
+@param style Font style. Available operation flags are : cv::QtFontStyles
+@param spacing Spacing between characters. It can be negative or positive.
+ */
+CV_EXPORTS_W void addText(const Mat& img, const String& text, Point org, const String& nameFont, int pointSize = -1, Scalar color = Scalar::all(0),
+        int weight = QT_FONT_NORMAL, int style = QT_STYLE_NORMAL, int spacing = 0);
+
 /** @brief Displays a text on a window image as an overlay for a specified duration.
 
 The function displayOverlay displays useful information/tips on top of the window for a certain
@@ -675,7 +765,7 @@ after the specified delay the original content of the window is restored.
 function is called before the previous overlay text timed out, the timer is restarted and the text
 is updated. If this value is zero, the text never disappears.
  */
-CV_EXPORTS void displayOverlay(const String& winname, const String& text, int delayms = 0);
+CV_EXPORTS_W void displayOverlay(const String& winname, const String& text, int delayms = 0);
 
 /** @brief Displays a text on the window statusbar during the specified period of time.
 
@@ -689,7 +779,7 @@ created with the CV_GUI_EXPANDED flags).
 the previous text timed out, the timer is restarted and the text is updated. If this value is
 zero, the text never disappears.
  */
-CV_EXPORTS void displayStatusBar(const String& winname, const String& text, int delayms = 0);
+CV_EXPORTS_W void displayStatusBar(const String& winname, const String& text, int delayms = 0);
 
 /** @brief Saves parameters of the specified window.
 
@@ -717,15 +807,17 @@ CV_EXPORTS  void stopLoop();
 
 The function createButton attaches a button to the control panel. Each button is added to a
 buttonbar to the right of the last button. A new buttonbar is created if nothing was attached to the
-control panel before, or if the last element attached to the control panel was a trackbar.
+control panel before, or if the last element attached to the control panel was a trackbar or if the
+QT_NEW_BUTTONBAR flag is added to the type.
 
 See below various examples of the cv::createButton function call: :
 @code
-    createButton(NULL,callbackButton);//create a push button "button 0", that will call callbackButton.
+    createButton("",callbackButton);//create a push button "button 0", that will call callbackButton.
     createButton("button2",callbackButton,NULL,QT_CHECKBOX,0);
     createButton("button3",callbackButton,&value);
     createButton("button5",callbackButton1,NULL,QT_RADIOBOX);
     createButton("button6",callbackButton2,NULL,QT_PUSH_BUTTON,1);
+    createButton("button6",callbackButton2,NULL,QT_PUSH_BUTTON|QT_NEW_BUTTONBAR);// create a push button in a new row
 @endcode
 
 @param  bar_name Name of the button.
@@ -747,8 +839,4 @@ CV_EXPORTS int createButton( const String& bar_name, ButtonCallback on_change,
 
 } // cv
 
-#ifndef DISABLE_OPENCV_24_COMPATIBILITY
-#include "opencv2/highgui/highgui_c.h"
-#endif
-
 #endif
diff --git a/IPL/include/opencv/opencv2/highgui/highgui_c.h b/IPL/include/opencv/opencv2/highgui/highgui_c.h
index 47fdb84..5d20b95 100644
--- a/IPL/include/opencv/opencv2/highgui/highgui_c.h
+++ b/IPL/include/opencv/opencv2/highgui/highgui_c.h
@@ -39,13 +39,11 @@
 //
 //M*/
 
-#ifndef __OPENCV_HIGHGUI_H__
-#define __OPENCV_HIGHGUI_H__
+#ifndef OPENCV_HIGHGUI_H
+#define OPENCV_HIGHGUI_H
 
 #include "opencv2/core/core_c.h"
 #include "opencv2/imgproc/imgproc_c.h"
-#include "opencv2/imgcodecs/imgcodecs_c.h"
-#include "opencv2/videoio/videoio_c.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -107,6 +105,7 @@ enum
     CV_WND_PROP_AUTOSIZE   = 1, //to change/get window's autosize property
     CV_WND_PROP_ASPECTRATIO= 2, //to change/get window's aspectratio property
     CV_WND_PROP_OPENGL     = 3, //to change/get window's opengl support
+    CV_WND_PROP_VISIBLE    = 4,
 
     //These 2 flags are used by cvNamedWindow and cvSet/GetWindowProperty
     CV_WINDOW_NORMAL       = 0x00000000, //the user can resize the window (no constraint)  / also use to switch a fullscreen window to a normal size
@@ -130,6 +129,11 @@ CVAPI(int) cvNamedWindow( const char* name, int flags CV_DEFAULT(CV_WINDOW_AUTOS
 CVAPI(void) cvSetWindowProperty(const char* name, int prop_id, double prop_value);
 CVAPI(double) cvGetWindowProperty(const char* name, int prop_id);
 
+#ifdef __cplusplus  // FIXIT remove in OpenCV 4.0
+/* Get window image rectangle coordinates, width and height */
+CVAPI(cv::Rect)cvGetWindowImageRect(const char* name);
+#endif
+
 /* display image within window (highgui windows remember their content) */
 CVAPI(void) cvShowImage( const char* name, const CvArr* image );
 
@@ -234,7 +238,7 @@ CVAPI(void) cvUpdateWindow(const char* window_name);
 #define set_preprocess_func cvSetPreprocessFuncWin32
 #define set_postprocess_func cvSetPostprocessFuncWin32
 
-#if defined WIN32 || defined _WIN32
+#if defined _WIN32
 
 CVAPI(void) cvSetPreprocessFuncWin32_(const void* callback);
 CVAPI(void) cvSetPostprocessFuncWin32_(const void* callback);
diff --git a/IPL/include/opencv/opencv2/img_hash.hpp b/IPL/include/opencv/opencv2/img_hash.hpp
new file mode 100644
index 0000000..5e7a928
--- /dev/null
+++ b/IPL/include/opencv/opencv2/img_hash.hpp
@@ -0,0 +1,78 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_IMG_HASH_H
+#define OPENCV_IMG_HASH_H
+
+#include "opencv2/img_hash/average_hash.hpp"
+#include "opencv2/img_hash/block_mean_hash.hpp"
+#include "opencv2/img_hash/color_moment_hash.hpp"
+#include "opencv2/img_hash/marr_hildreth_hash.hpp"
+#include "opencv2/img_hash/phash.hpp"
+#include "opencv2/img_hash/radial_variance_hash.hpp"
+
+/**
+@defgroup img_hash The module brings implementations of different image hashing algorithms.
+
+Provide algorithms to extract the hash of images and fast way to figure out most similar images in
+huge data set.
+
+Namespace for all functions is cv::img_hash.
+
+### Supported Algorithms
+
+- Average hash (also called Different hash)
+- PHash (also called Perceptual hash)
+- Marr Hildreth Hash
+- Radial Variance Hash
+- Block Mean Hash (modes 0 and 1)
+- Color Moment Hash (this is the one and only hash algorithm resist to rotation attack(-90~90 degree))
+
+You can study more about image hashing from following paper and websites:
+
+- "Implementation and benchmarking of perceptual image hash functions" @cite zauner2010implementation
+- "Looks Like It" @cite lookslikeit
+
+### Code Example
+
+@include samples/hash_samples.cpp
+
+### Performance under different attacks
+
+![Performance chart](img_hash/doc/attack_performance.JPG)
+
+### Speed comparison with PHash library (100 images from ukbench)
+
+![Hash Computation chart](img_hash/doc/hash_computation_chart.JPG)
+![Hash comparison chart](img_hash/doc/hash_comparison_chart.JPG)
+
+As you can see, hash computation speed of img_hash module outperform [PHash library](http://www.phash.org/) a lot.
+
+PS : I do not list out the comparison of Average hash, PHash and Color Moment hash, because I cannot
+find them in PHash.
+
+### Motivation
+
+Collects useful image hash algorithms into opencv, so we do not need to rewrite them by ourselves
+again and again or rely on another 3rd party library(ex : PHash library). BOVW or correlation
+matching are good and robust, but they are very slow compare with image hash, if you need to deal
+with large scale CBIR(content based image retrieval) problem, image hash is a more reasonable
+solution.
+
+### More info
+
+You can learn more about img_hash modules from following links, these links show you how to find
+similar image from ukbench dataset, provide thorough benchmark of different attacks(contrast, blur,
+noise(gaussion,pepper and salt), jpeg compression, watermark, resize).
+
+* [Introduction to image hash module of opencv](http://qtandopencv.blogspot.my/2016/06/introduction-to-image-hash-module-of.html)
+* [Speed up image hashing of opencv(img_hash) and introduce color moment hash](http://qtandopencv.blogspot.my/2016/06/speed-up-image-hashing-of-opencvimghash.html)
+
+### Contributors
+
+Tham Ngap Wei, thamngapwei@gmail.com
+
+*/
+
+#endif // OPENCV_IMG_HASH_H
diff --git a/IPL/include/opencv/opencv2/img_hash/average_hash.hpp b/IPL/include/opencv/opencv2/img_hash/average_hash.hpp
new file mode 100644
index 0000000..1204441
--- /dev/null
+++ b/IPL/include/opencv/opencv2/img_hash/average_hash.hpp
@@ -0,0 +1,39 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_AVERAGE_HASH_HPP
+#define OPENCV_AVERAGE_HASH_HPP
+
+#include "img_hash_base.hpp"
+
+namespace cv {
+namespace img_hash {
+
+//! @addtogroup img_hash
+//! @{
+
+/** @brief Computes average hash value of the input image
+
+This is a fast image hashing algorithm, but only work on simple case. For more details, please
+refer to @cite lookslikeit
+*/
+class CV_EXPORTS_W AverageHash : public ImgHashBase
+{
+public:
+    CV_WRAP static Ptr<AverageHash> create();
+protected:
+    AverageHash() {}
+};
+
+/** @brief Calculates img_hash::AverageHash in one call
+@param inputArr input image want to compute hash value, type should be CV_8UC4, CV_8UC3 or CV_8UC1.
+@param outputArr Hash value of input, it will contain 16 hex decimal number, return type is CV_8U
+*/
+CV_EXPORTS_W void averageHash(cv::InputArray inputArr, cv::OutputArray outputArr);
+
+//! @}
+
+}} // cv::img_hash::
+
+#endif // OPENCV_AVERAGE_HASH_HPP
diff --git a/IPL/include/opencv/opencv2/img_hash/block_mean_hash.hpp b/IPL/include/opencv/opencv2/img_hash/block_mean_hash.hpp
new file mode 100644
index 0000000..dbf2a5f
--- /dev/null
+++ b/IPL/include/opencv/opencv2/img_hash/block_mean_hash.hpp
@@ -0,0 +1,52 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_BLOCK_MEAN_HASH_HPP
+#define OPENCV_BLOCK_MEAN_HASH_HPP
+
+#include "img_hash_base.hpp"
+
+namespace cv {
+namespace img_hash {
+
+//! @addtogroup img_hash
+//! @{
+
+enum BlockMeanHashMode
+{
+    BLOCK_MEAN_HASH_MODE_0 = 0, //!< use fewer block and generate 16*16/8 uchar hash value
+    BLOCK_MEAN_HASH_MODE_1 = 1, //!< use block blocks(step sizes/2), generate 31*31/8 + 1 uchar hash value
+};
+
+/** @brief Image hash based on block mean.
+
+See @cite zauner2010implementation for details.
+*/
+class CV_EXPORTS_W BlockMeanHash : public ImgHashBase
+{
+public:
+    /** @brief Create BlockMeanHash object
+        @param mode the mode
+    */
+    CV_WRAP void setMode(int mode);
+    CV_WRAP std::vector<double> getMean() const;
+    CV_WRAP static Ptr<BlockMeanHash> create(int mode = BLOCK_MEAN_HASH_MODE_0);
+protected:
+    BlockMeanHash() {}
+};
+
+/** @brief Computes block mean hash of the input image
+    @param inputArr input image want to compute hash value, type should be CV_8UC4, CV_8UC3 or CV_8UC1.
+    @param outputArr Hash value of input, it will contain 16 hex decimal number, return type is CV_8U
+    @param mode the mode
+*/
+CV_EXPORTS_W void blockMeanHash(cv::InputArray inputArr,
+                                cv::OutputArray outputArr,
+                                int mode = BLOCK_MEAN_HASH_MODE_0);
+
+//! @}
+
+}} // cv::img_hash::
+
+#endif // OPENCV_BLOCK_MEAN_HASH_HPP
diff --git a/IPL/include/opencv/opencv2/img_hash/color_moment_hash.hpp b/IPL/include/opencv/opencv2/img_hash/color_moment_hash.hpp
new file mode 100644
index 0000000..d0a820b
--- /dev/null
+++ b/IPL/include/opencv/opencv2/img_hash/color_moment_hash.hpp
@@ -0,0 +1,41 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_COLOR_MOMENT_HASH_HPP
+#define OPENCV_COLOR_MOMENT_HASH_HPP
+
+#include "img_hash_base.hpp"
+
+namespace cv {
+namespace img_hash {
+
+//! @addtogroup img_hash
+//! @{
+
+/** @brief Image hash based on color moments.
+
+See @cite tang2012perceptual for details.
+*/
+class CV_EXPORTS_W ColorMomentHash : public ImgHashBase
+{
+public:
+    CV_WRAP static Ptr<ColorMomentHash> create();
+protected:
+    ColorMomentHash() {}
+};
+
+/** @brief Computes color moment hash of the input, the algorithm
+    is come from the paper "Perceptual  Hashing  for  Color  Images
+    Using  Invariant Moments"
+    @param inputArr input image want to compute hash value,
+    type should be CV_8UC4, CV_8UC3 or CV_8UC1.
+    @param outputArr 42 hash values with type CV_64F(double)
+     */
+CV_EXPORTS_W void colorMomentHash(cv::InputArray inputArr, cv::OutputArray outputArr);
+
+//! @}
+
+}} // cv::img_hash::
+
+#endif // OPENCV_COLOR_MOMENT_HASH_HPP
diff --git a/IPL/include/opencv/opencv2/img_hash/img_hash_base.hpp b/IPL/include/opencv/opencv2/img_hash/img_hash_base.hpp
new file mode 100644
index 0000000..f0cc451
--- /dev/null
+++ b/IPL/include/opencv/opencv2/img_hash/img_hash_base.hpp
@@ -0,0 +1,46 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_IMG_HASH_BASE_HPP
+#define OPENCV_IMG_HASH_BASE_HPP
+
+#include "opencv2/core.hpp"
+
+namespace cv {
+namespace img_hash {
+
+//! @addtogroup img_hash
+//! @{
+
+/** @brief The base class for image hash algorithms
+ */
+class CV_EXPORTS_W ImgHashBase : public Algorithm
+{
+public:
+    class ImgHashImpl;
+
+    ~ImgHashBase();
+    /** @brief Computes hash of the input image
+        @param inputArr input image want to compute hash value
+        @param outputArr hash of the image
+    */
+    CV_WRAP void compute(cv::InputArray inputArr, cv::OutputArray outputArr);
+    /** @brief Compare the hash value between inOne and inTwo
+        @param hashOne Hash value one
+        @param hashTwo Hash value two
+        @return value indicate similarity between inOne and inTwo, the meaning
+        of the value vary from algorithms to algorithms
+    */
+    CV_WRAP double compare(cv::InputArray hashOne, cv::InputArray hashTwo) const;
+protected:
+    ImgHashBase();
+protected:
+    Ptr<ImgHashImpl> pImpl;
+};
+
+//! @}
+
+} } // cv::img_hash::
+
+#endif // OPENCV_IMG_HASH_BASE_HPP
diff --git a/IPL/include/opencv/opencv2/img_hash/marr_hildreth_hash.hpp b/IPL/include/opencv/opencv2/img_hash/marr_hildreth_hash.hpp
new file mode 100644
index 0000000..a9b04f9
--- /dev/null
+++ b/IPL/include/opencv/opencv2/img_hash/marr_hildreth_hash.hpp
@@ -0,0 +1,64 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_MARR_HILDRETH_HASH_HPP
+#define OPENCV_MARR_HILDRETH_HASH_HPP
+
+#include "img_hash_base.hpp"
+
+namespace cv {
+namespace img_hash {
+
+//! @addtogroup img_hash
+//! @{
+
+/** @brief Marr-Hildreth Operator Based Hash, slowest but more discriminative.
+
+See @cite zauner2010implementation for details.
+*/
+class CV_EXPORTS_W MarrHildrethHash : public ImgHashBase
+{
+public:
+    /**
+     * @brief self explain
+     */
+    CV_WRAP float getAlpha() const;
+
+    /**
+     * @brief self explain
+     */
+    CV_WRAP float getScale() const;
+
+    /** @brief Set Mh kernel parameters
+        @param alpha int scale factor for marr wavelet (default=2).
+        @param scale int level of scale factor (default = 1)
+    */
+    CV_WRAP void setKernelParam(float alpha, float scale);
+
+    /**
+        @param alpha int scale factor for marr wavelet (default=2).
+        @param scale int level of scale factor (default = 1)
+    */
+    CV_WRAP static Ptr<MarrHildrethHash> create(float alpha = 2.0f, float scale = 1.0f);
+protected:
+    MarrHildrethHash() {}
+};
+
+/** @brief Computes average hash value of the input image
+    @param inputArr input image want to compute hash value,
+    type should be CV_8UC4, CV_8UC3, CV_8UC1.
+    @param outputArr Hash value of input, it will contain 16 hex
+    decimal number, return type is CV_8U
+    @param alpha int scale factor for marr wavelet (default=2).
+    @param scale int level of scale factor (default = 1)
+*/
+CV_EXPORTS_W void marrHildrethHash(cv::InputArray inputArr,
+                                   cv::OutputArray outputArr,
+                                   float alpha = 2.0f, float scale = 1.0f);
+
+//! @}
+
+}} // cv::img_hash::
+
+#endif // OPENCV_MARR_HILDRETH_HASH_HPP
diff --git a/IPL/include/opencv/opencv2/img_hash/phash.hpp b/IPL/include/opencv/opencv2/img_hash/phash.hpp
new file mode 100644
index 0000000..d57cd6f
--- /dev/null
+++ b/IPL/include/opencv/opencv2/img_hash/phash.hpp
@@ -0,0 +1,41 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_PHASH_HPP
+#define OPENCV_PHASH_HPP
+
+#include "img_hash_base.hpp"
+
+namespace cv {
+namespace img_hash {
+
+//! @addtogroup img_hash
+//! @{
+
+/** @brief pHash
+
+Slower than average_hash, but tolerant of minor modifications
+
+This algorithm can combat more variation than averageHash, for more details please refer to @cite lookslikeit
+*/
+class CV_EXPORTS_W PHash : public ImgHashBase
+{
+public:
+    CV_WRAP static Ptr<PHash> create();
+protected:
+    PHash() {}
+};
+
+/** @brief Computes pHash value of the input image
+    @param inputArr input image want to compute hash value,
+     type should be CV_8UC4, CV_8UC3, CV_8UC1.
+    @param outputArr Hash value of input, it will contain 8 uchar value
+*/
+CV_EXPORTS_W void pHash(cv::InputArray inputArr, cv::OutputArray outputArr);
+
+//! @}
+
+} } // cv::img_hash::
+
+#endif // OPENCV_PHASH_HPP
diff --git a/IPL/include/opencv/opencv2/img_hash/radial_variance_hash.hpp b/IPL/include/opencv/opencv2/img_hash/radial_variance_hash.hpp
new file mode 100644
index 0000000..455f285
--- /dev/null
+++ b/IPL/include/opencv/opencv2/img_hash/radial_variance_hash.hpp
@@ -0,0 +1,58 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_RADIAL_VARIANCE_HASH_HPP
+#define OPENCV_RADIAL_VARIANCE_HASH_HPP
+
+#include "img_hash_base.hpp"
+
+namespace cv {
+namespace img_hash {
+
+//! @addtogroup img_hash
+//! @{
+
+
+/** @brief Image hash based on Radon transform.
+
+See @cite tang2012perceptual for details.
+*/
+class CV_EXPORTS_W RadialVarianceHash : public ImgHashBase
+{
+public:
+    CV_WRAP static Ptr<RadialVarianceHash> create(double sigma = 1, int numOfAngleLine = 180);
+
+    CV_WRAP int getNumOfAngleLine() const;
+    CV_WRAP double getSigma() const;
+
+    CV_WRAP void setNumOfAngleLine(int value);
+    CV_WRAP void setSigma(double value);
+
+    // internals
+    std::vector<double> getFeatures();
+    cv::Mat getHash();
+    Mat getPixPerLine(Mat const &input);
+    Mat getProjection();
+protected:
+    RadialVarianceHash() {}
+};
+
+/** @brief Computes radial variance hash of the input image
+    @param inputArr input image want to compute hash value,
+    type should be CV_8UC4, CV_8UC3, CV_8UC1.
+    @param outputArr Hash value of input
+    @param sigma Gaussian kernel standard deviation
+    @param numOfAngleLine The number of angles to consider
+     */
+CV_EXPORTS_W void radialVarianceHash(cv::InputArray inputArr,
+                                     cv::OutputArray outputArr,
+                                     double sigma = 1,
+                                     int numOfAngleLine = 180);
+
+
+//! @}
+
+}} // cv::img_hash::
+
+#endif // OPENCV_RADIAL_VARIANCE_HASH_HPP
diff --git a/IPL/include/opencv/opencv2/imgcodecs.hpp b/IPL/include/opencv/opencv2/imgcodecs.hpp
index ac0fd24..89b014d 100644
--- a/IPL/include/opencv/opencv2/imgcodecs.hpp
+++ b/IPL/include/opencv/opencv2/imgcodecs.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_IMGCODECS_HPP__
-#define __OPENCV_IMGCODECS_HPP__
+#ifndef OPENCV_IMGCODECS_HPP
+#define OPENCV_IMGCODECS_HPP
 
 #include "opencv2/core.hpp"
 
@@ -62,8 +62,8 @@ namespace cv
 
 //! Imread flags
 enum ImreadModes {
-       IMREAD_UNCHANGED            = -1, //!< If set, return the loaded image as is (with alpha channel, otherwise it gets cropped).
-       IMREAD_GRAYSCALE            = 0,  //!< If set, always convert image to the single channel grayscale image.
+       IMREAD_UNCHANGED            = -1, //!< If set, return the loaded image as is (with alpha channel, otherwise it gets cropped). Ignore EXIF orientation.
+       IMREAD_GRAYSCALE            = 0,  //!< If set, always convert image to the single channel grayscale image (codec internal conversion).
        IMREAD_COLOR                = 1,  //!< If set, always convert image to the 3 channel BGR color image.
        IMREAD_ANYDEPTH             = 2,  //!< If set, return 16-bit/32-bit image when the input has the corresponding depth, otherwise convert it to 8-bit.
        IMREAD_ANYCOLOR             = 4,  //!< If set, the image is read in any possible color format.
@@ -73,7 +73,8 @@ enum ImreadModes {
        IMREAD_REDUCED_GRAYSCALE_4  = 32, //!< If set, always convert image to the single channel grayscale image and the image size reduced 1/4.
        IMREAD_REDUCED_COLOR_4      = 33, //!< If set, always convert image to the 3 channel BGR color image and the image size reduced 1/4.
        IMREAD_REDUCED_GRAYSCALE_8  = 64, //!< If set, always convert image to the single channel grayscale image and the image size reduced 1/8.
-       IMREAD_REDUCED_COLOR_8      = 65  //!< If set, always convert image to the 3 channel BGR color image and the image size reduced 1/8.
+       IMREAD_REDUCED_COLOR_8      = 65, //!< If set, always convert image to the 3 channel BGR color image and the image size reduced 1/8.
+       IMREAD_IGNORE_ORIENTATION   = 128 //!< If set, do not rotate the image according to EXIF's orientation flag.
      };
 
 //! Imwrite flags
@@ -84,11 +85,24 @@ enum ImwriteFlags {
        IMWRITE_JPEG_RST_INTERVAL   = 4,  //!< JPEG restart interval, 0 - 65535, default is 0 - no restart.
        IMWRITE_JPEG_LUMA_QUALITY   = 5,  //!< Separate luma quality level, 0 - 100, default is 0 - don't use.
        IMWRITE_JPEG_CHROMA_QUALITY = 6,  //!< Separate chroma quality level, 0 - 100, default is 0 - don't use.
-       IMWRITE_PNG_COMPRESSION     = 16, //!< For PNG, it can be the compression level from 0 to 9. A higher value means a smaller size and longer compression time. Default value is 3.
-       IMWRITE_PNG_STRATEGY        = 17, //!< One of cv::ImwritePNGFlags, default is IMWRITE_PNG_STRATEGY_DEFAULT.
+       IMWRITE_PNG_COMPRESSION     = 16, //!< For PNG, it can be the compression level from 0 to 9. A higher value means a smaller size and longer compression time. If specified, strategy is changed to IMWRITE_PNG_STRATEGY_DEFAULT (Z_DEFAULT_STRATEGY). Default value is 1 (best speed setting).
+       IMWRITE_PNG_STRATEGY        = 17, //!< One of cv::ImwritePNGFlags, default is IMWRITE_PNG_STRATEGY_RLE.
        IMWRITE_PNG_BILEVEL         = 18, //!< Binary level PNG, 0 or 1, default is 0.
        IMWRITE_PXM_BINARY          = 32, //!< For PPM, PGM, or PBM, it can be a binary format flag, 0 or 1. Default value is 1.
-       IMWRITE_WEBP_QUALITY        = 64  //!< For WEBP, it can be a quality from 1 to 100 (the higher is the better). By default (without any parameter) and for quality above 100 the lossless compression is used.
+       IMWRITE_EXR_TYPE            = (3 << 4) + 0, /* 48 */ //!< override EXR storage type (FLOAT (FP32) is default)
+       IMWRITE_WEBP_QUALITY        = 64, //!< For WEBP, it can be a quality from 1 to 100 (the higher is the better). By default (without any parameter) and for quality above 100 the lossless compression is used.
+       IMWRITE_PAM_TUPLETYPE       = 128,//!< For PAM, sets the TUPLETYPE field to the corresponding string value that is defined for the format
+       IMWRITE_TIFF_RESUNIT = 256,//!< For TIFF, use to specify which DPI resolution unit to set; see libtiff documentation for valid values
+       IMWRITE_TIFF_XDPI = 257,//!< For TIFF, use to specify the X direction DPI
+       IMWRITE_TIFF_YDPI = 258, //!< For TIFF, use to specify the Y direction DPI
+       IMWRITE_TIFF_COMPRESSION = 259, //!< For TIFF, use to specify the image compression scheme. See libtiff for integer constants corresponding to compression formats. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default.
+       IMWRITE_JPEG2000_COMPRESSION_X1000 = 272 //!< For JPEG2000, use to specify the target compression rate (multiplied by 1000). The value can be from 0 to 1000. Default is 1000.
+     };
+
+enum ImwriteEXRTypeFlags {
+       /*IMWRITE_EXR_TYPE_UNIT = 0, //!< not supported */
+       IMWRITE_EXR_TYPE_HALF = 1,   //!< store as HALF (FP16)
+       IMWRITE_EXR_TYPE_FLOAT = 2   //!< store as FP32 (default)
      };
 
 //! Imwrite PNG specific flags used to tune the compression algorithm.
@@ -107,6 +121,16 @@ enum ImwritePNGFlags {
        IMWRITE_PNG_STRATEGY_FIXED        = 4  //!< Using this value prevents the use of dynamic Huffman codes, allowing for a simpler decoder for special applications.
      };
 
+//! Imwrite PAM specific tupletype flags used to define the 'TUPETYPE' field of a PAM file.
+enum ImwritePAMFlags {
+       IMWRITE_PAM_FORMAT_NULL = 0,
+       IMWRITE_PAM_FORMAT_BLACKANDWHITE = 1,
+       IMWRITE_PAM_FORMAT_GRAYSCALE = 2,
+       IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA = 3,
+       IMWRITE_PAM_FORMAT_RGB = 4,
+       IMWRITE_PAM_FORMAT_RGB_ALPHA = 5,
+     };
+
 /** @brief Loads an image from a file.
 
 @anchor imread
@@ -118,21 +142,23 @@ returns an empty matrix ( Mat::data==NULL ).
 Currently, the following file formats are supported:
 
 -   Windows bitmaps - \*.bmp, \*.dib (always supported)
--   JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Notes* section)
--   JPEG 2000 files - \*.jp2 (see the *Notes* section)
--   Portable Network Graphics - \*.png (see the *Notes* section)
--   WebP - \*.webp (see the *Notes* section)
+-   JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Note* section)
+-   JPEG 2000 files - \*.jp2 (see the *Note* section)
+-   Portable Network Graphics - \*.png (see the *Note* section)
+-   WebP - \*.webp (see the *Note* section)
 -   Portable image format - \*.pbm, \*.pgm, \*.ppm \*.pxm, \*.pnm (always supported)
+-   PFM files - \*.pfm (see the *Note* section)
 -   Sun rasters - \*.sr, \*.ras (always supported)
--   TIFF files - \*.tiff, \*.tif (see the *Notes* section)
--   OpenEXR Image files - \*.exr (see the *Notes* section)
+-   TIFF files - \*.tiff, \*.tif (see the *Note* section)
+-   OpenEXR Image files - \*.exr (see the *Note* section)
 -   Radiance HDR - \*.hdr, \*.pic (always supported)
--   Raster and Vector geospatial data supported by Gdal (see the *Notes* section)
+-   Raster and Vector geospatial data supported by GDAL (see the *Note* section)
 
 @note
-
 -   The function determines the type of an image by the content, not by the file extension.
 -   In the case of color images, the decoded images will have the channels stored in **B G R** order.
+-   When using IMREAD_GRAYSCALE, the codec's internal grayscale conversion will be used, if available.
+    Results may differ to the output of cvtColor()
 -   On Microsoft Windows\* OS and MacOSX\*, the codecs shipped with an OpenCV image (libjpeg,
     libpng, libtiff, and libjasper) are used by default. So, OpenCV can always read JPEGs, PNGs,
     and TIFFs. On MacOSX, there is also an option to use native MacOSX image readers. But beware
@@ -143,9 +169,16 @@ Currently, the following file formats are supported:
     files, for example, "libjpeg-dev", in Debian\* and Ubuntu\*) to get the codec support or turn
     on the OPENCV_BUILD_3RDPARTY_LIBS flag in CMake.
 -   In the case you set *WITH_GDAL* flag to true in CMake and @ref IMREAD_LOAD_GDAL to load the image,
-    then [GDAL](http://www.gdal.org) driver will be used in order to decode the image by supporting
+    then the [GDAL](http://www.gdal.org) driver will be used in order to decode the image, supporting
     the following formats: [Raster](http://www.gdal.org/formats_list.html),
     [Vector](http://www.gdal.org/ogr_formats.html).
+-   If EXIF information is embedded in the image file, the EXIF orientation will be taken into account
+    and thus the image will be rotated accordingly except if the flags @ref IMREAD_IGNORE_ORIENTATION
+    or @ref IMREAD_UNCHANGED are passed.
+-   Use the IMREAD_UNCHANGED flag to keep the floating point values from PFM image.
+-   By default number of pixels must be less than 2^30. Limit can be set using system
+    variable OPENCV_IO_MAX_IMAGE_PIXELS
+
 @param filename Name of file to be loaded.
 @param flags Flag that can take values of cv::ImreadModes
 */
@@ -159,65 +192,30 @@ The function imreadmulti loads a multi-page image from the specified file into a
 @param mats A vector of Mat objects holding each page, if more than one.
 @sa cv::imread
 */
-CV_EXPORTS_W bool imreadmulti(const String& filename, std::vector<Mat>& mats, int flags = IMREAD_ANYCOLOR);
+CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector<Mat>& mats, int flags = IMREAD_ANYCOLOR);
 
 /** @brief Saves an image to a specified file.
 
 The function imwrite saves the image to the specified file. The image format is chosen based on the
-filename extension (see cv::imread for the list of extensions). Only 8-bit (or 16-bit unsigned (CV_16U)
-in case of PNG, JPEG 2000, and TIFF) single-channel or 3-channel (with 'BGR' channel order) images
-can be saved using this function. If the format, depth or channel order is different, use
-Mat::convertTo , and cv::cvtColor to convert it before saving. Or, use the universal FileStorage I/O
+filename extension (see cv::imread for the list of extensions). In general, only 8-bit
+single-channel or 3-channel (with 'BGR' channel order) images
+can be saved using this function, with these exceptions:
+
+- 16-bit unsigned (CV_16U) images can be saved in the case of PNG, JPEG 2000, and TIFF formats
+- 32-bit float (CV_32F) images can be saved in PFM, TIFF, OpenEXR, and Radiance HDR formats;
+  3-channel (CV_32FC3) TIFF images will be saved using the LogLuv high dynamic range encoding
+  (4 bytes per pixel)
+- PNG images with an alpha channel can be saved using this function. To do this, create
+8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels
+should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535 (see the code sample below).
+
+If the format, depth or channel order is different, use
+Mat::convertTo and cv::cvtColor to convert it before saving. Or, use the universal FileStorage I/O
 functions to save the image to XML or YAML format.
 
-It is possible to store PNG images with an alpha channel using this function. To do this, create
-8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels
-should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535.
-
-The sample below shows how to create such a BGRA image and store to PNG file. It also demonstrates how to set custom
-compression parameters :
-@code
-    #include <opencv2/opencv.hpp>
-
-    using namespace cv;
-    using namespace std;
-
-    void createAlphaMat(Mat &mat)
-    {
-        CV_Assert(mat.channels() == 4);
-        for (int i = 0; i < mat.rows; ++i) {
-            for (int j = 0; j < mat.cols; ++j) {
-                Vec4b& bgra = mat.at<Vec4b>(i, j);
-                bgra[0] = UCHAR_MAX; // Blue
-                bgra[1] = saturate_cast<uchar>((float (mat.cols - j)) / ((float)mat.cols) * UCHAR_MAX); // Green
-                bgra[2] = saturate_cast<uchar>((float (mat.rows - i)) / ((float)mat.rows) * UCHAR_MAX); // Red
-                bgra[3] = saturate_cast<uchar>(0.5 * (bgra[1] + bgra[2])); // Alpha
-            }
-        }
-    }
-
-    int main(int argv, char **argc)
-    {
-        // Create mat with alpha channel
-        Mat mat(480, 640, CV_8UC4);
-        createAlphaMat(mat);
-
-        vector<int> compression_params;
-        compression_params.push_back(IMWRITE_PNG_COMPRESSION);
-        compression_params.push_back(9);
-
-        try {
-            imwrite("alpha.png", mat, compression_params);
-        }
-        catch (cv::Exception& ex) {
-            fprintf(stderr, "Exception converting image to PNG format: %s\n", ex.what());
-            return 1;
-        }
-
-        fprintf(stdout, "Saved PNG file with alpha data.\n");
-        return 0;
-    }
-@endcode
+The sample below shows how to create a BGRA image and save it to a PNG file. It also demonstrates how to set custom
+compression parameters:
+@include snippets/imgcodecs_imwrite.cpp
 @param filename Name of the file.
 @param img Image to be saved.
 @param params Format-specific parameters encoded as pairs (paramId_1, paramValue_1, paramId_2, paramValue_2, ... .) see cv::ImwriteFlags
@@ -260,8 +258,21 @@ CV_EXPORTS_W bool imencode( const String& ext, InputArray img,
                             CV_OUT std::vector<uchar>& buf,
                             const std::vector<int>& params = std::vector<int>());
 
+/** @brief Returns true if the specified image can be decoded by OpenCV
+
+@param filename File name of the image
+*/
+CV_EXPORTS_W bool haveImageReader( const String& filename );
+
+/** @brief Returns true if an image with the specified filename can be encoded by OpenCV
+
+ @param filename File name of the image
+ */
+CV_EXPORTS_W bool haveImageWriter( const String& filename );
+
+
 //! @} imgcodecs
 
 } // cv
 
-#endif //__OPENCV_IMGCODECS_HPP__
+#endif //OPENCV_IMGCODECS_HPP
diff --git a/IPL/include/opencv/opencv2/imgcodecs/imgcodecs_c.h b/IPL/include/opencv/opencv2/imgcodecs/imgcodecs_c.h
index ad793cc..c78b3f7 100644
--- a/IPL/include/opencv/opencv2/imgcodecs/imgcodecs_c.h
+++ b/IPL/include/opencv/opencv2/imgcodecs/imgcodecs_c.h
@@ -1,137 +1 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_IMGCODECS_H__
-#define __OPENCV_IMGCODECS_H__
-
-#include "opencv2/core/core_c.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/** @addtogroup imgcodecs_c
-  @{
-  */
-
-enum
-{
-/* 8bit, color or not */
-    CV_LOAD_IMAGE_UNCHANGED  =-1,
-/* 8bit, gray */
-    CV_LOAD_IMAGE_GRAYSCALE  =0,
-/* ?, color */
-    CV_LOAD_IMAGE_COLOR      =1,
-/* any depth, ? */
-    CV_LOAD_IMAGE_ANYDEPTH   =2,
-/* ?, any color */
-    CV_LOAD_IMAGE_ANYCOLOR   =4
-};
-
-/* load image from file
-  iscolor can be a combination of above flags where CV_LOAD_IMAGE_UNCHANGED
-  overrides the other flags
-  using CV_LOAD_IMAGE_ANYCOLOR alone is equivalent to CV_LOAD_IMAGE_UNCHANGED
-  unless CV_LOAD_IMAGE_ANYDEPTH is specified images are converted to 8bit
-*/
-CVAPI(IplImage*) cvLoadImage( const char* filename, int iscolor CV_DEFAULT(CV_LOAD_IMAGE_COLOR));
-CVAPI(CvMat*) cvLoadImageM( const char* filename, int iscolor CV_DEFAULT(CV_LOAD_IMAGE_COLOR));
-
-enum
-{
-    CV_IMWRITE_JPEG_QUALITY =1,
-    CV_IMWRITE_JPEG_PROGRESSIVE =2,
-    CV_IMWRITE_JPEG_OPTIMIZE =3,
-    CV_IMWRITE_JPEG_RST_INTERVAL =4,
-    CV_IMWRITE_JPEG_LUMA_QUALITY =5,
-    CV_IMWRITE_JPEG_CHROMA_QUALITY =6,
-    CV_IMWRITE_PNG_COMPRESSION =16,
-    CV_IMWRITE_PNG_STRATEGY =17,
-    CV_IMWRITE_PNG_BILEVEL =18,
-    CV_IMWRITE_PNG_STRATEGY_DEFAULT =0,
-    CV_IMWRITE_PNG_STRATEGY_FILTERED =1,
-    CV_IMWRITE_PNG_STRATEGY_HUFFMAN_ONLY =2,
-    CV_IMWRITE_PNG_STRATEGY_RLE =3,
-    CV_IMWRITE_PNG_STRATEGY_FIXED =4,
-    CV_IMWRITE_PXM_BINARY =32,
-    CV_IMWRITE_WEBP_QUALITY =64
-};
-
-/* save image to file */
-CVAPI(int) cvSaveImage( const char* filename, const CvArr* image,
-                        const int* params CV_DEFAULT(0) );
-
-/* decode image stored in the buffer */
-CVAPI(IplImage*) cvDecodeImage( const CvMat* buf, int iscolor CV_DEFAULT(CV_LOAD_IMAGE_COLOR));
-CVAPI(CvMat*) cvDecodeImageM( const CvMat* buf, int iscolor CV_DEFAULT(CV_LOAD_IMAGE_COLOR));
-
-/* encode image and store the result as a byte vector (single-row 8uC1 matrix) */
-CVAPI(CvMat*) cvEncodeImage( const char* ext, const CvArr* image,
-                             const int* params CV_DEFAULT(0) );
-
-enum
-{
-    CV_CVTIMG_FLIP      =1,
-    CV_CVTIMG_SWAP_RB   =2
-};
-
-/* utility function: convert one image to another with optional vertical flip */
-CVAPI(void) cvConvertImage( const CvArr* src, CvArr* dst, int flags CV_DEFAULT(0));
-
-CVAPI(int) cvHaveImageReader(const char* filename);
-CVAPI(int) cvHaveImageWriter(const char* filename);
-
-
-/****************************************************************************************\
-*                              Obsolete functions/synonyms                               *
-\****************************************************************************************/
-
-#define cvvLoadImage(name) cvLoadImage((name),1)
-#define cvvSaveImage cvSaveImage
-#define cvvConvertImage cvConvertImage
-
-/** @} imgcodecs_c */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __OPENCV_IMGCODECS_H__
+#error "This header with legacy C API declarations has been removed from OpenCV. Legacy constants are available from legacy/constants_c.h file."
diff --git a/IPL/include/opencv/opencv2/imgcodecs/ios.h b/IPL/include/opencv/opencv2/imgcodecs/ios.h
index fbd6371..a90c6d3 100644
--- a/IPL/include/opencv/opencv2/imgcodecs/ios.h
+++ b/IPL/include/opencv/opencv2/imgcodecs/ios.h
@@ -50,8 +50,8 @@
 //! @addtogroup imgcodecs_ios
 //! @{
 
-UIImage* MatToUIImage(const cv::Mat& image);
-void UIImageToMat(const UIImage* image,
-                         cv::Mat& m, bool alphaExist = false);
+CV_EXPORTS UIImage* MatToUIImage(const cv::Mat& image);
+CV_EXPORTS void UIImageToMat(const UIImage* image,
+                             cv::Mat& m, bool alphaExist = false);
 
 //! @}
diff --git a/IPL/include/opencv/opencv2/imgcodecs/legacy/constants_c.h b/IPL/include/opencv/opencv2/imgcodecs/legacy/constants_c.h
new file mode 100644
index 0000000..de7be4f
--- /dev/null
+++ b/IPL/include/opencv/opencv2/imgcodecs/legacy/constants_c.h
@@ -0,0 +1,54 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_IMGCODECS_LEGACY_CONSTANTS_H
+#define OPENCV_IMGCODECS_LEGACY_CONSTANTS_H
+
+/* duplicate of "ImreadModes" enumeration for better compatibility with OpenCV 3.x */
+enum
+{
+/* 8bit, color or not */
+    CV_LOAD_IMAGE_UNCHANGED  =-1,
+/* 8bit, gray */
+    CV_LOAD_IMAGE_GRAYSCALE  =0,
+/* ?, color */
+    CV_LOAD_IMAGE_COLOR      =1,
+/* any depth, ? */
+    CV_LOAD_IMAGE_ANYDEPTH   =2,
+/* ?, any color */
+    CV_LOAD_IMAGE_ANYCOLOR   =4,
+/* ?, no rotate */
+    CV_LOAD_IMAGE_IGNORE_ORIENTATION  =128
+};
+
+/* duplicate of "ImwriteFlags" enumeration for better compatibility with OpenCV 3.x */
+enum
+{
+    CV_IMWRITE_JPEG_QUALITY =1,
+    CV_IMWRITE_JPEG_PROGRESSIVE =2,
+    CV_IMWRITE_JPEG_OPTIMIZE =3,
+    CV_IMWRITE_JPEG_RST_INTERVAL =4,
+    CV_IMWRITE_JPEG_LUMA_QUALITY =5,
+    CV_IMWRITE_JPEG_CHROMA_QUALITY =6,
+    CV_IMWRITE_PNG_COMPRESSION =16,
+    CV_IMWRITE_PNG_STRATEGY =17,
+    CV_IMWRITE_PNG_BILEVEL =18,
+    CV_IMWRITE_PNG_STRATEGY_DEFAULT =0,
+    CV_IMWRITE_PNG_STRATEGY_FILTERED =1,
+    CV_IMWRITE_PNG_STRATEGY_HUFFMAN_ONLY =2,
+    CV_IMWRITE_PNG_STRATEGY_RLE =3,
+    CV_IMWRITE_PNG_STRATEGY_FIXED =4,
+    CV_IMWRITE_PXM_BINARY =32,
+    CV_IMWRITE_EXR_TYPE = 48,
+    CV_IMWRITE_WEBP_QUALITY =64,
+    CV_IMWRITE_PAM_TUPLETYPE = 128,
+    CV_IMWRITE_PAM_FORMAT_NULL = 0,
+    CV_IMWRITE_PAM_FORMAT_BLACKANDWHITE = 1,
+    CV_IMWRITE_PAM_FORMAT_GRAYSCALE = 2,
+    CV_IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA = 3,
+    CV_IMWRITE_PAM_FORMAT_RGB = 4,
+    CV_IMWRITE_PAM_FORMAT_RGB_ALPHA = 5,
+};
+
+#endif // OPENCV_IMGCODECS_LEGACY_CONSTANTS_H
diff --git a/IPL/include/opencv/opencv2/imgproc.hpp b/IPL/include/opencv/opencv2/imgproc.hpp
index 1f330f2..e7ccf28 100644
--- a/IPL/include/opencv/opencv2/imgproc.hpp
+++ b/IPL/include/opencv/opencv2/imgproc.hpp
@@ -40,13 +40,16 @@
 //
 //M*/
 
-#ifndef __OPENCV_IMGPROC_HPP__
-#define __OPENCV_IMGPROC_HPP__
+#ifndef OPENCV_IMGPROC_HPP
+#define OPENCV_IMGPROC_HPP
 
 #include "opencv2/core.hpp"
 
 /**
-  @defgroup imgproc Image processing
+  @defgroup imgproc Image Processing
+
+This module includes image-processing functions.
+
   @{
     @defgroup imgproc_filter Image Filtering
 
@@ -67,7 +70,7 @@ processing the left-most pixels in each row, you need pixels to the left of them
 of the image. You can let these pixels be the same as the left-most image pixels ("replicated
 border" extrapolation method), or assume that all the non-existing pixels are zeros ("constant
 border" extrapolation method), and so on. OpenCV enables you to specify the extrapolation method.
-For details, see cv::BorderTypes
+For details, see #BorderTypes
 
 @anchor filter_depths
 ### Depth combinations
@@ -102,7 +105,7 @@ the simplest and the fastest resize, need to solve two main problems with the ab
 previous section, for some \f$(x,y)\f$, either one of \f$f_x(x,y)\f$, or \f$f_y(x,y)\f$, or both
 of them may fall outside of the image. In this case, an extrapolation method needs to be used.
 OpenCV provides the same selection of extrapolation methods as in the filtering functions. In
-addition, it provides the method BORDER_TRANSPARENT. This means that the corresponding pixels in
+addition, it provides the method #BORDER_TRANSPARENT. This means that the corresponding pixels in
 the destination image will not be modified at all.
 
 - Interpolation of pixel values. Usually \f$f_x(x,y)\f$ and \f$f_y(x,y)\f$ are floating-point
@@ -117,6 +120,8 @@ f_y(x,y))\f$, and then the value of the polynomial at \f$(f_x(x,y), f_y(x,y))\f$
 interpolated pixel value. In OpenCV, you can choose between several interpolation methods. See
 resize for details.
 
+@note The geometrical transformations do not work with `CV_8S` or `CV_32S` images.
+
     @defgroup imgproc_misc Miscellaneous Image Transformations
     @defgroup imgproc_draw Drawing Functions
 
@@ -146,6 +151,7 @@ case, the color[3] is simply copied to the repainted pixels. Thus, if you want t
 semi-transparent shapes, you can paint them in a separate buffer and then blend it with the main
 image.
 
+    @defgroup imgproc_color_conversions Color Space Conversions
     @defgroup imgproc_colormap ColorMaps in OpenCV
 
 The human perception isn't built for observing fine changes in grayscale images. Human eyes are more
@@ -157,49 +163,16 @@ In OpenCV you only need applyColorMap to apply a colormap on a given image. The
 code reads the path to an image from command line, applies a Jet colormap on it and shows the
 result:
 
-@code
-#include <opencv2/core.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/imgcodecs.hpp>
-#include <opencv2/highgui.hpp>
-using namespace cv;
-
-#include <iostream>
-using namespace std;
-
-int main(int argc, const char *argv[])
-{
-    // We need an input image. (can be grayscale or color)
-    if (argc < 2)
-    {
-        cerr << "We need an image to process here. Please run: colorMap [path_to_image]" << endl;
-        return -1;
-    }
-    Mat img_in = imread(argv[1]);
-    if(img_in.empty())
-    {
-        cerr << "Sample image (" << argv[1] << ") is empty. Please adjust your path, so it points to a valid input image!" << endl;
-        return -1;
-    }
-    // Holds the colormap version of the image:
-    Mat img_color;
-    // Apply the colormap:
-    applyColorMap(img_in, img_color, COLORMAP_JET);
-    // Show the result:
-    imshow("colorMap", img_color);
-    waitKey(0);
-    return 0;
-}
-@endcode
+@include snippets/imgproc_applyColorMap.cpp
 
-@see cv::ColormapTypes
+@see #ColormapTypes
 
     @defgroup imgproc_subdiv2d Planar Subdivision
 
 The Subdiv2D class described in this section is used to perform various planar subdivision on
 a set of 2D points (represented as vector of Point2f). OpenCV subdivides a plane into triangles
-using the Delaunay’s algorithm, which corresponds to the dual graph of the Voronoi diagram.
-In the figure below, the Delaunay’s triangulation is marked with black lines and the Voronoi
+using the Delaunay's algorithm, which corresponds to the dual graph of the Voronoi diagram.
+In the figure below, the Delaunay's triangulation is marked with black lines and the Voronoi
 diagram with red lines.
 
 ![Delaunay triangulation (black) and Voronoi (red)](pics/delaunay_voronoi.png)
@@ -213,6 +186,11 @@ location of points on the plane, building special graphs (such as NNG,RNG), and
     @defgroup imgproc_feature Feature Detection
     @defgroup imgproc_object Object Detection
     @defgroup imgproc_c C API
+    @defgroup imgproc_hal Hardware Acceleration Layer
+    @{
+        @defgroup imgproc_hal_functions Functions
+        @defgroup imgproc_hal_interface Interface
+    @}
   @}
 */
 
@@ -226,10 +204,14 @@ namespace cv
 //! @addtogroup imgproc_filter
 //! @{
 
+enum SpecialFilter {
+    FILTER_SCHARR = -1
+};
+
 //! type of morphological operation
 enum MorphTypes{
-    MORPH_ERODE    = 0, //!< see cv::erode
-    MORPH_DILATE   = 1, //!< see cv::dilate
+    MORPH_ERODE    = 0, //!< see #erode
+    MORPH_DILATE   = 1, //!< see #dilate
     MORPH_OPEN     = 2, //!< an opening operation
                         //!< \f[\texttt{dst} = \mathrm{open} ( \texttt{src} , \texttt{element} )= \mathrm{dilate} ( \mathrm{erode} ( \texttt{src} , \texttt{element} ))\f]
     MORPH_CLOSE    = 3, //!< a closing operation
@@ -240,8 +222,8 @@ enum MorphTypes{
                         //!< \f[\texttt{dst} = \mathrm{tophat} ( \texttt{src} , \texttt{element} )= \texttt{src} - \mathrm{open} ( \texttt{src} , \texttt{element} )\f]
     MORPH_BLACKHAT = 6, //!< "black hat"
                         //!< \f[\texttt{dst} = \mathrm{blackhat} ( \texttt{src} , \texttt{element} )= \mathrm{close} ( \texttt{src} , \texttt{element} )- \texttt{src}\f]
-    MORPH_HITMISS  = 7  //!< "hit and miss"
-                        //!<   .- Only supported for CV_8UC1 binary images. Tutorial can be found in [this page](http://opencv-code.com/tutorials/hit-or-miss-transform-in-opencv/)
+    MORPH_HITMISS  = 7  //!< "hit or miss"
+                        //!<   .- Only supported for CV_8UC1 binary images. A tutorial can be found in the documentation
 };
 
 //! shape of the structuring element
@@ -272,6 +254,8 @@ enum InterpolationFlags{
     INTER_AREA           = 3,
     /** Lanczos interpolation over 8x8 neighborhood */
     INTER_LANCZOS4       = 4,
+    /** Bit exact bilinear interpolation */
+    INTER_LINEAR_EXACT = 5,
     /** mask for interpolation codes */
     INTER_MAX            = 7,
     /** flag, fills all of the destination image pixels. If some of them correspond to outliers in the
@@ -279,13 +263,22 @@ enum InterpolationFlags{
     WARP_FILL_OUTLIERS   = 8,
     /** flag, inverse transformation
 
-    For example, polar transforms:
-    - flag is __not__ set: \f$dst( \phi , \rho ) = src(x,y)\f$
-    - flag is set: \f$dst(x,y) = src( \phi , \rho )\f$
+    For example, #linearPolar or #logPolar transforms:
+    - flag is __not__ set: \f$dst( \rho , \phi ) = src(x,y)\f$
+    - flag is set: \f$dst(x,y) = src( \rho , \phi )\f$
     */
     WARP_INVERSE_MAP     = 16
 };
 
+/** \brief Specify the polar mapping mode
+@sa warpPolar
+*/
+enum WarpPolarMode
+{
+    WARP_POLAR_LINEAR = 0, ///< Remaps an image to/from polar space.
+    WARP_POLAR_LOG = 256   ///< Remaps an image to/from semilog-polar space.
+};
+
 enum InterpolationMasks {
        INTER_BITS      = 5,
        INTER_BITS2     = INTER_BITS * 2,
@@ -299,7 +292,7 @@ enum InterpolationMasks {
 //! @{
 
 //! Distance types for Distance Transform and M-estimators
-//! @see cv::distanceTransform, cv::fitLine
+//! @see distanceTransform, fitLine
 enum DistanceTypes {
     DIST_USER    = -1,  //!< User defined distance
     DIST_L1      = 1,   //!< distance = |x1-x2| + |y1-y2|
@@ -332,7 +325,7 @@ enum ThresholdTypes {
 };
 
 //! adaptive threshold algorithm
-//! see cv::adaptiveThreshold
+//! @see adaptiveThreshold
 enum AdaptiveThresholdTypes {
     /** the threshold value \f$T(x,y)\f$ is a mean of the \f$\texttt{blockSize} \times
     \texttt{blockSize}\f$ neighborhood of \f$(x, y)\f$ minus C */
@@ -340,16 +333,10 @@ enum AdaptiveThresholdTypes {
     /** the threshold value \f$T(x, y)\f$ is a weighted sum (cross-correlation with a Gaussian
     window) of the \f$\texttt{blockSize} \times \texttt{blockSize}\f$ neighborhood of \f$(x, y)\f$
     minus C . The default sigma (standard deviation) is used for the specified blockSize . See
-    cv::getGaussianKernel*/
+    #getGaussianKernel*/
     ADAPTIVE_THRESH_GAUSSIAN_C = 1
 };
 
-//! cv::undistort mode
-enum UndistortTypes {
-       PROJ_SPHERICAL_ORTHO  = 0,
-       PROJ_SPHERICAL_EQRECT = 1
-     };
-
 //! class of the pixel in GrabCut algorithm
 enum GrabCutClasses {
     GC_BGD    = 0,  //!< an obvious background pixels
@@ -368,7 +355,9 @@ enum GrabCutModes {
     automatically initialized with GC_BGD .*/
     GC_INIT_WITH_MASK  = 1,
     /** The value means that the algorithm should just resume. */
-    GC_EVAL            = 2
+    GC_EVAL            = 2,
+    /** The value means that the algorithm should just run the grabCut algorithm (a single iteration) with the fixed model */
+    GC_EVAL_FREEZE_MODEL = 3
 };
 
 //! distanceTransform algorithm flags
@@ -405,7 +394,16 @@ enum ConnectedComponentsTypes {
     CC_STAT_WIDTH  = 2, //!< The horizontal size of the bounding box
     CC_STAT_HEIGHT = 3, //!< The vertical size of the bounding box
     CC_STAT_AREA   = 4, //!< The total area (in pixels) of the connected component
-    CC_STAT_MAX    = 5
+#ifndef CV_DOXYGEN
+    CC_STAT_MAX    = 5 //!< Max enumeration value. Used internally only for memory allocation
+#endif
+};
+
+//! connected components algorithm
+enum ConnectedComponentsAlgorithmsTypes {
+    CCL_WU      = 0,  //!< SAUF algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity
+    CCL_DEFAULT = -1, //!< BBDT algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity
+    CCL_GRANA   = 1   //!< BBDT algorithm for 8-way connectivity, SAUF algorithm for 4-way connectivity
 };
 
 //! mode of the contour retrieval algorithm
@@ -440,8 +438,25 @@ enum ContourApproximationModes {
     CHAIN_APPROX_TC89_KCOS = 4
 };
 
+/** @brief Shape matching methods
+
+\f$A\f$ denotes object1,\f$B\f$ denotes object2
+
+\f$\begin{array}{l} m^A_i =  \mathrm{sign} (h^A_i)  \cdot \log{h^A_i} \\ m^B_i =  \mathrm{sign} (h^B_i)  \cdot \log{h^B_i} \end{array}\f$
+
+and \f$h^A_i, h^B_i\f$ are the Hu moments of \f$A\f$ and \f$B\f$ , respectively.
+*/
+enum ShapeMatchModes {
+    CONTOURS_MATCH_I1  =1, //!< \f[I_1(A,B) =  \sum _{i=1...7}  \left |  \frac{1}{m^A_i} -  \frac{1}{m^B_i} \right |\f]
+    CONTOURS_MATCH_I2  =2, //!< \f[I_2(A,B) =  \sum _{i=1...7}  \left | m^A_i - m^B_i  \right |\f]
+    CONTOURS_MATCH_I3  =3  //!< \f[I_3(A,B) =  \max _{i=1...7}  \frac{ \left| m^A_i - m^B_i \right| }{ \left| m^A_i \right| }\f]
+};
+
 //! @} imgproc_shape
 
+//! @addtogroup imgproc_feature
+//! @{
+
 //! Variants of a Hough transform
 enum HoughModes {
 
@@ -458,11 +473,11 @@ enum HoughModes {
     /** multi-scale variant of the classical Hough transform. The lines are encoded the same way as
     HOUGH_STANDARD. */
     HOUGH_MULTI_SCALE   = 2,
-    HOUGH_GRADIENT      = 3 //!< basically *21HT*, described in @cite Yuen90
+    HOUGH_GRADIENT      = 3, //!< basically *21HT*, described in @cite Yuen90
+    HOUGH_GRADIENT_ALT  = 4, //!< variation of HOUGH_GRADIENT to get better accuracy
 };
 
 //! Variants of Line Segment %Detector
-//! @ingroup imgproc_feature
 enum LineSegmentDetectorModes {
     LSD_REFINE_NONE = 0, //!< No refinement applied
     LSD_REFINE_STD  = 1, //!< Standard refinement is applied. E.g. breaking arches into smaller straighter line approximations.
@@ -470,6 +485,8 @@ enum LineSegmentDetectorModes {
                          //!< refined through increase of precision, decrement in size, etc.
 };
 
+//! @} imgproc_feature
+
 /** Histogram comparison methods
   @ingroup imgproc_hist
 */
@@ -500,9 +517,9 @@ enum HistCompMethods {
     HISTCMP_KL_DIV        = 5
 };
 
-/** the color conversion code
+/** the color conversion codes
 @see @ref imgproc_color_conversions
-@ingroup imgproc_misc
+@ingroup imgproc_color_conversions
  */
 enum ColorConversionCodes {
     COLOR_BGR2BGRA     = 0, //!< add alpha channel to RGB or BGR image
@@ -587,7 +604,7 @@ enum ColorConversionCodes {
     COLOR_HLS2BGR      = 60,
     COLOR_HLS2RGB      = 61,
 
-    COLOR_BGR2HSV_FULL = 66, //!<
+    COLOR_BGR2HSV_FULL = 66,
     COLOR_RGB2HSV_FULL = 67,
     COLOR_BGR2HLS_FULL = 68,
     COLOR_RGB2HLS_FULL = 69,
@@ -757,135 +774,215 @@ enum ColorConversionCodes {
     COLOR_BayerRG2RGB_EA  = COLOR_BayerBG2BGR_EA,
     COLOR_BayerGR2RGB_EA  = COLOR_BayerGB2BGR_EA,
 
+    //! Demosaicing with alpha channel
+    COLOR_BayerBG2BGRA = 139,
+    COLOR_BayerGB2BGRA = 140,
+    COLOR_BayerRG2BGRA = 141,
+    COLOR_BayerGR2BGRA = 142,
 
-    COLOR_COLORCVT_MAX  = 139
+    COLOR_BayerBG2RGBA = COLOR_BayerRG2BGRA,
+    COLOR_BayerGB2RGBA = COLOR_BayerGR2BGRA,
+    COLOR_BayerRG2RGBA = COLOR_BayerBG2BGRA,
+    COLOR_BayerGR2RGBA = COLOR_BayerGB2BGRA,
+
+    COLOR_COLORCVT_MAX  = 143
 };
 
-/** types of intersection between rectangles
-@ingroup imgproc_shape
-*/
+//! @addtogroup imgproc_shape
+//! @{
+
+//! types of intersection between rectangles
 enum RectanglesIntersectTypes {
     INTERSECT_NONE = 0, //!< No intersection
     INTERSECT_PARTIAL  = 1, //!< There is a partial intersection
     INTERSECT_FULL  = 2 //!< One of the rectangle is fully enclosed in the other
 };
 
-//! finds arbitrary template in the grayscale image using Generalized Hough Transform
-class CV_EXPORTS GeneralizedHough : public Algorithm
+/** types of line
+@ingroup imgproc_draw
+*/
+enum LineTypes {
+    FILLED  = -1,
+    LINE_4  = 4, //!< 4-connected line
+    LINE_8  = 8, //!< 8-connected line
+    LINE_AA = 16 //!< antialiased line
+};
+
+/** Only a subset of Hershey fonts <https://en.wikipedia.org/wiki/Hershey_fonts> are supported
+@ingroup imgproc_draw
+*/
+enum HersheyFonts {
+    FONT_HERSHEY_SIMPLEX        = 0, //!< normal size sans-serif font
+    FONT_HERSHEY_PLAIN          = 1, //!< small size sans-serif font
+    FONT_HERSHEY_DUPLEX         = 2, //!< normal size sans-serif font (more complex than FONT_HERSHEY_SIMPLEX)
+    FONT_HERSHEY_COMPLEX        = 3, //!< normal size serif font
+    FONT_HERSHEY_TRIPLEX        = 4, //!< normal size serif font (more complex than FONT_HERSHEY_COMPLEX)
+    FONT_HERSHEY_COMPLEX_SMALL  = 5, //!< smaller version of FONT_HERSHEY_COMPLEX
+    FONT_HERSHEY_SCRIPT_SIMPLEX = 6, //!< hand-writing style font
+    FONT_HERSHEY_SCRIPT_COMPLEX = 7, //!< more complex variant of FONT_HERSHEY_SCRIPT_SIMPLEX
+    FONT_ITALIC                 = 16 //!< flag for italic font
+};
+
+/** Possible set of marker types used for the cv::drawMarker function
+@ingroup imgproc_draw
+*/
+enum MarkerTypes
+{
+    MARKER_CROSS = 0,           //!< A crosshair marker shape
+    MARKER_TILTED_CROSS = 1,    //!< A 45 degree tilted crosshair marker shape
+    MARKER_STAR = 2,            //!< A star marker shape, combination of cross and tilted cross
+    MARKER_DIAMOND = 3,         //!< A diamond marker shape
+    MARKER_SQUARE = 4,          //!< A square marker shape
+    MARKER_TRIANGLE_UP = 5,     //!< An upwards pointing triangle marker shape
+    MARKER_TRIANGLE_DOWN = 6    //!< A downwards pointing triangle marker shape
+};
+
+/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform
+*/
+class CV_EXPORTS_W GeneralizedHough : public Algorithm
 {
 public:
     //! set template to search
-    virtual void setTemplate(InputArray templ, Point templCenter = Point(-1, -1)) = 0;
-    virtual void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter = Point(-1, -1)) = 0;
+    CV_WRAP virtual void setTemplate(InputArray templ, Point templCenter = Point(-1, -1)) = 0;
+    CV_WRAP virtual void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter = Point(-1, -1)) = 0;
 
     //! find template on image
-    virtual void detect(InputArray image, OutputArray positions, OutputArray votes = noArray()) = 0;
-    virtual void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes = noArray()) = 0;
+    CV_WRAP virtual void detect(InputArray image, OutputArray positions, OutputArray votes = noArray()) = 0;
+    CV_WRAP virtual void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes = noArray()) = 0;
 
     //! Canny low threshold.
-    virtual void setCannyLowThresh(int cannyLowThresh) = 0;
-    virtual int getCannyLowThresh() const = 0;
+    CV_WRAP virtual void setCannyLowThresh(int cannyLowThresh) = 0;
+    CV_WRAP virtual int getCannyLowThresh() const = 0;
 
     //! Canny high threshold.
-    virtual void setCannyHighThresh(int cannyHighThresh) = 0;
-    virtual int getCannyHighThresh() const = 0;
+    CV_WRAP virtual void setCannyHighThresh(int cannyHighThresh) = 0;
+    CV_WRAP virtual int getCannyHighThresh() const = 0;
 
     //! Minimum distance between the centers of the detected objects.
-    virtual void setMinDist(double minDist) = 0;
-    virtual double getMinDist() const = 0;
+    CV_WRAP virtual void setMinDist(double minDist) = 0;
+    CV_WRAP virtual double getMinDist() const = 0;
 
     //! Inverse ratio of the accumulator resolution to the image resolution.
-    virtual void setDp(double dp) = 0;
-    virtual double getDp() const = 0;
+    CV_WRAP virtual void setDp(double dp) = 0;
+    CV_WRAP virtual double getDp() const = 0;
 
     //! Maximal size of inner buffers.
-    virtual void setMaxBufferSize(int maxBufferSize) = 0;
-    virtual int getMaxBufferSize() const = 0;
+    CV_WRAP virtual void setMaxBufferSize(int maxBufferSize) = 0;
+    CV_WRAP virtual int getMaxBufferSize() const = 0;
 };
 
-//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
-//! Detects position only without traslation and rotation
-class CV_EXPORTS GeneralizedHoughBallard : public GeneralizedHough
+/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform
+
+Detects position only without translation and rotation @cite Ballard1981 .
+*/
+class CV_EXPORTS_W GeneralizedHoughBallard : public GeneralizedHough
 {
 public:
     //! R-Table levels.
-    virtual void setLevels(int levels) = 0;
-    virtual int getLevels() const = 0;
+    CV_WRAP virtual void setLevels(int levels) = 0;
+    CV_WRAP virtual int getLevels() const = 0;
 
     //! The accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected.
-    virtual void setVotesThreshold(int votesThreshold) = 0;
-    virtual int getVotesThreshold() const = 0;
+    CV_WRAP virtual void setVotesThreshold(int votesThreshold) = 0;
+    CV_WRAP virtual int getVotesThreshold() const = 0;
 };
 
-//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
-//! Detects position, traslation and rotation
-class CV_EXPORTS GeneralizedHoughGuil : public GeneralizedHough
+/** @brief finds arbitrary template in the grayscale image using Generalized Hough Transform
+
+Detects position, translation and rotation @cite Guil1999 .
+*/
+class CV_EXPORTS_W GeneralizedHoughGuil : public GeneralizedHough
 {
 public:
     //! Angle difference in degrees between two points in feature.
-    virtual void setXi(double xi) = 0;
-    virtual double getXi() const = 0;
+    CV_WRAP virtual void setXi(double xi) = 0;
+    CV_WRAP virtual double getXi() const = 0;
 
     //! Feature table levels.
-    virtual void setLevels(int levels) = 0;
-    virtual int getLevels() const = 0;
+    CV_WRAP virtual void setLevels(int levels) = 0;
+    CV_WRAP virtual int getLevels() const = 0;
 
     //! Maximal difference between angles that treated as equal.
-    virtual void setAngleEpsilon(double angleEpsilon) = 0;
-    virtual double getAngleEpsilon() const = 0;
+    CV_WRAP virtual void setAngleEpsilon(double angleEpsilon) = 0;
+    CV_WRAP virtual double getAngleEpsilon() const = 0;
 
     //! Minimal rotation angle to detect in degrees.
-    virtual void setMinAngle(double minAngle) = 0;
-    virtual double getMinAngle() const = 0;
+    CV_WRAP virtual void setMinAngle(double minAngle) = 0;
+    CV_WRAP virtual double getMinAngle() const = 0;
 
     //! Maximal rotation angle to detect in degrees.
-    virtual void setMaxAngle(double maxAngle) = 0;
-    virtual double getMaxAngle() const = 0;
+    CV_WRAP virtual void setMaxAngle(double maxAngle) = 0;
+    CV_WRAP virtual double getMaxAngle() const = 0;
 
     //! Angle step in degrees.
-    virtual void setAngleStep(double angleStep) = 0;
-    virtual double getAngleStep() const = 0;
+    CV_WRAP virtual void setAngleStep(double angleStep) = 0;
+    CV_WRAP virtual double getAngleStep() const = 0;
 
     //! Angle votes threshold.
-    virtual void setAngleThresh(int angleThresh) = 0;
-    virtual int getAngleThresh() const = 0;
+    CV_WRAP virtual void setAngleThresh(int angleThresh) = 0;
+    CV_WRAP virtual int getAngleThresh() const = 0;
 
     //! Minimal scale to detect.
-    virtual void setMinScale(double minScale) = 0;
-    virtual double getMinScale() const = 0;
+    CV_WRAP virtual void setMinScale(double minScale) = 0;
+    CV_WRAP virtual double getMinScale() const = 0;
 
     //! Maximal scale to detect.
-    virtual void setMaxScale(double maxScale) = 0;
-    virtual double getMaxScale() const = 0;
+    CV_WRAP virtual void setMaxScale(double maxScale) = 0;
+    CV_WRAP virtual double getMaxScale() const = 0;
 
     //! Scale step.
-    virtual void setScaleStep(double scaleStep) = 0;
-    virtual double getScaleStep() const = 0;
+    CV_WRAP virtual void setScaleStep(double scaleStep) = 0;
+    CV_WRAP virtual double getScaleStep() const = 0;
 
     //! Scale votes threshold.
-    virtual void setScaleThresh(int scaleThresh) = 0;
-    virtual int getScaleThresh() const = 0;
+    CV_WRAP virtual void setScaleThresh(int scaleThresh) = 0;
+    CV_WRAP virtual int getScaleThresh() const = 0;
 
     //! Position votes threshold.
-    virtual void setPosThresh(int posThresh) = 0;
-    virtual int getPosThresh() const = 0;
+    CV_WRAP virtual void setPosThresh(int posThresh) = 0;
+    CV_WRAP virtual int getPosThresh() const = 0;
 };
 
+//! @} imgproc_shape
+
+//! @addtogroup imgproc_hist
+//! @{
 
+/** @brief Base class for Contrast Limited Adaptive Histogram Equalization.
+*/
 class CV_EXPORTS_W CLAHE : public Algorithm
 {
 public:
+    /** @brief Equalizes the histogram of a grayscale image using Contrast Limited Adaptive Histogram Equalization.
+
+    @param src Source image of type CV_8UC1 or CV_16UC1.
+    @param dst Destination image.
+     */
     CV_WRAP virtual void apply(InputArray src, OutputArray dst) = 0;
 
+    /** @brief Sets threshold for contrast limiting.
+
+    @param clipLimit threshold value.
+    */
     CV_WRAP virtual void setClipLimit(double clipLimit) = 0;
+
+    //! Returns threshold value for contrast limiting.
     CV_WRAP virtual double getClipLimit() const = 0;
 
+    /** @brief Sets size of grid for histogram equalization. Input image will be divided into
+    equally sized rectangular tiles.
+
+    @param tileGridSize defines the number of tiles in row and column.
+    */
     CV_WRAP virtual void setTilesGridSize(Size tileGridSize) = 0;
+
+    //!@brief Returns Size defines the number of tiles in row and column.
     CV_WRAP virtual Size getTilesGridSize() const = 0;
 
     CV_WRAP virtual void collectGarbage() = 0;
 };
 
+//! @} imgproc_hist
 
 //! @addtogroup imgproc_subdiv2d
 //! @{
@@ -913,13 +1010,13 @@ class CV_EXPORTS_W Subdiv2D
          };
 
     /** creates an empty Subdiv2D object.
-    To create a new empty Delaunay subdivision you need to use the initDelaunay() function.
+    To create a new empty Delaunay subdivision you need to use the #initDelaunay function.
      */
     CV_WRAP Subdiv2D();
 
     /** @overload
 
-    @param rect – Rectangle that includes all of the 2D points that are to be added to the subdivision.
+    @param rect Rectangle that includes all of the 2D points that are to be added to the subdivision.
 
     The function creates an empty Delaunay subdivision where 2D points can be added using the function
     insert() . All of the points to be added must be within the specified rectangle, otherwise a runtime
@@ -929,14 +1026,14 @@ class CV_EXPORTS_W Subdiv2D
 
     /** @brief Creates a new empty Delaunay subdivision
 
-    @param rect – Rectangle that includes all of the 2D points that are to be added to the subdivision.
+    @param rect Rectangle that includes all of the 2D points that are to be added to the subdivision.
 
      */
     CV_WRAP void initDelaunay(Rect rect);
 
     /** @brief Insert a single point into a Delaunay triangulation.
 
-    @param pt – Point to insert.
+    @param pt Point to insert.
 
     The function inserts a single point into a subdivision and modifies the subdivision topology
     appropriately. If a point with the same coordinates exists already, no new point is added.
@@ -948,7 +1045,7 @@ class CV_EXPORTS_W Subdiv2D
 
     /** @brief Insert multiple points into a Delaunay triangulation.
 
-    @param ptvec – Points to insert.
+    @param ptvec Points to insert.
 
     The function inserts a vector of points into a subdivision and modifies the subdivision topology
     appropriately.
@@ -957,30 +1054,30 @@ class CV_EXPORTS_W Subdiv2D
 
     /** @brief Returns the location of a point within a Delaunay triangulation.
 
-    @param pt – Point to locate.
-    @param edge – Output edge that the point belongs to or is located to the right of it.
-    @param vertex – Optional output vertex the input point coincides with.
+    @param pt Point to locate.
+    @param edge Output edge that the point belongs to or is located to the right of it.
+    @param vertex Optional output vertex the input point coincides with.
 
     The function locates the input point within the subdivision and gives one of the triangle edges
     or vertices.
 
     @returns an integer which specify one of the following five cases for point location:
-    -  The point falls into some facet. The function returns PTLOC_INSIDE and edge will contain one of
+    -  The point falls into some facet. The function returns #PTLOC_INSIDE and edge will contain one of
        edges of the facet.
-    -  The point falls onto the edge. The function returns PTLOC_ON_EDGE and edge will contain this edge.
-    -  The point coincides with one of the subdivision vertices. The function returns PTLOC_VERTEX and
+    -  The point falls onto the edge. The function returns #PTLOC_ON_EDGE and edge will contain this edge.
+    -  The point coincides with one of the subdivision vertices. The function returns #PTLOC_VERTEX and
        vertex will contain a pointer to the vertex.
-    -  The point is outside the subdivision reference rectangle. The function returns PTLOC_OUTSIDE_RECT
+    -  The point is outside the subdivision reference rectangle. The function returns #PTLOC_OUTSIDE_RECT
        and no pointers are filled.
-    -  One of input arguments is invalid. A runtime error is raised or, if silent or “parent” error
-       processing mode is selected, CV_PTLOC_ERROR is returnd.
+    -  One of input arguments is invalid. A runtime error is raised or, if silent or "parent" error
+       processing mode is selected, #PTLOC_ERROR is returned.
      */
     CV_WRAP int locate(Point2f pt, CV_OUT int& edge, CV_OUT int& vertex);
 
     /** @brief Finds the subdivision vertex closest to the given point.
 
-    @param pt – Input point.
-    @param nearestPt – Output subdivision vertex point.
+    @param pt Input point.
+    @param nearestPt Output subdivision vertex point.
 
     The function is another function that locates the input point within the subdivision. It finds the
     subdivision vertex that is the closest to the input point. It is not necessarily one of vertices
@@ -993,27 +1090,35 @@ class CV_EXPORTS_W Subdiv2D
 
     /** @brief Returns a list of all edges.
 
-    @param edgeList – Output vector.
+    @param edgeList Output vector.
 
     The function gives each edge as a 4 numbers vector, where each two are one of the edge
     vertices. i.e. org_x = v[0], org_y = v[1], dst_x = v[2], dst_y = v[3].
      */
     CV_WRAP void getEdgeList(CV_OUT std::vector<Vec4f>& edgeList) const;
 
+    /** @brief Returns a list of the leading edge ID connected to each triangle.
+
+    @param leadingEdgeList Output vector.
+
+    The function gives one edge ID for each triangle.
+     */
+    CV_WRAP void getLeadingEdgeList(CV_OUT std::vector<int>& leadingEdgeList) const;
+
     /** @brief Returns a list of all triangles.
 
-    @param triangleList – Output vector.
+    @param triangleList Output vector.
 
     The function gives each triangle as a 6 numbers vector, where each two are one of the triangle
     vertices. i.e. p1_x = v[0], p1_y = v[1], p2_x = v[2], p2_y = v[3], p3_x = v[4], p3_y = v[5].
      */
     CV_WRAP void getTriangleList(CV_OUT std::vector<Vec6f>& triangleList) const;
 
-    /** @brief Returns a list of all Voroni facets.
+    /** @brief Returns a list of all Voronoi facets.
 
-    @param idx – Vector of vertices IDs to consider. For all vertices you can pass empty vector.
-    @param facetList – Output vector of the Voroni facets.
-    @param facetCenters – Output vector of the Voroni facets center points.
+    @param idx Vector of vertices IDs to consider. For all vertices you can pass empty vector.
+    @param facetList Output vector of the Voronoi facets.
+    @param facetCenters Output vector of the Voronoi facets center points.
 
      */
     CV_WRAP void getVoronoiFacetList(const std::vector<int>& idx, CV_OUT std::vector<std::vector<Point2f> >& facetList,
@@ -1021,8 +1126,8 @@ class CV_EXPORTS_W Subdiv2D
 
     /** @brief Returns vertex location from vertex ID.
 
-    @param vertex – vertex ID.
-    @param firstEdge – Optional. The first edge ID which is connected to the vertex.
+    @param vertex vertex ID.
+    @param firstEdge Optional. The first edge ID which is connected to the vertex.
     @returns vertex (x,y)
 
      */
@@ -1030,8 +1135,8 @@ class CV_EXPORTS_W Subdiv2D
 
     /** @brief Returns one of the edges related to the given edge.
 
-    @param edge – Subdivision edge ID.
-    @param nextEdgeType - Parameter specifying which of the related edges to return.
+    @param edge Subdivision edge ID.
+    @param nextEdgeType Parameter specifying which of the related edges to return.
     The following values are possible:
     -   NEXT_AROUND_ORG next around the edge origin ( eOnext on the picture below if e is the input edge)
     -   NEXT_AROUND_DST next around the edge vertex ( eDnext )
@@ -1050,7 +1155,7 @@ class CV_EXPORTS_W Subdiv2D
 
     /** @brief Returns next edge around the edge origin.
 
-    @param edge – Subdivision edge ID.
+    @param edge Subdivision edge ID.
 
     @returns an integer which is next edge ID around the edge origin: eOnext on the
     picture above if e is the input edge).
@@ -1059,8 +1164,8 @@ class CV_EXPORTS_W Subdiv2D
 
     /** @brief Returns another edge of the same quad-edge.
 
-    @param edge – Subdivision edge ID.
-    @param rotate - Parameter specifying which of the edges of the same quad-edge as the input
+    @param edge Subdivision edge ID.
+    @param rotate Parameter specifying which of the edges of the same quad-edge as the input
     one to return. The following values are possible:
     -   0 - the input edge ( e on the picture below if e is the input edge)
     -   1 - the rotated edge ( eRot )
@@ -1074,8 +1179,8 @@ class CV_EXPORTS_W Subdiv2D
 
     /** @brief Returns the edge origin.
 
-    @param edge – Subdivision edge ID.
-    @param orgpt – Output vertex location.
+    @param edge Subdivision edge ID.
+    @param orgpt Output vertex location.
 
     @returns vertex ID.
      */
@@ -1083,8 +1188,8 @@ class CV_EXPORTS_W Subdiv2D
 
     /** @brief Returns the edge destination.
 
-    @param edge – Subdivision edge ID.
-    @param dstpt – Output vertex location.
+    @param edge Subdivision edge ID.
+    @param dstpt Output vertex location.
 
     @returns vertex ID.
      */
@@ -1146,13 +1251,12 @@ class CV_EXPORTS_W Subdiv2D
 //! @addtogroup imgproc_feature
 //! @{
 
-/** @example lsd_lines.cpp
-An example using the LineSegmentDetector
-*/
-
 /** @brief Line segment detector class
 
 following the algorithm described at @cite Rafael12 .
+
+@note Implementation has been removed due original code license conflict
+
 */
 class CV_EXPORTS_W LineSegmentDetector : public Algorithm
 {
@@ -1176,14 +1280,14 @@ class CV_EXPORTS_W LineSegmentDetector : public Algorithm
     - -1 corresponds to 10 mean false alarms
     - 0 corresponds to 1 mean false alarm
     - 1 corresponds to 0.1 mean false alarms
-    This vector will be calculated only when the objects type is LSD_REFINE_ADV.
+    This vector will be calculated only when the objects type is #LSD_REFINE_ADV.
     */
     CV_WRAP virtual void detect(InputArray _image, OutputArray _lines,
                         OutputArray width = noArray(), OutputArray prec = noArray(),
                         OutputArray nfa = noArray()) = 0;
 
     /** @brief Draws the line segments on a given image.
-    @param _image The image, where the liens will be drawn. Should be bigger or equal to the image,
+    @param _image The image, where the lines will be drawn. Should be bigger or equal to the image,
     where the lines were found.
     @param lines A vector of the lines that needed to be drawn.
      */
@@ -1207,15 +1311,17 @@ class CV_EXPORTS_W LineSegmentDetector : public Algorithm
 The LineSegmentDetector algorithm is defined using the standard values. Only advanced users may want
 to edit those, as to tailor it for their own application.
 
-@param _refine The way found lines will be refined, see cv::LineSegmentDetectorModes
+@param _refine The way found lines will be refined, see #LineSegmentDetectorModes
 @param _scale The scale of the image that will be used to find the lines. Range (0..1].
 @param _sigma_scale Sigma for Gaussian filter. It is computed as sigma = _sigma_scale/_scale.
 @param _quant Bound to the quantization error on the gradient norm.
 @param _ang_th Gradient angle tolerance in degrees.
-@param _log_eps Detection threshold: -log10(NFA) \> log_eps. Used only when advancent refinement
+@param _log_eps Detection threshold: -log10(NFA) \> log_eps. Used only when advance refinement
 is chosen.
 @param _density_th Minimal density of aligned region points in the enclosing rectangle.
 @param _n_bins Number of bins in pseudo-ordering of gradient modulus.
+
+@note Implementation has been removed due original code license conflict
  */
 CV_EXPORTS_W Ptr<LineSegmentDetector> createLineSegmentDetector(
     int _refine = LSD_REFINE_STD, double _scale = 0.8,
@@ -1241,7 +1347,7 @@ smoothing kernels (a symmetrical kernel with sum of weights equal to 1) and hand
 You may also use the higher-level GaussianBlur.
 @param ksize Aperture size. It should be odd ( \f$\texttt{ksize} \mod 2 = 1\f$ ) and positive.
 @param sigma Gaussian standard deviation. If it is non-positive, it is computed from ksize as
-`sigma = 0.3\*((ksize-1)\*0.5 - 1) + 0.8`.
+`sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`.
 @param ktype Type of filter coefficients. It can be CV_32F or CV_64F .
 @sa  sepFilter2D, getDerivKernels, getStructuringElement, GaussianBlur
  */
@@ -1250,14 +1356,14 @@ CV_EXPORTS_W Mat getGaussianKernel( int ksize, double sigma, int ktype = CV_64F
 /** @brief Returns filter coefficients for computing spatial image derivatives.
 
 The function computes and returns the filter coefficients for spatial image derivatives. When
-`ksize=CV_SCHARR`, the Scharr \f$3 \times 3\f$ kernels are generated (see cv::Scharr). Otherwise, Sobel
-kernels are generated (see cv::Sobel). The filters are normally passed to sepFilter2D or to
+`ksize=FILTER_SCHARR`, the Scharr \f$3 \times 3\f$ kernels are generated (see #Scharr). Otherwise, Sobel
+kernels are generated (see #Sobel). The filters are normally passed to #sepFilter2D or to
 
 @param kx Output matrix of row filter coefficients. It has the type ktype .
 @param ky Output matrix of column filter coefficients. It has the type ktype .
 @param dx Derivative order in respect of x.
 @param dy Derivative order in respect of y.
-@param ksize Aperture size. It can be CV_SCHARR, 1, 3, 5, or 7.
+@param ksize Aperture size. It can be FILTER_SCHARR, 1, 3, 5, or 7.
 @param normalize Flag indicating whether to normalize (scale down) the filter coefficients or not.
 Theoretically, the coefficients should have the denominator \f$=2^{ksize*2-dx-dy-2}\f$. If you are
 going to filter floating-point images, you are likely to use the normalized kernels. But if you
@@ -1290,11 +1396,11 @@ static inline Scalar morphologyDefaultBorderValue() { return Scalar::all(DBL_MAX
 
 /** @brief Returns a structuring element of the specified size and shape for morphological operations.
 
-The function constructs and returns the structuring element that can be further passed to cv::erode,
-cv::dilate or cv::morphologyEx. But you can also construct an arbitrary binary mask yourself and use it as
+The function constructs and returns the structuring element that can be further passed to #erode,
+#dilate or #morphologyEx. But you can also construct an arbitrary binary mask yourself and use it as
 the structuring element.
 
-@param shape Element shape that could be one of cv::MorphShapes
+@param shape Element shape that could be one of #MorphShapes
 @param ksize Size of the structuring element.
 @param anchor Anchor position within the element. The default value \f$(-1, -1)\f$ means that the
 anchor is at the center. Note that only the shape of a cross-shaped element depends on the anchor
@@ -1303,12 +1409,20 @@ operation is shifted.
  */
 CV_EXPORTS_W Mat getStructuringElement(int shape, Size ksize, Point anchor = Point(-1,-1));
 
+/** @example samples/cpp/tutorial_code/ImgProc/Smoothing/Smoothing.cpp
+Sample code for simple filters
+![Sample screenshot](Smoothing_Tutorial_Result_Median_Filter.jpg)
+Check @ref tutorial_gausian_median_blur_bilateral_filter "the corresponding tutorial" for more details
+ */
+
 /** @brief Blurs an image using the median filter.
 
 The function smoothes an image using the median filter with the \f$\texttt{ksize} \times
 \texttt{ksize}\f$ aperture. Each channel of a multi-channel image is processed independently.
 In-place operation is supported.
 
+@note The median filter uses #BORDER_REPLICATE internally to cope with border pixels, see #BorderTypes
+
 @param src input 1-, 3-, or 4-channel image; when ksize is 3 or 5, the image depth should be
 CV_8U, CV_16U, or CV_32F, for larger aperture sizes, it can only be CV_8U.
 @param dst destination array of the same size and type as src.
@@ -1330,10 +1444,10 @@ positive and odd. Or, they can be zero's and then they are computed from sigma.
 @param sigmaX Gaussian kernel standard deviation in X direction.
 @param sigmaY Gaussian kernel standard deviation in Y direction; if sigmaY is zero, it is set to be
 equal to sigmaX, if both sigmas are zeros, they are computed from ksize.width and ksize.height,
-respectively (see cv::getGaussianKernel for details); to fully control the result regardless of
+respectively (see #getGaussianKernel for details); to fully control the result regardless of
 possible future modifications of all this semantics, it is recommended to specify all of ksize,
 sigmaX, and sigmaY.
-@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
 
 @sa  sepFilter2D, filter2D, blur, boxFilter, bilateralFilter, medianBlur
  */
@@ -1367,7 +1481,7 @@ in larger areas of semi-equal color.
 farther pixels will influence each other as long as their colors are close enough (see sigmaColor
 ). When d\>0, it specifies the neighborhood size regardless of sigmaSpace. Otherwise, d is
 proportional to sigmaSpace.
-@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes
+@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes
  */
 CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d,
                                    double sigmaColor, double sigmaSpace,
@@ -1375,7 +1489,7 @@ CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d,
 
 /** @brief Blurs an image using the box filter.
 
-The function smoothes an image using the kernel:
+The function smooths an image using the kernel:
 
 \f[\texttt{K} =  \alpha \begin{bmatrix} 1 & 1 & 1 &  \cdots & 1 & 1  \\ 1 & 1 & 1 &  \cdots & 1 & 1  \\ \hdotsfor{6} \\ 1 & 1 & 1 &  \cdots & 1 & 1 \end{bmatrix}\f]
 
@@ -1385,7 +1499,7 @@ where
 
 Unnormalized box filter is useful for computing various integral characteristics over each pixel
 neighborhood, such as covariance matrices of image derivatives (used in dense optical flow
-algorithms, and so on). If you need to compute pixel sums over variable-size windows, use cv::integral.
+algorithms, and so on). If you need to compute pixel sums over variable-size windows, use #integral.
 
 @param src input image.
 @param dst output image of the same size and type as src.
@@ -1394,7 +1508,7 @@ algorithms, and so on). If you need to compute pixel sums over variable-size win
 @param anchor anchor point; default value Point(-1,-1) means that the anchor is at the kernel
 center.
 @param normalize flag, specifying whether the kernel is normalized by its area or not.
-@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes
+@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported.
 @sa  blur, bilateralFilter, GaussianBlur, medianBlur, integral
  */
 CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth,
@@ -1410,24 +1524,24 @@ pixel values which overlap the filter placed over the pixel \f$ (x, y) \f$.
 The unnormalized square box filter can be useful in computing local image statistics such as the the local
 variance and standard deviation around the neighborhood of a pixel.
 
-@param _src input image
-@param _dst output image of the same size and type as _src
+@param src input image
+@param dst output image of the same size and type as _src
 @param ddepth the output image depth (-1 to use src.depth())
 @param ksize kernel size
 @param anchor kernel anchor point. The default value of Point(-1, -1) denotes that the anchor is at the kernel
 center.
 @param normalize flag, specifying whether the kernel is to be normalized by it's area or not.
-@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes
+@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported.
 @sa boxFilter
 */
-CV_EXPORTS_W void sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth,
+CV_EXPORTS_W void sqrBoxFilter( InputArray src, OutputArray dst, int ddepth,
                                 Size ksize, Point anchor = Point(-1, -1),
                                 bool normalize = true,
                                 int borderType = BORDER_DEFAULT );
 
 /** @brief Blurs an image using the normalized box filter.
 
-The function smoothes an image using the kernel:
+The function smooths an image using the kernel:
 
 \f[\texttt{K} =  \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 &  \cdots & 1 & 1  \\ 1 & 1 & 1 &  \cdots & 1 & 1  \\ \hdotsfor{6} \\ 1 & 1 & 1 &  \cdots & 1 & 1  \\ \end{bmatrix}\f]
 
@@ -1440,7 +1554,7 @@ the depth should be CV_8U, CV_16U, CV_16S, CV_32F or CV_64F.
 @param ksize blurring kernel size.
 @param anchor anchor point; default value Point(-1,-1) means that the anchor is at the kernel
 center.
-@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes
+@param borderType border mode used to extrapolate pixels outside of the image, see #BorderTypes. #BORDER_WRAP is not supported.
 @sa  boxFilter, bilateralFilter, GaussianBlur, medianBlur
  */
 CV_EXPORTS_W void blur( InputArray src, OutputArray dst,
@@ -1458,7 +1572,7 @@ The function does actually compute correlation, not the convolution:
 \f[\texttt{dst} (x,y) =  \sum _{ \stackrel{0\leq x' < \texttt{kernel.cols},}{0\leq y' < \texttt{kernel.rows}} }  \texttt{kernel} (x',y')* \texttt{src} (x+x'- \texttt{anchor.x} ,y+y'- \texttt{anchor.y} )\f]
 
 That is, the kernel is not mirrored around the anchor point. If you need a real convolution, flip
-the kernel using cv::flip and set the new anchor to `(kernel.cols - anchor.x - 1, kernel.rows -
+the kernel using #flip and set the new anchor to `(kernel.cols - anchor.x - 1, kernel.rows -
 anchor.y - 1)`.
 
 The function uses the DFT-based algorithm in case of sufficiently large kernels (~`11 x 11` or
@@ -1474,7 +1588,7 @@ separate color planes using split and process them individually.
 the kernel; the anchor should lie within the kernel; default value (-1,-1) means that the anchor
 is at the kernel center.
 @param delta optional value added to the filtered pixels before storing them in dst.
-@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
 @sa  sepFilter2D, dft, matchTemplate
  */
 CV_EXPORTS_W void filter2D( InputArray src, OutputArray dst, int ddepth,
@@ -1495,7 +1609,7 @@ kernel kernelY. The final result shifted by delta is stored in dst .
 @param anchor Anchor position within the kernel. The default value \f$(-1,-1)\f$ means that the anchor
 is at the kernel center.
 @param delta Value added to the filtered results before storing them.
-@param borderType Pixel extrapolation method, see cv::BorderTypes
+@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
 @sa  filter2D, Sobel, GaussianBlur, boxFilter, blur
  */
 CV_EXPORTS_W void sepFilter2D( InputArray src, OutputArray dst, int ddepth,
@@ -1503,6 +1617,12 @@ CV_EXPORTS_W void sepFilter2D( InputArray src, OutputArray dst, int ddepth,
                                Point anchor = Point(-1,-1),
                                double delta = 0, int borderType = BORDER_DEFAULT );
 
+/** @example samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp
+Sample code using Sobel and/or Scharr OpenCV functions to make a simple Edge Detector
+![Sample screenshot](Sobel_Derivatives_Tutorial_Result.jpg)
+Check @ref tutorial_sobel_derivatives "the corresponding tutorial" for more details
+*/
+
 /** @brief Calculates the first, second, third, or mixed image derivatives using an extended Sobel operator.
 
 In all cases except one, the \f$\texttt{ksize} \times \texttt{ksize}\f$ separable kernel is used to
@@ -1510,7 +1630,7 @@ calculate the derivative. When \f$\texttt{ksize = 1}\f$, the \f$3 \times 1\f$ or
 kernel is used (that is, no Gaussian smoothing is done). `ksize = 1` can only be used for the first
 or the second x- or y- derivatives.
 
-There is also the special value `ksize = CV_SCHARR (-1)` that corresponds to the \f$3\times3\f$ Scharr
+There is also the special value `ksize = #FILTER_SCHARR (-1)` that corresponds to the \f$3\times3\f$ Scharr
 filter that may give more accurate results than the \f$3\times3\f$ Sobel. The Scharr aperture is
 
 \f[\vecthreethree{-3}{0}{3}{-10}{0}{10}{-3}{0}{3}\f]
@@ -1540,9 +1660,9 @@ The second case corresponds to a kernel of:
 @param dy order of the derivative y.
 @param ksize size of the extended Sobel kernel; it must be 1, 3, 5, or 7.
 @param scale optional scale factor for the computed derivative values; by default, no scaling is
-applied (see cv::getDerivKernels for details).
+applied (see #getDerivKernels for details).
 @param delta optional delta value that is added to the results prior to storing them in dst.
-@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
 @sa  Scharr, Laplacian, sepFilter2D, filter2D, GaussianBlur, cartToPolar
  */
 CV_EXPORTS_W void Sobel( InputArray src, OutputArray dst, int ddepth,
@@ -1563,7 +1683,8 @@ Sobel( src, dy, CV_16SC1, 0, 1, 3 );
 @param dx output image with first-order derivative in x.
 @param dy output image with first-order derivative in y.
 @param ksize size of Sobel kernel. It must be 3.
-@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderType pixel extrapolation method, see #BorderTypes.
+                  Only #BORDER_DEFAULT=#BORDER_REFLECT_101 and #BORDER_REPLICATE are supported.
 
 @sa Sobel
  */
@@ -1581,7 +1702,7 @@ call
 
 is equivalent to
 
-\f[\texttt{Sobel(src, dst, ddepth, dx, dy, CV\_SCHARR, scale, delta, borderType)} .\f]
+\f[\texttt{Sobel(src, dst, ddepth, dx, dy, FILTER_SCHARR, scale, delta, borderType)} .\f]
 
 @param src input image.
 @param dst output image of the same size and the same number of channels as src.
@@ -1589,17 +1710,17 @@ is equivalent to
 @param dx order of the derivative x.
 @param dy order of the derivative y.
 @param scale optional scale factor for the computed derivative values; by default, no scaling is
-applied (see getDerivKernels for details).
+applied (see #getDerivKernels for details).
 @param delta optional delta value that is added to the results prior to storing them in dst.
-@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
 @sa  cartToPolar
  */
 CV_EXPORTS_W void Scharr( InputArray src, OutputArray dst, int ddepth,
                           int dx, int dy, double scale = 1, double delta = 0,
                           int borderType = BORDER_DEFAULT );
 
-/** @example laplace.cpp
-  An example using Laplace transformations for edge detection
+/** @example samples/cpp/laplace.cpp
+An example using Laplace transformations for edge detection
 */
 
 /** @brief Calculates the Laplacian of an image.
@@ -1617,12 +1738,12 @@ with the following \f$3 \times 3\f$ aperture:
 @param src Source image.
 @param dst Destination image of the same size and the same number of channels as src .
 @param ddepth Desired depth of the destination image.
-@param ksize Aperture size used to compute the second-derivative filters. See getDerivKernels for
+@param ksize Aperture size used to compute the second-derivative filters. See #getDerivKernels for
 details. The size must be positive and odd.
 @param scale Optional scale factor for the computed Laplacian values. By default, no scaling is
-applied. See getDerivKernels for details.
+applied. See #getDerivKernels for details.
 @param delta Optional delta value that is added to the results prior to storing them in dst .
-@param borderType Pixel extrapolation method, see cv::BorderTypes
+@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
 @sa  Sobel, Scharr
  */
 CV_EXPORTS_W void Laplacian( InputArray src, OutputArray dst, int ddepth,
@@ -1634,13 +1755,15 @@ CV_EXPORTS_W void Laplacian( InputArray src, OutputArray dst, int ddepth,
 //! @addtogroup imgproc_feature
 //! @{
 
-/** @example edge.cpp
-  An example on using the canny edge detector
+/** @example samples/cpp/edge.cpp
+This program demonstrates usage of the Canny edge detector
+
+Check @ref tutorial_canny_detector "the corresponding tutorial" for more details
 */
 
 /** @brief Finds edges in an image using the Canny algorithm @cite Canny86 .
 
-The function finds edges in the input image image and marks them in the output map edges using the
+The function finds edges in the input image and marks them in the output map edges using the
 Canny algorithm. The smallest value between threshold1 and threshold2 is used for edge linking. The
 largest value is used to find initial segments of strong edges. See
 <http://en.wikipedia.org/wiki/Canny_edge_detector>
@@ -1659,6 +1782,25 @@ CV_EXPORTS_W void Canny( InputArray image, OutputArray edges,
                          double threshold1, double threshold2,
                          int apertureSize = 3, bool L2gradient = false );
 
+/** \overload
+
+Finds edges in an image using the Canny algorithm with custom image gradient.
+
+@param dx 16-bit x derivative of input image (CV_16SC1 or CV_16SC3).
+@param dy 16-bit y derivative of input image (same type as dx).
+@param edges output edge map; single channels 8-bit image, which has the same size as image .
+@param threshold1 first threshold for the hysteresis procedure.
+@param threshold2 second threshold for the hysteresis procedure.
+@param L2gradient a flag, indicating whether a more accurate \f$L_2\f$ norm
+\f$=\sqrt{(dI/dx)^2 + (dI/dy)^2}\f$ should be used to calculate the image gradient magnitude (
+L2gradient=true ), or whether the default \f$L_1\f$ norm \f$=|dI/dx|+|dI/dy|\f$ is enough (
+L2gradient=false ).
+ */
+CV_EXPORTS_W void Canny( InputArray dx, InputArray dy,
+                         OutputArray edges,
+                         double threshold1, double threshold2,
+                         bool L2gradient = false );
+
 /** @brief Calculates the minimal eigenvalue of gradient matrices for corner detection.
 
 The function is similar to cornerEigenValsAndVecs but it calculates and stores only the minimal
@@ -1668,9 +1810,9 @@ of the formulae in the cornerEigenValsAndVecs description.
 @param src Input single-channel 8-bit or floating-point image.
 @param dst Image to store the minimal eigenvalues. It has the type CV_32FC1 and the same size as
 src .
-@param blockSize Neighborhood size (see the details on cornerEigenValsAndVecs ).
+@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ).
 @param ksize Aperture parameter for the Sobel operator.
-@param borderType Pixel extrapolation method. See cv::BorderTypes.
+@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported.
  */
 CV_EXPORTS_W void cornerMinEigenVal( InputArray src, OutputArray dst,
                                      int blockSize, int ksize = 3,
@@ -1690,10 +1832,10 @@ Corners in the image can be found as the local maxima of this response map.
 @param src Input single-channel 8-bit or floating-point image.
 @param dst Image to store the Harris detector responses. It has the type CV_32FC1 and the same
 size as src .
-@param blockSize Neighborhood size (see the details on cornerEigenValsAndVecs ).
+@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ).
 @param ksize Aperture parameter for the Sobel operator.
-@param k Harris detector free parameter. See the formula below.
-@param borderType Pixel extrapolation method. See cv::BorderTypes.
+@param k Harris detector free parameter. See the formula above.
+@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported.
  */
 CV_EXPORTS_W void cornerHarris( InputArray src, OutputArray dst, int blockSize,
                                 int ksize, double k,
@@ -1721,7 +1863,7 @@ The output of the function can be used for robust edge or corner detection.
 @param dst Image to store the results. It has the same size as src and the type CV_32FC(6) .
 @param blockSize Neighborhood size (see details below).
 @param ksize Aperture parameter for the Sobel operator.
-@param borderType Pixel extrapolation method. See cv::BorderTypes.
+@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported.
 
 @sa  cornerMinEigenVal, cornerHarris, preCornerDetect
  */
@@ -1750,7 +1892,7 @@ The corners can be found as local maximums of the functions, as shown below:
 @param src Source single-channel 8-bit of floating-point image.
 @param dst Output image that has the type CV_32F and the same size as src .
 @param ksize %Aperture size of the Sobel .
-@param borderType Pixel extrapolation method. See cv::BorderTypes.
+@param borderType Pixel extrapolation method. See #BorderTypes. #BORDER_WRAP is not supported.
  */
 CV_EXPORTS_W void preCornerDetect( InputArray src, OutputArray dst, int ksize,
                                    int borderType = BORDER_DEFAULT );
@@ -1772,7 +1914,7 @@ where \f${DI_{p_i}}\f$ is an image gradient at one of the points \f$p_i\f$ in a
 value of \f$q\f$ is to be found so that \f$\epsilon_i\f$ is minimized. A system of equations may be set up
 with \f$\epsilon_i\f$ set to zero:
 
-\f[\sum _i(DI_{p_i}  \cdot {DI_{p_i}}^T) -  \sum _i(DI_{p_i}  \cdot {DI_{p_i}}^T  \cdot p_i)\f]
+\f[\sum _i(DI_{p_i}  \cdot {DI_{p_i}}^T) \cdot q -  \sum _i(DI_{p_i}  \cdot {DI_{p_i}}^T  \cdot p_i)\f]
 
 where the gradients are summed within a neighborhood ("search window") of \f$q\f$ . Calling the first
 gradient term \f$G\f$ and the second gradient term \f$b\f$ gives:
@@ -1782,11 +1924,11 @@ gradient term \f$G\f$ and the second gradient term \f$b\f$ gives:
 The algorithm sets the center of the neighborhood window at this new center \f$q\f$ and then iterates
 until the center stays within a set threshold.
 
-@param image Input image.
+@param image Input single-channel, 8-bit or float image.
 @param corners Initial coordinates of the input corners and refined coordinates provided for
 output.
 @param winSize Half of the side length of the search window. For example, if winSize=Size(5,5) ,
-then a \f$5*2+1 \times 5*2+1 = 11 \times 11\f$ search window is used.
+then a \f$(5*2+1) \times (5*2+1) = 11 \times 11\f$ search window is used.
 @param zeroZone Half of the size of the dead region in the middle of the search zone over which
 the summation in the formula below is not done. It is used sometimes to avoid possible
 singularities of the autocorrelation matrix. The value of (-1,-1) indicates that there is no such
@@ -1805,7 +1947,7 @@ The function finds the most prominent corners in the image or in the specified i
 described in @cite Shi94
 
 -   Function calculates the corner quality measure at every source image pixel using the
-    cornerMinEigenVal or cornerHarris .
+    #cornerMinEigenVal or #cornerHarris .
 -   Function performs a non-maximum suppression (the local maximums in *3 x 3* neighborhood are
     retained).
 -   The corners with the minimal eigenvalue less than
@@ -1823,10 +1965,11 @@ with qualityLevel=B .
 @param image Input 8-bit or floating-point 32-bit, single-channel image.
 @param corners Output vector of detected corners.
 @param maxCorners Maximum number of corners to return. If there are more corners than are found,
-the strongest of them is returned.
+the strongest of them is returned. `maxCorners <= 0` implies that no limit on the maximum is set
+and all detected corners are returned.
 @param qualityLevel Parameter characterizing the minimal accepted quality of image corners. The
 parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue
-(see cornerMinEigenVal ) or the Harris function response (see cornerHarris ). The corners with the
+(see #cornerMinEigenVal ) or the Harris function response (see #cornerHarris ). The corners with the
 quality measure less than the product are rejected. For example, if the best corner has the
 quality measure = 1500, and the qualityLevel=0.01 , then all the corners with the quality measure
 less than 15 are rejected.
@@ -1835,19 +1978,26 @@ less than 15 are rejected.
 CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected.
 @param blockSize Size of an average block for computing a derivative covariation matrix over each
 pixel neighborhood. See cornerEigenValsAndVecs .
-@param useHarrisDetector Parameter indicating whether to use a Harris detector (see cornerHarris)
-or cornerMinEigenVal.
+@param useHarrisDetector Parameter indicating whether to use a Harris detector (see #cornerHarris)
+or #cornerMinEigenVal.
 @param k Free parameter of the Harris detector.
 
 @sa  cornerMinEigenVal, cornerHarris, calcOpticalFlowPyrLK, estimateRigidTransform,
  */
+
 CV_EXPORTS_W void goodFeaturesToTrack( InputArray image, OutputArray corners,
                                      int maxCorners, double qualityLevel, double minDistance,
                                      InputArray mask = noArray(), int blockSize = 3,
                                      bool useHarrisDetector = false, double k = 0.04 );
 
-/** @example houghlines.cpp
+CV_EXPORTS_W void goodFeaturesToTrack( InputArray image, OutputArray corners,
+                                     int maxCorners, double qualityLevel, double minDistance,
+                                     InputArray mask, int blockSize,
+                                     int gradientSize, bool useHarrisDetector = false,
+                                     double k = 0.04 );
+/** @example samples/cpp/tutorial_code/ImgTrans/houghlines.cpp
 An example using the Hough line detector
+![Sample input image](Hough_Lines_Tutorial_Original_Image.jpg) ![Output image](Hough_Lines_Tutorial_Result.jpg)
 */
 
 /** @brief Finds lines in a binary image using the standard Hough transform.
@@ -1857,10 +2007,11 @@ detection. See <http://homepages.inf.ed.ac.uk/rbf/HIPR2/hough.htm> for a good ex
 transform.
 
 @param image 8-bit, single-channel binary source image. The image may be modified by the function.
-@param lines Output vector of lines. Each line is represented by a two-element vector
-\f$(\rho, \theta)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
+@param lines Output vector of lines. Each line is represented by a 2 or 3 element vector
+\f$(\rho, \theta)\f$ or \f$(\rho, \theta, \textrm{votes})\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
 the image). \f$\theta\f$ is the line rotation angle in radians (
 \f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ).
+\f$\textrm{votes}\f$ is the value of accumulator.
 @param rho Distance resolution of the accumulator in pixels.
 @param theta Angle resolution of the accumulator in radians.
 @param threshold Accumulator threshold parameter. Only those lines are returned that get enough
@@ -1886,58 +2037,7 @@ The function implements the probabilistic Hough transform algorithm for line det
 in @cite Matas00
 
 See the line detection example below:
-
-@code
-    #include <opencv2/imgproc.hpp>
-    #include <opencv2/highgui.hpp>
-
-    using namespace cv;
-    using namespace std;
-
-    int main(int argc, char** argv)
-    {
-        Mat src, dst, color_dst;
-        if( argc != 2 || !(src=imread(argv[1], 0)).data)
-            return -1;
-
-        Canny( src, dst, 50, 200, 3 );
-        cvtColor( dst, color_dst, COLOR_GRAY2BGR );
-
-    #if 0
-        vector<Vec2f> lines;
-        HoughLines( dst, lines, 1, CV_PI/180, 100 );
-
-        for( size_t i = 0; i < lines.size(); i++ )
-        {
-            float rho = lines[i][0];
-            float theta = lines[i][1];
-            double a = cos(theta), b = sin(theta);
-            double x0 = a*rho, y0 = b*rho;
-            Point pt1(cvRound(x0 + 1000*(-b)),
-                      cvRound(y0 + 1000*(a)));
-            Point pt2(cvRound(x0 - 1000*(-b)),
-                      cvRound(y0 - 1000*(a)));
-            line( color_dst, pt1, pt2, Scalar(0,0,255), 3, 8 );
-        }
-    #else
-        vector<Vec4i> lines;
-        HoughLinesP( dst, lines, 1, CV_PI/180, 80, 30, 10 );
-        for( size_t i = 0; i < lines.size(); i++ )
-        {
-            line( color_dst, Point(lines[i][0], lines[i][1]),
-                Point(lines[i][2], lines[i][3]), Scalar(0,0,255), 3, 8 );
-        }
-    #endif
-        namedWindow( "Source", 1 );
-        imshow( "Source", src );
-
-        namedWindow( "Detected Lines", 1 );
-        imshow( "Detected Lines", color_dst );
-
-        waitKey(0);
-        return 0;
-    }
-@endcode
+@include snippets/imgproc_HoughLinesP.cpp
 This is a sample picture the function parameters have been tuned for:
 
 ![image](pics/building.jpg)
@@ -1963,7 +2063,28 @@ CV_EXPORTS_W void HoughLinesP( InputArray image, OutputArray lines,
                                double rho, double theta, int threshold,
                                double minLineLength = 0, double maxLineGap = 0 );
 
-/** @example houghcircles.cpp
+/** @brief Finds lines in a set of points using the standard Hough transform.
+
+The function finds lines in a set of points using a modification of the Hough transform.
+@include snippets/imgproc_HoughLinesPointSet.cpp
+@param _point Input vector of points. Each vector must be encoded as a Point vector \f$(x,y)\f$. Type must be CV_32FC2 or CV_32SC2.
+@param _lines Output vector of found lines. Each vector is encoded as a vector<Vec3d> \f$(votes, rho, theta)\f$.
+The larger the value of 'votes', the higher the reliability of the Hough line.
+@param lines_max Max count of hough lines.
+@param threshold Accumulator threshold parameter. Only those lines are returned that get enough
+votes ( \f$>\texttt{threshold}\f$ )
+@param min_rho Minimum Distance value of the accumulator in pixels.
+@param max_rho Maximum Distance value of the accumulator in pixels.
+@param rho_step Distance resolution of the accumulator in pixels.
+@param min_theta Minimum angle value of the accumulator in radians.
+@param max_theta Maximum angle value of the accumulator in radians.
+@param theta_step Angle resolution of the accumulator in radians.
+ */
+CV_EXPORTS_W void HoughLinesPointSet( InputArray _point, OutputArray _lines, int lines_max, int threshold,
+                                      double min_rho, double max_rho, double rho_step,
+                                      double min_theta, double max_theta, double theta_step );
+
+/** @example samples/cpp/tutorial_code/ImgTrans/houghcircles.cpp
 An example using the Hough circle detector
 */
 
@@ -1972,65 +2093,41 @@ An example using the Hough circle detector
 The function finds circles in a grayscale image using a modification of the Hough transform.
 
 Example: :
-@code
-    #include <opencv2/imgproc.hpp>
-    #include <opencv2/highgui.hpp>
-    #include <math.h>
-
-    using namespace cv;
-    using namespace std;
-
-    int main(int argc, char** argv)
-    {
-        Mat img, gray;
-        if( argc != 2 || !(img=imread(argv[1], 1)).data)
-            return -1;
-        cvtColor(img, gray, COLOR_BGR2GRAY);
-        // smooth it, otherwise a lot of false circles may be detected
-        GaussianBlur( gray, gray, Size(9, 9), 2, 2 );
-        vector<Vec3f> circles;
-        HoughCircles(gray, circles, HOUGH_GRADIENT,
-                     2, gray.rows/4, 200, 100 );
-        for( size_t i = 0; i < circles.size(); i++ )
-        {
-             Point center(cvRound(circles[i][0]), cvRound(circles[i][1]));
-             int radius = cvRound(circles[i][2]);
-             // draw the circle center
-             circle( img, center, 3, Scalar(0,255,0), -1, 8, 0 );
-             // draw the circle outline
-             circle( img, center, radius, Scalar(0,0,255), 3, 8, 0 );
-        }
-        namedWindow( "circles", 1 );
-        imshow( "circles", img );
-
-        waitKey(0);
-        return 0;
-    }
-@endcode
+@include snippets/imgproc_HoughLinesCircles.cpp
 
 @note Usually the function detects the centers of circles well. However, it may fail to find correct
 radii. You can assist to the function by specifying the radius range ( minRadius and maxRadius ) if
-you know it. Or, you may ignore the returned radius, use only the center, and find the correct
-radius using an additional procedure.
+you know it. Or, in the case of #HOUGH_GRADIENT method you may set maxRadius to a negative number
+to return centers only without radius search, and find the correct radius using an additional procedure.
+
+It also helps to smooth image a bit unless it's already soft. For example,
+GaussianBlur() with 7x7 kernel and 1.5x1.5 sigma or similar blurring may help.
 
 @param image 8-bit, single-channel, grayscale input image.
-@param circles Output vector of found circles. Each vector is encoded as a 3-element
-floating-point vector \f$(x, y, radius)\f$ .
-@param method Detection method, see cv::HoughModes. Currently, the only implemented method is HOUGH_GRADIENT
+@param circles Output vector of found circles. Each vector is encoded as  3 or 4 element
+floating-point vector \f$(x, y, radius)\f$ or \f$(x, y, radius, votes)\f$ .
+@param method Detection method, see #HoughModes. The available methods are #HOUGH_GRADIENT and #HOUGH_GRADIENT_ALT.
 @param dp Inverse ratio of the accumulator resolution to the image resolution. For example, if
 dp=1 , the accumulator has the same resolution as the input image. If dp=2 , the accumulator has
-half as big width and height.
+half as big width and height. For #HOUGH_GRADIENT_ALT the recommended value is dp=1.5,
+unless some small very circles need to be detected.
 @param minDist Minimum distance between the centers of the detected circles. If the parameter is
 too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is
 too large, some circles may be missed.
-@param param1 First method-specific parameter. In case of CV_HOUGH_GRADIENT , it is the higher
-threshold of the two passed to the Canny edge detector (the lower one is twice smaller).
-@param param2 Second method-specific parameter. In case of CV_HOUGH_GRADIENT , it is the
+@param param1 First method-specific parameter. In case of #HOUGH_GRADIENT and #HOUGH_GRADIENT_ALT,
+it is the higher threshold of the two passed to the Canny edge detector (the lower one is twice smaller).
+Note that #HOUGH_GRADIENT_ALT uses #Scharr algorithm to compute image derivatives, so the threshold value
+shough normally be higher, such as 300 or normally exposed and contrasty images.
+@param param2 Second method-specific parameter. In case of #HOUGH_GRADIENT, it is the
 accumulator threshold for the circle centers at the detection stage. The smaller it is, the more
 false circles may be detected. Circles, corresponding to the larger accumulator values, will be
-returned first.
+returned first. In the case of #HOUGH_GRADIENT_ALT algorithm, this is the circle "perfectness" measure.
+The closer it to 1, the better shaped circles algorithm selects. In most cases 0.9 should be fine.
+If you want get better detection of small circles, you may decrease it to 0.85, 0.8 or even less.
+But then also try to limit the search range [minRadius, maxRadius] to avoid many false circles.
 @param minRadius Minimum circle radius.
-@param maxRadius Maximum circle radius.
+@param maxRadius Maximum circle radius. If <= 0, uses the maximum image dimension. If < 0, #HOUGH_GRADIENT returns
+centers without finding the radius. #HOUGH_GRADIENT_ALT always computes circle radiuses.
 
 @sa fitEllipse, minEnclosingCircle
  */
@@ -2044,8 +2141,10 @@ CV_EXPORTS_W void HoughCircles( InputArray image, OutputArray circles,
 //! @addtogroup imgproc_filter
 //! @{
 
-/** @example morphology2.cpp
-  An example using the morphological operations
+/** @example samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp
+Advanced morphology Transformations sample code
+![Sample screenshot](Morphology_2_Tutorial_Result.jpg)
+Check @ref tutorial_opening_closing_hats "the corresponding tutorial" for more details
 */
 
 /** @brief Erodes an image by using a specific structuring element.
@@ -2062,11 +2161,11 @@ case of multi-channel images, each channel is processed independently.
 CV_8U, CV_16U, CV_16S, CV_32F or CV_64F.
 @param dst output image of the same size and type as src.
 @param kernel structuring element used for erosion; if `element=Mat()`, a `3 x 3` rectangular
-structuring element is used. Kernel can be created using getStructuringElement.
+structuring element is used. Kernel can be created using #getStructuringElement.
 @param anchor position of the anchor within the element; default value (-1, -1) means that the
 anchor is at the element center.
 @param iterations number of times erosion is applied.
-@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
 @param borderValue border value in case of a constant border
 @sa  dilate, morphologyEx, getStructuringElement
  */
@@ -2075,6 +2174,12 @@ CV_EXPORTS_W void erode( InputArray src, OutputArray dst, InputArray kernel,
                          int borderType = BORDER_CONSTANT,
                          const Scalar& borderValue = morphologyDefaultBorderValue() );
 
+/** @example samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp
+Erosion and Dilation sample code
+![Sample Screenshot-Erosion](Morphology_1_Tutorial_Erosion_Result.jpg)![Sample Screenshot-Dilation](Morphology_1_Tutorial_Dilation_Result.jpg)
+Check @ref tutorial_erosion_dilatation "the corresponding tutorial" for more details
+*/
+
 /** @brief Dilates an image by using a specific structuring element.
 
 The function dilates the source image using the specified structuring element that determines the
@@ -2086,13 +2191,13 @@ case of multi-channel images, each channel is processed independently.
 
 @param src input image; the number of channels can be arbitrary, but the depth should be one of
 CV_8U, CV_16U, CV_16S, CV_32F or CV_64F.
-@param dst output image of the same size and type as src\`.
+@param dst output image of the same size and type as src.
 @param kernel structuring element used for dilation; if elemenat=Mat(), a 3 x 3 rectangular
-structuring element is used. Kernel can be created using getStructuringElement
+structuring element is used. Kernel can be created using #getStructuringElement
 @param anchor position of the anchor within the element; default value (-1, -1) means that the
 anchor is at the element center.
 @param iterations number of times dilation is applied.
-@param borderType pixel extrapolation method, see cv::BorderTypes
+@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not suported.
 @param borderValue border value in case of a constant border
 @sa  erode, morphologyEx, getStructuringElement
  */
@@ -2103,7 +2208,7 @@ CV_EXPORTS_W void dilate( InputArray src, OutputArray dst, InputArray kernel,
 
 /** @brief Performs advanced morphological transformations.
 
-The function morphologyEx can perform advanced morphological transformations using an erosion and dilation as
+The function cv::morphologyEx can perform advanced morphological transformations using an erosion and dilation as
 basic operations.
 
 Any of the operations can be done in-place. In case of multi-channel images, each channel is
@@ -2112,15 +2217,18 @@ processed independently.
 @param src Source image. The number of channels can be arbitrary. The depth should be one of
 CV_8U, CV_16U, CV_16S, CV_32F or CV_64F.
 @param dst Destination image of the same size and type as source image.
-@param op Type of a morphological operation, see cv::MorphTypes
-@param kernel Structuring element. It can be created using cv::getStructuringElement.
+@param op Type of a morphological operation, see #MorphTypes
+@param kernel Structuring element. It can be created using #getStructuringElement.
 @param anchor Anchor position with the kernel. Negative values mean that the anchor is at the
 kernel center.
 @param iterations Number of times erosion and dilation are applied.
-@param borderType Pixel extrapolation method, see cv::BorderTypes
+@param borderType Pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
 @param borderValue Border value in case of a constant border. The default value has a special
 meaning.
 @sa  dilate, erode, getStructuringElement
+@note The number of iterations is the number of times erosion or dilatation operation will be applied.
+For instance, an opening operation (#MORPH_OPEN) with two iterations is equivalent to apply
+successively: erode -> erode -> dilate -> dilate (and not erode -> dilate -> erode -> dilate).
  */
 CV_EXPORTS_W void morphologyEx( InputArray src, OutputArray dst,
                                 int op, InputArray kernel,
@@ -2149,8 +2257,8 @@ If you want to decimate the image by factor of 2 in each direction, you can call
     // specify fx and fy and let the function compute the destination image size.
     resize(src, dst, Size(), 0.5, 0.5, interpolation);
 @endcode
-To shrink an image, it will generally look best with cv::INTER_AREA interpolation, whereas to
-enlarge an image, it will generally look best with cv::INTER_CUBIC (slow) or cv::INTER_LINEAR
+To shrink an image, it will generally look best with #INTER_AREA interpolation, whereas to
+enlarge an image, it will generally look best with c#INTER_CUBIC (slow) or #INTER_LINEAR
 (faster but still looks OK).
 
 @param src input image.
@@ -2163,7 +2271,7 @@ src.size(), fx, and fy; the type of dst is the same as of src.
 \f[\texttt{(double)dsize.width/src.cols}\f]
 @param fy scale factor along the vertical axis; when it equals 0, it is computed as
 \f[\texttt{(double)dsize.height/src.rows}\f]
-@param interpolation interpolation method, see cv::InterpolationFlags
+@param interpolation interpolation method, see #InterpolationFlags
 
 @sa  warpAffine, warpPerspective, remap
  */
@@ -2177,19 +2285,19 @@ The function warpAffine transforms the source image using the specified matrix:
 
 \f[\texttt{dst} (x,y) =  \texttt{src} ( \texttt{M} _{11} x +  \texttt{M} _{12} y +  \texttt{M} _{13}, \texttt{M} _{21} x +  \texttt{M} _{22} y +  \texttt{M} _{23})\f]
 
-when the flag WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted
-with cv::invertAffineTransform and then put in the formula above instead of M. The function cannot
+when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted
+with #invertAffineTransform and then put in the formula above instead of M. The function cannot
 operate in-place.
 
 @param src input image.
 @param dst output image that has the size dsize and the same type as src .
 @param M \f$2\times 3\f$ transformation matrix.
 @param dsize size of the output image.
-@param flags combination of interpolation methods (see cv::InterpolationFlags) and the optional
-flag WARP_INVERSE_MAP that means that M is the inverse transformation (
+@param flags combination of interpolation methods (see #InterpolationFlags) and the optional
+flag #WARP_INVERSE_MAP that means that M is the inverse transformation (
 \f$\texttt{dst}\rightarrow\texttt{src}\f$ ).
-@param borderMode pixel extrapolation method (see cv::BorderTypes); when
-borderMode=BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to
+@param borderMode pixel extrapolation method (see #BorderTypes); when
+borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image corresponding to
 the "outliers" in the source image are not modified by the function.
 @param borderValue value used in case of a constant border; by default, it is 0.
 
@@ -2201,6 +2309,10 @@ CV_EXPORTS_W void warpAffine( InputArray src, OutputArray dst,
                               int borderMode = BORDER_CONSTANT,
                               const Scalar& borderValue = Scalar());
 
+/** @example samples/cpp/warpPerspective_demo.cpp
+An example program shows using cv::findHomography and cv::warpPerspective for image warping
+*/
+
 /** @brief Applies a perspective transformation to an image.
 
 The function warpPerspective transforms the source image using the specified matrix:
@@ -2208,17 +2320,17 @@ The function warpPerspective transforms the source image using the specified mat
 \f[\texttt{dst} (x,y) =  \texttt{src} \left ( \frac{M_{11} x + M_{12} y + M_{13}}{M_{31} x + M_{32} y + M_{33}} ,
      \frac{M_{21} x + M_{22} y + M_{23}}{M_{31} x + M_{32} y + M_{33}} \right )\f]
 
-when the flag WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted with invert
+when the flag #WARP_INVERSE_MAP is set. Otherwise, the transformation is first inverted with invert
 and then put in the formula above instead of M. The function cannot operate in-place.
 
 @param src input image.
 @param dst output image that has the size dsize and the same type as src .
 @param M \f$3\times 3\f$ transformation matrix.
 @param dsize size of the output image.
-@param flags combination of interpolation methods (INTER_LINEAR or INTER_NEAREST) and the
-optional flag WARP_INVERSE_MAP, that sets M as the inverse transformation (
+@param flags combination of interpolation methods (#INTER_LINEAR or #INTER_NEAREST) and the
+optional flag #WARP_INVERSE_MAP, that sets M as the inverse transformation (
 \f$\texttt{dst}\rightarrow\texttt{src}\f$ ).
-@param borderMode pixel extrapolation method (BORDER_CONSTANT or BORDER_REPLICATE).
+@param borderMode pixel extrapolation method (#BORDER_CONSTANT or #BORDER_REPLICATE).
 @param borderValue value used in case of a constant border; by default, it equals 0.
 
 @sa  warpAffine, resize, remap, getRectSubPix, perspectiveTransform
@@ -2252,12 +2364,14 @@ CV_32FC1, or CV_32FC2. See convertMaps for details on converting a floating poin
 representation to fixed-point for speed.
 @param map2 The second map of y values having the type CV_16UC1, CV_32FC1, or none (empty map
 if map1 is (x,y) points), respectively.
-@param interpolation Interpolation method (see cv::InterpolationFlags). The method INTER_AREA is
+@param interpolation Interpolation method (see #InterpolationFlags). The method #INTER_AREA is
 not supported by this function.
-@param borderMode Pixel extrapolation method (see cv::BorderTypes). When
-borderMode=BORDER_TRANSPARENT, it means that the pixels in the destination image that
+@param borderMode Pixel extrapolation method (see #BorderTypes). When
+borderMode=#BORDER_TRANSPARENT, it means that the pixels in the destination image that
 corresponds to the "outliers" in the source image are not modified by the function.
 @param borderValue Value used in case of a constant border. By default, it is 0.
+@note
+Due to current implementation limitations the size of an input and output images should be less than 32767x32767.
  */
 CV_EXPORTS_W void remap( InputArray src, OutputArray dst,
                          InputArray map1, InputArray map2,
@@ -2270,13 +2384,13 @@ The function converts a pair of maps for remap from one representation to anothe
 options ( (map1.type(), map2.type()) \f$\rightarrow\f$ (dstmap1.type(), dstmap2.type()) ) are
 supported:
 
-- \f$\texttt{(CV\_32FC1, CV\_32FC1)} \rightarrow \texttt{(CV\_16SC2, CV\_16UC1)}\f$. This is the
+- \f$\texttt{(CV_32FC1, CV_32FC1)} \rightarrow \texttt{(CV_16SC2, CV_16UC1)}\f$. This is the
 most frequently used conversion operation, in which the original floating-point maps (see remap )
 are converted to a more compact and much faster fixed-point representation. The first output array
 contains the rounded coordinates and the second array (created only when nninterpolation=false )
 contains indices in the interpolation tables.
 
-- \f$\texttt{(CV\_32FC2)} \rightarrow \texttt{(CV\_16SC2, CV\_16UC1)}\f$. The same as above but
+- \f$\texttt{(CV_32FC2)} \rightarrow \texttt{(CV_16SC2, CV_16UC1)}\f$. The same as above but
 the original maps are stored in one 2-channel matrix.
 
 - Reverse conversion. Obviously, the reconstructed floating-point maps will not be exactly the same
@@ -2317,16 +2431,22 @@ coordinate origin is assumed to be the top-left corner).
 
 @sa  getAffineTransform, warpAffine, transform
  */
-CV_EXPORTS_W Mat getRotationMatrix2D( Point2f center, double angle, double scale );
+CV_EXPORTS_W Mat getRotationMatrix2D(Point2f center, double angle, double scale);
 
-//! returns 3x3 perspective transformation for the corresponding 4 point pairs.
-CV_EXPORTS Mat getPerspectiveTransform( const Point2f src[], const Point2f dst[] );
+/** @sa getRotationMatrix2D */
+CV_EXPORTS Matx23d getRotationMatrix2D_(Point2f center, double angle, double scale);
+
+inline
+Mat getRotationMatrix2D(Point2f center, double angle, double scale)
+{
+    return Mat(getRotationMatrix2D_(center, angle, scale), true);
+}
 
 /** @brief Calculates an affine transform from three pairs of the corresponding points.
 
 The function calculates the \f$2 \times 3\f$ matrix of an affine transform so that:
 
-\f[\begin{bmatrix} x'_i \\ y'_i \end{bmatrix} = \texttt{map\_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f]
+\f[\begin{bmatrix} x'_i \\ y'_i \end{bmatrix} = \texttt{map_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f]
 
 where
 
@@ -2356,7 +2476,7 @@ CV_EXPORTS_W void invertAffineTransform( InputArray M, OutputArray iM );
 
 The function calculates the \f$3 \times 3\f$ matrix of a perspective transform so that:
 
-\f[\begin{bmatrix} t_i x'_i \\ t_i y'_i \\ t_i \end{bmatrix} = \texttt{map\_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f]
+\f[\begin{bmatrix} t_i x'_i \\ t_i y'_i \\ t_i \end{bmatrix} = \texttt{map_matrix} \cdot \begin{bmatrix} x_i \\ y_i \\ 1 \end{bmatrix}\f]
 
 where
 
@@ -2364,10 +2484,15 @@ where
 
 @param src Coordinates of quadrangle vertices in the source image.
 @param dst Coordinates of the corresponding quadrangle vertices in the destination image.
+@param solveMethod method passed to cv::solve (#DecompTypes)
 
 @sa  findHomography, warpPerspective, perspectiveTransform
  */
-CV_EXPORTS_W Mat getPerspectiveTransform( InputArray src, InputArray dst );
+CV_EXPORTS_W Mat getPerspectiveTransform(InputArray src, InputArray dst, int solveMethod = DECOMP_LU);
+
+/** @overload */
+CV_EXPORTS Mat getPerspectiveTransform(const Point2f src[], const Point2f dst[], int solveMethod = DECOMP_LU);
+
 
 CV_EXPORTS_W Mat getAffineTransform( InputArray src, InputArray dst );
 
@@ -2375,13 +2500,12 @@ CV_EXPORTS_W Mat getAffineTransform( InputArray src, InputArray dst );
 
 The function getRectSubPix extracts pixels from src:
 
-\f[dst(x, y) = src(x +  \texttt{center.x} - ( \texttt{dst.cols} -1)*0.5, y +  \texttt{center.y} - ( \texttt{dst.rows} -1)*0.5)\f]
+\f[patch(x, y) = src(x +  \texttt{center.x} - ( \texttt{dst.cols} -1)*0.5, y +  \texttt{center.y} - ( \texttt{dst.rows} -1)*0.5)\f]
 
 where the values of the pixels at non-integer coordinates are retrieved using bilinear
-interpolation. Every channel of multi-channel images is processed independently. While the center of
-the rectangle must be inside the image, parts of the rectangle may be outside. In this case, the
-replication border mode (see cv::BorderTypes) is used to extrapolate the pixel values outside of
-the image.
+interpolation. Every channel of multi-channel images is processed independently. Also
+the image should be a single channel or three channel image. While the center of the
+rectangle must be inside the image, parts of the rectangle may be outside.
 
 @param image Source image.
 @param patchSize Size of the extracted patch.
@@ -2395,48 +2519,185 @@ source image. The center must be inside the image.
 CV_EXPORTS_W void getRectSubPix( InputArray image, Size patchSize,
                                  Point2f center, OutputArray patch, int patchType = -1 );
 
-/** @example polar_transforms.cpp
+/** @example samples/cpp/polar_transforms.cpp
 An example using the cv::linearPolar and cv::logPolar operations
 */
 
-/** @brief Remaps an image to log-polar space.
+/** @brief Remaps an image to semilog-polar coordinates space.
+
+@deprecated This function produces same result as cv::warpPolar(src, dst, src.size(), center, maxRadius, flags+WARP_POLAR_LOG);
+
+@internal
+Transform the source image using the following transformation (See @ref polar_remaps_reference_image "Polar remaps reference image d)"):
+\f[\begin{array}{l}
+  dst( \rho , \phi ) = src(x,y) \\
+  dst.size() \leftarrow src.size()
+\end{array}\f]
 
-transforms the source image using the following transformation:
-\f[dst( \phi , \rho ) = src(x,y)\f]
 where
-\f[\rho = M  \cdot \log{\sqrt{x^2 + y^2}} , \phi =atan(y/x)\f]
+\f[\begin{array}{l}
+  I = (dx,dy) = (x - center.x,y - center.y) \\
+  \rho = M \cdot log_e(\texttt{magnitude} (I)) ,\\
+  \phi = Kangle \cdot \texttt{angle} (I) \\
+\end{array}\f]
 
-The function emulates the human "foveal" vision and can be used for fast scale and
-rotation-invariant template matching, for object tracking and so forth. The function can not operate
-in-place.
+and
+\f[\begin{array}{l}
+  M = src.cols / log_e(maxRadius) \\
+  Kangle = src.rows / 2\Pi \\
+\end{array}\f]
 
+The function emulates the human "foveal" vision and can be used for fast scale and
+rotation-invariant template matching, for object tracking and so forth.
 @param src Source image
-@param dst Destination image
+@param dst Destination image. It will have same size and type as src.
 @param center The transformation center; where the output precision is maximal
-@param M Magnitude scale parameter.
-@param flags A combination of interpolation methods, see cv::InterpolationFlags
- */
+@param M Magnitude scale parameter. It determines the radius of the bounding circle to transform too.
+@param flags A combination of interpolation methods, see #InterpolationFlags
+
+@note
+-   The function can not operate in-place.
+-   To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees.
+
+@sa cv::linearPolar
+@endinternal
+*/
 CV_EXPORTS_W void logPolar( InputArray src, OutputArray dst,
                             Point2f center, double M, int flags );
 
-/** @brief Remaps an image to polar space.
+/** @brief Remaps an image to polar coordinates space.
+
+@deprecated This function produces same result as cv::warpPolar(src, dst, src.size(), center, maxRadius, flags)
+
+@internal
+Transform the source image using the following transformation (See @ref polar_remaps_reference_image "Polar remaps reference image c)"):
+\f[\begin{array}{l}
+  dst( \rho , \phi ) = src(x,y) \\
+  dst.size() \leftarrow src.size()
+\end{array}\f]
 
-transforms the source image using the following transformation:
-\f[dst( \phi , \rho ) = src(x,y)\f]
 where
-\f[\rho = (src.width/maxRadius)  \cdot \sqrt{x^2 + y^2} , \phi =atan(y/x)\f]
+\f[\begin{array}{l}
+  I = (dx,dy) = (x - center.x,y - center.y) \\
+  \rho = Kmag \cdot \texttt{magnitude} (I) ,\\
+  \phi = angle \cdot \texttt{angle} (I)
+\end{array}\f]
+
+and
+\f[\begin{array}{l}
+  Kx = src.cols / maxRadius \\
+  Ky = src.rows / 2\Pi
+\end{array}\f]
 
-The function can not operate in-place.
 
 @param src Source image
-@param dst Destination image
+@param dst Destination image. It will have same size and type as src.
 @param center The transformation center;
-@param maxRadius Inverse magnitude scale parameter
-@param flags A combination of interpolation methods, see cv::InterpolationFlags
- */
+@param maxRadius The radius of the bounding circle to transform. It determines the inverse magnitude scale parameter too.
+@param flags A combination of interpolation methods, see #InterpolationFlags
+
+@note
+-   The function can not operate in-place.
+-   To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees.
+
+@sa cv::logPolar
+@endinternal
+*/
 CV_EXPORTS_W void linearPolar( InputArray src, OutputArray dst,
                                Point2f center, double maxRadius, int flags );
 
+
+/** \brief Remaps an image to polar or semilog-polar coordinates space
+
+@anchor polar_remaps_reference_image
+![Polar remaps reference](pics/polar_remap_doc.png)
+
+Transform the source image using the following transformation:
+\f[
+dst(\rho , \phi ) = src(x,y)
+\f]
+
+where
+\f[
+\begin{array}{l}
+\vec{I} = (x - center.x, \;y - center.y) \\
+\phi = Kangle \cdot \texttt{angle} (\vec{I}) \\
+\rho = \left\{\begin{matrix}
+Klin \cdot \texttt{magnitude} (\vec{I}) & default \\
+Klog \cdot log_e(\texttt{magnitude} (\vec{I})) & if \; semilog \\
+\end{matrix}\right.
+\end{array}
+\f]
+
+and
+\f[
+\begin{array}{l}
+Kangle = dsize.height / 2\Pi \\
+Klin = dsize.width / maxRadius \\
+Klog = dsize.width / log_e(maxRadius) \\
+\end{array}
+\f]
+
+
+\par Linear vs semilog mapping
+
+Polar mapping can be linear or semi-log. Add one of #WarpPolarMode to `flags` to specify the polar mapping mode.
+
+Linear is the default mode.
+
+The semilog mapping emulates the human "foveal" vision that permit very high acuity on the line of sight (central vision)
+in contrast to peripheral vision where acuity is minor.
+
+\par Option on `dsize`:
+
+- if both values in `dsize <=0 ` (default),
+the destination image will have (almost) same area of source bounding circle:
+\f[\begin{array}{l}
+dsize.area  \leftarrow (maxRadius^2 \cdot \Pi) \\
+dsize.width = \texttt{cvRound}(maxRadius) \\
+dsize.height = \texttt{cvRound}(maxRadius \cdot \Pi) \\
+\end{array}\f]
+
+
+- if only `dsize.height <= 0`,
+the destination image area will be proportional to the bounding circle area but scaled by `Kx * Kx`:
+\f[\begin{array}{l}
+dsize.height = \texttt{cvRound}(dsize.width \cdot \Pi) \\
+\end{array}
+\f]
+
+- if both values in `dsize > 0 `,
+the destination image will have the given size therefore the area of the bounding circle will be scaled to `dsize`.
+
+
+\par Reverse mapping
+
+You can get reverse mapping adding #WARP_INVERSE_MAP to `flags`
+\snippet polar_transforms.cpp InverseMap
+
+In addiction, to calculate the original coordinate from a polar mapped coordinate \f$(rho, phi)->(x, y)\f$:
+\snippet polar_transforms.cpp InverseCoordinate
+
+@param src Source image.
+@param dst Destination image. It will have same type as src.
+@param dsize The destination image size (see description for valid options).
+@param center The transformation center.
+@param maxRadius The radius of the bounding circle to transform. It determines the inverse magnitude scale parameter too.
+@param flags A combination of interpolation methods, #InterpolationFlags + #WarpPolarMode.
+            - Add #WARP_POLAR_LINEAR to select linear polar mapping (default)
+            - Add #WARP_POLAR_LOG to select semilog polar mapping
+            - Add #WARP_INVERSE_MAP for reverse mapping.
+@note
+-  The function can not operate in-place.
+-  To calculate magnitude and angle in degrees #cartToPolar is used internally thus angles are measured from 0 to 360 with accuracy about 0.3 degrees.
+-  This function uses #remap. Due to current implementation limitations the size of an input and output images should be less than 32767x32767.
+
+@sa cv::remap
+*/
+CV_EXPORTS_W void warpPolar(InputArray src, OutputArray dst, Size dsize,
+                            Point2f center, double maxRadius, int flags);
+
+
 //! @} imgproc_transform
 
 //! @addtogroup imgproc_misc
@@ -2451,7 +2712,7 @@ CV_EXPORTS_AS(integral2) void integral( InputArray src, OutputArray sum,
 
 /** @brief Calculates the integral of an image.
 
-The functions calculate one or more integral images for the source image as follows:
+The function calculates one or more integral images for the source image as follows:
 
 \f[\texttt{sum} (X,Y) =  \sum _{x<X,y<Y}  \texttt{image} (x,y)\f]
 
@@ -2492,7 +2753,7 @@ CV_EXPORTS_AS(integral3) void integral( InputArray src, OutputArray sum,
 //! @addtogroup imgproc_motion
 //! @{
 
-/** @brief Adds an image to the accumulator.
+/** @brief Adds an image to the accumulator image.
 
 The function adds src or some of its elements to dst :
 
@@ -2500,12 +2761,11 @@ The function adds src or some of its elements to dst :
 
 The function supports multi-channel images. Each channel is processed independently.
 
-The functions accumulate\* can be used, for example, to collect statistics of a scene background
+The function cv::accumulate can be used, for example, to collect statistics of a scene background
 viewed by a still camera and for the further foreground-background segmentation.
 
-@param src Input image as 1- or 3-channel, 8-bit or 32-bit floating point.
-@param dst %Accumulator image with the same number of channels as input image, 32-bit or 64-bit
-floating-point.
+@param src Input image of type CV_8UC(n), CV_16UC(n), CV_32FC(n) or CV_64FC(n), where n is a positive integer.
+@param dst %Accumulator image with the same number of channels as input image, and a depth of CV_32F or CV_64F.
 @param mask Optional operation mask.
 
 @sa  accumulateSquare, accumulateProduct, accumulateWeighted
@@ -2513,7 +2773,7 @@ floating-point.
 CV_EXPORTS_W void accumulate( InputArray src, InputOutputArray dst,
                               InputArray mask = noArray() );
 
-/** @brief Adds the square of a source image to the accumulator.
+/** @brief Adds the square of a source image to the accumulator image.
 
 The function adds the input image src or its selected region, raised to a power of 2, to the
 accumulator dst :
@@ -2532,7 +2792,7 @@ floating-point.
 CV_EXPORTS_W void accumulateSquare( InputArray src, InputOutputArray dst,
                                     InputArray mask = noArray() );
 
-/** @brief Adds the per-element product of two input images to the accumulator.
+/** @brief Adds the per-element product of two input images to the accumulator image.
 
 The function adds the product of two images or their selected regions to the accumulator dst :
 
@@ -2542,7 +2802,7 @@ The function supports multi-channel images. Each channel is processed independen
 
 @param src1 First input image, 1- or 3-channel, 8-bit or 32-bit floating point.
 @param src2 Second input image of the same type and the same size as src1 .
-@param dst %Accumulator with the same number of channels as input images, 32-bit or 64-bit
+@param dst %Accumulator image with the same number of channels as input images, 32-bit or 64-bit
 floating-point.
 @param mask Optional operation mask.
 
@@ -2622,7 +2882,7 @@ An example is shown below:
     createHanningWindow(hann, Size(100, 100), CV_32F);
 @endcode
 @param dst Destination array to place Hann coefficients in
-@param winSize The window size specifications
+@param winSize The window size specifications (both width and height must be > 1)
 @param type Created array type
  */
 CV_EXPORTS_W void createHanningWindow(OutputArray dst, Size winSize, int type);
@@ -2634,24 +2894,25 @@ CV_EXPORTS_W void createHanningWindow(OutputArray dst, Size winSize, int type);
 
 /** @brief Applies a fixed-level threshold to each array element.
 
-The function applies fixed-level thresholding to a single-channel array. The function is typically
-used to get a bi-level (binary) image out of a grayscale image ( cv::compare could be also used for
+The function applies fixed-level thresholding to a multiple-channel array. The function is typically
+used to get a bi-level (binary) image out of a grayscale image ( #compare could be also used for
 this purpose) or for removing a noise, that is, filtering out pixels with too small or too large
 values. There are several types of thresholding supported by the function. They are determined by
 type parameter.
 
-Also, the special values cv::THRESH_OTSU or cv::THRESH_TRIANGLE may be combined with one of the
+Also, the special values #THRESH_OTSU or #THRESH_TRIANGLE may be combined with one of the
 above values. In these cases, the function determines the optimal threshold value using the Otsu's
-or Triangle algorithm and uses it instead of the specified thresh . The function returns the
-computed threshold value. Currently, the Otsu's and Triangle methods are implemented only for 8-bit
-images.
+or Triangle algorithm and uses it instead of the specified thresh.
+
+@note Currently, the Otsu's and Triangle methods are implemented only for 8-bit single-channel images.
 
-@param src input array (single-channel, 8-bit or 32-bit floating point).
-@param dst output array of the same size and type as src.
+@param src input array (multiple-channel, 8-bit or 32-bit floating point).
+@param dst output array of the same size  and type and the same number of channels as src.
 @param thresh threshold value.
-@param maxval maximum value to use with the THRESH_BINARY and THRESH_BINARY_INV thresholding
+@param maxval maximum value to use with the #THRESH_BINARY and #THRESH_BINARY_INV thresholding
 types.
-@param type thresholding type (see the cv::ThresholdTypes).
+@param type thresholding type (see #ThresholdTypes).
+@return the computed threshold value if Otsu's or Triangle methods used.
 
 @sa  adaptiveThreshold, findContours, compare, min, max
  */
@@ -2673,9 +2934,10 @@ The function can process the image in-place.
 @param src Source 8-bit single-channel image.
 @param dst Destination image of the same size and the same type as src.
 @param maxValue Non-zero value assigned to the pixels for which the condition is satisfied
-@param adaptiveMethod Adaptive thresholding algorithm to use, see cv::AdaptiveThresholdTypes
-@param thresholdType Thresholding type that must be either THRESH_BINARY or THRESH_BINARY_INV,
-see cv::ThresholdTypes.
+@param adaptiveMethod Adaptive thresholding algorithm to use, see #AdaptiveThresholdTypes.
+The #BORDER_REPLICATE | #BORDER_ISOLATED is used to process boundaries.
+@param thresholdType Thresholding type that must be either #THRESH_BINARY or #THRESH_BINARY_INV,
+see #ThresholdTypes.
 @param blockSize Size of a pixel neighborhood that is used to calculate a threshold value for the
 pixel: 3, 5, 7, and so on.
 @param C Constant subtracted from the mean or weighted mean (see the details below). Normally, it
@@ -2692,6 +2954,10 @@ CV_EXPORTS_W void adaptiveThreshold( InputArray src, OutputArray dst,
 //! @addtogroup imgproc_filter
 //! @{
 
+/** @example samples/cpp/tutorial_code/ImgProc/Pyramids/Pyramids.cpp
+An example using pyrDown and pyrUp functions
+*/
+
 /** @brief Blurs an image and downsamples it.
 
 By default, size of the output image is computed as `Size((src.cols+1)/2, (src.rows+1)/2)`, but in
@@ -2709,7 +2975,7 @@ Then, it downsamples the image by rejecting even rows and columns.
 @param src input image.
 @param dst output image; it has the specified size and the same type as src.
 @param dstsize size of the output image.
-@param borderType Pixel extrapolation method, see cv::BorderTypes (BORDER_CONSTANT isn't supported)
+@param borderType Pixel extrapolation method, see #BorderTypes (#BORDER_CONSTANT isn't supported)
  */
 CV_EXPORTS_W void pyrDown( InputArray src, OutputArray dst,
                            const Size& dstsize = Size(), int borderType = BORDER_DEFAULT );
@@ -2729,7 +2995,7 @@ pyrDown multiplied by 4.
 @param src input image.
 @param dst output image. It has the specified size and the same type as src .
 @param dstsize size of the output image.
-@param borderType Pixel extrapolation method, see cv::BorderTypes (only BORDER_DEFAULT is supported)
+@param borderType Pixel extrapolation method, see #BorderTypes (only #BORDER_DEFAULT is supported)
  */
 CV_EXPORTS_W void pyrUp( InputArray src, OutputArray dst,
                          const Size& dstsize = Size(), int borderType = BORDER_DEFAULT );
@@ -2743,261 +3009,28 @@ pyrDown to the previously built pyramid layers, starting from `dst[0]==src`.
 @param dst Destination vector of maxlevel+1 images of the same type as src. dst[0] will be the
 same as src. dst[1] is the next pyramid layer, a smoothed and down-sized src, and so on.
 @param maxlevel 0-based index of the last (the smallest) pyramid layer. It must be non-negative.
-@param borderType Pixel extrapolation method, see cv::BorderTypes (BORDER_CONSTANT isn't supported)
+@param borderType Pixel extrapolation method, see #BorderTypes (#BORDER_CONSTANT isn't supported)
  */
 CV_EXPORTS void buildPyramid( InputArray src, OutputArrayOfArrays dst,
                               int maxlevel, int borderType = BORDER_DEFAULT );
 
 //! @} imgproc_filter
 
-//! @addtogroup imgproc_transform
-//! @{
-
-/** @brief Transforms an image to compensate for lens distortion.
-
-The function transforms an image to compensate radial and tangential lens distortion.
-
-The function is simply a combination of cv::initUndistortRectifyMap (with unity R ) and cv::remap
-(with bilinear interpolation). See the former function for details of the transformation being
-performed.
-
-Those pixels in the destination image, for which there is no correspondent pixels in the source
-image, are filled with zeros (black color).
-
-A particular subset of the source image that will be visible in the corrected image can be regulated
-by newCameraMatrix. You can use cv::getOptimalNewCameraMatrix to compute the appropriate
-newCameraMatrix depending on your requirements.
-
-The camera matrix and the distortion parameters can be determined using cv::calibrateCamera. If
-the resolution of images is different from the resolution used at the calibration stage, \f$f_x,
-f_y, c_x\f$ and \f$c_y\f$ need to be scaled accordingly, while the distortion coefficients remain
-the same.
-
-@param src Input (distorted) image.
-@param dst Output (corrected) image that has the same size and type as src .
-@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
-@param distCoeffs Input vector of distortion coefficients
-\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$
-of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed.
-@param newCameraMatrix Camera matrix of the distorted image. By default, it is the same as
-cameraMatrix but you may additionally scale and shift the result by using a different matrix.
- */
-CV_EXPORTS_W void undistort( InputArray src, OutputArray dst,
-                             InputArray cameraMatrix,
-                             InputArray distCoeffs,
-                             InputArray newCameraMatrix = noArray() );
-
-/** @brief Computes the undistortion and rectification transformation map.
-
-The function computes the joint undistortion and rectification transformation and represents the
-result in the form of maps for remap. The undistorted image looks like original, as if it is
-captured with a camera using the camera matrix =newCameraMatrix and zero distortion. In case of a
-monocular camera, newCameraMatrix is usually equal to cameraMatrix, or it can be computed by
-cv::getOptimalNewCameraMatrix for a better control over scaling. In case of a stereo camera,
-newCameraMatrix is normally set to P1 or P2 computed by cv::stereoRectify .
-
-Also, this new camera is oriented differently in the coordinate space, according to R. That, for
-example, helps to align two heads of a stereo camera so that the epipolar lines on both images
-become horizontal and have the same y- coordinate (in case of a horizontally aligned stereo camera).
-
-The function actually builds the maps for the inverse mapping algorithm that is used by remap. That
-is, for each pixel \f$(u, v)\f$ in the destination (corrected and rectified) image, the function
-computes the corresponding coordinates in the source image (that is, in the original image from
-camera). The following process is applied:
-\f[
-\begin{array}{l}
-x  \leftarrow (u - {c'}_x)/{f'}_x  \\
-y  \leftarrow (v - {c'}_y)/{f'}_y  \\
-{[X\,Y\,W]} ^T  \leftarrow R^{-1}*[x \, y \, 1]^T  \\
-x'  \leftarrow X/W  \\
-y'  \leftarrow Y/W  \\
-r^2  \leftarrow x'^2 + y'^2 \\
-x''  \leftarrow x' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6}
-+ 2p_1 x' y' + p_2(r^2 + 2 x'^2)  + s_1 r^2 + s_2 r^4\\
-y''  \leftarrow y' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6}
-+ p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_3 r^2 + s_4 r^4 \\
-s\vecthree{x'''}{y'''}{1} =
-\vecthreethree{R_{33}(\tau_x, \tau_y)}{0}{-R_{13}((\tau_x, \tau_y)}
-{0}{R_{33}(\tau_x, \tau_y)}{-R_{23}(\tau_x, \tau_y)}
-{0}{0}{1} R(\tau_x, \tau_y) \vecthree{x''}{y''}{1}\\
-map_x(u,v)  \leftarrow x''' f_x + c_x  \\
-map_y(u,v)  \leftarrow y''' f_y + c_y
-\end{array}
-\f]
-where \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$
-are the distortion coefficients.
-
-In case of a stereo camera, this function is called twice: once for each camera head, after
-stereoRectify, which in its turn is called after cv::stereoCalibrate. But if the stereo camera
-was not calibrated, it is still possible to compute the rectification transformations directly from
-the fundamental matrix using cv::stereoRectifyUncalibrated. For each camera, the function computes
-homography H as the rectification transformation in a pixel domain, not a rotation matrix R in 3D
-space. R can be computed from H as
-\f[\texttt{R} = \texttt{cameraMatrix} ^{-1} \cdot \texttt{H} \cdot \texttt{cameraMatrix}\f]
-where cameraMatrix can be chosen arbitrarily.
-
-@param cameraMatrix Input camera matrix \f$A=\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
-@param distCoeffs Input vector of distortion coefficients
-\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$
-of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed.
-@param R Optional rectification transformation in the object space (3x3 matrix). R1 or R2 ,
-computed by stereoRectify can be passed here. If the matrix is empty, the identity transformation
-is assumed. In cvInitUndistortMap R assumed to be an identity matrix.
-@param newCameraMatrix New camera matrix \f$A'=\vecthreethree{f_x'}{0}{c_x'}{0}{f_y'}{c_y'}{0}{0}{1}\f$.
-@param size Undistorted image size.
-@param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2, see cv::convertMaps
-@param map1 The first output map.
-@param map2 The second output map.
- */
-CV_EXPORTS_W void initUndistortRectifyMap( InputArray cameraMatrix, InputArray distCoeffs,
-                           InputArray R, InputArray newCameraMatrix,
-                           Size size, int m1type, OutputArray map1, OutputArray map2 );
-
-//! initializes maps for cv::remap() for wide-angle
-CV_EXPORTS_W float initWideAngleProjMap( InputArray cameraMatrix, InputArray distCoeffs,
-                                         Size imageSize, int destImageWidth,
-                                         int m1type, OutputArray map1, OutputArray map2,
-                                         int projType = PROJ_SPHERICAL_EQRECT, double alpha = 0);
-
-/** @brief Returns the default new camera matrix.
-
-The function returns the camera matrix that is either an exact copy of the input cameraMatrix (when
-centerPrinicipalPoint=false ), or the modified one (when centerPrincipalPoint=true).
-
-In the latter case, the new camera matrix will be:
-
-\f[\begin{bmatrix} f_x && 0 && ( \texttt{imgSize.width} -1)*0.5  \\ 0 && f_y && ( \texttt{imgSize.height} -1)*0.5  \\ 0 && 0 && 1 \end{bmatrix} ,\f]
-
-where \f$f_x\f$ and \f$f_y\f$ are \f$(0,0)\f$ and \f$(1,1)\f$ elements of cameraMatrix, respectively.
-
-By default, the undistortion functions in OpenCV (see initUndistortRectifyMap, undistort) do not
-move the principal point. However, when you work with stereo, it is important to move the principal
-points in both views to the same y-coordinate (which is required by most of stereo correspondence
-algorithms), and may be to the same x-coordinate too. So, you can form the new camera matrix for
-each view where the principal points are located at the center.
-
-@param cameraMatrix Input camera matrix.
-@param imgsize Camera view image size in pixels.
-@param centerPrincipalPoint Location of the principal point in the new camera matrix. The
-parameter indicates whether this location should be at the image center or not.
- */
-CV_EXPORTS_W Mat getDefaultNewCameraMatrix( InputArray cameraMatrix, Size imgsize = Size(),
-                                            bool centerPrincipalPoint = false );
-
-/** @brief Computes the ideal point coordinates from the observed point coordinates.
-
-The function is similar to cv::undistort and cv::initUndistortRectifyMap but it operates on a
-sparse set of points instead of a raster image. Also the function performs a reverse transformation
-to projectPoints. In case of a 3D object, it does not reconstruct its 3D coordinates, but for a
-planar object, it does, up to a translation vector, if the proper R is specified.
-@code
-    // (u,v) is the input point, (u', v') is the output point
-    // camera_matrix=[fx 0 cx; 0 fy cy; 0 0 1]
-    // P=[fx' 0 cx' tx; 0 fy' cy' ty; 0 0 1 tz]
-    x" = (u - cx)/fx
-    y" = (v - cy)/fy
-    (x',y') = undistort(x",y",dist_coeffs)
-    [X,Y,W]T = R*[x' y' 1]T
-    x = X/W, y = Y/W
-    // only performed if P=[fx' 0 cx' [tx]; 0 fy' cy' [ty]; 0 0 1 [tz]] is specified
-    u' = x*fx' + cx'
-    v' = y*fy' + cy',
-@endcode
-where cv::undistort is an approximate iterative algorithm that estimates the normalized original
-point coordinates out of the normalized distorted point coordinates ("normalized" means that the
-coordinates do not depend on the camera matrix).
-
-The function can be used for both a stereo camera head or a monocular camera (when R is empty).
-
-@param src Observed point coordinates, 1xN or Nx1 2-channel (CV_32FC2 or CV_64FC2).
-@param dst Output ideal point coordinates after undistortion and reverse perspective
-transformation. If matrix P is identity or omitted, dst will contain normalized point coordinates.
-@param cameraMatrix Camera matrix \f$\vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
-@param distCoeffs Input vector of distortion coefficients
-\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$
-of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed.
-@param R Rectification transformation in the object space (3x3 matrix). R1 or R2 computed by
-cv::stereoRectify can be passed here. If the matrix is empty, the identity transformation is used.
-@param P New camera matrix (3x3) or new projection matrix (3x4). P1 or P2 computed by
-cv::stereoRectify can be passed here. If the matrix is empty, the identity new camera matrix is used.
- */
-CV_EXPORTS_W void undistortPoints( InputArray src, OutputArray dst,
-                                   InputArray cameraMatrix, InputArray distCoeffs,
-                                   InputArray R = noArray(), InputArray P = noArray());
-
-//! @} imgproc_transform
-
 //! @addtogroup imgproc_hist
 //! @{
 
-/** @example demhist.cpp
+/** @example samples/cpp/demhist.cpp
 An example for creating histograms of an image
 */
 
 /** @brief Calculates a histogram of a set of arrays.
 
-The functions calcHist calculate the histogram of one or more arrays. The elements of a tuple used
+The function cv::calcHist calculates the histogram of one or more arrays. The elements of a tuple used
 to increment a histogram bin are taken from the corresponding input arrays at the same location. The
 sample below shows how to compute a 2D Hue-Saturation histogram for a color image. :
-@code
-    #include <opencv2/imgproc.hpp>
-    #include <opencv2/highgui.hpp>
-
-    using namespace cv;
+@include snippets/imgproc_calcHist.cpp
 
-    int main( int argc, char** argv )
-    {
-        Mat src, hsv;
-        if( argc != 2 || !(src=imread(argv[1], 1)).data )
-            return -1;
-
-        cvtColor(src, hsv, COLOR_BGR2HSV);
-
-        // Quantize the hue to 30 levels
-        // and the saturation to 32 levels
-        int hbins = 30, sbins = 32;
-        int histSize[] = {hbins, sbins};
-        // hue varies from 0 to 179, see cvtColor
-        float hranges[] = { 0, 180 };
-        // saturation varies from 0 (black-gray-white) to
-        // 255 (pure spectrum color)
-        float sranges[] = { 0, 256 };
-        const float* ranges[] = { hranges, sranges };
-        MatND hist;
-        // we compute the histogram from the 0-th and 1-st channels
-        int channels[] = {0, 1};
-
-        calcHist( &hsv, 1, channels, Mat(), // do not use mask
-                 hist, 2, histSize, ranges,
-                 true, // the histogram is uniform
-                 false );
-        double maxVal=0;
-        minMaxLoc(hist, 0, &maxVal, 0, 0);
-
-        int scale = 10;
-        Mat histImg = Mat::zeros(sbins*scale, hbins*10, CV_8UC3);
-
-        for( int h = 0; h < hbins; h++ )
-            for( int s = 0; s < sbins; s++ )
-            {
-                float binVal = hist.at<float>(h, s);
-                int intensity = cvRound(binVal*255/maxVal);
-                rectangle( histImg, Point(h*scale, s*scale),
-                            Point( (h+1)*scale - 1, (s+1)*scale - 1),
-                            Scalar::all(intensity),
-                            CV_FILLED );
-            }
-
-        namedWindow( "Source", 1 );
-        imshow( "Source", src );
-
-        namedWindow( "H-S Histogram", 1 );
-        imshow( "H-S Histogram", histImg );
-        waitKey();
-    }
-@endcode
-
-@param images Source arrays. They all should have the same depth, CV_8U or CV_32F , and the same
+@param images Source arrays. They all should have the same depth, CV_8U, CV_16U or CV_32F , and the same
 size. Each of them can have an arbitrary number of channels.
 @param nimages Number of source images.
 @param channels List of the dims channels used to compute the histogram. The first array channels
@@ -3030,7 +3063,7 @@ CV_EXPORTS void calcHist( const Mat* images, int nimages,
 
 /** @overload
 
-this variant uses cv::SparseMat for output
+this variant uses %SparseMat for output
 */
 CV_EXPORTS void calcHist( const Mat* images, int nimages,
                           const int* channels, InputArray mask,
@@ -3048,8 +3081,8 @@ CV_EXPORTS_W void calcHist( InputArrayOfArrays images,
 
 /** @brief Calculates the back projection of a histogram.
 
-The functions calcBackProject calculate the back project of the histogram. That is, similarly to
-cv::calcHist , at each location (x, y) the function collects the values from the selected channels
+The function cv::calcBackProject calculates the back project of the histogram. That is, similarly to
+#calcHist , at each location (x, y) the function collects the values from the selected channels
 in the input images and finds the corresponding histogram bin. But instead of incrementing it, the
 function reads the bin value, scales it by scale , and stores in backProject(x,y) . In terms of
 statistics, the function computes probability of each element value in respect with the empirical
@@ -3069,7 +3102,7 @@ component.
 
 This is an approximate algorithm of the CamShift color object tracker.
 
-@param images Source arrays. They all should have the same depth, CV_8U or CV_32F , and the same
+@param images Source arrays. They all should have the same depth, CV_8U, CV_16U or CV_32F , and the same
 size. Each of them can have an arbitrary number of channels.
 @param nimages Number of source images.
 @param channels The list of channels used to compute the back projection. The number of channels
@@ -3079,11 +3112,11 @@ images[0].channels() + images[1].channels()-1, and so on.
 @param hist Input histogram that can be dense or sparse.
 @param backProject Destination back projection array that is a single-channel array of the same
 size and depth as images[0] .
-@param ranges Array of arrays of the histogram bin boundaries in each dimension. See calcHist .
+@param ranges Array of arrays of the histogram bin boundaries in each dimension. See #calcHist .
 @param scale Optional scale factor for the output back projection.
 @param uniform Flag indicating whether the histogram is uniform or not (see above).
 
-@sa cv::calcHist, cv::compareHist
+@sa calcHist, compareHist
  */
 CV_EXPORTS void calcBackProject( const Mat* images, int nimages,
                                  const int* channels, InputArray hist,
@@ -3104,18 +3137,18 @@ CV_EXPORTS_W void calcBackProject( InputArrayOfArrays images, const std::vector<
 
 /** @brief Compares two histograms.
 
-The function compare two dense or two sparse histograms using the specified method.
+The function cv::compareHist compares two dense or two sparse histograms using the specified method.
 
 The function returns \f$d(H_1, H_2)\f$ .
 
 While the function works well with 1-, 2-, 3-dimensional dense histograms, it may not be suitable
 for high-dimensional sparse histograms. In such histograms, because of aliasing and sampling
 problems, the coordinates of non-zero histogram bins can slightly shift. To compare such histograms
-or more general sparse configurations of weighted points, consider using the cv::EMD function.
+or more general sparse configurations of weighted points, consider using the #EMD function.
 
 @param H1 First compared histogram.
 @param H2 Second compared histogram of the same size as H1 .
-@param method Comparison method, see cv::HistCompMethods
+@param method Comparison method, see #HistCompMethods
  */
 CV_EXPORTS_W double compareHist( InputArray H1, InputArray H2, int method );
 
@@ -3139,6 +3172,14 @@ The algorithm normalizes the brightness and increases the contrast of the image.
  */
 CV_EXPORTS_W void equalizeHist( InputArray src, OutputArray dst );
 
+/** @brief Creates a smart pointer to a cv::CLAHE class and initializes it.
+
+@param clipLimit Threshold for contrast limiting.
+@param tileGridSize Size of grid for histogram equalization. Input image will be divided into
+equally sized rectangular tiles. tileGridSize defines the number of tiles in row and column.
+ */
+CV_EXPORTS_W Ptr<CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+
 /** @brief Computes the "minimal work" distance between two weighted point configurations.
 
 The function computes the earth mover distance and/or a lower boundary of the distance between the
@@ -3152,11 +3193,13 @@ same object.
 
 @param signature1 First signature, a \f$\texttt{size1}\times \texttt{dims}+1\f$ floating-point matrix.
 Each row stores the point weight followed by the point coordinates. The matrix is allowed to have
-a single column (weights only) if the user-defined cost matrix is used.
+a single column (weights only) if the user-defined cost matrix is used. The weights must be
+non-negative and have at least one non-zero value.
 @param signature2 Second signature of the same format as signature1 , though the number of rows
 may be different. The total weights may be different. In this case an extra "dummy" point is added
-to either signature1 or signature2 .
-@param distType Used metric. See cv::DistanceTypes.
+to either signature1 or signature2. The weights must be non-negative and have at least one non-zero
+value.
+@param distType Used metric. See #DistanceTypes.
 @param cost User-defined \f$\texttt{size1}\times \texttt{size2}\f$ cost matrix. Also, if a cost matrix
 is used, lower boundary lowerBound cannot be calculated because it needs a metric function.
 @param lowerBound Optional input/output parameter: lower boundary of a distance between the two
@@ -3175,11 +3218,15 @@ CV_EXPORTS float EMD( InputArray signature1, InputArray signature2,
                       int distType, InputArray cost=noArray(),
                       float* lowerBound = 0, OutputArray flow = noArray() );
 
+CV_EXPORTS_AS(EMD) float wrapperEMD( InputArray signature1, InputArray signature2,
+                      int distType, InputArray cost=noArray(),
+                      CV_IN_OUT Ptr<float> lowerBound = Ptr<float>(), OutputArray flow = noArray() );
+
 //! @} imgproc_hist
 
-/** @example watershed.cpp
+/** @example samples/cpp/watershed.cpp
 An example using the watershed algorithm
- */
+*/
 
 /** @brief Performs a marker-based image segmentation using the watershed algorithm.
 
@@ -3189,7 +3236,7 @@ algorithm, described in @cite Meyer92 .
 Before passing the image to the function, you have to roughly outline the desired regions in the
 image markers with positive (\>0) indices. So, every region is represented as one or more connected
 components with the pixel values 1, 2, 3, and so on. Such markers can be retrieved from a binary
-mask using findContours and drawContours (see the watershed.cpp demo). The markers are "seeds" of
+mask using #findContours and #drawContours (see the watershed.cpp demo). The markers are "seeds" of
 the future image regions. All the other pixels in markers , whose relation to the outlined regions
 is not known and should be defined by the algorithm, should be set to 0's. In the function output,
 each pixel in markers is set to a value of the "seed" components or to -1 at boundaries between the
@@ -3257,9 +3304,10 @@ CV_EXPORTS_W void pyrMeanShiftFiltering( InputArray src, OutputArray dst,
 //! @addtogroup imgproc_misc
 //! @{
 
-/** @example grabcut.cpp
+/** @example samples/cpp/grabcut.cpp
 An example using the GrabCut algorithm
- */
+![Sample Screenshot](grabcut_output1.jpg)
+*/
 
 /** @brief Runs the GrabCut algorithm.
 
@@ -3267,33 +3315,32 @@ The function implements the [GrabCut image segmentation algorithm](http://en.wik
 
 @param img Input 8-bit 3-channel image.
 @param mask Input/output 8-bit single-channel mask. The mask is initialized by the function when
-mode is set to GC_INIT_WITH_RECT. Its elements may have one of the cv::GrabCutClasses.
+mode is set to #GC_INIT_WITH_RECT. Its elements may have one of the #GrabCutClasses.
 @param rect ROI containing a segmented object. The pixels outside of the ROI are marked as
-"obvious background". The parameter is only used when mode==GC_INIT_WITH_RECT .
+"obvious background". The parameter is only used when mode==#GC_INIT_WITH_RECT .
 @param bgdModel Temporary array for the background model. Do not modify it while you are
 processing the same image.
 @param fgdModel Temporary arrays for the foreground model. Do not modify it while you are
 processing the same image.
 @param iterCount Number of iterations the algorithm should make before returning the result. Note
-that the result can be refined with further calls with mode==GC_INIT_WITH_MASK or
+that the result can be refined with further calls with mode==#GC_INIT_WITH_MASK or
 mode==GC_EVAL .
-@param mode Operation mode that could be one of the cv::GrabCutModes
+@param mode Operation mode that could be one of the #GrabCutModes
  */
 CV_EXPORTS_W void grabCut( InputArray img, InputOutputArray mask, Rect rect,
                            InputOutputArray bgdModel, InputOutputArray fgdModel,
                            int iterCount, int mode = GC_EVAL );
 
-/** @example distrans.cpp
-An example on using the distance transform\
+/** @example samples/cpp/distrans.cpp
+An example on using the distance transform
 */
 
-
 /** @brief Calculates the distance to the closest zero pixel for each pixel of the source image.
 
-The functions distanceTransform calculate the approximate or precise distance from every binary
+The function cv::distanceTransform calculates the approximate or precise distance from every binary
 image pixel to the nearest zero pixel. For zero image pixels, the distance will obviously be zero.
 
-When maskSize == DIST_MASK_PRECISE and distanceType == DIST_L2 , the function runs the
+When maskSize == #DIST_MASK_PRECISE and distanceType == #DIST_L2 , the function runs the
 algorithm described in @cite Felzenszwalb04 . This algorithm is parallelized with the TBB library.
 
 In other cases, the algorithm @cite Borgefors86 is used. This means that for a pixel the function
@@ -3302,8 +3349,8 @@ diagonal, or knight's move (the latest is available for a \f$5\times 5\f$ mask).
 distance is calculated as a sum of these basic distances. Since the distance function should be
 symmetric, all of the horizontal and vertical shifts must have the same cost (denoted as a ), all
 the diagonal shifts must have the same cost (denoted as `b`), and all knight's moves must have the
-same cost (denoted as `c`). For the cv::DIST_C and cv::DIST_L1 types, the distance is calculated
-precisely, whereas for cv::DIST_L2 (Euclidean distance) the distance can be calculated only with a
+same cost (denoted as `c`). For the #DIST_C and #DIST_L1 types, the distance is calculated
+precisely, whereas for #DIST_L2 (Euclidean distance) the distance can be calculated only with a
 relative error (a \f$5\times 5\f$ mask gives more accurate results). For `a`,`b`, and `c`, OpenCV
 uses the values suggested in the original paper:
 - DIST_L1: `a = 1, b = 2`
@@ -3312,21 +3359,21 @@ uses the values suggested in the original paper:
     - `5 x 5`: `a=1, b=1.4, c=2.1969`
 - DIST_C: `a = 1, b = 1`
 
-Typically, for a fast, coarse distance estimation DIST_L2, a \f$3\times 3\f$ mask is used. For a
-more accurate distance estimation DIST_L2, a \f$5\times 5\f$ mask or the precise algorithm is used.
+Typically, for a fast, coarse distance estimation #DIST_L2, a \f$3\times 3\f$ mask is used. For a
+more accurate distance estimation #DIST_L2, a \f$5\times 5\f$ mask or the precise algorithm is used.
 Note that both the precise and the approximate algorithms are linear on the number of pixels.
 
 This variant of the function does not only compute the minimum distance for each pixel \f$(x, y)\f$
 but also identifies the nearest connected component consisting of zero pixels
-(labelType==DIST_LABEL_CCOMP) or the nearest zero pixel (labelType==DIST_LABEL_PIXEL). Index of the
-component/pixel is stored in `labels(x, y)`. When labelType==DIST_LABEL_CCOMP, the function
+(labelType==#DIST_LABEL_CCOMP) or the nearest zero pixel (labelType==#DIST_LABEL_PIXEL). Index of the
+component/pixel is stored in `labels(x, y)`. When labelType==#DIST_LABEL_CCOMP, the function
 automatically finds connected components of zero pixels in the input image and marks them with
-distinct labels. When labelType==DIST_LABEL_CCOMP, the function scans through the input image and
+distinct labels. When labelType==#DIST_LABEL_CCOMP, the function scans through the input image and
 marks all the zero pixels with distinct labels.
 
 In this mode, the complexity is still linear. That is, the function provides a very fast way to
 compute the Voronoi diagram for a binary image. Currently, the second variant can use only the
-approximate distance transform algorithm, i.e. maskSize=DIST_MASK_PRECISE is not supported
+approximate distance transform algorithm, i.e. maskSize=#DIST_MASK_PRECISE is not supported
 yet.
 
 @param src 8-bit, single-channel (binary) source image.
@@ -3334,12 +3381,12 @@ yet.
 single-channel image of the same size as src.
 @param labels Output 2D array of labels (the discrete Voronoi diagram). It has the type
 CV_32SC1 and the same size as src.
-@param distanceType Type of distance, see cv::DistanceTypes
-@param maskSize Size of the distance transform mask, see cv::DistanceTransformMasks.
-DIST_MASK_PRECISE is not supported by this variant. In case of the DIST_L1 or DIST_C distance type,
+@param distanceType Type of distance, see #DistanceTypes
+@param maskSize Size of the distance transform mask, see #DistanceTransformMasks.
+#DIST_MASK_PRECISE is not supported by this variant. In case of the #DIST_L1 or #DIST_C distance type,
 the parameter is forced to 3 because a \f$3\times 3\f$ mask gives the same result as \f$5\times
 5\f$ or any larger aperture.
-@param labelType Type of the label array to build, see cv::DistanceTransformLabelTypes.
+@param labelType Type of the label array to build, see #DistanceTransformLabelTypes.
  */
 CV_EXPORTS_AS(distanceTransformWithLabels) void distanceTransform( InputArray src, OutputArray dst,
                                      OutputArray labels, int distanceType, int maskSize,
@@ -3349,18 +3396,18 @@ CV_EXPORTS_AS(distanceTransformWithLabels) void distanceTransform( InputArray sr
 @param src 8-bit, single-channel (binary) source image.
 @param dst Output image with calculated distances. It is a 8-bit or 32-bit floating-point,
 single-channel image of the same size as src .
-@param distanceType Type of distance, see cv::DistanceTypes
-@param maskSize Size of the distance transform mask, see cv::DistanceTransformMasks. In case of the
-DIST_L1 or DIST_C distance type, the parameter is forced to 3 because a \f$3\times 3\f$ mask gives
+@param distanceType Type of distance, see #DistanceTypes
+@param maskSize Size of the distance transform mask, see #DistanceTransformMasks. In case of the
+#DIST_L1 or #DIST_C distance type, the parameter is forced to 3 because a \f$3\times 3\f$ mask gives
 the same result as \f$5\times 5\f$ or any larger aperture.
 @param dstType Type of output image. It can be CV_8U or CV_32F. Type CV_8U can be used only for
-the first variant of the function and distanceType == DIST_L1.
+the first variant of the function and distanceType == #DIST_L1.
 */
 CV_EXPORTS_W void distanceTransform( InputArray src, OutputArray dst,
                                      int distanceType, int maskSize, int dstType=CV_32F);
 
-/** @example ffilldemo.cpp
-  An example using the FloodFill technique
+/** @example samples/cpp/ffilldemo.cpp
+An example using the FloodFill technique
 */
 
 /** @overload
@@ -3374,7 +3421,7 @@ CV_EXPORTS int floodFill( InputOutputArray image,
 
 /** @brief Fills a connected component with the given color.
 
-The functions floodFill fill a connected component starting from the seed point with the specified
+The function cv::floodFill fills a connected component starting from the seed point with the specified
 color. The connectivity is determined by the color/brightness closeness of the neighbor pixels. The
 pixel at \f$(x,y)\f$ is considered to belong to the repainted domain if:
 
@@ -3411,14 +3458,15 @@ Use these functions to either mark a connected component with the specified colo
 a mask and then extract the contour, or copy the region to another image, and so on.
 
 @param image Input/output 1- or 3-channel, 8-bit, or floating-point image. It is modified by the
-function unless the FLOODFILL_MASK_ONLY flag is set in the second variant of the function. See
+function unless the #FLOODFILL_MASK_ONLY flag is set in the second variant of the function. See
 the details below.
 @param mask Operation mask that should be a single-channel 8-bit image, 2 pixels wider and 2 pixels
 taller than image. Since this is both an input and output parameter, you must take responsibility
 of initializing it. Flood-filling cannot go across non-zero pixels in the input mask. For example,
 an edge detector output can be used as a mask to stop filling at edges. On output, pixels in the
 mask corresponding to filled pixels in the image are set to 1 or to the a value specified in flags
-as described below. It is therefore possible to use the same mask in multiple calls to the function
+as described below. Additionally, the function fills the border of the mask with ones to simplify
+internal processing. It is therefore possible to use the same mask in multiple calls to the function
 to make sure the filled areas do not overlap.
 @param seedPoint Starting point.
 @param newVal New value of the repainted domain pixels.
@@ -3435,7 +3483,7 @@ will be considered. The next 8 bits (8-16) contain a value between 1 and 255 wit
 the mask (the default value is 1). For example, 4 | ( 255 \<\< 8 ) will consider 4 nearest
 neighbours and fill the mask with a value of 255. The following additional options occupy higher
 bits and therefore may be further combined with the connectivity and mask fill values using
-bit-wise or (|), see cv::FloodFillFlags.
+bit-wise or (|), see #FloodFillFlags.
 
 @note Since the mask is larger than the filled image, a pixel \f$(x, y)\f$ in image corresponds to the
 pixel \f$(x+1, y+1)\f$ in the mask .
@@ -3447,6 +3495,20 @@ CV_EXPORTS_W int floodFill( InputOutputArray image, InputOutputArray mask,
                             Scalar loDiff = Scalar(), Scalar upDiff = Scalar(),
                             int flags = 4 );
 
+//! Performs linear blending of two images:
+//! \f[ \texttt{dst}(i,j) = \texttt{weights1}(i,j)*\texttt{src1}(i,j) + \texttt{weights2}(i,j)*\texttt{src2}(i,j) \f]
+//! @param src1 It has a type of CV_8UC(n) or CV_32FC(n), where n is a positive integer.
+//! @param src2 It has the same type and size as src1.
+//! @param weights1 It has a type of CV_32FC1 and the same size with src1.
+//! @param weights2 It has a type of CV_32FC1 and the same size with src1.
+//! @param dst It is created if it does not have the same size and type with src1.
+CV_EXPORTS void blendLinear(InputArray src1, InputArray src2, InputArray weights1, InputArray weights2, OutputArray dst);
+
+//! @} imgproc_misc
+
+//! @addtogroup imgproc_color_conversions
+//! @{
+
 /** @brief Converts an image from one color space to another.
 
 The function converts an input image from one color space to another. In case of a transformation
@@ -3465,13 +3527,13 @@ In case of linear transformations, the range does not matter. But in case of a n
 transformation, an input RGB image should be normalized to the proper value range to get the correct
 results, for example, for RGB \f$\rightarrow\f$ L\*u\*v\* transformation. For example, if you have a
 32-bit floating-point image directly converted from an 8-bit image without any scaling, then it will
-have the 0..255 value range instead of 0..1 assumed by the function. So, before calling cvtColor ,
+have the 0..255 value range instead of 0..1 assumed by the function. So, before calling #cvtColor ,
 you need first to scale the image down:
 @code
     img *= 1./255;
     cvtColor(img, img, COLOR_BGR2Luv);
 @endcode
-If you use cvtColor with 8-bit images, the conversion will have some information lost. For many
+If you use #cvtColor with 8-bit images, the conversion will have some information lost. For many
 applications, this will not be noticeable but it is recommended to use 32-bit images in applications
 that need the full range of colors or that convert an image before an operation and then convert
 back.
@@ -3482,7 +3544,7 @@ range: 255 for CV_8U, 65535 for CV_16U, 1 for CV_32F.
 @param src input image: 8-bit unsigned, 16-bit unsigned ( CV_16UC... ), or single-precision
 floating-point.
 @param dst output image of the same size and depth as src.
-@param code color space conversion code (see cv::ColorConversionCodes).
+@param code color space conversion code (see #ColorConversionCodes).
 @param dstCn number of channels in the destination image; if the parameter is 0, the number of the
 channels is derived automatically from src and code.
 
@@ -3490,10 +3552,59 @@ channels is derived automatically from src and code.
  */
 CV_EXPORTS_W void cvtColor( InputArray src, OutputArray dst, int code, int dstCn = 0 );
 
-//! @} imgproc_misc
+/** @brief Converts an image from one color space to another where the source image is
+stored in two planes.
+
+This function only supports YUV420 to RGB conversion as of now.
+
+@param src1: 8-bit image (#CV_8U) of the Y plane.
+@param src2: image containing interleaved U/V plane.
+@param dst: output image.
+@param code: Specifies the type of conversion. It can take any of the following values:
+- #COLOR_YUV2BGR_NV12
+- #COLOR_YUV2RGB_NV12
+- #COLOR_YUV2BGRA_NV12
+- #COLOR_YUV2RGBA_NV12
+- #COLOR_YUV2BGR_NV21
+- #COLOR_YUV2RGB_NV21
+- #COLOR_YUV2BGRA_NV21
+- #COLOR_YUV2RGBA_NV21
+*/
+CV_EXPORTS_W void cvtColorTwoPlane( InputArray src1, InputArray src2, OutputArray dst, int code );
+
+/** @brief main function for all demosaicing processes
+
+@param src input image: 8-bit unsigned or 16-bit unsigned.
+@param dst output image of the same size and depth as src.
+@param code Color space conversion code (see the description below).
+@param dstCn number of channels in the destination image; if the parameter is 0, the number of the
+channels is derived automatically from src and code.
+
+The function can do the following transformations:
+
+-   Demosaicing using bilinear interpolation
+
+    #COLOR_BayerBG2BGR , #COLOR_BayerGB2BGR , #COLOR_BayerRG2BGR , #COLOR_BayerGR2BGR
+
+    #COLOR_BayerBG2GRAY , #COLOR_BayerGB2GRAY , #COLOR_BayerRG2GRAY , #COLOR_BayerGR2GRAY
+
+-   Demosaicing using Variable Number of Gradients.
+
+    #COLOR_BayerBG2BGR_VNG , #COLOR_BayerGB2BGR_VNG , #COLOR_BayerRG2BGR_VNG , #COLOR_BayerGR2BGR_VNG
+
+-   Edge-Aware Demosaicing.
+
+    #COLOR_BayerBG2BGR_EA , #COLOR_BayerGB2BGR_EA , #COLOR_BayerRG2BGR_EA , #COLOR_BayerGR2BGR_EA
+
+-   Demosaicing with alpha channel
+
+    #COLOR_BayerBG2BGRA , #COLOR_BayerGB2BGRA , #COLOR_BayerRG2BGRA , #COLOR_BayerGR2BGRA
+
+@sa cvtColor
+*/
+CV_EXPORTS_W void demosaicing(InputArray src, OutputArray dst, int code, int dstCn = 0);
 
-// main function for all demosaicing procceses
-CV_EXPORTS_W void demosaicing(InputArray _src, OutputArray _dst, int code, int dcn = 0);
+//! @} imgproc_color_conversions
 
 //! @addtogroup imgproc_shape
 //! @{
@@ -3509,6 +3620,9 @@ results are returned in the structure cv::Moments.
 used for images only.
 @returns moments.
 
+@note Only applicable to contour moments calculations from Python bindings: Note that the numpy
+type for the input array should be either np.int32 or np.float32.
+
 @sa  contourArea, arcLength
  */
 CV_EXPORTS_W Moments moments( InputArray array, bool binaryImage = false );
@@ -3554,6 +3668,10 @@ enum TemplateMatchModes {
     TM_CCOEFF_NORMED = 5  //!< \f[R(x,y)= \frac{ \sum_{x',y'} (T'(x',y') \cdot I'(x+x',y+y')) }{ \sqrt{\sum_{x',y'}T'(x',y')^2 \cdot \sum_{x',y'} I'(x+x',y+y')^2} }\f]
 };
 
+/** @example samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp
+An example using Template Matching algorithm
+*/
+
 /** @brief Compares a template against overlapped image regions.
 
 The function slides through image , compares the overlapped patches of size \f$w \times h\f$ against
@@ -3562,8 +3680,8 @@ for the available comparison methods ( \f$I\f$ denotes image, \f$T\f$ template,
 is done over template and/or the image patch: \f$x' = 0...w-1, y' = 0...h-1\f$
 
 After the function finishes the comparison, the best matches can be found as global minimums (when
-TM_SQDIFF was used) or maximums (when TM_CCORR or TM_CCOEFF was used) using the
-minMaxLoc function. In case of a color image, template summation in the numerator and each sum in
+#TM_SQDIFF was used) or maximums (when #TM_CCORR or #TM_CCOEFF was used) using the
+#minMaxLoc function. In case of a color image, template summation in the numerator and each sum in
 the denominator is done over all of the channels and separate mean values are used for each channel.
 That is, the function can take a color template and a color image. The result will still be a
 single-channel image, which is easier to analyze.
@@ -3573,9 +3691,9 @@ single-channel image, which is easier to analyze.
 data type.
 @param result Map of comparison results. It must be single-channel 32-bit floating-point. If image
 is \f$W \times H\f$ and templ is \f$w \times h\f$ , then result is \f$(W-w+1) \times (H-h+1)\f$ .
-@param method Parameter specifying the comparison method, see cv::TemplateMatchModes
+@param method Parameter specifying the comparison method, see #TemplateMatchModes
 @param mask Mask of searched template. It must have the same datatype and size with templ. It is
-not set by default.
+not set by default. Currently, only the #TM_SQDIFF and #TM_CCORR_NORMED methods are supported.
  */
 CV_EXPORTS_W void matchTemplate( InputArray image, InputArray templ,
                                  OutputArray result, int method, InputArray mask = noArray() );
@@ -3585,27 +3703,74 @@ CV_EXPORTS_W void matchTemplate( InputArray image, InputArray templ,
 //! @addtogroup imgproc_shape
 //! @{
 
+/** @example samples/cpp/connected_components.cpp
+This program demonstrates connected components and use of the trackbar
+*/
+
 /** @brief computes the connected components labeled image of boolean image
 
 image with 4 or 8 way connectivity - returns N, the total number of labels [0, N-1] where 0
 represents the background label. ltype specifies the output label image type, an important
 consideration based on the total number of labels or alternatively the total number of pixels in
-the source image.
+the source image. ccltype specifies the connected components labeling algorithm to use, currently
+Grana (BBDT) and Wu's (SAUF) algorithms are supported, see the #ConnectedComponentsAlgorithmsTypes
+for details. Note that SAUF algorithm forces a row major ordering of labels while BBDT does not.
+This function uses parallel version of both Grana and Wu's algorithms if at least one allowed
+parallel framework is enabled and if the rows of the image are at least twice the number returned by #getNumberOfCPUs.
 
 @param image the 8-bit single-channel image to be labeled
 @param labels destination labeled image
 @param connectivity 8 or 4 for 8-way or 4-way connectivity respectively
 @param ltype output image label type. Currently CV_32S and CV_16U are supported.
- */
+@param ccltype connected components algorithm type (see the #ConnectedComponentsAlgorithmsTypes).
+*/
+CV_EXPORTS_AS(connectedComponentsWithAlgorithm) int connectedComponents(InputArray image, OutputArray labels,
+                                                                        int connectivity, int ltype, int ccltype);
+
+
+/** @overload
+
+@param image the 8-bit single-channel image to be labeled
+@param labels destination labeled image
+@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively
+@param ltype output image label type. Currently CV_32S and CV_16U are supported.
+*/
 CV_EXPORTS_W int connectedComponents(InputArray image, OutputArray labels,
                                      int connectivity = 8, int ltype = CV_32S);
 
+
+/** @brief computes the connected components labeled image of boolean image and also produces a statistics output for each label
+
+image with 4 or 8 way connectivity - returns N, the total number of labels [0, N-1] where 0
+represents the background label. ltype specifies the output label image type, an important
+consideration based on the total number of labels or alternatively the total number of pixels in
+the source image. ccltype specifies the connected components labeling algorithm to use, currently
+Grana's (BBDT) and Wu's (SAUF) algorithms are supported, see the #ConnectedComponentsAlgorithmsTypes
+for details. Note that SAUF algorithm forces a row major ordering of labels while BBDT does not.
+This function uses parallel version of both Grana and Wu's algorithms (statistics included) if at least one allowed
+parallel framework is enabled and if the rows of the image are at least twice the number returned by #getNumberOfCPUs.
+
+@param image the 8-bit single-channel image to be labeled
+@param labels destination labeled image
+@param stats statistics output for each label, including the background label, see below for
+available statistics. Statistics are accessed via stats(label, COLUMN) where COLUMN is one of
+#ConnectedComponentsTypes. The data type is CV_32S.
+@param centroids centroid output for each label, including the background label. Centroids are
+accessed via centroids(label, 0) for x and centroids(label, 1) for y. The data type CV_64F.
+@param connectivity 8 or 4 for 8-way or 4-way connectivity respectively
+@param ltype output image label type. Currently CV_32S and CV_16U are supported.
+@param ccltype connected components algorithm type (see #ConnectedComponentsAlgorithmsTypes).
+*/
+CV_EXPORTS_AS(connectedComponentsWithStatsWithAlgorithm) int connectedComponentsWithStats(InputArray image, OutputArray labels,
+                                                                                          OutputArray stats, OutputArray centroids,
+                                                                                          int connectivity, int ltype, int ccltype);
+
 /** @overload
 @param image the 8-bit single-channel image to be labeled
 @param labels destination labeled image
 @param stats statistics output for each label, including the background label, see below for
 available statistics. Statistics are accessed via stats(label, COLUMN) where COLUMN is one of
-cv::ConnectedComponentsTypes. The data type is CV_32S.
+#ConnectedComponentsTypes. The data type is CV_32S.
 @param centroids centroid output for each label, including the background label. Centroids are
 accessed via centroids(label, 0) for x and centroids(label, 1) for y. The data type CV_64F.
 @param connectivity 8 or 4 for 8-way or 4-way connectivity respectively
@@ -3619,42 +3784,49 @@ CV_EXPORTS_W int connectedComponentsWithStats(InputArray image, OutputArray labe
 /** @brief Finds contours in a binary image.
 
 The function retrieves contours from the binary image using the algorithm @cite Suzuki85 . The contours
-are a useful tool for shape analysis and object detection and recognition. See squares.c in the
+are a useful tool for shape analysis and object detection and recognition. See squares.cpp in the
 OpenCV sample directory.
-
-@note Source image is modified by this function. Also, the function does not take into account
-1-pixel border of the image (it's filled with 0's and used for neighbor analysis in the algorithm),
-therefore the contours touching the image border will be clipped.
+@note Since opencv 3.2 source image is not modified by this function.
 
 @param image Source, an 8-bit single-channel image. Non-zero pixels are treated as 1's. Zero
-pixels remain 0's, so the image is treated as binary . You can use compare , inRange , threshold ,
-adaptiveThreshold , Canny , and others to create a binary image out of a grayscale or color one.
-The function modifies the image while extracting the contours. If mode equals to RETR_CCOMP
-or RETR_FLOODFILL, the input can also be a 32-bit integer image of labels (CV_32SC1).
-@param contours Detected contours. Each contour is stored as a vector of points.
-@param hierarchy Optional output vector, containing information about the image topology. It has
-as many elements as the number of contours. For each i-th contour contours[i] , the elements
-hierarchy[i][0] , hiearchy[i][1] , hiearchy[i][2] , and hiearchy[i][3] are set to 0-based indices
+pixels remain 0's, so the image is treated as binary . You can use #compare, #inRange, #threshold ,
+#adaptiveThreshold, #Canny, and others to create a binary image out of a grayscale or color one.
+If mode equals to #RETR_CCOMP or #RETR_FLOODFILL, the input can also be a 32-bit integer image of labels (CV_32SC1).
+@param contours Detected contours. Each contour is stored as a vector of points (e.g.
+std::vector<std::vector<cv::Point> >).
+@param hierarchy Optional output vector (e.g. std::vector<cv::Vec4i>), containing information about the image topology. It has
+as many elements as the number of contours. For each i-th contour contours[i], the elements
+hierarchy[i][0] , hierarchy[i][1] , hierarchy[i][2] , and hierarchy[i][3] are set to 0-based indices
 in contours of the next and previous contours at the same hierarchical level, the first child
 contour and the parent contour, respectively. If for the contour i there are no next, previous,
 parent, or nested contours, the corresponding elements of hierarchy[i] will be negative.
-@param mode Contour retrieval mode, see cv::RetrievalModes
-@param method Contour approximation method, see cv::ContourApproximationModes
+@param mode Contour retrieval mode, see #RetrievalModes
+@param method Contour approximation method, see #ContourApproximationModes
 @param offset Optional offset by which every contour point is shifted. This is useful if the
 contours are extracted from the image ROI and then they should be analyzed in the whole image
 context.
  */
-CV_EXPORTS_W void findContours( InputOutputArray image, OutputArrayOfArrays contours,
+CV_EXPORTS_W void findContours( InputArray image, OutputArrayOfArrays contours,
                               OutputArray hierarchy, int mode,
                               int method, Point offset = Point());
 
 /** @overload */
-CV_EXPORTS void findContours( InputOutputArray image, OutputArrayOfArrays contours,
+CV_EXPORTS void findContours( InputArray image, OutputArrayOfArrays contours,
                               int mode, int method, Point offset = Point());
 
+/** @example samples/cpp/squares.cpp
+A program using pyramid scaling, Canny, contours and contour simplification to find
+squares in a list of images (pic1-6.png). Returns sequence of squares detected on the image.
+*/
+
+/** @example samples/tapi/squares.cpp
+A program using pyramid scaling, Canny, contours and contour simplification to find
+squares in the input image.
+*/
+
 /** @brief Approximates a polygonal curve(s) with the specified precision.
 
-The functions approxPolyDP approximate a curve or a polygon with another curve/polygon with less
+The function cv::approxPolyDP approximates a curve or a polygon with another curve/polygon with less
 vertices so that the distance between them is less or equal to the specified precision. It uses the
 Douglas-Peucker algorithm <http://en.wikipedia.org/wiki/Ramer-Douglas-Peucker_algorithm>
 
@@ -3678,19 +3850,20 @@ The function computes a curve length or a closed contour perimeter.
  */
 CV_EXPORTS_W double arcLength( InputArray curve, bool closed );
 
-/** @brief Calculates the up-right bounding rectangle of a point set.
+/** @brief Calculates the up-right bounding rectangle of a point set or non-zero pixels of gray-scale image.
 
-The function calculates and returns the minimal up-right bounding rectangle for the specified point set.
+The function calculates and returns the minimal up-right bounding rectangle for the specified point set or
+non-zero pixels of gray-scale image.
 
-@param points Input 2D point set, stored in std::vector or Mat.
+@param array Input gray-scale image or 2D point set, stored in std::vector or Mat.
  */
-CV_EXPORTS_W Rect boundingRect( InputArray points );
+CV_EXPORTS_W Rect boundingRect( InputArray array );
 
 /** @brief Calculates a contour area.
 
 The function computes a contour area. Similarly to moments , the area is computed using the Green
 formula. Thus, the returned area and the number of non-zero pixels, if you draw the contour using
-drawContours or fillPoly , can be different. Also, the function will most certainly give a wrong
+#drawContours or #fillPoly , can be different. Also, the function will most certainly give a wrong
 results for contours with self-intersections.
 
 Example:
@@ -3721,9 +3894,8 @@ CV_EXPORTS_W double contourArea( InputArray contour, bool oriented = false );
 /** @brief Finds a rotated rectangle of the minimum area enclosing the input 2D point set.
 
 The function calculates and returns the minimum-area bounding rectangle (possibly rotated) for a
-specified point set. See the OpenCV sample minarea.cpp . Developer should keep in mind that the
-returned rotatedRect can contain negative indices when data is close to the containing Mat element
-boundary.
+specified point set. Developer should keep in mind that the returned RotatedRect can contain negative
+indices when data is close to the containing Mat element boundary.
 
 @param points Input vector of 2D points, stored in std::vector\<\> or Mat
  */
@@ -3732,10 +3904,8 @@ CV_EXPORTS_W RotatedRect minAreaRect( InputArray points );
 /** @brief Finds the four vertices of a rotated rect. Useful to draw the rotated rectangle.
 
 The function finds the four vertices of a rotated rectangle. This function is useful to draw the
-rectangle. In C++, instead of using this function, you can directly use box.points() method. Please
-visit the [tutorial on bounding
-rectangle](http://docs.opencv.org/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.html#bounding-rects-circles)
-for more information.
+rectangle. In C++, instead of using this function, you can directly use RotatedRect::points method. Please
+visit the @ref tutorial_bounding_rotated_ellipses "tutorial on Creating Bounding rotated boxes and ellipses for contours" for more information.
 
 @param box The input rotated rectangle. It may be the output of
 @param points The output array of four vertices of rectangles.
@@ -3744,8 +3914,7 @@ CV_EXPORTS_W void boxPoints(RotatedRect box, OutputArray points);
 
 /** @brief Finds a circle of the minimum area enclosing a 2D point set.
 
-The function finds the minimal enclosing circle of a 2D point set using an iterative algorithm. See
-the OpenCV sample minarea.cpp .
+The function finds the minimal enclosing circle of a 2D point set using an iterative algorithm.
 
 @param points Input vector of 2D points, stored in std::vector\<\> or Mat
 @param center Output center of the circle.
@@ -3754,8 +3923,8 @@ the OpenCV sample minarea.cpp .
 CV_EXPORTS_W void minEnclosingCircle( InputArray points,
                                       CV_OUT Point2f& center, CV_OUT float& radius );
 
-/** @example minarea.cpp
-  */
+/** @example samples/cpp/minarea.cpp
+*/
 
 /** @brief Finds a triangle of minimum area enclosing a 2D point set and returns its area.
 
@@ -3767,9 +3936,9 @@ area. The output for a given 2D point set is shown in the image below. 2D points
 
 The implementation of the algorithm is based on O'Rourke's @cite ORourke86 and Klee and Laskowski's
 @cite KleeLaskowski85 papers. O'Rourke provides a \f$\theta(n)\f$ algorithm for finding the minimal
-enclosing triangle of a 2D convex polygon with n vertices. Since the minEnclosingTriangle function
+enclosing triangle of a 2D convex polygon with n vertices. Since the #minEnclosingTriangle function
 takes a 2D point set as input an additional preprocessing step of computing the convex hull of the
-2D point set is required. The complexity of the convexHull function is \f$O(n log(n))\f$ which is higher
+2D point set is required. The complexity of the #convexHull function is \f$O(n log(n))\f$ which is higher
 than \f$\theta(n)\f$. Thus the overall complexity of the function is \f$O(n log(n))\f$.
 
 @param points Input vector of 2D points with depth CV_32S or CV_32F, stored in std::vector\<\> or Mat
@@ -3780,25 +3949,24 @@ CV_EXPORTS_W double minEnclosingTriangle( InputArray points, CV_OUT OutputArray
 
 /** @brief Compares two shapes.
 
-The function compares two shapes. All three implemented methods use the Hu invariants (see cv::HuMoments)
+The function compares two shapes. All three implemented methods use the Hu invariants (see #HuMoments)
 
 @param contour1 First contour or grayscale image.
 @param contour2 Second contour or grayscale image.
-@param method Comparison method, see ::ShapeMatchModes
+@param method Comparison method, see #ShapeMatchModes
 @param parameter Method-specific parameter (not supported now).
  */
 CV_EXPORTS_W double matchShapes( InputArray contour1, InputArray contour2,
                                  int method, double parameter );
 
-/** @example convexhull.cpp
+/** @example samples/cpp/convexhull.cpp
 An example using the convexHull functionality
 */
 
 /** @brief Finds the convex hull of a point set.
 
-The functions find the convex hull of a 2D point set using the Sklansky's algorithm @cite Sklansky82
-that has *O(N logN)* complexity in the current implementation. See the OpenCV sample convexhull.cpp
-that demonstrates the usage of different function variants.
+The function cv::convexHull finds the convex hull of a 2D point set using the Sklansky's algorithm @cite Sklansky82
+that has *O(N logN)* complexity in the current implementation.
 
 @param points Input 2D point set, stored in std::vector or Mat.
 @param hull Output convex hull. It is either an integer vector of indices or vector of points. In
@@ -3811,8 +3979,16 @@ to the right, and its Y axis pointing upwards.
 @param returnPoints Operation flag. In case of a matrix, when the flag is true, the function
 returns convex hull points. Otherwise, it returns indices of the convex hull points. When the
 output array is std::vector, the flag is ignored, and the output depends on the type of the
-vector: std::vector\<int\> implies returnPoints=true, std::vector\<Point\> implies
-returnPoints=false.
+vector: std::vector\<int\> implies returnPoints=false, std::vector\<Point\> implies
+returnPoints=true.
+
+@note `points` and `hull` should be different arrays, inplace processing isn't supported.
+
+Check @ref tutorial_hull "the corresponding tutorial" for more details.
+
+useful links:
+
+https://www.learnopencv.com/convex-hull-using-opencv-in-python-and-c/
  */
 CV_EXPORTS_W void convexHull( InputArray points, OutputArray hull,
                               bool clockwise = false, bool returnPoints = true );
@@ -3827,7 +4003,7 @@ The figure below displays convexity defects of a hand contour:
 @param convexhull Convex hull obtained using convexHull that should contain indices of the contour
 points that make the hull.
 @param convexityDefects The output vector of convexity defects. In C++ and the new Python/Java
-interface each convexity defect is represented as 4-element integer vector (a.k.a. cv::Vec4i):
+interface each convexity defect is represented as 4-element integer vector (a.k.a. #Vec4i):
 (start_index, end_index, farthest_pt_index, fixpt_depth), where indices are 0-based indices
 in the original contour of the convexity defect beginning, end and the farthest point, and
 fixpt_depth is fixed-point approximation (with 8 fractional bits) of the distance between the
@@ -3845,12 +4021,28 @@ without self-intersections. Otherwise, the function output is undefined.
  */
 CV_EXPORTS_W bool isContourConvex( InputArray contour );
 
-//! finds intersection of two convex polygons
+/** @example samples/cpp/intersectExample.cpp
+Examples of how intersectConvexConvex works
+*/
+
+/** @brief Finds intersection of two convex polygons
+
+@param _p1 First polygon
+@param _p2 Second polygon
+@param _p12 Output polygon describing the intersecting area
+@param handleNested When true, an intersection is found if one of the polygons is fully enclosed in the other.
+When false, no intersection is found. If the polygons share a side or the vertex of one polygon lies on an edge
+of the other, they are not considered nested and an intersection will be found regardless of the value of handleNested.
+
+@returns Absolute value of area of intersecting polygon
+
+@note intersectConvexConvex doesn't confirm that both polygons are convex and will return invalid results if they aren't.
+ */
 CV_EXPORTS_W float intersectConvexConvex( InputArray _p1, InputArray _p2,
                                           OutputArray _p12, bool handleNested = true );
 
-/** @example fitellipse.cpp
-  An example using the fitEllipse technique
+/** @example samples/cpp/fitellipse.cpp
+An example using the fitEllipse technique
 */
 
 /** @brief Fits an ellipse around a set of 2D points.
@@ -3865,6 +4057,88 @@ border of the containing Mat element.
  */
 CV_EXPORTS_W RotatedRect fitEllipse( InputArray points );
 
+/** @brief Fits an ellipse around a set of 2D points.
+
+ The function calculates the ellipse that fits a set of 2D points.
+ It returns the rotated rectangle in which the ellipse is inscribed.
+ The Approximate Mean Square (AMS) proposed by @cite Taubin1991 is used.
+
+ For an ellipse, this basis set is \f$ \chi= \left(x^2, x y, y^2, x, y, 1\right) \f$,
+ which is a set of six free coefficients \f$ A^T=\left\{A_{\text{xx}},A_{\text{xy}},A_{\text{yy}},A_x,A_y,A_0\right\} \f$.
+ However, to specify an ellipse, all that is needed is five numbers; the major and minor axes lengths \f$ (a,b) \f$,
+ the position \f$ (x_0,y_0) \f$, and the orientation \f$ \theta \f$. This is because the basis set includes lines,
+ quadratics, parabolic and hyperbolic functions as well as elliptical functions as possible fits.
+ If the fit is found to be a parabolic or hyperbolic function then the standard #fitEllipse method is used.
+ The AMS method restricts the fit to parabolic, hyperbolic and elliptical curves
+ by imposing the condition that \f$ A^T ( D_x^T D_x  +   D_y^T D_y) A = 1 \f$ where
+ the matrices \f$ Dx \f$ and \f$ Dy \f$ are the partial derivatives of the design matrix \f$ D \f$ with
+ respect to x and y. The matrices are formed row by row applying the following to
+ each of the points in the set:
+ \f{align*}{
+ D(i,:)&=\left\{x_i^2, x_i y_i, y_i^2, x_i, y_i, 1\right\} &
+ D_x(i,:)&=\left\{2 x_i,y_i,0,1,0,0\right\} &
+ D_y(i,:)&=\left\{0,x_i,2 y_i,0,1,0\right\}
+ \f}
+ The AMS method minimizes the cost function
+ \f{equation*}{
+ \epsilon ^2=\frac{ A^T D^T D A }{ A^T (D_x^T D_x +  D_y^T D_y) A^T }
+ \f}
+
+ The minimum cost is found by solving the generalized eigenvalue problem.
+
+ \f{equation*}{
+ D^T D A = \lambda  \left( D_x^T D_x +  D_y^T D_y\right) A
+ \f}
+
+ @param points Input 2D point set, stored in std::vector\<\> or Mat
+ */
+CV_EXPORTS_W RotatedRect fitEllipseAMS( InputArray points );
+
+
+/** @brief Fits an ellipse around a set of 2D points.
+
+ The function calculates the ellipse that fits a set of 2D points.
+ It returns the rotated rectangle in which the ellipse is inscribed.
+ The Direct least square (Direct) method by @cite Fitzgibbon1999 is used.
+
+ For an ellipse, this basis set is \f$ \chi= \left(x^2, x y, y^2, x, y, 1\right) \f$,
+ which is a set of six free coefficients \f$ A^T=\left\{A_{\text{xx}},A_{\text{xy}},A_{\text{yy}},A_x,A_y,A_0\right\} \f$.
+ However, to specify an ellipse, all that is needed is five numbers; the major and minor axes lengths \f$ (a,b) \f$,
+ the position \f$ (x_0,y_0) \f$, and the orientation \f$ \theta \f$. This is because the basis set includes lines,
+ quadratics, parabolic and hyperbolic functions as well as elliptical functions as possible fits.
+ The Direct method confines the fit to ellipses by ensuring that \f$ 4 A_{xx} A_{yy}- A_{xy}^2 > 0 \f$.
+ The condition imposed is that \f$ 4 A_{xx} A_{yy}- A_{xy}^2=1 \f$ which satisfies the inequality
+ and as the coefficients can be arbitrarily scaled is not overly restrictive.
+
+ \f{equation*}{
+ \epsilon ^2= A^T D^T D A \quad \text{with} \quad A^T C A =1 \quad \text{and} \quad C=\left(\begin{matrix}
+ 0 & 0  & 2  & 0  & 0  &  0  \\
+ 0 & -1  & 0  & 0  & 0  &  0 \\
+ 2 & 0  & 0  & 0  & 0  &  0 \\
+ 0 & 0  & 0  & 0  & 0  &  0 \\
+ 0 & 0  & 0  & 0  & 0  &  0 \\
+ 0 & 0  & 0  & 0  & 0  &  0
+ \end{matrix} \right)
+ \f}
+
+ The minimum cost is found by solving the generalized eigenvalue problem.
+
+ \f{equation*}{
+ D^T D A = \lambda  \left( C\right) A
+ \f}
+
+ The system produces only one positive eigenvalue \f$ \lambda\f$ which is chosen as the solution
+ with its eigenvector \f$\mathbf{u}\f$. These are used to find the coefficients
+
+ \f{equation*}{
+ A = \sqrt{\frac{1}{\mathbf{u}^T C \mathbf{u}}}  \mathbf{u}
+ \f}
+ The scaling factor guarantees that  \f$A^T C A =1\f$.
+
+ @param points Input 2D point set, stored in std::vector\<\> or Mat
+ */
+CV_EXPORTS_W RotatedRect fitEllipseDirect( InputArray points );
+
 /** @brief Fits a line to a 2D or 3D point set.
 
 The function fitLine fits a line to a 2D or 3D point set by minimizing \f$\sum_i \rho(r_i)\f$ where
@@ -3893,7 +4167,7 @@ weights \f$w_i\f$ are adjusted to be inversely proportional to \f$\rho(r_i)\f$ .
 (x0, y0) is a point on the line. In case of 3D fitting, it should be a vector of 6 elements (like
 Vec6f) - (vx, vy, vz, x0, y0, z0), where (vx, vy, vz) is a normalized vector collinear to the line
 and (x0, y0, z0) is a point on the line.
-@param distType Distance used by the M-estimator, see cv::DistanceTypes
+@param distType Distance used by the M-estimator, see #DistanceTypes
 @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value
 is chosen.
 @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the line).
@@ -3922,7 +4196,7 @@ CV_EXPORTS_W double pointPolygonTest( InputArray contour, Point2f pt, bool measu
 
 /** @brief Finds out if there is any intersection between two rotated rectangles.
 
-If there is then the vertices of the interesecting region are returned as well.
+If there is then the vertices of the intersecting region are returned as well.
 
 Below are some examples of intersection configurations. The hatched pattern indicates the
 intersecting region and the red vertices are returned by the function.
@@ -3931,26 +4205,21 @@ intersecting region and the red vertices are returned by the function.
 
 @param rect1 First rectangle
 @param rect2 Second rectangle
-@param intersectingRegion The output array of the verticies of the intersecting region. It returns
+@param intersectingRegion The output array of the vertices of the intersecting region. It returns
 at most 8 vertices. Stored as std::vector\<cv::Point2f\> or cv::Mat as Mx1 of type CV_32FC2.
-@returns One of cv::RectanglesIntersectTypes
+@returns One of #RectanglesIntersectTypes
  */
 CV_EXPORTS_W int rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& rect2, OutputArray intersectingRegion  );
 
-//! @} imgproc_shape
-
-CV_EXPORTS_W Ptr<CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
-
-//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
-//! Detects position only without traslation and rotation
-CV_EXPORTS Ptr<GeneralizedHoughBallard> createGeneralizedHoughBallard();
+/** @brief Creates a smart pointer to a cv::GeneralizedHoughBallard class and initializes it.
+*/
+CV_EXPORTS_W Ptr<GeneralizedHoughBallard> createGeneralizedHoughBallard();
 
-//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
-//! Detects position, traslation and rotation
-CV_EXPORTS Ptr<GeneralizedHoughGuil> createGeneralizedHoughGuil();
+/** @brief Creates a smart pointer to a cv::GeneralizedHoughGuil class and initializes it.
+*/
+CV_EXPORTS_W Ptr<GeneralizedHoughGuil> createGeneralizedHoughGuil();
 
-//! Performs linear blending of two images
-CV_EXPORTS void blendLinear(InputArray src1, InputArray src2, InputArray weights1, InputArray weights2, OutputArray dst);
+//! @} imgproc_shape
 
 //! @addtogroup imgproc_colormap
 //! @{
@@ -3970,22 +4239,46 @@ enum ColormapTypes
     COLORMAP_HSV = 9, //!< ![HSV](pics/colormaps/colorscale_hsv.jpg)
     COLORMAP_PINK = 10, //!< ![pink](pics/colormaps/colorscale_pink.jpg)
     COLORMAP_HOT = 11, //!< ![hot](pics/colormaps/colorscale_hot.jpg)
-    COLORMAP_PARULA = 12 //!< ![parula](pics/colormaps/colorscale_parula.jpg)
+    COLORMAP_PARULA = 12, //!< ![parula](pics/colormaps/colorscale_parula.jpg)
+    COLORMAP_MAGMA = 13, //!< ![magma](pics/colormaps/colorscale_magma.jpg)
+    COLORMAP_INFERNO = 14, //!< ![inferno](pics/colormaps/colorscale_inferno.jpg)
+    COLORMAP_PLASMA = 15, //!< ![plasma](pics/colormaps/colorscale_plasma.jpg)
+    COLORMAP_VIRIDIS = 16, //!< ![viridis](pics/colormaps/colorscale_viridis.jpg)
+    COLORMAP_CIVIDIS = 17, //!< ![cividis](pics/colormaps/colorscale_cividis.jpg)
+    COLORMAP_TWILIGHT = 18, //!< ![twilight](pics/colormaps/colorscale_twilight.jpg)
+    COLORMAP_TWILIGHT_SHIFTED = 19, //!< ![twilight shifted](pics/colormaps/colorscale_twilight_shifted.jpg)
+    COLORMAP_TURBO = 20 //!< ![turbo](pics/colormaps/colorscale_turbo.jpg)
 };
 
+/** @example samples/cpp/falsecolor.cpp
+An example using applyColorMap function
+*/
+
 /** @brief Applies a GNU Octave/MATLAB equivalent colormap on a given image.
 
-@param src The source image, grayscale or colored does not matter.
+@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3.
 @param dst The result is the colormapped source image. Note: Mat::create is called on dst.
-@param colormap The colormap to apply, see cv::ColormapTypes
- */
+@param colormap The colormap to apply, see #ColormapTypes
+*/
 CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, int colormap);
 
+/** @brief Applies a user colormap on a given image.
+
+@param src The source image, grayscale or colored of type CV_8UC1 or CV_8UC3.
+@param dst The result is the colormapped source image. Note: Mat::create is called on dst.
+@param userColor The colormap to apply of type CV_8UC1 or CV_8UC3 and size 256
+*/
+CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, InputArray userColor);
+
 //! @} imgproc_colormap
 
 //! @addtogroup imgproc_draw
 //! @{
 
+
+/** OpenCV color channel order is BGR[A] */
+#define CV_RGB(r, g, b)  cv::Scalar((b), (g), (r), 0)
+
 /** @brief Draws a line segment connecting two points.
 
 The function line draws the line segment between pt1 and pt2 points in the image. The line is
@@ -3998,7 +4291,7 @@ lines are drawn using Gaussian filtering.
 @param pt2 Second point of the line segment.
 @param color Line color.
 @param thickness Line thickness.
-@param lineType Type of the line, see cv::LineTypes.
+@param lineType Type of the line. See #LineTypes.
 @param shift Number of fractional bits in the point coordinates.
  */
 CV_EXPORTS_W void line(InputOutputArray img, Point pt1, Point pt2, const Scalar& color,
@@ -4006,14 +4299,14 @@ CV_EXPORTS_W void line(InputOutputArray img, Point pt1, Point pt2, const Scalar&
 
 /** @brief Draws a arrow segment pointing from the first point to the second one.
 
-The function arrowedLine draws an arrow between pt1 and pt2 points in the image. See also cv::line.
+The function cv::arrowedLine draws an arrow between pt1 and pt2 points in the image. See also #line.
 
 @param img Image.
 @param pt1 The point the arrow starts from.
 @param pt2 The point the arrow points to.
 @param color Line color.
 @param thickness Line thickness.
-@param line_type Type of the line, see cv::LineTypes
+@param line_type Type of the line. See #LineTypes
 @param shift Number of fractional bits in the point coordinates.
 @param tipLength The length of the arrow tip in relation to the arrow length
  */
@@ -4022,16 +4315,16 @@ CV_EXPORTS_W void arrowedLine(InputOutputArray img, Point pt1, Point pt2, const
 
 /** @brief Draws a simple, thick, or filled up-right rectangle.
 
-The function rectangle draws a rectangle outline or a filled rectangle whose two opposite corners
+The function cv::rectangle draws a rectangle outline or a filled rectangle whose two opposite corners
 are pt1 and pt2.
 
 @param img Image.
 @param pt1 Vertex of the rectangle.
 @param pt2 Vertex of the rectangle opposite to pt1 .
 @param color Rectangle color or brightness (grayscale image).
-@param thickness Thickness of lines that make up the rectangle. Negative values, like CV_FILLED ,
+@param thickness Thickness of lines that make up the rectangle. Negative values, like #FILLED,
 mean that the function has to draw a filled rectangle.
-@param lineType Type of the line. See the line description.
+@param lineType Type of the line. See #LineTypes
 @param shift Number of fractional bits in the point coordinates.
  */
 CV_EXPORTS_W void rectangle(InputOutputArray img, Point pt1, Point pt2,
@@ -4043,20 +4336,24 @@ CV_EXPORTS_W void rectangle(InputOutputArray img, Point pt1, Point pt2,
 use `rec` parameter as alternative specification of the drawn rectangle: `r.tl() and
 r.br()-Point(1,1)` are opposite corners
 */
-CV_EXPORTS void rectangle(CV_IN_OUT Mat& img, Rect rec,
+CV_EXPORTS_W void rectangle(InputOutputArray img, Rect rec,
                           const Scalar& color, int thickness = 1,
                           int lineType = LINE_8, int shift = 0);
 
+/** @example samples/cpp/tutorial_code/ImgProc/basic_drawing/Drawing_2.cpp
+An example using drawing functions
+*/
+
 /** @brief Draws a circle.
 
-The function circle draws a simple or filled circle with a given center and radius.
+The function cv::circle draws a simple or filled circle with a given center and radius.
 @param img Image where the circle is drawn.
 @param center Center of the circle.
 @param radius Radius of the circle.
 @param color Circle color.
-@param thickness Thickness of the circle outline, if positive. Negative thickness means that a
-filled circle is to be drawn.
-@param lineType Type of the circle boundary. See the line description.
+@param thickness Thickness of the circle outline, if positive. Negative values, like #FILLED,
+mean that a filled circle is to be drawn.
+@param lineType Type of the circle boundary. See #LineTypes
 @param shift Number of fractional bits in the coordinates of the center and in the radius value.
  */
 CV_EXPORTS_W void circle(InputOutputArray img, Point center, int radius,
@@ -4065,14 +4362,16 @@ CV_EXPORTS_W void circle(InputOutputArray img, Point center, int radius,
 
 /** @brief Draws a simple or thick elliptic arc or fills an ellipse sector.
 
-The functions ellipse with less parameters draw an ellipse outline, a filled ellipse, an elliptic
-arc, or a filled ellipse sector. A piecewise-linear curve is used to approximate the elliptic arc
+The function cv::ellipse with more parameters draws an ellipse outline, a filled ellipse, an elliptic
+arc, or a filled ellipse sector. The drawing code uses general parametric form.
+A piecewise-linear curve is used to approximate the elliptic arc
 boundary. If you need more control of the ellipse rendering, you can retrieve the curve using
-ellipse2Poly and then render it with polylines or fill it with fillPoly . If you use the first
-variant of the function and want to draw the whole ellipse, not an arc, pass startAngle=0 and
-endAngle=360 . The figure below explains the meaning of the parameters.
+#ellipse2Poly and then render it with #polylines or fill it with #fillPoly. If you use the first
+variant of the function and want to draw the whole ellipse, not an arc, pass `startAngle=0` and
+`endAngle=360`. If `startAngle` is greater than `endAngle`, they are swapped. The figure below explains
+the meaning of the parameters to draw the blue arc.
 
-![Parameters of Elliptic Arc](pics/ellipse.png)
+![Parameters of Elliptic Arc](pics/ellipse.svg)
 
 @param img Image.
 @param center Center of the ellipse.
@@ -4083,7 +4382,7 @@ endAngle=360 . The figure below explains the meaning of the parameters.
 @param color Ellipse color.
 @param thickness Thickness of the ellipse arc outline, if positive. Otherwise, this indicates that
 a filled ellipse sector is to be drawn.
-@param lineType Type of the ellipse boundary. See the line description.
+@param lineType Type of the ellipse boundary. See #LineTypes
 @param shift Number of fractional bits in the coordinates of the center and values of axes.
  */
 CV_EXPORTS_W void ellipse(InputOutputArray img, Point center, Size axes,
@@ -4098,7 +4397,7 @@ an ellipse inscribed in the rotated rectangle.
 @param color Ellipse color.
 @param thickness Thickness of the ellipse arc outline, if positive. Otherwise, this indicates that
 a filled ellipse sector is to be drawn.
-@param lineType Type of the ellipse boundary. See the line description.
+@param lineType Type of the ellipse boundary. See #LineTypes
 */
 CV_EXPORTS_W void ellipse(InputOutputArray img, const RotatedRect& box, const Scalar& color,
                         int thickness = 1, int lineType = LINE_8);
@@ -4107,32 +4406,20 @@ CV_EXPORTS_W void ellipse(InputOutputArray img, const RotatedRect& box, const Sc
 /* ADDING A SET OF PREDEFINED MARKERS WHICH COULD BE USED TO HIGHLIGHT POSITIONS IN AN IMAGE */
 /* ----------------------------------------------------------------------------------------- */
 
-//! Possible set of marker types used for the cv::drawMarker function
-enum MarkerTypes
-{
-    MARKER_CROSS = 0,           //!< A crosshair marker shape
-    MARKER_TILTED_CROSS = 1,    //!< A 45 degree tilted crosshair marker shape
-    MARKER_STAR = 2,            //!< A star marker shape, combination of cross and tilted cross
-    MARKER_DIAMOND = 3,         //!< A diamond marker shape
-    MARKER_SQUARE = 4,          //!< A square marker shape
-    MARKER_TRIANGLE_UP = 5,     //!< An upwards pointing triangle marker shape
-    MARKER_TRIANGLE_DOWN = 6    //!< A downwards pointing triangle marker shape
-};
-
 /** @brief Draws a marker on a predefined position in an image.
 
-The function drawMarker draws a marker on a given position in the image. For the moment several
-marker types are supported, see cv::MarkerTypes for more information.
+The function cv::drawMarker draws a marker on a given position in the image. For the moment several
+marker types are supported, see #MarkerTypes for more information.
 
 @param img Image.
 @param position The point where the crosshair is positioned.
 @param color Line color.
-@param markerType The specific type of marker you want to use, see cv::MarkerTypes
+@param markerType The specific type of marker you want to use, see #MarkerTypes
 @param thickness Line thickness.
-@param line_type Type of the line, see cv::LineTypes
+@param line_type Type of the line, See #LineTypes
 @param markerSize The length of the marker axis [default = 20 pixels]
  */
-CV_EXPORTS_W void drawMarker(CV_IN_OUT Mat& img, Point position, const Scalar& color,
+CV_EXPORTS_W void drawMarker(InputOutputArray img, Point position, const Scalar& color,
                              int markerType = MARKER_CROSS, int markerSize=20, int thickness=1,
                              int line_type=8);
 
@@ -4141,21 +4428,21 @@ CV_EXPORTS_W void drawMarker(CV_IN_OUT Mat& img, Point position, const Scalar& c
 /* ----------------------------------------------------------------------------------------- */
 
 /** @overload */
-CV_EXPORTS void fillConvexPoly(Mat& img, const Point* pts, int npts,
+CV_EXPORTS void fillConvexPoly(InputOutputArray img, const Point* pts, int npts,
                                const Scalar& color, int lineType = LINE_8,
                                int shift = 0);
 
 /** @brief Fills a convex polygon.
 
-The function fillConvexPoly draws a filled convex polygon. This function is much faster than the
-function cv::fillPoly . It can fill not only convex polygons but any monotonic polygon without
+The function cv::fillConvexPoly draws a filled convex polygon. This function is much faster than the
+function #fillPoly . It can fill not only convex polygons but any monotonic polygon without
 self-intersections, that is, a polygon whose contour intersects every horizontal line (scan line)
 twice at the most (though, its top-most and/or the bottom edge could be horizontal).
 
 @param img Image.
 @param points Polygon vertices.
 @param color Polygon color.
-@param lineType Type of the polygon boundaries. See the line description.
+@param lineType Type of the polygon boundaries. See #LineTypes
 @param shift Number of fractional bits in the vertex coordinates.
  */
 CV_EXPORTS_W void fillConvexPoly(InputOutputArray img, InputArray points,
@@ -4163,21 +4450,26 @@ CV_EXPORTS_W void fillConvexPoly(InputOutputArray img, InputArray points,
                                  int shift = 0);
 
 /** @overload */
-CV_EXPORTS void fillPoly(Mat& img, const Point** pts,
+CV_EXPORTS void fillPoly(InputOutputArray img, const Point** pts,
                          const int* npts, int ncontours,
                          const Scalar& color, int lineType = LINE_8, int shift = 0,
                          Point offset = Point() );
 
+/** @example samples/cpp/tutorial_code/ImgProc/basic_drawing/Drawing_1.cpp
+An example using drawing functions
+Check @ref tutorial_random_generator_and_text "the corresponding tutorial" for more details
+*/
+
 /** @brief Fills the area bounded by one or more polygons.
 
-The function fillPoly fills an area bounded by several polygonal contours. The function can fill
+The function cv::fillPoly fills an area bounded by several polygonal contours. The function can fill
 complex areas, for example, areas with holes, contours with self-intersections (some of their
 parts), and so forth.
 
 @param img Image.
 @param pts Array of polygons where each polygon is represented as an array of points.
 @param color Polygon color.
-@param lineType Type of the polygon boundaries. See the line description.
+@param lineType Type of the polygon boundaries. See #LineTypes
 @param shift Number of fractional bits in the vertex coordinates.
 @param offset Optional offset of all points of the contours.
  */
@@ -4186,7 +4478,7 @@ CV_EXPORTS_W void fillPoly(InputOutputArray img, InputArrayOfArrays pts,
                            Point offset = Point() );
 
 /** @overload */
-CV_EXPORTS void polylines(Mat& img, const Point* const* pts, const int* npts,
+CV_EXPORTS void polylines(InputOutputArray img, const Point* const* pts, const int* npts,
                           int ncontours, bool isClosed, const Scalar& color,
                           int thickness = 1, int lineType = LINE_8, int shift = 0 );
 
@@ -4198,77 +4490,38 @@ CV_EXPORTS void polylines(Mat& img, const Point* const* pts, const int* npts,
 the function draws a line from the last vertex of each curve to its first vertex.
 @param color Polyline color.
 @param thickness Thickness of the polyline edges.
-@param lineType Type of the line segments. See the line description.
+@param lineType Type of the line segments. See #LineTypes
 @param shift Number of fractional bits in the vertex coordinates.
 
-The function polylines draws one or more polygonal curves.
+The function cv::polylines draws one or more polygonal curves.
  */
 CV_EXPORTS_W void polylines(InputOutputArray img, InputArrayOfArrays pts,
                             bool isClosed, const Scalar& color,
                             int thickness = 1, int lineType = LINE_8, int shift = 0 );
 
-/** @example contours2.cpp
-  An example using the drawContour functionality
+/** @example samples/cpp/contours2.cpp
+An example program illustrates the use of cv::findContours and cv::drawContours
+\image html WindowsQtContoursOutput.png "Screenshot of the program"
 */
 
-/** @example segment_objects.cpp
+/** @example samples/cpp/segment_objects.cpp
 An example using drawContours to clean up a background segmentation result
- */
+*/
 
 /** @brief Draws contours outlines or filled contours.
 
 The function draws contour outlines in the image if \f$\texttt{thickness} \ge 0\f$ or fills the area
 bounded by the contours if \f$\texttt{thickness}<0\f$ . The example below shows how to retrieve
 connected components from the binary image and label them: :
-@code
-    #include "opencv2/imgproc.hpp"
-    #include "opencv2/highgui.hpp"
-
-    using namespace cv;
-    using namespace std;
-
-    int main( int argc, char** argv )
-    {
-        Mat src;
-        // the first command-line parameter must be a filename of the binary
-        // (black-n-white) image
-        if( argc != 2 || !(src=imread(argv[1], 0)).data)
-            return -1;
-
-        Mat dst = Mat::zeros(src.rows, src.cols, CV_8UC3);
-
-        src = src > 1;
-        namedWindow( "Source", 1 );
-        imshow( "Source", src );
-
-        vector<vector<Point> > contours;
-        vector<Vec4i> hierarchy;
-
-        findContours( src, contours, hierarchy,
-            RETR_CCOMP, CHAIN_APPROX_SIMPLE );
-
-        // iterate through all the top-level contours,
-        // draw each connected component with its own random color
-        int idx = 0;
-        for( ; idx >= 0; idx = hierarchy[idx][0] )
-        {
-            Scalar color( rand()&255, rand()&255, rand()&255 );
-            drawContours( dst, contours, idx, color, FILLED, 8, hierarchy );
-        }
-
-        namedWindow( "Components", 1 );
-        imshow( "Components", dst );
-        waitKey(0);
-    }
-@endcode
+@include snippets/imgproc_drawContours.cpp
 
 @param image Destination image.
 @param contours All the input contours. Each contour is stored as a point vector.
 @param contourIdx Parameter indicating a contour to draw. If it is negative, all the contours are drawn.
 @param color Color of the contours.
 @param thickness Thickness of lines the contours are drawn with. If it is negative (for example,
-thickness=CV_FILLED ), the contour interiors are drawn.
-@param lineType Line connectivity. See cv::LineTypes.
+thickness=#FILLED ), the contour interiors are drawn.
+@param lineType Line connectivity. See #LineTypes
 @param hierarchy Optional information about hierarchy. It is only needed if you want to draw only
 some of the contours (see maxLevel ).
 @param maxLevel Maximal level for drawn contours. If it is 0, only the specified contour is drawn.
@@ -4277,6 +4530,11 @@ draws the contours, all the nested contours, all the nested-to-nested contours,
 parameter is only taken into account when there is hierarchy available.
 @param offset Optional contour shift parameter. Shift all the drawn contours by the specified
 \f$\texttt{offset}=(dx,dy)\f$ .
+@note When thickness=#FILLED, the function is designed to handle connected components with holes correctly
+even when no hierarchy date is provided. This is done by analyzing all the outlines together
+using even-odd rule. This may give incorrect results if you have a joint collection of separately retrieved
+contours. In order to solve this problem, you need to call #drawContours separately for each sub-group
+of contours, or iterate over the collection using contourIdx parameter.
  */
 CV_EXPORTS_W void drawContours( InputOutputArray image, InputArrayOfArrays contours,
                               int contourIdx, const Scalar& color,
@@ -4286,15 +4544,22 @@ CV_EXPORTS_W void drawContours( InputOutputArray image, InputArrayOfArrays conto
 
 /** @brief Clips the line against the image rectangle.
 
-The functions clipLine calculate a part of the line segment that is entirely within the specified
-rectangle. They return false if the line segment is completely outside the rectangle. Otherwise,
-they return true .
+The function cv::clipLine calculates a part of the line segment that is entirely within the specified
+rectangle. it returns false if the line segment is completely outside the rectangle. Otherwise,
+it returns true .
 @param imgSize Image size. The image rectangle is Rect(0, 0, imgSize.width, imgSize.height) .
 @param pt1 First line point.
 @param pt2 Second line point.
  */
 CV_EXPORTS bool clipLine(Size imgSize, CV_IN_OUT Point& pt1, CV_IN_OUT Point& pt2);
 
+/** @overload
+@param imgSize Image size. The image rectangle is Rect(0, 0, imgSize.width, imgSize.height) .
+@param pt1 First line point.
+@param pt2 Second line point.
+*/
+CV_EXPORTS bool clipLine(Size2l imgSize, CV_IN_OUT Point2l& pt1, CV_IN_OUT Point2l& pt2);
+
 /** @overload
 @param imgRect Image rectangle.
 @param pt1 First line point.
@@ -4305,11 +4570,11 @@ CV_EXPORTS_W bool clipLine(Rect imgRect, CV_OUT CV_IN_OUT Point& pt1, CV_OUT CV_
 /** @brief Approximates an elliptic arc with a polyline.
 
 The function ellipse2Poly computes the vertices of a polyline that approximates the specified
-elliptic arc. It is used by cv::ellipse.
+elliptic arc. It is used by #ellipse. If `arcStart` is greater than `arcEnd`, they are swapped.
 
 @param center Center of the arc.
-@param axes Half of the size of the ellipse main axes. See the ellipse for details.
-@param angle Rotation angle of the ellipse in degrees. See the ellipse for details.
+@param axes Half of the size of the ellipse main axes. See #ellipse for details.
+@param angle Rotation angle of the ellipse in degrees. See #ellipse for details.
 @param arcStart Starting angle of the elliptic arc in degrees.
 @param arcEnd Ending angle of the elliptic arc in degrees.
 @param delta Angle between the subsequent polyline vertices. It defines the approximation
@@ -4320,20 +4585,33 @@ CV_EXPORTS_W void ellipse2Poly( Point center, Size axes, int angle,
                                 int arcStart, int arcEnd, int delta,
                                 CV_OUT std::vector<Point>& pts );
 
+/** @overload
+@param center Center of the arc.
+@param axes Half of the size of the ellipse main axes. See #ellipse for details.
+@param angle Rotation angle of the ellipse in degrees. See #ellipse for details.
+@param arcStart Starting angle of the elliptic arc in degrees.
+@param arcEnd Ending angle of the elliptic arc in degrees.
+@param delta Angle between the subsequent polyline vertices. It defines the approximation accuracy.
+@param pts Output vector of polyline vertices.
+*/
+CV_EXPORTS void ellipse2Poly(Point2d center, Size2d axes, int angle,
+                             int arcStart, int arcEnd, int delta,
+                             CV_OUT std::vector<Point2d>& pts);
+
 /** @brief Draws a text string.
 
-The function putText renders the specified text string in the image. Symbols that cannot be rendered
-using the specified font are replaced by question marks. See getTextSize for a text rendering code
+The function cv::putText renders the specified text string in the image. Symbols that cannot be rendered
+using the specified font are replaced by question marks. See #getTextSize for a text rendering code
 example.
 
 @param img Image.
 @param text Text string to be drawn.
 @param org Bottom-left corner of the text string in the image.
-@param fontFace Font type, see cv::HersheyFonts.
+@param fontFace Font type, see #HersheyFonts.
 @param fontScale Font scale factor that is multiplied by the font-specific base size.
 @param color Text color.
 @param thickness Thickness of the lines used to draw a text.
-@param lineType Line type. See the line for details.
+@param lineType Line type. See #LineTypes
 @param bottomLeftOrigin When true, the image data origin is at the bottom-left corner. Otherwise,
 it is at the top-left corner.
  */
@@ -4344,7 +4622,7 @@ CV_EXPORTS_W void putText( InputOutputArray img, const String& text, Point org,
 
 /** @brief Calculates the width and height of a text string.
 
-The function getTextSize calculates and returns the size of a box that contains the specified text.
+The function cv::getTextSize calculates and returns the size of a box that contains the specified text.
 That is, the following code renders some text, the tight box surrounding it, and the baseline: :
 @code
     String text = "Funny text inside the box";
@@ -4378,19 +4656,33 @@ That is, the following code renders some text, the tight box surrounding it, and
 @endcode
 
 @param text Input text string.
-@param fontFace Font to use, see cv::HersheyFonts.
+@param fontFace Font to use, see #HersheyFonts.
 @param fontScale Font scale factor that is multiplied by the font-specific base size.
-@param thickness Thickness of lines used to render the text. See putText for details.
+@param thickness Thickness of lines used to render the text. See #putText for details.
 @param[out] baseLine y-coordinate of the baseline relative to the bottom-most text
 point.
 @return The size of a box that contains the specified text.
 
-@see cv::putText
+@see putText
  */
 CV_EXPORTS_W Size getTextSize(const String& text, int fontFace,
                             double fontScale, int thickness,
                             CV_OUT int* baseLine);
 
+
+/** @brief Calculates the font-specific size to use to achieve a given height in pixels.
+
+@param fontFace Font to use, see cv::HersheyFonts.
+@param pixelHeight Pixel height to compute the fontScale for
+@param thickness Thickness of lines used to render the text.See putText for details.
+@return The fontSize to use for cv::putText
+
+@see cv::putText
+*/
+CV_EXPORTS_W double getFontScaleFromHeight(const int fontFace,
+                                           const int pixelHeight,
+                                           const int thickness = 1);
+
 /** @brief Line iterator
 
 The class is used to iterate over all the pixels on the raster line
@@ -4413,7 +4705,7 @@ LineIterator it2 = it;
 vector<Vec3b> buf(it.count);
 
 for(int i = 0; i < it.count; i++, ++it)
-    buf[i] = *(const Vec3b)*it;
+    buf[i] = *(const Vec3b*)*it;
 
 // alternative way of iterating through the line
 for(int i = 0; i < it2.count; i++, ++it2)
@@ -4426,17 +4718,46 @@ for(int i = 0; i < it2.count; i++, ++it2)
 class CV_EXPORTS LineIterator
 {
 public:
-    /** @brief intializes the iterator
+    /** @brief initializes the iterator
 
     creates iterators for the line connecting pt1 and pt2
     the line will be clipped on the image boundaries
     the line is 8-connected or 4-connected
     If leftToRight=true, then the iteration is always done
     from the left-most point to the right most,
-    not to depend on the ordering of pt1 and pt2 parameters
+    not to depend on the ordering of pt1 and pt2 parameters;
     */
     LineIterator( const Mat& img, Point pt1, Point pt2,
-                  int connectivity = 8, bool leftToRight = false );
+                  int connectivity = 8, bool leftToRight = false )
+    {
+        init(&img, Rect(0, 0, img.cols, img.rows), pt1, pt2, connectivity, leftToRight);
+        ptmode = false;
+    }
+    LineIterator( Point pt1, Point pt2,
+                  int connectivity = 8, bool leftToRight = false )
+    {
+        init(0, Rect(std::min(pt1.x, pt2.x),
+                     std::min(pt1.y, pt2.y),
+                     std::max(pt1.x, pt2.x) - std::min(pt1.x, pt2.x) + 1,
+                     std::max(pt1.y, pt2.y) - std::min(pt1.y, pt2.y) + 1),
+             pt1, pt2, connectivity, leftToRight);
+        ptmode = true;
+    }
+    LineIterator( Size boundingAreaSize, Point pt1, Point pt2,
+                  int connectivity = 8, bool leftToRight = false )
+    {
+        init(0, Rect(0, 0, boundingAreaSize.width, boundingAreaSize.height),
+             pt1, pt2, connectivity, leftToRight);
+        ptmode = true;
+    }
+    LineIterator( Rect boundingAreaRect, Point pt1, Point pt2,
+                  int connectivity = 8, bool leftToRight = false )
+    {
+        init(0, boundingAreaRect, pt1, pt2, connectivity, leftToRight);
+        ptmode = true;
+    }
+    void init(const Mat* img, Rect boundingAreaRect, Point pt1, Point pt2, int connectivity, bool leftToRight);
+
     /** @brief returns pointer to the current pixel
     */
     uchar* operator *();
@@ -4456,6 +4777,9 @@ class CV_EXPORTS LineIterator
     int err, count;
     int minusDelta, plusDelta;
     int minusStep, plusStep;
+    int minusShift, plusShift;
+    Point p;
+    bool ptmode;
 };
 
 //! @cond IGNORED
@@ -4465,7 +4789,7 @@ class CV_EXPORTS LineIterator
 inline
 uchar* LineIterator::operator *()
 {
-    return ptr;
+    return ptmode ? 0 : ptr;
 }
 
 inline
@@ -4473,7 +4797,15 @@ LineIterator& LineIterator::operator ++()
 {
     int mask = err < 0 ? -1 : 0;
     err += minusDelta + (plusDelta & mask);
-    ptr += minusStep + (plusStep & mask);
+    if(!ptmode)
+    {
+        ptr += minusStep + (plusStep & mask);
+    }
+    else
+    {
+        p.x += minusShift + (plusShift & mask);
+        p.y += minusStep + (plusStep & mask);
+    }
     return *this;
 }
 
@@ -4488,9 +4820,13 @@ LineIterator LineIterator::operator ++(int)
 inline
 Point LineIterator::pos() const
 {
-    Point p;
-    p.y = (int)((ptr - ptr0)/step);
-    p.x = (int)(((ptr - ptr0) - p.y*step)/elemSize);
+    if(!ptmode)
+    {
+        size_t offset = (size_t)(ptr - ptr0);
+        int y = (int)(offset/step);
+        int x = (int)((offset - (size_t)y*step)/elemSize);
+        return Point(x, y);
+    }
     return p;
 }
 
@@ -4502,8 +4838,4 @@ Point LineIterator::pos() const
 
 } // cv
 
-#ifndef DISABLE_OPENCV_24_COMPATIBILITY
-#include "opencv2/imgproc/imgproc_c.h"
-#endif
-
 #endif
diff --git a/IPL/include/opencv/opencv2/imgproc/detail/distortion_model.hpp b/IPL/include/opencv/opencv2/imgproc/detail/distortion_model.hpp
deleted file mode 100644
index ca29304..0000000
--- a/IPL/include/opencv/opencv2/imgproc/detail/distortion_model.hpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_IMGPROC_DETAIL_DISTORTION_MODEL_HPP__
-#define __OPENCV_IMGPROC_DETAIL_DISTORTION_MODEL_HPP__
-
-//! @cond IGNORED
-
-namespace cv { namespace detail {
-/**
-Computes the matrix for the projection onto a tilted image sensor
-\param tauX angular parameter rotation around x-axis
-\param tauY angular parameter rotation around y-axis
-\param matTilt if not NULL returns the matrix
-\f[
-\vecthreethree{R_{33}(\tau_x, \tau_y)}{0}{-R_{13}((\tau_x, \tau_y)}
-{0}{R_{33}(\tau_x, \tau_y)}{-R_{23}(\tau_x, \tau_y)}
-{0}{0}{1} R(\tau_x, \tau_y)
-\f]
-where
-\f[
-R(\tau_x, \tau_y) =
-\vecthreethree{\cos(\tau_y)}{0}{-\sin(\tau_y)}{0}{1}{0}{\sin(\tau_y)}{0}{\cos(\tau_y)}
-\vecthreethree{1}{0}{0}{0}{\cos(\tau_x)}{\sin(\tau_x)}{0}{-\sin(\tau_x)}{\cos(\tau_x)} =
-\vecthreethree{\cos(\tau_y)}{\sin(\tau_y)\sin(\tau_x)}{-\sin(\tau_y)\cos(\tau_x)}
-{0}{\cos(\tau_x)}{\sin(\tau_x)}
-{\sin(\tau_y)}{-\cos(\tau_y)\sin(\tau_x)}{\cos(\tau_y)\cos(\tau_x)}.
-\f]
-\param dMatTiltdTauX if not NULL it returns the derivative of matTilt with
-respect to \f$\tau_x\f$.
-\param dMatTiltdTauY if not NULL it returns the derivative of matTilt with
-respect to \f$\tau_y\f$.
-\param invMatTilt if not NULL it returns the inverse of matTilt
-**/
-template <typename FLOAT>
-void computeTiltProjectionMatrix(FLOAT tauX,
-    FLOAT tauY,
-    Matx<FLOAT, 3, 3>* matTilt = 0,
-    Matx<FLOAT, 3, 3>* dMatTiltdTauX = 0,
-    Matx<FLOAT, 3, 3>* dMatTiltdTauY = 0,
-    Matx<FLOAT, 3, 3>* invMatTilt = 0)
-{
-    FLOAT cTauX = cos(tauX);
-    FLOAT sTauX = sin(tauX);
-    FLOAT cTauY = cos(tauY);
-    FLOAT sTauY = sin(tauY);
-    Matx<FLOAT, 3, 3> matRotX = Matx<FLOAT, 3, 3>(1,0,0,0,cTauX,sTauX,0,-sTauX,cTauX);
-    Matx<FLOAT, 3, 3> matRotY = Matx<FLOAT, 3, 3>(cTauY,0,-sTauY,0,1,0,sTauY,0,cTauY);
-    Matx<FLOAT, 3, 3> matRotXY = matRotY * matRotX;
-    Matx<FLOAT, 3, 3> matProjZ = Matx<FLOAT, 3, 3>(matRotXY(2,2),0,-matRotXY(0,2),0,matRotXY(2,2),-matRotXY(1,2),0,0,1);
-    if (matTilt)
-    {
-        // Matrix for trapezoidal distortion of tilted image sensor
-        *matTilt = matProjZ * matRotXY;
-    }
-    if (dMatTiltdTauX)
-    {
-        // Derivative with respect to tauX
-        Matx<FLOAT, 3, 3> dMatRotXYdTauX = matRotY * Matx<FLOAT, 3, 3>(0,0,0,0,-sTauX,cTauX,0,-cTauX,-sTauX);
-        Matx<FLOAT, 3, 3> dMatProjZdTauX = Matx<FLOAT, 3, 3>(dMatRotXYdTauX(2,2),0,-dMatRotXYdTauX(0,2),
-          0,dMatRotXYdTauX(2,2),-dMatRotXYdTauX(1,2),0,0,0);
-        *dMatTiltdTauX = (matProjZ * dMatRotXYdTauX) + (dMatProjZdTauX * matRotXY);
-    }
-    if (dMatTiltdTauY)
-    {
-        // Derivative with respect to tauY
-        Matx<FLOAT, 3, 3> dMatRotXYdTauY = Matx<FLOAT, 3, 3>(-sTauY,0,-cTauY,0,0,0,cTauY,0,-sTauY) * matRotX;
-        Matx<FLOAT, 3, 3> dMatProjZdTauY = Matx<FLOAT, 3, 3>(dMatRotXYdTauY(2,2),0,-dMatRotXYdTauY(0,2),
-          0,dMatRotXYdTauY(2,2),-dMatRotXYdTauY(1,2),0,0,0);
-        *dMatTiltdTauY = (matProjZ * dMatRotXYdTauY) + (dMatProjZdTauY * matRotXY);
-    }
-    if (invMatTilt)
-    {
-        FLOAT inv = 1./matRotXY(2,2);
-        Matx<FLOAT, 3, 3> invMatProjZ = Matx<FLOAT, 3, 3>(inv,0,inv*matRotXY(0,2),0,inv,inv*matRotXY(1,2),0,0,1);
-        *invMatTilt = matRotXY.t()*invMatProjZ;
-    }
-}
-}} // namespace detail, cv
-
-
-//! @endcond
-
-#endif // __OPENCV_IMGPROC_DETAIL_DISTORTION_MODEL_HPP__
diff --git a/IPL/include/opencv/opencv2/imgproc/detail/gcgraph.hpp b/IPL/include/opencv/opencv2/imgproc/detail/gcgraph.hpp
new file mode 100644
index 0000000..db2ea0f
--- /dev/null
+++ b/IPL/include/opencv/opencv2/imgproc/detail/gcgraph.hpp
@@ -0,0 +1,393 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP
+#define OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP
+
+//! @cond IGNORED
+
+namespace cv { namespace detail {
+template <class TWeight> class GCGraph
+{
+public:
+    GCGraph();
+    GCGraph( unsigned int vtxCount, unsigned int edgeCount );
+    ~GCGraph();
+    void create( unsigned int vtxCount, unsigned int edgeCount );
+    int addVtx();
+    void addEdges( int i, int j, TWeight w, TWeight revw );
+    void addTermWeights( int i, TWeight sourceW, TWeight sinkW );
+    TWeight maxFlow();
+    bool inSourceSegment( int i );
+private:
+    class Vtx
+    {
+    public:
+        Vtx *next; // initialized and used in maxFlow() only
+        int parent;
+        int first;
+        int ts;
+        int dist;
+        TWeight weight;
+        uchar t;
+    };
+    class Edge
+    {
+    public:
+        int dst;
+        int next;
+        TWeight weight;
+    };
+
+    std::vector<Vtx> vtcs;
+    std::vector<Edge> edges;
+    TWeight flow;
+};
+
+template <class TWeight>
+GCGraph<TWeight>::GCGraph()
+{
+    flow = 0;
+}
+template <class TWeight>
+GCGraph<TWeight>::GCGraph( unsigned int vtxCount, unsigned int edgeCount )
+{
+    create( vtxCount, edgeCount );
+}
+template <class TWeight>
+GCGraph<TWeight>::~GCGraph()
+{
+}
+template <class TWeight>
+void GCGraph<TWeight>::create( unsigned int vtxCount, unsigned int edgeCount )
+{
+    vtcs.reserve( vtxCount );
+    edges.reserve( edgeCount + 2 );
+    flow = 0;
+}
+
+template <class TWeight>
+int GCGraph<TWeight>::addVtx()
+{
+    Vtx v;
+    memset( &v, 0, sizeof(Vtx));
+    vtcs.push_back(v);
+    return (int)vtcs.size() - 1;
+}
+
+template <class TWeight>
+void GCGraph<TWeight>::addEdges( int i, int j, TWeight w, TWeight revw )
+{
+    CV_Assert( i>=0 && i<(int)vtcs.size() );
+    CV_Assert( j>=0 && j<(int)vtcs.size() );
+    CV_Assert( w>=0 && revw>=0 );
+    CV_Assert( i != j );
+
+    if( !edges.size() )
+        edges.resize( 2 );
+
+    Edge fromI, toI;
+    fromI.dst = j;
+    fromI.next = vtcs[i].first;
+    fromI.weight = w;
+    vtcs[i].first = (int)edges.size();
+    edges.push_back( fromI );
+
+    toI.dst = i;
+    toI.next = vtcs[j].first;
+    toI.weight = revw;
+    vtcs[j].first = (int)edges.size();
+    edges.push_back( toI );
+}
+
+template <class TWeight>
+void GCGraph<TWeight>::addTermWeights( int i, TWeight sourceW, TWeight sinkW )
+{
+    CV_Assert( i>=0 && i<(int)vtcs.size() );
+
+    TWeight dw = vtcs[i].weight;
+    if( dw > 0 )
+        sourceW += dw;
+    else
+        sinkW -= dw;
+    flow += (sourceW < sinkW) ? sourceW : sinkW;
+    vtcs[i].weight = sourceW - sinkW;
+}
+
+template <class TWeight>
+TWeight GCGraph<TWeight>::maxFlow()
+{
+    const int TERMINAL = -1, ORPHAN = -2;
+    Vtx stub, *nilNode = &stub, *first = nilNode, *last = nilNode;
+    int curr_ts = 0;
+    stub.next = nilNode;
+    Vtx *vtxPtr = &vtcs[0];
+    Edge *edgePtr = &edges[0];
+
+    std::vector<Vtx*> orphans;
+
+    // initialize the active queue and the graph vertices
+    for( int i = 0; i < (int)vtcs.size(); i++ )
+    {
+        Vtx* v = vtxPtr + i;
+        v->ts = 0;
+        if( v->weight != 0 )
+        {
+            last = last->next = v;
+            v->dist = 1;
+            v->parent = TERMINAL;
+            v->t = v->weight < 0;
+        }
+        else
+            v->parent = 0;
+    }
+    first = first->next;
+    last->next = nilNode;
+    nilNode->next = 0;
+
+    // run the search-path -> augment-graph -> restore-trees loop
+    for(;;)
+    {
+        Vtx* v, *u;
+        int e0 = -1, ei = 0, ej = 0;
+        TWeight minWeight, weight;
+        uchar vt;
+
+        // grow S & T search trees, find an edge connecting them
+        while( first != nilNode )
+        {
+            v = first;
+            if( v->parent )
+            {
+                vt = v->t;
+                for( ei = v->first; ei != 0; ei = edgePtr[ei].next )
+                {
+                    if( edgePtr[ei^vt].weight == 0 )
+                        continue;
+                    u = vtxPtr+edgePtr[ei].dst;
+                    if( !u->parent )
+                    {
+                        u->t = vt;
+                        u->parent = ei ^ 1;
+                        u->ts = v->ts;
+                        u->dist = v->dist + 1;
+                        if( !u->next )
+                        {
+                            u->next = nilNode;
+                            last = last->next = u;
+                        }
+                        continue;
+                    }
+
+                    if( u->t != vt )
+                    {
+                        e0 = ei ^ vt;
+                        break;
+                    }
+
+                    if( u->dist > v->dist+1 && u->ts <= v->ts )
+                    {
+                        // reassign the parent
+                        u->parent = ei ^ 1;
+                        u->ts = v->ts;
+                        u->dist = v->dist + 1;
+                    }
+                }
+                if( e0 > 0 )
+                    break;
+            }
+            // exclude the vertex from the active list
+            first = first->next;
+            v->next = 0;
+        }
+
+        if( e0 <= 0 )
+            break;
+
+        // find the minimum edge weight along the path
+        minWeight = edgePtr[e0].weight;
+        CV_Assert( minWeight > 0 );
+        // k = 1: source tree, k = 0: destination tree
+        for( int k = 1; k >= 0; k-- )
+        {
+            for( v = vtxPtr+edgePtr[e0^k].dst;; v = vtxPtr+edgePtr[ei].dst )
+            {
+                if( (ei = v->parent) < 0 )
+                    break;
+                weight = edgePtr[ei^k].weight;
+                minWeight = MIN(minWeight, weight);
+                CV_Assert( minWeight > 0 );
+            }
+            weight = fabs(v->weight);
+            minWeight = MIN(minWeight, weight);
+            CV_Assert( minWeight > 0 );
+        }
+
+        // modify weights of the edges along the path and collect orphans
+        edgePtr[e0].weight -= minWeight;
+        edgePtr[e0^1].weight += minWeight;
+        flow += minWeight;
+
+        // k = 1: source tree, k = 0: destination tree
+        for( int k = 1; k >= 0; k-- )
+        {
+            for( v = vtxPtr+edgePtr[e0^k].dst;; v = vtxPtr+edgePtr[ei].dst )
+            {
+                if( (ei = v->parent) < 0 )
+                    break;
+                edgePtr[ei^(k^1)].weight += minWeight;
+                if( (edgePtr[ei^k].weight -= minWeight) == 0 )
+                {
+                    orphans.push_back(v);
+                    v->parent = ORPHAN;
+                }
+            }
+
+            v->weight = v->weight + minWeight*(1-k*2);
+            if( v->weight == 0 )
+            {
+               orphans.push_back(v);
+               v->parent = ORPHAN;
+            }
+        }
+
+        // restore the search trees by finding new parents for the orphans
+        curr_ts++;
+        while( !orphans.empty() )
+        {
+            Vtx* v2 = orphans.back();
+            orphans.pop_back();
+
+            int d, minDist = INT_MAX;
+            e0 = 0;
+            vt = v2->t;
+
+            for( ei = v2->first; ei != 0; ei = edgePtr[ei].next )
+            {
+                if( edgePtr[ei^(vt^1)].weight == 0 )
+                    continue;
+                u = vtxPtr+edgePtr[ei].dst;
+                if( u->t != vt || u->parent == 0 )
+                    continue;
+                // compute the distance to the tree root
+                for( d = 0;; )
+                {
+                    if( u->ts == curr_ts )
+                    {
+                        d += u->dist;
+                        break;
+                    }
+                    ej = u->parent;
+                    d++;
+                    if( ej < 0 )
+                    {
+                        if( ej == ORPHAN )
+                            d = INT_MAX-1;
+                        else
+                        {
+                            u->ts = curr_ts;
+                            u->dist = 1;
+                        }
+                        break;
+                    }
+                    u = vtxPtr+edgePtr[ej].dst;
+                }
+
+                // update the distance
+                if( ++d < INT_MAX )
+                {
+                    if( d < minDist )
+                    {
+                        minDist = d;
+                        e0 = ei;
+                    }
+                    for( u = vtxPtr+edgePtr[ei].dst; u->ts != curr_ts; u = vtxPtr+edgePtr[u->parent].dst )
+                    {
+                        u->ts = curr_ts;
+                        u->dist = --d;
+                    }
+                }
+            }
+
+            if( (v2->parent = e0) > 0 )
+            {
+                v2->ts = curr_ts;
+                v2->dist = minDist;
+                continue;
+            }
+
+            /* no parent is found */
+            v2->ts = 0;
+            for( ei = v2->first; ei != 0; ei = edgePtr[ei].next )
+            {
+                u = vtxPtr+edgePtr[ei].dst;
+                ej = u->parent;
+                if( u->t != vt || !ej )
+                    continue;
+                if( edgePtr[ei^(vt^1)].weight && !u->next )
+                {
+                    u->next = nilNode;
+                    last = last->next = u;
+                }
+                if( ej > 0 && vtxPtr+edgePtr[ej].dst == v2 )
+                {
+                    orphans.push_back(u);
+                    u->parent = ORPHAN;
+                }
+            }
+        }
+    }
+    return flow;
+}
+
+template <class TWeight>
+bool GCGraph<TWeight>::inSourceSegment( int i )
+{
+    CV_Assert( i>=0 && i<(int)vtcs.size() );
+    return vtcs[i].t == 0;
+}
+
+}} // namespace detail, cv
+
+
+//! @endcond
+
+#endif  // OPENCV_IMGPROC_DETAIL_GCGRAPH_HPP
diff --git a/IPL/include/opencv/opencv2/imgproc/hal/hal.hpp b/IPL/include/opencv/opencv2/imgproc/hal/hal.hpp
new file mode 100644
index 0000000..ac20725
--- /dev/null
+++ b/IPL/include/opencv/opencv2/imgproc/hal/hal.hpp
@@ -0,0 +1,241 @@
+#ifndef CV_IMGPROC_HAL_HPP
+#define CV_IMGPROC_HAL_HPP
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/cvstd.hpp"
+#include "opencv2/core/hal/interface.h"
+
+namespace cv { namespace hal {
+
+//! @addtogroup imgproc_hal_functions
+//! @{
+
+//---------------------------
+//! @cond IGNORED
+
+struct CV_EXPORTS Filter2D
+{
+    CV_DEPRECATED static Ptr<hal::Filter2D> create(uchar * , size_t , int ,
+                                     int , int ,
+                                     int , int ,
+                                     int , int ,
+                                     int , double ,
+                                     int , int ,
+                                     bool , bool );
+    virtual void apply(uchar * , size_t ,
+                       uchar * , size_t ,
+                       int , int ,
+                       int , int ,
+                       int , int ) = 0;
+    virtual ~Filter2D() {}
+};
+
+struct CV_EXPORTS SepFilter2D
+{
+    CV_DEPRECATED static Ptr<hal::SepFilter2D> create(int , int , int ,
+                                        uchar * , int ,
+                                        uchar * , int ,
+                                        int , int ,
+                                        double , int );
+    virtual void apply(uchar * , size_t ,
+                       uchar * , size_t ,
+                       int , int ,
+                       int , int ,
+                       int , int ) = 0;
+    virtual ~SepFilter2D() {}
+};
+
+
+struct CV_EXPORTS Morph
+{
+    CV_DEPRECATED static Ptr<hal::Morph> create(int , int , int , int , int ,
+                                    int , uchar * , size_t ,
+                                    int , int ,
+                                    int , int ,
+                                    int , const double *,
+                                    int , bool , bool );
+    virtual void apply(uchar * , size_t , uchar * , size_t , int , int ,
+                       int , int , int , int ,
+                       int , int , int , int ) = 0;
+    virtual ~Morph() {}
+};
+
+//! @endcond
+//---------------------------
+
+CV_EXPORTS void filter2D(int stype, int dtype, int kernel_type,
+                         uchar * src_data, size_t src_step,
+                         uchar * dst_data, size_t dst_step,
+                         int width, int height,
+                         int full_width, int full_height,
+                         int offset_x, int offset_y,
+                         uchar * kernel_data, size_t kernel_step,
+                         int kernel_width, int kernel_height,
+                         int anchor_x, int anchor_y,
+                         double delta, int borderType,
+                         bool isSubmatrix);
+
+CV_EXPORTS void sepFilter2D(int stype, int dtype, int ktype,
+                            uchar * src_data, size_t src_step,
+                            uchar * dst_data, size_t dst_step,
+                            int width, int height,
+                            int full_width, int full_height,
+                            int offset_x, int offset_y,
+                            uchar * kernelx_data, int kernelx_len,
+                            uchar * kernely_data, int kernely_len,
+                            int anchor_x, int anchor_y,
+                            double delta, int borderType);
+
+CV_EXPORTS void morph(int op, int src_type, int dst_type,
+                      uchar * src_data, size_t src_step,
+                      uchar * dst_data, size_t dst_step,
+                      int width, int height,
+                      int roi_width, int roi_height, int roi_x, int roi_y,
+                      int roi_width2, int roi_height2, int roi_x2, int roi_y2,
+                      int kernel_type, uchar * kernel_data, size_t kernel_step,
+                      int kernel_width, int kernel_height, int anchor_x, int anchor_y,
+                      int borderType, const double borderValue[4],
+                      int iterations, bool isSubmatrix);
+
+
+CV_EXPORTS void resize(int src_type,
+                       const uchar * src_data, size_t src_step, int src_width, int src_height,
+                       uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
+                       double inv_scale_x, double inv_scale_y, int interpolation);
+
+CV_EXPORTS void warpAffine(int src_type,
+                           const uchar * src_data, size_t src_step, int src_width, int src_height,
+                           uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
+                           const double M[6], int interpolation, int borderType, const double borderValue[4]);
+
+CV_EXPORTS void warpPerspective(int src_type,
+                               const uchar * src_data, size_t src_step, int src_width, int src_height,
+                               uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
+                               const double M[9], int interpolation, int borderType, const double borderValue[4]);
+
+CV_EXPORTS void cvtBGRtoBGR(const uchar * src_data, size_t src_step,
+                            uchar * dst_data, size_t dst_step,
+                            int width, int height,
+                            int depth, int scn, int dcn, bool swapBlue);
+
+CV_EXPORTS void cvtBGRtoBGR5x5(const uchar * src_data, size_t src_step,
+                               uchar * dst_data, size_t dst_step,
+                               int width, int height,
+                               int scn, bool swapBlue, int greenBits);
+
+CV_EXPORTS void cvtBGR5x5toBGR(const uchar * src_data, size_t src_step,
+                               uchar * dst_data, size_t dst_step,
+                               int width, int height,
+                               int dcn, bool swapBlue, int greenBits);
+
+CV_EXPORTS void cvtBGRtoGray(const uchar * src_data, size_t src_step,
+                             uchar * dst_data, size_t dst_step,
+                             int width, int height,
+                             int depth, int scn, bool swapBlue);
+
+CV_EXPORTS void cvtGraytoBGR(const uchar * src_data, size_t src_step,
+                             uchar * dst_data, size_t dst_step,
+                             int width, int height,
+                             int depth, int dcn);
+
+CV_EXPORTS void cvtBGR5x5toGray(const uchar * src_data, size_t src_step,
+                                uchar * dst_data, size_t dst_step,
+                                int width, int height,
+                                int greenBits);
+
+CV_EXPORTS void cvtGraytoBGR5x5(const uchar * src_data, size_t src_step,
+                                uchar * dst_data, size_t dst_step,
+                                int width, int height,
+                                int greenBits);
+CV_EXPORTS void cvtBGRtoYUV(const uchar * src_data, size_t src_step,
+                            uchar * dst_data, size_t dst_step,
+                            int width, int height,
+                            int depth, int scn, bool swapBlue, bool isCbCr);
+
+CV_EXPORTS void cvtYUVtoBGR(const uchar * src_data, size_t src_step,
+                            uchar * dst_data, size_t dst_step,
+                            int width, int height,
+                            int depth, int dcn, bool swapBlue, bool isCbCr);
+
+CV_EXPORTS void cvtBGRtoXYZ(const uchar * src_data, size_t src_step,
+                            uchar * dst_data, size_t dst_step,
+                            int width, int height,
+                            int depth, int scn, bool swapBlue);
+
+CV_EXPORTS void cvtXYZtoBGR(const uchar * src_data, size_t src_step,
+                            uchar * dst_data, size_t dst_step,
+                            int width, int height,
+                            int depth, int dcn, bool swapBlue);
+
+CV_EXPORTS void cvtBGRtoHSV(const uchar * src_data, size_t src_step,
+                            uchar * dst_data, size_t dst_step,
+                            int width, int height,
+                            int depth, int scn, bool swapBlue, bool isFullRange, bool isHSV);
+
+CV_EXPORTS void cvtHSVtoBGR(const uchar * src_data, size_t src_step,
+                            uchar * dst_data, size_t dst_step,
+                            int width, int height,
+                            int depth, int dcn, bool swapBlue, bool isFullRange, bool isHSV);
+
+CV_EXPORTS void cvtBGRtoLab(const uchar * src_data, size_t src_step,
+                            uchar * dst_data, size_t dst_step,
+                            int width, int height,
+                            int depth, int scn, bool swapBlue, bool isLab, bool srgb);
+
+CV_EXPORTS void cvtLabtoBGR(const uchar * src_data, size_t src_step,
+                            uchar * dst_data, size_t dst_step,
+                            int width, int height,
+                            int depth, int dcn, bool swapBlue, bool isLab, bool srgb);
+
+CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step,
+                                    uchar * dst_data, size_t dst_step,
+                                    int dst_width, int dst_height,
+                                    int dcn, bool swapBlue, int uIdx);
+
+//! Separate Y and UV planes
+CV_EXPORTS void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step,
+                                    uchar * dst_data, size_t dst_step,
+                                    int dst_width, int dst_height,
+                                    int dcn, bool swapBlue, int uIdx);
+
+CV_EXPORTS void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step,
+                                      uchar * dst_data, size_t dst_step,
+                                      int dst_width, int dst_height,
+                                      int dcn, bool swapBlue, int uIdx);
+
+CV_EXPORTS void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step,
+                                      uchar * dst_data, size_t dst_step,
+                                      int width, int height,
+                                      int scn, bool swapBlue, int uIdx);
+
+//! Separate Y and UV planes
+CV_EXPORTS void cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step,
+                                    uchar * y_data, uchar * uv_data, size_t dst_step,
+                                    int width, int height,
+                                    int scn, bool swapBlue, int uIdx);
+
+CV_EXPORTS void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step,
+                                    uchar * dst_data, size_t dst_step,
+                                    int width, int height,
+                                    int dcn, bool swapBlue, int uIdx, int ycn);
+
+CV_EXPORTS void cvtRGBAtoMultipliedRGBA(const uchar * src_data, size_t src_step,
+                                        uchar * dst_data, size_t dst_step,
+                                        int width, int height);
+
+CV_EXPORTS void cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_step,
+                                        uchar * dst_data, size_t dst_step,
+                                        int width, int height);
+
+CV_EXPORTS void integral(int depth, int sdepth, int sqdepth,
+                         const uchar* src, size_t srcstep,
+                         uchar* sum, size_t sumstep,
+                         uchar* sqsum, size_t sqsumstep,
+                         uchar* tilted, size_t tstep,
+                         int width, int height, int cn);
+
+//! @}
+
+}}
+
+#endif // CV_IMGPROC_HAL_HPP
diff --git a/IPL/include/opencv/opencv2/imgproc/hal/interface.h b/IPL/include/opencv/opencv2/imgproc/hal/interface.h
new file mode 100644
index 0000000..f8dbcfe
--- /dev/null
+++ b/IPL/include/opencv/opencv2/imgproc/hal/interface.h
@@ -0,0 +1,46 @@
+#ifndef OPENCV_IMGPROC_HAL_INTERFACE_H
+#define OPENCV_IMGPROC_HAL_INTERFACE_H
+
+//! @addtogroup imgproc_hal_interface
+//! @{
+
+//! @name Interpolation modes
+//! @sa cv::InterpolationFlags
+//! @{
+#define CV_HAL_INTER_NEAREST 0
+#define CV_HAL_INTER_LINEAR 1
+#define CV_HAL_INTER_CUBIC 2
+#define CV_HAL_INTER_AREA 3
+#define CV_HAL_INTER_LANCZOS4 4
+//! @}
+
+//! @name Morphology operations
+//! @sa cv::MorphTypes
+//! @{
+#define CV_HAL_MORPH_ERODE 0
+#define CV_HAL_MORPH_DILATE 1
+//! @}
+
+//! @name Threshold types
+//! @sa cv::ThresholdTypes
+//! @{
+#define CV_HAL_THRESH_BINARY      0
+#define CV_HAL_THRESH_BINARY_INV  1
+#define CV_HAL_THRESH_TRUNC       2
+#define CV_HAL_THRESH_TOZERO      3
+#define CV_HAL_THRESH_TOZERO_INV  4
+#define CV_HAL_THRESH_MASK        7
+#define CV_HAL_THRESH_OTSU        8
+#define CV_HAL_THRESH_TRIANGLE    16
+//! @}
+
+//! @name Adaptive threshold algorithm
+//! @sa cv::AdaptiveThresholdTypes
+//! @{
+#define CV_HAL_ADAPTIVE_THRESH_MEAN_C     0
+#define CV_HAL_ADAPTIVE_THRESH_GAUSSIAN_C 1
+//! @}
+
+//! @}
+
+#endif
diff --git a/IPL/include/opencv/opencv2/imgproc/imgproc_c.h b/IPL/include/opencv/opencv2/imgproc/imgproc_c.h
index 87518d7..13d7cbb 100644
--- a/IPL/include/opencv/opencv2/imgproc/imgproc_c.h
+++ b/IPL/include/opencv/opencv2/imgproc/imgproc_c.h
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_IMGPROC_IMGPROC_C_H__
-#define __OPENCV_IMGPROC_IMGPROC_C_H__
+#ifndef OPENCV_IMGPROC_IMGPROC_C_H
+#define OPENCV_IMGPROC_IMGPROC_C_H
 
 #include "opencv2/imgproc/types_c.h"
 
@@ -174,7 +174,7 @@ CVAPI(void)  cvReleasePyramid( CvMat*** pyramid, int extra_layers );
 */
 CVAPI(void) cvPyrMeanShiftFiltering( const CvArr* src, CvArr* dst,
     double sp, double sr, int max_level CV_DEFAULT(1),
-    CvTermCriteria termcrit CV_DEFAULT(cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,5,1)));
+    CvTermCriteria termcrit CV_DEFAULT(cvTermCriteria(cv::TermCriteria::MAX_ITER+cv::TermCriteria::EPS,5,1)));
 
 /** @brief Segments image using seed "markers"
 @see cv::watershed
@@ -260,52 +260,19 @@ CVAPI(void)  cvConvertMaps( const CvArr* mapx, const CvArr* mapy,
                             CvArr* mapxy, CvArr* mapalpha );
 
 /** @brief Performs forward or inverse log-polar image transform
-@see cv::logPolar
+@see cv::warpPolar
 */
 CVAPI(void)  cvLogPolar( const CvArr* src, CvArr* dst,
                          CvPoint2D32f center, double M,
                          int flags CV_DEFAULT(CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS));
 
 /** Performs forward or inverse linear-polar image transform
-@see cv::linearPolar
+@see cv::warpPolar
 */
 CVAPI(void)  cvLinearPolar( const CvArr* src, CvArr* dst,
                          CvPoint2D32f center, double maxRadius,
                          int flags CV_DEFAULT(CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS));
 
-/** @brief Transforms the input image to compensate lens distortion
-@see cv::undistort
-*/
-CVAPI(void) cvUndistort2( const CvArr* src, CvArr* dst,
-                          const CvMat* camera_matrix,
-                          const CvMat* distortion_coeffs,
-                          const CvMat* new_camera_matrix CV_DEFAULT(0) );
-
-/** @brief Computes transformation map from intrinsic camera parameters
-   that can used by cvRemap
-*/
-CVAPI(void) cvInitUndistortMap( const CvMat* camera_matrix,
-                                const CvMat* distortion_coeffs,
-                                CvArr* mapx, CvArr* mapy );
-
-/** @brief Computes undistortion+rectification map for a head of stereo camera
-@see cv::initUndistortRectifyMap
-*/
-CVAPI(void) cvInitUndistortRectifyMap( const CvMat* camera_matrix,
-                                       const CvMat* dist_coeffs,
-                                       const CvMat *R, const CvMat* new_camera_matrix,
-                                       CvArr* mapx, CvArr* mapy );
-
-/** @brief Computes the original (undistorted) feature coordinates
-   from the observed (distorted) coordinates
-@see cv::undistortPoints
-*/
-CVAPI(void) cvUndistortPoints( const CvMat* src, CvMat* dst,
-                               const CvMat* camera_matrix,
-                               const CvMat* dist_coeffs,
-                               const CvMat* R CV_DEFAULT(0),
-                               const CvMat* P CV_DEFAULT(0));
-
 /** @brief Returns a structuring element of the specified size and shape for morphological operations.
 
 @note the created structuring element IplConvKernel\* element must be released in the end using
@@ -982,10 +949,9 @@ CVAPI(void)  cvFitLine( const CvArr* points, int dist_type, double param,
 *       If a drawn figure is partially or completely outside of the image, it is clipped.*
 \****************************************************************************************/
 
-#define CV_RGB( r, g, b )  cvScalar( (b), (g), (r), 0 )
 #define CV_FILLED -1
 
-#define CV_AA 16
+#define cv::LINE_AA 16
 
 /** @brief Draws 4-connected, 8-connected or antialiased line segment connecting two points
 @see cv::line
@@ -1037,9 +1003,10 @@ CV_INLINE  void  cvEllipseBox( CvArr* img, CvBox2D box, CvScalar color,
                                int thickness CV_DEFAULT(1),
                                int line_type CV_DEFAULT(8), int shift CV_DEFAULT(0) )
 {
-    CvSize axes;
-    axes.width = cvRound(box.size.width*0.5);
-    axes.height = cvRound(box.size.height*0.5);
+    CvSize axes = cvSize(
+        cvRound(box.size.width*0.5),
+        cvRound(box.size.height*0.5)
+    );
 
     cvEllipse( img, cvPointFrom32f( box.center ), axes, box.angle,
                0, 360, color, thickness, line_type, shift );
@@ -1157,7 +1124,7 @@ CVAPI(void)  cvInitFont( CvFont* font, int font_face,
 CV_INLINE CvFont cvFont( double scale, int thickness CV_DEFAULT(1) )
 {
     CvFont font;
-    cvInitFont( &font, CV_FONT_HERSHEY_PLAIN, scale, scale, 0, thickness, CV_AA );
+    cvInitFont( &font, CV_FONT_HERSHEY_PLAIN, scale, scale, 0, thickness, cv::LINE_AA );
     return font;
 }
 
@@ -1184,7 +1151,7 @@ CVAPI(CvScalar)  cvColorToScalar( double packed_color, int arrtype );
 /** @brief Returns the polygon points which make up the given ellipse.
 
 The ellipse is define by the box of size 'axes' rotated 'angle' around the 'center'. A partial
-sweep of the ellipse arc can be done by spcifying arc_start and arc_end to be something other than
+sweep of the ellipse arc can be done by specifying arc_start and arc_end to be something other than
 0 and 360, respectively. The input array 'pts' must be large enough to hold the result. The total
 number of points stored into 'pts' is returned by this function.
 @see cv::ellipse2Poly
diff --git a/IPL/include/opencv/opencv2/imgproc/types_c.h b/IPL/include/opencv/opencv2/imgproc/types_c.h
index 5ecb460..d3e55f5 100644
--- a/IPL/include/opencv/opencv2/imgproc/types_c.h
+++ b/IPL/include/opencv/opencv2/imgproc/types_c.h
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_IMGPROC_TYPES_C_H__
-#define __OPENCV_IMGPROC_TYPES_C_H__
+#ifndef OPENCV_IMGPROC_TYPES_C_H
+#define OPENCV_IMGPROC_TYPES_C_H
 
 #include "opencv2/core/core_c.h"
 
@@ -349,7 +349,17 @@ enum
     CV_BayerRG2RGB_EA = CV_BayerBG2BGR_EA,
     CV_BayerGR2RGB_EA = CV_BayerGB2BGR_EA,
 
-    CV_COLORCVT_MAX  = 139
+    CV_BayerBG2BGRA =139,
+    CV_BayerGB2BGRA =140,
+    CV_BayerRG2BGRA =141,
+    CV_BayerGR2BGRA =142,
+
+    CV_BayerBG2RGBA =CV_BayerRG2BGRA,
+    CV_BayerGB2RGBA =CV_BayerGR2BGRA,
+    CV_BayerRG2RGBA =CV_BayerBG2BGRA,
+    CV_BayerGR2RGBA =CV_BayerGB2BGRA,
+
+    CV_COLORCVT_MAX  = 143
 };
 
 
@@ -400,7 +410,7 @@ typedef struct CvMoments
     double  mu20, mu11, mu02, mu30, mu21, mu12, mu03; /**< central moments */
     double  inv_sqrt_m00; /**< m00 != 0 ? 1/sqrt(m00) : 0 */
 
-#ifdef __cplusplus
+#if defined(CV__ENABLE_C_API_CTORS) && defined(__cplusplus)
     CvMoments(){}
     CvMoments(const cv::Moments& m)
     {
@@ -420,6 +430,36 @@ typedef struct CvMoments
 }
 CvMoments;
 
+#ifdef __cplusplus
+} // extern "C"
+
+CV_INLINE CvMoments cvMoments()
+{
+#if !defined(CV__ENABLE_C_API_CTORS)
+    CvMoments self = CV_STRUCT_INITIALIZER; return self;
+#else
+    return CvMoments();
+#endif
+}
+
+CV_INLINE CvMoments cvMoments(const cv::Moments& m)
+{
+#if !defined(CV__ENABLE_C_API_CTORS)
+    double am00 = std::abs(m.m00);
+    CvMoments self = {
+        m.m00, m.m10, m.m01, m.m20, m.m11, m.m02, m.m30, m.m21, m.m12, m.m03,
+        m.mu20, m.mu11, m.mu02, m.mu30, m.mu21, m.mu12, m.mu03,
+        am00 > DBL_EPSILON ? 1./std::sqrt(am00) : 0
+    };
+    return self;
+#else
+    return CvMoments(m);
+#endif
+}
+
+extern "C" {
+#endif // __cplusplus
+
 /** Hu invariants */
 typedef struct CvHuMoments
 {
@@ -491,15 +531,8 @@ enum
     CV_POLY_APPROX_DP = 0
 };
 
-/** @brief Shape matching methods
-
-\f$A\f$ denotes object1,\f$B\f$ denotes object2
-
-\f$\begin{array}{l} m^A_i =  \mathrm{sign} (h^A_i)  \cdot \log{h^A_i} \\ m^B_i =  \mathrm{sign} (h^B_i)  \cdot \log{h^B_i} \end{array}\f$
-
-and \f$h^A_i, h^B_i\f$ are the Hu moments of \f$A\f$ and \f$B\f$ , respectively.
-*/
-enum ShapeMatchModes
+/** Shape matching methods */
+enum
 {
     CV_CONTOURS_MATCH_I1  =1, //!< \f[I_1(A,B) =  \sum _{i=1...7}  \left |  \frac{1}{m^A_i} -  \frac{1}{m^B_i} \right |\f]
     CV_CONTOURS_MATCH_I2  =2, //!< \f[I_2(A,B) =  \sum _{i=1...7}  \left | m^A_i - m^B_i  \right |\f]
diff --git a/IPL/include/opencv/opencv2/intensity_transform.hpp b/IPL/include/opencv/opencv2/intensity_transform.hpp
new file mode 100644
index 0000000..12447f5
--- /dev/null
+++ b/IPL/include/opencv/opencv2/intensity_transform.hpp
@@ -0,0 +1,78 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_INTENSITY_TRANSFORM_H
+#define OPENCV_INTENSITY_TRANSFORM_H
+
+#include "opencv2/core.hpp"
+
+/**
+ * @defgroup intensity_transform The module brings implementations of intensity transformation algorithms to adjust image contrast.
+ *
+ * Namespace for all functions is cv::intensity_trasnform.
+ *
+ * ### Supported Algorithms
+ * - Autoscaling
+ * - Log Transformations
+ * - Power-Law (Gamma) Transformations
+ * - Contrast Stretching
+ *
+ * Reference from following book and websites:
+ * - Digital Image Processing 4th Edition Chapter 3 [Rafael C. Gonzalez, Richard E. Woods] @cite Gonzalez2018
+ * - http://www.cs.uregina.ca/Links/class-info/425/Lab3/ @cite lcs435lab
+ * - https://theailearner.com/2019/01/30/contrast-stretching/ @cite theailearner
+*/
+
+namespace cv {
+namespace intensity_transform {
+
+//! @addtogroup intensity_transform
+//! @{
+
+/**
+ * @brief Given an input bgr or grayscale image and constant c, apply log transformation to the image
+ * on domain [0, 255] and return the resulting image.
+ *
+ * @param input input bgr or grayscale image.
+ * @param output resulting image of log transformations.
+*/
+CV_EXPORTS_W void logTransform(const Mat input, Mat& output);
+
+/**
+ * @brief Given an input bgr or grayscale image and constant gamma, apply power-law transformation,
+ * a.k.a. gamma correction to the image on domain [0, 255] and return the resulting image.
+ *
+ * @param input input bgr or grayscale image.
+ * @param output resulting image of gamma corrections.
+ * @param gamma constant in c*r^gamma where r is pixel value.
+*/
+CV_EXPORTS_W void gammaCorrection(const Mat input, Mat& output, const float gamma);
+
+/**
+ * @brief Given an input bgr or grayscale image, apply autoscaling on domain [0, 255] to increase
+ * the contrast of the input image and return the resulting image.
+ *
+ * @param input input bgr or grayscale image.
+ * @param output resulting image of autoscaling.
+*/
+CV_EXPORTS_W void autoscaling(const Mat input, Mat& output);
+
+/**
+ * @brief Given an input bgr or grayscale image, apply linear contrast stretching on domain [0, 255]
+ * and return the resulting image.
+ *
+ * @param input input bgr or grayscale image.
+ * @param output resulting image of contrast stretching.
+ * @param r1 x coordinate of first point (r1, s1) in the transformation function.
+ * @param s1 y coordinate of first point (r1, s1) in the transformation function.
+ * @param r2 x coordinate of second point (r2, s2) in the transformation function.
+ * @param s2 y coordinate of second point (r2, s2) in the transformation function.
+*/
+CV_EXPORTS_W void contrastStretching(const Mat input, Mat& output, const int r1, const int s1, const int r2, const int s2);
+
+//! @}
+
+}} // cv::intensity_transform::
+
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/line_descriptor/descriptor.hpp b/IPL/include/opencv/opencv2/line_descriptor/descriptor.hpp
index 65c4395..15e8fe7 100644
--- a/IPL/include/opencv/opencv2/line_descriptor/descriptor.hpp
+++ b/IPL/include/opencv/opencv2/line_descriptor/descriptor.hpp
@@ -56,10 +56,7 @@
 #include <iostream>
 
 #include "opencv2/core/utility.hpp"
-//#include "opencv2/core/private.hpp"
 #include <opencv2/imgproc.hpp>
-#include <opencv2/features2d.hpp>
-#include <opencv2/highgui.hpp>
 #include "opencv2/core.hpp"
 
 /* define data types */
@@ -102,73 +99,73 @@ Apart from fields inspired to KeyPoint class, KeyLines stores information about
 original image and in octave it was extracted from, about line's length and number of pixels it
 covers.
  */
-struct CV_EXPORTS KeyLine
+struct CV_EXPORTS_W_SIMPLE KeyLine
 {
  public:
   /** orientation of the line */
-  float angle;
+  CV_PROP_RW float angle;
 
   /** object ID, that can be used to cluster keylines by the line they represent */
-  int class_id;
+  CV_PROP_RW int class_id;
 
   /** octave (pyramid layer), from which the keyline has been extracted */
-  int octave;
+  CV_PROP_RW int octave;
 
   /** coordinates of the middlepoint */
-  Point2f pt;
+  CV_PROP_RW Point2f pt;
 
   /** the response, by which the strongest keylines have been selected.
    It's represented by the ratio between line's length and maximum between
    image's width and height */
-  float response;
+  CV_PROP_RW float response;
 
   /** minimum area containing line */
-  float size;
+  CV_PROP_RW float size;
 
   /** lines's extremes in original image */
-  float startPointX;
-  float startPointY;
-  float endPointX;
-  float endPointY;
+  CV_PROP_RW float startPointX;
+  CV_PROP_RW float startPointY;
+  CV_PROP_RW float endPointX;
+  CV_PROP_RW float endPointY;
 
   /** line's extremes in image it was extracted from */
-  float sPointInOctaveX;
-  float sPointInOctaveY;
-  float ePointInOctaveX;
-  float ePointInOctaveY;
+  CV_PROP_RW float sPointInOctaveX;
+  CV_PROP_RW float sPointInOctaveY;
+  CV_PROP_RW float ePointInOctaveX;
+  CV_PROP_RW float ePointInOctaveY;
 
   /** the length of line */
-  float lineLength;
+  CV_PROP_RW float lineLength;
 
   /** number of pixels covered by the line */
-  int numOfPixels;
+  CV_PROP_RW int numOfPixels;
 
   /** Returns the start point of the line in the original image */
-  Point2f getStartPoint() const
+  CV_WRAP Point2f getStartPoint() const
   {
     return Point2f(startPointX, startPointY);
   }
 
   /** Returns the end point of the line in the original image */
-  Point2f getEndPoint() const
+  CV_WRAP Point2f getEndPoint() const
   {
     return Point2f(endPointX, endPointY);
   }
 
   /** Returns the start point of the line in the octave it was extracted from */
-  Point2f getStartPointInOctave() const
+  CV_WRAP Point2f getStartPointInOctave() const
   {
     return Point2f(sPointInOctaveX, sPointInOctaveY);
   }
 
   /** Returns the end point of the line in the octave it was extracted from */
-  Point2f getEndPointInOctave() const
+  CV_WRAP Point2f getEndPointInOctave() const
   {
     return Point2f(ePointInOctaveX, ePointInOctaveY);
   }
 
   /** constructor */
-  KeyLine()
+  CV_WRAP KeyLine()
   {
   }
 };
@@ -180,7 +177,7 @@ Class' interface is mainly based on the ones of classical detectors and extracto
 Feature2d's @ref features2d_main and @ref features2d_match. Retrieved information about lines is
 stored in line_descriptor::KeyLine objects.
  */
-class CV_EXPORTS BinaryDescriptor : public Algorithm
+class CV_EXPORTS_W BinaryDescriptor : public Algorithm
 {
 
  public:
@@ -217,14 +214,14 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm
   @param parameters configuration parameters BinaryDescriptor::Params
 
   If no argument is provided, constructor sets default values (see comments in the code snippet in
-  previous section). Default values are strongly reccomended.
+  previous section). Default values are strongly recommended.
   */
   BinaryDescriptor( const BinaryDescriptor::Params &parameters = BinaryDescriptor::Params() );
 
   /** @brief Create a BinaryDescriptor object with default parameters (or with the ones provided)
   and return a smart pointer to it
      */
-  static Ptr<BinaryDescriptor> createBinaryDescriptor();
+  CV_WRAP static Ptr<BinaryDescriptor> createBinaryDescriptor();
   static Ptr<BinaryDescriptor> createBinaryDescriptor( Params parameters );
 
   /** destructor */
@@ -232,37 +229,37 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm
 
   /** @brief Get current number of octaves
   */
-  int getNumOfOctaves();/*CV_WRAP*/
+  CV_WRAP int getNumOfOctaves();
   /** @brief Set number of octaves
     @param octaves number of octaves
      */
-  void setNumOfOctaves( int octaves );/*CV_WRAP*/
+  CV_WRAP void setNumOfOctaves( int octaves );
   /** @brief Get current width of bands
     */
-  int getWidthOfBand();/*CV_WRAP*/
+  CV_WRAP int getWidthOfBand();
   /** @brief Set width of bands
     @param width width of bands
     */
-  void setWidthOfBand( int width );/*CV_WRAP*/
+  CV_WRAP void setWidthOfBand( int width );
   /** @brief Get current reduction ratio (used in Gaussian pyramids)
     */
-  int getReductionRatio();/*CV_WRAP*/
+  CV_WRAP int getReductionRatio();
   /** @brief Set reduction ratio (used in Gaussian pyramids)
     @param rRatio reduction ratio
      */
-  void setReductionRatio( int rRatio );
+  CV_WRAP void setReductionRatio( int rRatio );
 
   /** @brief Read parameters from a FileNode object and store them
 
     @param fn source FileNode file
      */
-  virtual void read( const cv::FileNode& fn );
+  virtual void read( const cv::FileNode& fn ) CV_OVERRIDE;
 
   /** @brief Store parameters to a FileStorage object
 
     @param fs output FileStorage file
      */
-  virtual void write( cv::FileStorage& fs ) const;
+  virtual void write( cv::FileStorage& fs ) const CV_OVERRIDE;
 
   /** @brief Requires line detection
 
@@ -270,7 +267,7 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm
     @param keypoints vector that will store extracted lines for one or more images
     @param mask mask matrix to detect only KeyLines of interest
      */
-  void detect( const Mat& image, CV_OUT std::vector<KeyLine>& keypoints, const Mat& mask = Mat() );
+  CV_WRAP void detect( const Mat& image, CV_OUT std::vector<KeyLine>& keypoints, const Mat& mask = Mat() );
 
   /** @overload
 
@@ -288,7 +285,7 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm
     @param descriptors
     @param returnFloatDescr flag (when set to true, original non-binary descriptors are returned)
      */
-  void compute( const Mat& image, CV_OUT CV_IN_OUT std::vector<KeyLine>& keylines, CV_OUT Mat& descriptors, bool returnFloatDescr = false ) const;
+  CV_WRAP void compute( const Mat& image, CV_OUT CV_IN_OUT std::vector<KeyLine>& keylines, CV_OUT Mat& descriptors, bool returnFloatDescr = false ) const;
 
   /** @overload
 
@@ -404,6 +401,12 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm
     unsigned int octaveCount;
     //the decriptor of line
     std::vector<float> descriptor;
+
+    OctaveSingleLine() : startPointX(0), startPointY(0), endPointX(0), endPointY(0),
+        sPointInOctaveX(0), sPointInOctaveY(0), ePointInOctaveX(0), ePointInOctaveY(0),
+        direction(0), salience(0), lineLength(0), numOfPixels(0), octaveCount(0),
+        descriptor(std::vector<float>())
+    {}
   };
 
   struct Pixel
@@ -429,15 +432,15 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm
 
   typedef std::list<Pixel> PixelChain;  //each edge is a pixel chain
 
-  struct EDLineParam
+  struct CV_EXPORTS_W_SIMPLE EDLineParam
   {
-    int ksize;
-    float sigma;
-    float gradientThreshold;
-    float anchorThreshold;
-    int scanIntervals;
-    int minLineLen;
-    double lineFitErrThreshold;
+    CV_PROP_RW int ksize;
+    CV_PROP_RW float sigma;
+    CV_PROP_RW float gradientThreshold;
+    CV_PROP_RW float anchorThreshold;
+    CV_PROP_RW int scanIntervals;
+    CV_PROP_RW int minLineLen;
+    CV_PROP_RW double lineFitErrThreshold;
   };
 
   #define RELATIVE_ERROR_FACTOR   100.0
@@ -452,13 +455,19 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm
    * PS: The linking step of edge detection has a little bit difference with the Edge drawing algorithm
    *     described in the paper. The edge chain doesn't stop when the pixel direction is changed.
    */
-  class EDLineDetector
+  class CV_EXPORTS_W EDLineDetector
   {
    public:
-    EDLineDetector();
-    EDLineDetector( EDLineParam param );
+    CV_WRAP EDLineDetector();
+    CV_WRAP_AS(EDLineDetectorWithParams) EDLineDetector( EDLineParam param );
     ~EDLineDetector();
 
+    /** @brief Creates an EDLineDetector object, using smart pointers.
+     */
+    CV_WRAP static Ptr<EDLineDetector> createEDLineDetector();
+
+
+    CV_WRAP_AS(createEDLineDetectorWithParams) static Ptr<EDLineDetector> createEDLineDetector(EDLineParam params);
     /*extract edges from image
      *image:    In, gray image;
      *edges:    Out, store the edges, each edge is a pixel chain
@@ -474,7 +483,7 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm
     int EDline( cv::Mat &image, LineChains &lines );
 
     /** extract line from image, and store them */
-    int EDline( cv::Mat &image );
+    CV_WRAP int EDline( cv::Mat &image );
 
     cv::Mat dxImg_;  //store the dxImg;
 
@@ -619,211 +628,6 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm
 
     cv::Mat_<float> tempVecLineFit;    //the vector used in line fit function;
 
-    /** Compare doubles by relative error.
-     The resulting rounding error after floating point computations
-     depend on the specific operations done. The same number computed by
-     different algorithms could present different rounding errors. For a
-     useful comparison, an estimation of the relative rounding error
-     should be considered and compared to a factor times EPS. The factor
-     should be related to the accumulated rounding error in the chain of
-     computation. Here, as a simplification, a fixed factor is used.
-     */
-    static int double_equal( double a, double b )
-    {
-      double abs_diff, aa, bb, abs_max;
-      /* trivial case */
-      if( a == b )
-        return true;
-      abs_diff = fabs( a - b );
-      aa = fabs( a );
-      bb = fabs( b );
-      abs_max = aa > bb ? aa : bb;
-
-      /* DBL_MIN is the smallest normalized number, thus, the smallest
-       number whose relative error is bounded by DBL_EPSILON. For
-       smaller numbers, the same quantization steps as for DBL_MIN
-       are used. Then, for smaller numbers, a meaningful "relative"
-       error should be computed by dividing the difference by DBL_MIN. */
-      if( abs_max < DBL_MIN )
-        abs_max = DBL_MIN;
-
-      /* equal if relative error <= factor x eps */
-      return ( abs_diff / abs_max ) <= ( RELATIVE_ERROR_FACTOR * DBL_EPSILON );
-    }
-
-    /** Computes the natural logarithm of the absolute value of
-     the gamma function of x using the Lanczos approximation.
-     See http://www.rskey.org/gamma.htm
-     The formula used is
-     @f[
-     \Gamma(x) = \frac{ \sum_{n=0}^{N} q_n x^n }{ \Pi_{n=0}^{N} (x+n) }
-     (x+5.5)^{x+0.5} e^{-(x+5.5)}
-     @f]
-     so
-     @f[
-     \log\Gamma(x) = \log\left( \sum_{n=0}^{N} q_n x^n \right)
-     + (x+0.5) \log(x+5.5) - (x+5.5) - \sum_{n=0}^{N} \log(x+n)
-     @f]
-     and
-     q0 = 75122.6331530,
-     q1 = 80916.6278952,
-     q2 = 36308.2951477,
-     q3 = 8687.24529705,
-     q4 = 1168.92649479,
-     q5 = 83.8676043424,
-     q6 = 2.50662827511.
-     */
-    static double log_gamma_lanczos( double x )
-    {
-      static double q[7] =
-      { 75122.6331530, 80916.6278952, 36308.2951477, 8687.24529705, 1168.92649479, 83.8676043424, 2.50662827511 };
-      double a = ( x + 0.5 ) * log( x + 5.5 ) - ( x + 5.5 );
-      double b = 0.0;
-      int n;
-      for ( n = 0; n < 7; n++ )
-      {
-        a -= log( x + (double) n );
-        b += q[n] * pow( x, (double) n );
-      }
-      return a + log( b );
-    }
-
-    /** Computes the natural logarithm of the absolute value of
-     the gamma function of x using Windschitl method.
-     See http://www.rskey.org/gamma.htm
-     The formula used is
-     @f[
-     \Gamma(x) = \sqrt{\frac{2\pi}{x}} \left( \frac{x}{e}
-     \sqrt{ x\sinh(1/x) + \frac{1}{810x^6} } \right)^x
-     @f]
-     so
-     @f[
-     \log\Gamma(x) = 0.5\log(2\pi) + (x-0.5)\log(x) - x
-     + 0.5x\log\left( x\sinh(1/x) + \frac{1}{810x^6} \right).
-     @f]
-     This formula is a good approximation when x > 15.
-     */
-    static double log_gamma_windschitl( double x )
-    {
-      return 0.918938533204673 + ( x - 0.5 ) * log( x ) - x + 0.5 * x * log( x * sinh( 1 / x ) + 1 / ( 810.0 * pow( x, 6.0 ) ) );
-    }
-
-    /** Computes -log10(NFA).
-     NFA stands for Number of False Alarms:
-     @f[
-     \mathrm{NFA} = NT \cdot B(n,k,p)
-     @f]
-     - NT       - number of tests
-     - B(n,k,p) - tail of binomial distribution with parameters n,k and p:
-     @f[
-     B(n,k,p) = \sum_{j=k}^n
-     \left(\begin{array}{c}n\\j\end{array}\right)
-     p^{j} (1-p)^{n-j}
-     @f]
-     The value -log10(NFA) is equivalent but more intuitive than NFA:
-     - -1 corresponds to 10 mean false alarms
-     -  0 corresponds to 1 mean false alarm
-     -  1 corresponds to 0.1 mean false alarms
-     -  2 corresponds to 0.01 mean false alarms
-     -  ...
-     Used this way, the bigger the value, better the detection,
-     and a logarithmic scale is used.
-     @param n,k,p binomial parameters.
-     @param logNT logarithm of Number of Tests
-     The computation is based in the gamma function by the following
-     relation:
-     @f[
-     \left(\begin{array}{c}n\\k\end{array}\right)
-     = \frac{ \Gamma(n+1) }{ \Gamma(k+1) \cdot \Gamma(n-k+1) }.
-     @f]
-     We use efficient algorithms to compute the logarithm of
-     the gamma function.
-     To make the computation faster, not all the sum is computed, part
-     of the terms are neglected based on a bound to the error obtained
-     (an error of 10% in the result is accepted).
-     */
-    static double nfa( int n, int k, double p, double logNT )
-    {
-      double tolerance = 0.1; /* an error of 10% in the result is accepted */
-      double log1term, term, bin_term, mult_term, bin_tail, err, p_term;
-      int i;
-
-      /* check parameters */
-      if( n < 0 || k < 0 || k > n || p <= 0.0 || p >= 1.0 )
-      {
-        std::cout << "nfa: wrong n, k or p values." << std::endl;
-        exit( 0 );
-      }
-      /* trivial cases */
-      if( n == 0 || k == 0 )
-        return -logNT;
-      if( n == k )
-        return -logNT - (double) n * log10( p );
-
-      /* probability term */
-      p_term = p / ( 1.0 - p );
-
-      /* compute the first term of the series */
-      /*
-       binomial_tail(n,k,p) = sum_{i=k}^n bincoef(n,i) * p^i * (1-p)^{n-i}
-       where bincoef(n,i) are the binomial coefficients.
-       But
-       bincoef(n,k) = gamma(n+1) / ( gamma(k+1) * gamma(n-k+1) ).
-       We use this to compute the first term. Actually the log of it.
-       */
-      log1term = log_gamma( (double) n + 1.0 )- log_gamma( (double ) k + 1.0 )- log_gamma( (double ) ( n - k ) + 1.0 )
-+ (double) k * log( p )
-+ (double) ( n - k ) * log( 1.0 - p );
-term = exp( log1term );
-
-/* in some cases no more computations are needed */
-if( double_equal( term, 0.0 ) )
-{ /* the first term is almost zero */
-  if( (double) k > (double) n * p ) /* at begin or end of the tail?  */
-  return -log1term / MLN10 - logNT; /* end: use just the first term  */
-  else
-  return -logNT; /* begin: the tail is roughly 1  */
-}
-
-/* compute more terms if needed */
-bin_tail = term;
-for ( i = k + 1; i <= n; i++ )
-{
-  /*    As
-   term_i = bincoef(n,i) * p^i * (1-p)^(n-i)
-   and
-   bincoef(n,i)/bincoef(n,i-1) = n-i+1 / i,
-   then,
-   term_i / term_i-1 = (n-i+1)/i * p/(1-p)
-   and
-   term_i = term_i-1 * (n-i+1)/i * p/(1-p).
-   p/(1-p) is computed only once and stored in 'p_term'.
-   */
-  bin_term = (double) ( n - i + 1 ) / (double) i;
-  mult_term = bin_term * p_term;
-  term *= mult_term;
-  bin_tail += term;
-  if( bin_term < 1.0 )
-  {
-    /* When bin_term<1 then mult_term_j<mult_term_i for j>i.
-     Then, the error on the binomial tail when truncated at
-     the i term can be bounded by a geometric series of form
-     term_i * sum mult_term_i^j.                            */
-    err = term * ( ( 1.0 - pow( mult_term, (double) ( n - i + 1 ) ) ) / ( 1.0 - mult_term ) - 1.0 );
-    /* One wants an error at most of tolerance*final_result, or:
-     tolerance * abs(-log10(bin_tail)-logNT).
-     Now, the error that can be accepted on bin_tail is
-     given by tolerance*final_result divided by the derivative
-     of -log10(x) when x=bin_tail. that is:
-     tolerance * abs(-log10(bin_tail)-logNT) / (1/bin_tail)
-     Finally, we truncate the tail if the error is less than:
-     tolerance * abs(-log10(bin_tail)-logNT) * bin_tail        */
-    if( err < tolerance * fabs( -log10( bin_tail ) - logNT ) * bin_tail )
-    break;
-  }
-}
-return -log10( bin_tail ) - logNT;
-}
 };
 
   // Specifies a vector of lines.
@@ -892,20 +696,49 @@ the one used in *BinaryDescriptor* class, data associated to a line's extremes i
 in octave it was extracted from, coincide. KeyLine's field *class_id* is used as an index to
 indicate the order of extraction of a line inside a single octave.
 */
-class CV_EXPORTS LSDDetector : public Algorithm
+struct CV_EXPORTS_W_SIMPLE LSDParam
+{
+  CV_PROP_RW double scale ;
+  CV_PROP_RW double sigma_scale;
+  CV_PROP_RW double quant;
+  CV_PROP_RW double ang_th;
+  CV_PROP_RW double log_eps;
+  CV_PROP_RW double density_th ;
+  CV_PROP_RW int n_bins ;
+
+
+CV_WRAP LSDParam():scale(0.8),
+                   sigma_scale(0.6),
+                   quant(2.0),
+                   ang_th(22.5),
+                   log_eps(0),
+                   density_th(0.7),
+                   n_bins(1024){}
+
+};
+
+class CV_EXPORTS_W LSDDetector : public Algorithm
 {
 public:
 
 /* constructor */
-/*CV_WRAP*/
-LSDDetector()
+CV_WRAP LSDDetector() : params()
+{
+}
+;
+
+CV_WRAP_AS(LSDDetectorWithParams) LSDDetector(LSDParam _params) : params(_params)
 {
 }
 ;
 
 /** @brief Creates ad LSDDetector object, using smart pointers.
  */
-static Ptr<LSDDetector> createLSDDetector();
+CV_WRAP static Ptr<LSDDetector> createLSDDetector();
+
+
+CV_WRAP_AS(createLSDDetectorWithParams) static Ptr<LSDDetector> createLSDDetector(LSDParam params);
+
 
 /** @brief Detect lines inside an image.
 
@@ -915,7 +748,7 @@ static Ptr<LSDDetector> createLSDDetector();
 @param numOctaves number of octaves inside pyramid
 @param mask mask matrix to detect only KeyLines of interest
  */
-void detect( const Mat& image, CV_OUT std::vector<KeyLine>& keypoints, int scale, int numOctaves, const Mat& mask = Mat() );
+CV_WRAP void detect( const Mat& image, CV_OUT std::vector<KeyLine>& keypoints, int scale, int numOctaves, const Mat& mask = Mat() );
 
 /** @overload
 @param images input images
@@ -924,7 +757,7 @@ void detect( const Mat& image, CV_OUT std::vector<KeyLine>& keypoints, int scale
 @param numOctaves number of octaves inside pyramid
 @param masks vector of mask matrices to detect only KeyLines of interest from each input image
 */
-void detect( const std::vector<Mat>& images, std::vector<std::vector<KeyLine> >& keylines, int scale, int numOctaves,
+CV_WRAP void detect( const std::vector<Mat>& images, std::vector<std::vector<KeyLine> >& keylines, int scale, int numOctaves,
 const std::vector<Mat>& masks = std::vector<Mat>() ) const;
 
 private:
@@ -936,6 +769,9 @@ void detectImpl( const Mat& imageSrc, std::vector<KeyLine>& keylines, int numOct
 
 /* matrices for Gaussian pyramids */
 std::vector<cv::Mat> gaussianPyrs;
+
+/* parameters */
+LSDParam params;
 };
 
 /** @brief furnishes all functionalities for querying a dataset provided by user or internal to
@@ -976,7 +812,7 @@ candidates \f$\mathcal{N}_i(\mathbf{q})\f$ is obtained. The union of sets
 of **q**. Then, last step of algorithm is computing the Hamming distance between **q** and each
 element in \f$\mathcal{N}(\mathbf{q})\f$, deleting the codes that are distant more that *r* from **q**.
 */
-class CV_EXPORTS BinaryDescriptorMatcher : public Algorithm
+class CV_EXPORTS_W BinaryDescriptorMatcher : public Algorithm
 {
 
 public:
@@ -988,7 +824,7 @@ or from the one internal to class
 @param matches vector to host retrieved matches
 @param mask mask to select which input descriptors must be matched to one in dataset
  */
-void match( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vector<DMatch>& matches, const Mat& mask = Mat() ) const;
+CV_WRAP void match( const Mat& queryDescriptors, const Mat& trainDescriptors, CV_OUT std::vector<DMatch>& matches, const Mat& mask = Mat() ) const;
 
 /** @overload
 @param queryDescriptors query descriptors
@@ -997,7 +833,7 @@ void match( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vecto
 (the *i*-th mask in vector indicates whether each input query can be matched with descriptors in
 dataset relative to *i*-th image)
 */
-void match( const Mat& queryDescriptors, std::vector<DMatch>& matches, const std::vector<Mat>& masks = std::vector<Mat>() );
+CV_WRAP_AS(matchQuery) void match( const Mat& queryDescriptors, CV_OUT std::vector<DMatch>& matches, const std::vector<Mat>& masks = std::vector<Mat>() );
 
 /** @brief For every input query descriptor, retrieve the best *k* matching ones from a dataset provided from
 user or from the one internal to class
@@ -1010,7 +846,7 @@ user or from the one internal to class
 @param compactResult flag to obtain a compact result (if true, a vector that doesn't contain any
 matches for a given query is not inserted in final result)
  */
-void knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vector<std::vector<DMatch> >& matches, int k, const Mat& mask = Mat(),
+CV_WRAP void knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, int k, const Mat& mask = Mat(),
 bool compactResult = false ) const;
 
 /** @overload
@@ -1023,7 +859,7 @@ dataset relative to *i*-th image)
 @param compactResult flag to obtain a compact result (if true, a vector that doesn't contain any
 matches for a given query is not inserted in final result)
 */
-void knnMatch( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k, const std::vector<Mat>& masks = std::vector<Mat>(),
+CV_WRAP_AS(knnMatchQuery) void knnMatch( const Mat& queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k, const std::vector<Mat>& masks = std::vector<Mat>(),
 bool compactResult = false );
 
 /** @brief For every input query descriptor, retrieve, from a dataset provided from user or from the one
@@ -1076,13 +912,13 @@ static Ptr<BinaryDescriptorMatcher> createBinaryDescriptorMatcher();
 
 /** @brief Clear dataset and internal data
  */
-void clear();
+void clear() CV_OVERRIDE;
 
 /** @brief Constructor.
 
 The BinaryDescriptorMatcher constructed is able to store and manage 256-bits long entries.
  */
-BinaryDescriptorMatcher();
+CV_WRAP BinaryDescriptorMatcher();
 
 /** destructor */
 ~BinaryDescriptorMatcher()
@@ -1095,7 +931,7 @@ class BucketGroup
 
 public:
 /** constructor */
-BucketGroup();
+BucketGroup(bool needAllocateGroup = true);
 
 /** destructor */
 ~BucketGroup();
@@ -1126,7 +962,7 @@ class SparseHashtable
 static const int MAX_B;
 
 /** Bins (each bin is an Array object for duplicates of the same key) */
-BucketGroup *table;
+std::vector<BucketGroup> table;
 
 public:
 
@@ -1172,12 +1008,15 @@ length = 0;
 /** constructor setting sequence's length */
 bitarray( UINT64 _bits )
 {
+arr = NULL;
 init( _bits );
 }
 
 /** initializer of private fields */
 void init( UINT64 _bits )
 {
+if( arr )
+delete[] arr;
 length = (UINT32) ceil( _bits / 32.00 );
 arr = new UINT32[length];
 erase();
@@ -1248,13 +1087,13 @@ UINT64 N;
 cv::Mat codes;
 
 /** Counter for eliminating duplicate results (it is not thread safe) */
-bitarray *counter;
+Ptr<bitarray> counter;
 
 /** Array of m hashtables */
-SparseHashtable *H;
+std::vector<SparseHashtable> H;
 
 /** Volume of a b-bit Hamming ball with radius s (for s = 0 to d) */
-UINT32 *xornum;
+std::vector<UINT32> xornum;
 
 /** Used within generation of binary codes at a certain Hamming distance */
 int power[100];
@@ -1293,7 +1132,7 @@ Mat descriptorsMat;
 std::map<int, int> indexesMap;
 
 /** internal MiHaser representing dataset */
-Mihasher* dataset;
+Ptr<Mihasher> dataset;
 
 /** index from which next added descriptors' bunch must begin */
 int nextAddedIndex;
@@ -1311,9 +1150,9 @@ int descrInDS;
  -------------------------------------------------------------------------------------------- */
 
 /** struct for drawing options */
-struct CV_EXPORTS DrawLinesMatchesFlags
+struct CV_EXPORTS_W_SIMPLE DrawLinesMatchesFlags
 {
-enum
+CV_PROP_RW enum
 {
 DEFAULT = 0,  //!< Output image matrix will be created (Mat::create),
               //!< i.e. existing memory of output image may be reused.
@@ -1342,10 +1181,10 @@ NOT_DRAW_SINGLE_LINES = 2//!< Single keylines will not be drawn.
 @note If both *matchColor* and *singleLineColor* are set to their default values, function draws
 matched lines and line connecting them with same color
  */
-CV_EXPORTS void drawLineMatches( const Mat& img1, const std::vector<KeyLine>& keylines1, const Mat& img2, const std::vector<KeyLine>& keylines2,
-                                 const std::vector<DMatch>& matches1to2, Mat& outImg, const Scalar& matchColor = Scalar::all( -1 ),
-                                 const Scalar& singleLineColor = Scalar::all( -1 ), const std::vector<char>& matchesMask = std::vector<char>(),
-                                 int flags = DrawLinesMatchesFlags::DEFAULT );
+CV_EXPORTS_W void drawLineMatches( const Mat& img1, const std::vector<KeyLine>& keylines1, const Mat& img2, const std::vector<KeyLine>& keylines2,
+                                   const std::vector<DMatch>& matches1to2, CV_OUT Mat& outImg, const Scalar& matchColor = Scalar::all( -1 ),
+                                   const Scalar& singleLineColor = Scalar::all( -1 ), const std::vector<char>& matchesMask = std::vector<char>(),
+                                   int flags = DrawLinesMatchesFlags::DEFAULT );
 
 /** @brief Draws keylines.
 
@@ -1355,8 +1194,8 @@ CV_EXPORTS void drawLineMatches( const Mat& img1, const std::vector<KeyLine>& ke
 @param color color of lines to be drawn (if set to defaul value, color is chosen randomly)
 @param flags drawing flags
  */
-CV_EXPORTS void drawKeylines( const Mat& image, const std::vector<KeyLine>& keylines, Mat& outImage, const Scalar& color = Scalar::all( -1 ),
-                              int flags = DrawLinesMatchesFlags::DEFAULT );
+CV_EXPORTS_W void drawKeylines( const Mat& image, const std::vector<KeyLine>& keylines, CV_OUT Mat& outImage, const Scalar& color = Scalar::all( -1 ),
+                                int flags = DrawLinesMatchesFlags::DEFAULT );
 
 //! @}
 
diff --git a/IPL/include/opencv/opencv2/ml.hpp b/IPL/include/opencv/opencv2/ml.hpp
index 0b90269..7fdb460 100644
--- a/IPL/include/opencv/opencv2/ml.hpp
+++ b/IPL/include/opencv/opencv2/ml.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_ML_HPP__
-#define __OPENCV_ML_HPP__
+#ifndef OPENCV_ML_HPP
+#define OPENCV_ML_HPP
 
 #ifdef __cplusplus
 #  include "opencv2/core.hpp"
@@ -104,7 +104,7 @@ enum SampleTypes
 It is used for optimizing statmodel accuracy by varying model parameters, the accuracy estimate
 being computed by cross-validation.
  */
-class CV_EXPORTS ParamGrid
+class CV_EXPORTS_W ParamGrid
 {
 public:
     /** @brief Default constructor */
@@ -112,17 +112,25 @@ class CV_EXPORTS ParamGrid
     /** @brief Constructor with parameters */
     ParamGrid(double _minVal, double _maxVal, double _logStep);
 
-    double minVal; //!< Minimum value of the statmodel parameter. Default value is 0.
-    double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0.
+    CV_PROP_RW double minVal; //!< Minimum value of the statmodel parameter. Default value is 0.
+    CV_PROP_RW double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0.
     /** @brief Logarithmic step for iterating the statmodel parameter.
 
     The grid determines the following iteration sequence of the statmodel parameter values:
     \f[(minVal, minVal*step, minVal*{step}^2, \dots,  minVal*{logStep}^n),\f]
     where \f$n\f$ is the maximal index satisfying
     \f[\texttt{minVal} * \texttt{logStep} ^n <  \texttt{maxVal}\f]
-    The grid is logarithmic, so logStep must always be greater then 1. Default value is 1.
+    The grid is logarithmic, so logStep must always be greater than 1. Default value is 1.
     */
-    double logStep;
+    CV_PROP_RW double logStep;
+
+    /** @brief Creates a ParamGrid Ptr that can be given to the %SVM::trainAuto method
+
+    @param minVal minimum value of the parameter grid
+    @param maxVal maximum value of the parameter grid
+    @param logstep Logarithmic step for iterating the statmodel parameter
+    */
+    CV_WRAP static Ptr<ParamGrid> create(double minVal=0., double maxVal=0., double logstep=1.);
 };
 
 /** @brief Class encapsulating training data.
@@ -190,6 +198,7 @@ class CV_EXPORTS_W TrainData
     CV_WRAP virtual Mat getTestSampleWeights() const = 0;
     CV_WRAP virtual Mat getVarIdx() const = 0;
     CV_WRAP virtual Mat getVarType() const = 0;
+    CV_WRAP virtual Mat getVarSymbolFlags() const = 0;
     CV_WRAP virtual int getResponseType() const = 0;
     CV_WRAP virtual Mat getTrainSampleIdx() const = 0;
     CV_WRAP virtual Mat getTestSampleIdx() const = 0;
@@ -225,9 +234,23 @@ class CV_EXPORTS_W TrainData
     CV_WRAP virtual void shuffleTrainTest() = 0;
 
     /** @brief Returns matrix of test samples */
-    CV_WRAP Mat getTestSamples() const;
+    CV_WRAP virtual Mat getTestSamples() const = 0;
+
+    /** @brief Returns vector of symbolic names captured in loadFromCSV() */
+    CV_WRAP virtual void getNames(std::vector<String>& names) const = 0;
+
+    /** @brief Extract from 1D vector elements specified by passed indexes.
+    @param vec input vector (supported types: CV_32S, CV_32F, CV_64F)
+    @param idx 1D index vector
+     */
+    static CV_WRAP Mat getSubVector(const Mat& vec, const Mat& idx);
 
-    CV_WRAP static Mat getSubVector(const Mat& vec, const Mat& idx);
+    /** @brief Extract from matrix rows/cols specified by passed indexes.
+    @param matrix input matrix (supported types: CV_32S, CV_32F, CV_64F)
+    @param idx 1D index vector
+    @param layout specifies to extract rows (cv::ml::ROW_SAMPLES) or to extract columns (cv::ml::COL_SAMPLES)
+     */
+    static CV_WRAP Mat getSubMatrix(const Mat& matrix, const Mat& idx, int layout);
 
     /** @brief Reads the dataset from a .csv file and returns the ready-to-use training data.
 
@@ -306,7 +329,7 @@ class CV_EXPORTS_W StatModel : public Algorithm
     /** @brief Returns the number of variables in training samples */
     CV_WRAP virtual int getVarCount() const = 0;
 
-    CV_WRAP virtual bool empty() const;
+    CV_WRAP virtual bool empty() const CV_OVERRIDE;
 
     /** @brief Returns true if the model is trained */
     CV_WRAP virtual bool isTrained() const = 0;
@@ -389,6 +412,17 @@ class CV_EXPORTS_W NormalBayesClassifier : public StatModel
     /** Creates empty model
     Use StatModel::train to train the model after creation. */
     CV_WRAP static Ptr<NormalBayesClassifier> create();
+
+    /** @brief Loads and creates a serialized NormalBayesClassifier from a file
+     *
+     * Use NormalBayesClassifier::save to serialize and store an NormalBayesClassifier to disk.
+     * Load the NormalBayesClassifier from this file again, by calling this function with the path to the file.
+     * Optionally specify the node for the file containing the classifier
+     *
+     * @param filepath path to serialized NormalBayesClassifier
+     * @param nodeName name of node containing the classifier
+     */
+    CV_WRAP static Ptr<NormalBayesClassifier> load(const String& filepath , const String& nodeName = String());
 };
 
 /****************************************************************************************\
@@ -471,6 +505,14 @@ class CV_EXPORTS_W KNearest : public StatModel
     The static method creates empty %KNearest classifier. It should be then trained using StatModel::train method.
      */
     CV_WRAP static Ptr<KNearest> create();
+    /** @brief Loads and creates a serialized knearest from a file
+     *
+     * Use KNearest::save to serialize and store an KNearest to disk.
+     * Load the KNearest from this file again, by calling this function with the path to the file.
+     *
+     * @param filepath path to serialized KNearest
+     */
+    CV_WRAP static Ptr<KNearest> load(const String& filepath);
 };
 
 /****************************************************************************************\
@@ -668,14 +710,54 @@ class CV_EXPORTS_W SVM : public StatModel
     the usual %SVM with parameters specified in params is executed.
      */
     virtual bool trainAuto( const Ptr<TrainData>& data, int kFold = 10,
-                    ParamGrid Cgrid = SVM::getDefaultGrid(SVM::C),
-                    ParamGrid gammaGrid  = SVM::getDefaultGrid(SVM::GAMMA),
-                    ParamGrid pGrid      = SVM::getDefaultGrid(SVM::P),
-                    ParamGrid nuGrid     = SVM::getDefaultGrid(SVM::NU),
-                    ParamGrid coeffGrid  = SVM::getDefaultGrid(SVM::COEF),
-                    ParamGrid degreeGrid = SVM::getDefaultGrid(SVM::DEGREE),
+                    ParamGrid Cgrid = getDefaultGrid(C),
+                    ParamGrid gammaGrid  = getDefaultGrid(GAMMA),
+                    ParamGrid pGrid      = getDefaultGrid(P),
+                    ParamGrid nuGrid     = getDefaultGrid(NU),
+                    ParamGrid coeffGrid  = getDefaultGrid(COEF),
+                    ParamGrid degreeGrid = getDefaultGrid(DEGREE),
                     bool balanced=false) = 0;
 
+    /** @brief Trains an %SVM with optimal parameters
+
+    @param samples training samples
+    @param layout See ml::SampleTypes.
+    @param responses vector of responses associated with the training samples.
+    @param kFold Cross-validation parameter. The training set is divided into kFold subsets. One
+        subset is used to test the model, the others form the train set. So, the %SVM algorithm is
+    @param Cgrid grid for C
+    @param gammaGrid grid for gamma
+    @param pGrid grid for p
+    @param nuGrid grid for nu
+    @param coeffGrid grid for coeff
+    @param degreeGrid grid for degree
+    @param balanced If true and the problem is 2-class classification then the method creates more
+        balanced cross-validation subsets that is proportions between classes in subsets are close
+        to such proportion in the whole train dataset.
+
+    The method trains the %SVM model automatically by choosing the optimal parameters C, gamma, p,
+    nu, coef0, degree. Parameters are considered optimal when the cross-validation
+    estimate of the test set error is minimal.
+
+    This function only makes use of SVM::getDefaultGrid for parameter optimization and thus only
+    offers rudimentary parameter options.
+
+    This function works for the classification (SVM::C_SVC or SVM::NU_SVC) as well as for the
+    regression (SVM::EPS_SVR or SVM::NU_SVR). If it is SVM::ONE_CLASS, no optimization is made and
+    the usual %SVM with parameters specified in params is executed.
+    */
+    CV_WRAP virtual bool trainAuto(InputArray samples,
+            int layout,
+            InputArray responses,
+            int kFold = 10,
+            Ptr<ParamGrid> Cgrid = SVM::getDefaultGridPtr(SVM::C),
+            Ptr<ParamGrid> gammaGrid  = SVM::getDefaultGridPtr(SVM::GAMMA),
+            Ptr<ParamGrid> pGrid      = SVM::getDefaultGridPtr(SVM::P),
+            Ptr<ParamGrid> nuGrid     = SVM::getDefaultGridPtr(SVM::NU),
+            Ptr<ParamGrid> coeffGrid  = SVM::getDefaultGridPtr(SVM::COEF),
+            Ptr<ParamGrid> degreeGrid = SVM::getDefaultGridPtr(SVM::DEGREE),
+            bool balanced=false) = 0;
+
     /** @brief Retrieves all the support vectors
 
     The method returns all the support vectors as a floating-point matrix, where support vectors are
@@ -689,7 +771,7 @@ class CV_EXPORTS_W SVM : public StatModel
     support vector, used for prediction, was derived from. They are returned in a floating-point
     matrix, where the support vectors are stored as matrix rows.
      */
-    CV_WRAP Mat getUncompressedSupportVectors() const;
+    CV_WRAP virtual Mat getUncompressedSupportVectors() const = 0;
 
     /** @brief Retrieves the decision function
 
@@ -718,6 +800,16 @@ class CV_EXPORTS_W SVM : public StatModel
      */
     static ParamGrid getDefaultGrid( int param_id );
 
+    /** @brief Generates a grid for %SVM parameters.
+
+    @param param_id %SVM parameters IDs that must be one of the SVM::ParamTypes. The grid is
+    generated for the parameter with this ID.
+
+    The function generates a grid pointer for the specified parameter of the %SVM algorithm.
+    The grid may be passed to the function SVM::trainAuto.
+     */
+    CV_WRAP static Ptr<ParamGrid> getDefaultGridPtr( int param_id );
+
     /** Creates empty model.
     Use StatModel::train to train the model. Since %SVM has several parameters, you may want to
     find the best parameters for your problem, it can be done with SVM::trainAuto. */
@@ -814,6 +906,15 @@ class CV_EXPORTS_W EM : public StatModel
      */
     CV_WRAP virtual void getCovs(CV_OUT std::vector<Mat>& covs) const = 0;
 
+    /** @brief Returns posterior probabilities for the provided samples
+
+    @param samples The input samples, floating-point matrix
+    @param results The optional output \f$ nSamples \times nClusters\f$ matrix of results. It contains
+    posterior probabilities for each sample from the input
+    @param flags This parameter will be ignored
+     */
+    CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const CV_OVERRIDE = 0;
+
     /** @brief Returns a likelihood logarithm value and an index of the most probable mixture component
     for the given sample.
 
@@ -903,7 +1004,7 @@ class CV_EXPORTS_W EM : public StatModel
     @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
         one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
         it will be converted to the inner matrix of such type for the further computing.
-    @param probs0
+    @param probs0 the probabilities
     @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
         each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
     @param labels The optional output "class label" for each sample:
@@ -923,6 +1024,17 @@ class CV_EXPORTS_W EM : public StatModel
     can use one of the EM::train\* methods or load it from file using Algorithm::load\<EM\>(filename).
      */
     CV_WRAP static Ptr<EM> create();
+
+    /** @brief Loads and creates a serialized EM from a file
+     *
+     * Use EM::save to serialize and store an EM to disk.
+     * Load the EM from this file again, by calling this function with the path to the file.
+     * Optionally specify the node for the file containing the classifier
+     *
+     * @param filepath path to serialized EM
+     * @param nodeName name of node containing the classifier
+     */
+    CV_WRAP static Ptr<EM> load(const String& filepath , const String& nodeName = String());
 };
 
 /****************************************************************************************\
@@ -1111,6 +1223,17 @@ class CV_EXPORTS_W DTrees : public StatModel
     file using Algorithm::load\<DTrees\>(filename).
      */
     CV_WRAP static Ptr<DTrees> create();
+
+    /** @brief Loads and creates a serialized DTrees from a file
+     *
+     * Use DTree::save to serialize and store an DTree to disk.
+     * Load the DTree from this file again, by calling this function with the path to the file.
+     * Optionally specify the node for the file containing the classifier
+     *
+     * @param filepath path to serialized DTree
+     * @param nodeName name of node containing the classifier
+     */
+    CV_WRAP static Ptr<DTrees> load(const String& filepath , const String& nodeName = String());
 };
 
 /****************************************************************************************\
@@ -1160,11 +1283,33 @@ class CV_EXPORTS_W RTrees : public DTrees
      */
     CV_WRAP virtual Mat getVarImportance() const = 0;
 
+    /** Returns the result of each individual tree in the forest.
+    In case the model is a regression problem, the method will return each of the trees'
+    results for each of the sample cases. If the model is a classifier, it will return
+    a Mat with samples + 1 rows, where the first row gives the class number and the
+    following rows return the votes each class had for each sample.
+        @param samples Array containing the samples for which votes will be calculated.
+        @param results Array where the result of the calculation will be written.
+        @param flags Flags for defining the type of RTrees.
+    */
+    CV_WRAP virtual void getVotes(InputArray samples, OutputArray results, int flags) const = 0;
+
     /** Creates the empty model.
     Use StatModel::train to train the model, StatModel::train to create and train the model,
     Algorithm::load to load the pre-trained model.
      */
     CV_WRAP static Ptr<RTrees> create();
+
+    /** @brief Loads and creates a serialized RTree from a file
+     *
+     * Use RTree::save to serialize and store an RTree to disk.
+     * Load the RTree from this file again, by calling this function with the path to the file.
+     * Optionally specify the node for the file containing the classifier
+     *
+     * @param filepath path to serialized RTree
+     * @param nodeName name of node containing the classifier
+     */
+    CV_WRAP static Ptr<RTrees> load(const String& filepath , const String& nodeName = String());
 };
 
 /****************************************************************************************\
@@ -1214,6 +1359,17 @@ class CV_EXPORTS_W Boost : public DTrees
     /** Creates the empty model.
     Use StatModel::train to train the model, Algorithm::load\<Boost\>(filename) to load the pre-trained model. */
     CV_WRAP static Ptr<Boost> create();
+
+    /** @brief Loads and creates a serialized Boost from a file
+     *
+     * Use Boost::save to serialize and store an RTree to disk.
+     * Load the Boost from this file again, by calling this function with the path to the file.
+     * Optionally specify the node for the file containing the classifier
+     *
+     * @param filepath path to serialized Boost
+     * @param nodeName name of node containing the classifier
+     */
+    CV_WRAP static Ptr<Boost> load(const String& filepath , const String& nodeName = String());
 };
 
 /****************************************************************************************\
@@ -1269,13 +1425,14 @@ class CV_EXPORTS_W ANN_MLP : public StatModel
     /** Available training methods */
     enum TrainingMethods {
         BACKPROP=0, //!< The back-propagation algorithm.
-        RPROP=1 //!< The RPROP algorithm. See @cite RPROP93 for details.
+        RPROP = 1, //!< The RPROP algorithm. See @cite RPROP93 for details.
+        ANNEAL = 2 //!< The simulated annealing algorithm. See @cite Kirkpatrick83 for details.
     };
 
     /** Sets training method and common parameters.
     @param method Default value is ANN_MLP::RPROP. See ANN_MLP::TrainingMethods.
-    @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP
-    @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP.
+    @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP and to initialT for ANN_MLP::ANNEAL.
+    @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP and to finalT for ANN_MLP::ANNEAL.
     */
     CV_WRAP virtual void setTrainMethod(int method, double param1 = 0, double param2 = 0) = 0;
 
@@ -1362,18 +1519,53 @@ class CV_EXPORTS_W ANN_MLP : public StatModel
     /** @copybrief getRpropDWMax @see getRpropDWMax */
     CV_WRAP virtual void setRpropDWMax(double val) = 0;
 
+    /** ANNEAL: Update initial temperature.
+    It must be \>=0. Default value is 10.*/
+    /** @see setAnnealInitialT */
+    CV_WRAP virtual double getAnnealInitialT() const = 0;
+    /** @copybrief getAnnealInitialT @see getAnnealInitialT */
+    CV_WRAP virtual void setAnnealInitialT(double val) = 0;
+
+    /** ANNEAL: Update final temperature.
+    It must be \>=0 and less than initialT. Default value is 0.1.*/
+    /** @see setAnnealFinalT */
+    CV_WRAP virtual double getAnnealFinalT() const = 0;
+    /** @copybrief getAnnealFinalT @see getAnnealFinalT */
+    CV_WRAP virtual void setAnnealFinalT(double val) = 0;
+
+    /** ANNEAL: Update cooling ratio.
+    It must be \>0 and less than 1. Default value is 0.95.*/
+    /** @see setAnnealCoolingRatio */
+    CV_WRAP virtual double getAnnealCoolingRatio() const = 0;
+    /** @copybrief getAnnealCoolingRatio @see getAnnealCoolingRatio */
+    CV_WRAP virtual void setAnnealCoolingRatio(double val) = 0;
+
+    /** ANNEAL: Update iteration per step.
+    It must be \>0 . Default value is 10.*/
+    /** @see setAnnealItePerStep */
+    CV_WRAP virtual int getAnnealItePerStep() const = 0;
+    /** @copybrief getAnnealItePerStep @see getAnnealItePerStep */
+    CV_WRAP virtual void setAnnealItePerStep(int val) = 0;
+
+    /** @brief Set/initialize anneal RNG */
+    virtual void setAnnealEnergyRNG(const RNG& rng) = 0;
+
     /** possible activation functions */
     enum ActivationFunctions {
         /** Identity function: \f$f(x)=x\f$ */
         IDENTITY = 0,
-        /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x}\f$
+        /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x})\f$
         @note
         If you are using the default sigmoid activation function with the default parameter values
         fparam1=0 and fparam2=0 then the function used is y = 1.7159\*tanh(2/3 \* x), so the output
         will range from [-1.7159, 1.7159], instead of [0,1].*/
         SIGMOID_SYM = 1,
         /** Gaussian function: \f$f(x)=\beta e^{-\alpha x*x}\f$ */
-        GAUSSIAN = 2
+        GAUSSIAN = 2,
+        /** ReLU function: \f$f(x)=max(0,x)\f$ */
+        RELU = 3,
+        /** Leaky ReLU function: for x>0 \f$f(x)=x \f$ and x<=0 \f$f(x)=\alpha x \f$*/
+        LEAKYRELU= 4
     };
 
     /** Train options */
@@ -1413,6 +1605,10 @@ class CV_EXPORTS_W ANN_MLP : public StatModel
 
 };
 
+#ifndef DISABLE_OPENCV_3_COMPATIBILITY
+typedef ANN_MLP ANN_MLP_ANNEAL;
+#endif
+
 /****************************************************************************************\
 *                           Logistic Regression                                          *
 \****************************************************************************************/
@@ -1483,11 +1679,11 @@ class CV_EXPORTS_W LogisticRegression : public StatModel
     @param results Predicted labels as a column matrix of type CV_32S.
     @param flags Not used.
      */
-    CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
+    CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const CV_OVERRIDE = 0;
 
-    /** @brief This function returns the trained paramters arranged across rows.
+    /** @brief This function returns the trained parameters arranged across rows.
 
-    For a two class classifcation problem, it returns a row matrix. It returns learnt paramters of
+    For a two class classification problem, it returns a row matrix. It returns learnt parameters of
     the Logistic Regression as a matrix of type CV_32F.
      */
     CV_WRAP virtual Mat get_learnt_thetas() const = 0;
@@ -1497,10 +1693,191 @@ class CV_EXPORTS_W LogisticRegression : public StatModel
     Creates Logistic Regression model with parameters given.
      */
     CV_WRAP static Ptr<LogisticRegression> create();
+
+    /** @brief Loads and creates a serialized LogisticRegression from a file
+     *
+     * Use LogisticRegression::save to serialize and store an LogisticRegression to disk.
+     * Load the LogisticRegression from this file again, by calling this function with the path to the file.
+     * Optionally specify the node for the file containing the classifier
+     *
+     * @param filepath path to serialized LogisticRegression
+     * @param nodeName name of node containing the classifier
+     */
+    CV_WRAP static Ptr<LogisticRegression> load(const String& filepath , const String& nodeName = String());
 };
 
+
 /****************************************************************************************\
-*                           Auxilary functions declarations                              *
+*                        Stochastic Gradient Descent SVM Classifier                      *
+\****************************************************************************************/
+
+/*!
+@brief Stochastic Gradient Descent SVM classifier
+
+SVMSGD provides a fast and easy-to-use implementation of the SVM classifier using the Stochastic Gradient Descent approach,
+as presented in @cite bottou2010large.
+
+The classifier has following parameters:
+- model type,
+- margin type,
+- margin regularization (\f$\lambda\f$),
+- initial step size (\f$\gamma_0\f$),
+- step decreasing power (\f$c\f$),
+- and termination criteria.
+
+The model type may have one of the following values: \ref SGD and \ref ASGD.
+
+- \ref SGD is the classic version of SVMSGD classifier: every next step is calculated by the formula
+  \f[w_{t+1} = w_t - \gamma(t) \frac{dQ_i}{dw} |_{w = w_t}\f]
+  where
+  - \f$w_t\f$ is the weights vector for decision function at step \f$t\f$,
+  - \f$\gamma(t)\f$ is the step size of model parameters at the iteration \f$t\f$, it is decreased on each step by the formula
+    \f$\gamma(t) = \gamma_0  (1 + \lambda  \gamma_0 t) ^ {-c}\f$
+  - \f$Q_i\f$ is the target functional from SVM task for sample with number \f$i\f$, this sample is chosen stochastically on each step of the algorithm.
+
+- \ref ASGD is Average Stochastic Gradient Descent SVM Classifier. ASGD classifier averages weights vector on each step of algorithm by the formula
+\f$\widehat{w}_{t+1} = \frac{t}{1+t}\widehat{w}_{t} + \frac{1}{1+t}w_{t+1}\f$
+
+The recommended model type is ASGD (following @cite bottou2010large).
+
+The margin type may have one of the following values: \ref SOFT_MARGIN or \ref HARD_MARGIN.
+
+- You should use \ref HARD_MARGIN type, if you have linearly separable sets.
+- You should use \ref SOFT_MARGIN type, if you have non-linearly separable sets or sets with outliers.
+- In the general case (if you know nothing about linear separability of your sets), use SOFT_MARGIN.
+
+The other parameters may be described as follows:
+- Margin regularization parameter is responsible for weights decreasing at each step and for the strength of restrictions on outliers
+  (the less the parameter, the less probability that an outlier will be ignored).
+  Recommended value for SGD model is 0.0001, for ASGD model is 0.00001.
+
+- Initial step size parameter is the initial value for the step size \f$\gamma(t)\f$.
+  You will have to find the best initial step for your problem.
+
+- Step decreasing power is the power parameter for \f$\gamma(t)\f$ decreasing by the formula, mentioned above.
+  Recommended value for SGD model is 1, for ASGD model is 0.75.
+
+- Termination criteria can be TermCriteria::COUNT, TermCriteria::EPS or TermCriteria::COUNT + TermCriteria::EPS.
+  You will have to find the best termination criteria for your problem.
+
+Note that the parameters margin regularization, initial step size, and step decreasing power should be positive.
+
+To use SVMSGD algorithm do as follows:
+
+- first, create the SVMSGD object. The algorithm will set optimal parameters by default, but you can set your own parameters via functions setSvmsgdType(),
+  setMarginType(), setMarginRegularization(), setInitialStepSize(), and setStepDecreasingPower().
+
+- then the SVM model can be trained using the train features and the correspondent labels by the method train().
+
+- after that, the label of a new feature vector can be predicted using the method predict().
+
+@code
+// Create empty object
+cv::Ptr<SVMSGD> svmsgd = SVMSGD::create();
+
+// Train the Stochastic Gradient Descent SVM
+svmsgd->train(trainData);
+
+// Predict labels for the new samples
+svmsgd->predict(samples, responses);
+@endcode
+
+*/
+
+class CV_EXPORTS_W SVMSGD : public cv::ml::StatModel
+{
+public:
+
+    /** SVMSGD type.
+    ASGD is often the preferable choice. */
+    enum SvmsgdType
+    {
+        SGD, //!< Stochastic Gradient Descent
+        ASGD //!< Average Stochastic Gradient Descent
+    };
+
+    /** Margin type.*/
+    enum MarginType
+    {
+        SOFT_MARGIN, //!< General case, suits to the case of non-linearly separable sets, allows outliers.
+        HARD_MARGIN  //!< More accurate for the case of linearly separable sets.
+    };
+
+    /**
+     * @return the weights of the trained model (decision function f(x) = weights * x + shift).
+    */
+    CV_WRAP virtual Mat getWeights() = 0;
+
+    /**
+     * @return the shift of the trained model (decision function f(x) = weights * x + shift).
+    */
+    CV_WRAP virtual float getShift() = 0;
+
+    /** @brief Creates empty model.
+     * Use StatModel::train to train the model. Since %SVMSGD has several parameters, you may want to
+     * find the best parameters for your problem or use setOptimalParameters() to set some default parameters.
+    */
+    CV_WRAP static Ptr<SVMSGD> create();
+
+    /** @brief Loads and creates a serialized SVMSGD from a file
+     *
+     * Use SVMSGD::save to serialize and store an SVMSGD to disk.
+     * Load the SVMSGD from this file again, by calling this function with the path to the file.
+     * Optionally specify the node for the file containing the classifier
+     *
+     * @param filepath path to serialized SVMSGD
+     * @param nodeName name of node containing the classifier
+     */
+    CV_WRAP static Ptr<SVMSGD> load(const String& filepath , const String& nodeName = String());
+
+    /** @brief Function sets optimal parameters values for chosen SVM SGD model.
+     * @param svmsgdType is the type of SVMSGD classifier.
+     * @param marginType is the type of margin constraint.
+    */
+    CV_WRAP virtual void setOptimalParameters(int svmsgdType = SVMSGD::ASGD, int marginType = SVMSGD::SOFT_MARGIN) = 0;
+
+    /** @brief %Algorithm type, one of SVMSGD::SvmsgdType. */
+    /** @see setSvmsgdType */
+    CV_WRAP virtual int getSvmsgdType() const = 0;
+    /** @copybrief getSvmsgdType @see getSvmsgdType */
+    CV_WRAP virtual void setSvmsgdType(int svmsgdType) = 0;
+
+    /** @brief %Margin type, one of SVMSGD::MarginType. */
+    /** @see setMarginType */
+    CV_WRAP virtual int getMarginType() const = 0;
+    /** @copybrief getMarginType @see getMarginType */
+    CV_WRAP virtual void setMarginType(int marginType) = 0;
+
+    /** @brief Parameter marginRegularization of a %SVMSGD optimization problem. */
+    /** @see setMarginRegularization */
+    CV_WRAP virtual float getMarginRegularization() const = 0;
+    /** @copybrief getMarginRegularization @see getMarginRegularization */
+    CV_WRAP virtual void setMarginRegularization(float marginRegularization) = 0;
+
+    /** @brief Parameter initialStepSize of a %SVMSGD optimization problem. */
+    /** @see setInitialStepSize */
+    CV_WRAP virtual float getInitialStepSize() const = 0;
+    /** @copybrief getInitialStepSize @see getInitialStepSize */
+    CV_WRAP virtual void setInitialStepSize(float InitialStepSize) = 0;
+
+    /** @brief Parameter stepDecreasingPower of a %SVMSGD optimization problem. */
+    /** @see setStepDecreasingPower */
+    CV_WRAP virtual float getStepDecreasingPower() const = 0;
+    /** @copybrief getStepDecreasingPower @see getStepDecreasingPower */
+    CV_WRAP virtual void setStepDecreasingPower(float stepDecreasingPower) = 0;
+
+    /** @brief Termination criteria of the training algorithm.
+    You can specify the maximum number of iterations (maxCount) and/or how much the error could
+    change between the iterations to make the algorithm continue (epsilon).*/
+    /** @see setTermCriteria */
+    CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
+    /** @copybrief getTermCriteria @see getTermCriteria */
+    CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0;
+};
+
+
+/****************************************************************************************\
+*                           Auxiliary functions declarations                              *
 \****************************************************************************************/
 
 /** @brief Generates _sample_ from multivariate normal distribution
@@ -1516,12 +1893,55 @@ CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, Out
 CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses,
                                                 OutputArray samples, OutputArray responses);
 
+
+/****************************************************************************************\
+*                                   Simulated annealing solver                             *
+\****************************************************************************************/
+
+#ifdef CV_DOXYGEN
+/** @brief This class declares example interface for system state used in simulated annealing optimization algorithm.
+
+@note This class is not defined in C++ code and can't be use directly - you need your own implementation with the same methods.
+*/
+struct SimulatedAnnealingSolverSystem
+{
+    /** Give energy value for a state of system.*/
+    double energy() const;
+    /** Function which change the state of system (random perturbation).*/
+    void changeState();
+    /** Function to reverse to the previous state. Can be called once only after changeState(). */
+    void reverseState();
+};
+#endif // CV_DOXYGEN
+
+/** @brief The class implements simulated annealing for optimization.
+
+@cite Kirkpatrick83 for details
+
+@param solverSystem optimization system (see SimulatedAnnealingSolverSystem)
+@param initialTemperature initial temperature
+@param finalTemperature final temperature
+@param coolingRatio temperature step multiplies
+@param iterationsPerStep number of iterations per temperature changing step
+@param lastTemperature optional output for last used temperature
+@param rngEnergy specify custom random numbers generator (cv::theRNG() by default)
+*/
+template<class SimulatedAnnealingSolverSystem>
+int simulatedAnnealingSolver(SimulatedAnnealingSolverSystem& solverSystem,
+     double initialTemperature, double finalTemperature, double coolingRatio,
+     size_t iterationsPerStep,
+     CV_OUT double* lastTemperature = NULL,
+     cv::RNG& rngEnergy = cv::theRNG()
+);
+
 //! @} ml
 
 }
 }
 
+#include <opencv2/ml/ml.inl.hpp>
+
 #endif // __cplusplus
-#endif // __OPENCV_ML_HPP__
+#endif // OPENCV_ML_HPP
 
 /* End of file. */
diff --git a/IPL/include/opencv/opencv2/ml/ml.inl.hpp b/IPL/include/opencv/opencv2/ml/ml.inl.hpp
new file mode 100644
index 0000000..dc9c783
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ml/ml.inl.hpp
@@ -0,0 +1,60 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_ML_INL_HPP
+#define OPENCV_ML_INL_HPP
+
+namespace cv { namespace ml {
+
+// declared in ml.hpp
+template<class SimulatedAnnealingSolverSystem>
+int simulatedAnnealingSolver(SimulatedAnnealingSolverSystem& solverSystem,
+     double initialTemperature, double finalTemperature, double coolingRatio,
+     size_t iterationsPerStep,
+     CV_OUT double* lastTemperature,
+     cv::RNG& rngEnergy
+)
+{
+    CV_Assert(finalTemperature > 0);
+    CV_Assert(initialTemperature > finalTemperature);
+    CV_Assert(iterationsPerStep > 0);
+    CV_Assert(coolingRatio < 1.0f);
+    double Ti = initialTemperature;
+    double previousEnergy = solverSystem.energy();
+    int exchange = 0;
+    while (Ti > finalTemperature)
+    {
+        for (size_t i = 0; i < iterationsPerStep; i++)
+        {
+            solverSystem.changeState();
+            double newEnergy = solverSystem.energy();
+            if (newEnergy < previousEnergy)
+            {
+                previousEnergy = newEnergy;
+                exchange++;
+            }
+            else
+            {
+                double r = rngEnergy.uniform(0.0, 1.0);
+                if (r < std::exp(-(newEnergy - previousEnergy) / Ti))
+                {
+                    previousEnergy = newEnergy;
+                    exchange++;
+                }
+                else
+                {
+                    solverSystem.reverseState();
+                }
+            }
+        }
+        Ti *= coolingRatio;
+    }
+    if (lastTemperature)
+        *lastTemperature = Ti;
+    return exchange;
+}
+
+}} //namespace
+
+#endif // OPENCV_ML_INL_HPP
diff --git a/IPL/include/opencv/opencv2/objdetect.hpp b/IPL/include/opencv/opencv2/objdetect.hpp
index 6587b3d..097c592 100644
--- a/IPL/include/opencv/opencv2/objdetect.hpp
+++ b/IPL/include/opencv/opencv2/objdetect.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_OBJDETECT_HPP__
-#define __OPENCV_OBJDETECT_HPP__
+#ifndef OPENCV_OBJDETECT_HPP
+#define OPENCV_OBJDETECT_HPP
 
 #include "opencv2/core.hpp"
 
@@ -91,7 +91,7 @@ compensate for the differences in the size of areas. The sums of pixel values ov
 regions are calculated rapidly using integral images (see below and the integral description).
 
 To see the object detector at work, have a look at the facedetect demo:
-<https://github.com/Itseez/opencv/tree/master/samples/cpp/dbt_face_detection.cpp>
+<https://github.com/opencv/opencv/tree/master/samples/cpp/dbt_face_detection.cpp>
 
 The following reference is for the detection part only. There is a separate application called
 opencv_traincascade that can train a cascade of boosted classifiers from a set of samples.
@@ -124,7 +124,7 @@ class CV_EXPORTS SimilarRects
     SimilarRects(double _eps) : eps(_eps) {}
     inline bool operator()(const Rect& r1, const Rect& r2) const
     {
-        double delta = eps*(std::min(r1.width, r2.width) + std::min(r1.height, r2.height))*0.5;
+        double delta = eps * ((std::min)(r1.width, r2.width) + (std::min)(r1.height, r2.height)) * 0.5;
         return std::abs(r1.x - r2.x) <= delta &&
             std::abs(r1.y - r2.y) <= delta &&
             std::abs(r1.x + r1.width - r2.x - r2.width) <= delta &&
@@ -163,7 +163,7 @@ CV_EXPORTS   void groupRectangles_meanshift(std::vector<Rect>& rectList, std::ve
                                             std::vector<double>& foundScales,
                                             double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
 
-template<> CV_EXPORTS void DefaultDeleter<CvHaarClassifierCascade>::operator ()(CvHaarClassifierCascade* obj) const;
+template<> struct DefaultDeleter<CvHaarClassifierCascade>{ CV_EXPORTS void operator ()(CvHaarClassifierCascade* obj) const; };
 
 enum { CASCADE_DO_CANNY_PRUNING    = 1,
        CASCADE_SCALE_IMAGE         = 2,
@@ -175,7 +175,7 @@ class CV_EXPORTS_W BaseCascadeClassifier : public Algorithm
 {
 public:
     virtual ~BaseCascadeClassifier();
-    virtual bool empty() const = 0;
+    virtual bool empty() const CV_OVERRIDE = 0;
     virtual bool load( const String& filename ) = 0;
     virtual void detectMultiScale( InputArray image,
                            CV_OUT std::vector<Rect>& objects,
@@ -215,6 +215,10 @@ class CV_EXPORTS_W BaseCascadeClassifier : public Algorithm
     virtual Ptr<MaskGenerator> getMaskGenerator() = 0;
 };
 
+/** @example samples/cpp/facedetect.cpp
+This program demonstrates usage of the Cascade classifier class
+\image html Cascade_Classifier_Tutorial_Result_Haar.jpg "Sample screenshot" width=321 height=254
+*/
 /** @brief Cascade classifier class for object detection.
  */
 class CV_EXPORTS_W CascadeClassifier
@@ -255,7 +259,7 @@ class CV_EXPORTS_W CascadeClassifier
     @param flags Parameter with the same meaning for an old cascade as in the function
     cvHaarDetectObjects. It is not used for a new cascade.
     @param minSize Minimum possible object size. Objects smaller than that are ignored.
-    @param maxSize Maximum possible object size. Objects larger than that are ignored.
+    @param maxSize Maximum possible object size. Objects larger than that are ignored. If `maxSize == minSize` model is evaluated on single scale.
 
     The function is parallelized with the TBB library.
 
@@ -283,7 +287,7 @@ class CV_EXPORTS_W CascadeClassifier
     @param flags Parameter with the same meaning for an old cascade as in the function
     cvHaarDetectObjects. It is not used for a new cascade.
     @param minSize Minimum possible object size. Objects smaller than that are ignored.
-    @param maxSize Maximum possible object size. Objects larger than that are ignored.
+    @param maxSize Maximum possible object size. Objects larger than that are ignored. If `maxSize == minSize` model is evaluated on single scale.
     */
     CV_WRAP_AS(detectMultiScale2) void detectMultiScale( InputArray image,
                           CV_OUT std::vector<Rect>& objects,
@@ -294,7 +298,21 @@ class CV_EXPORTS_W CascadeClassifier
                           Size maxSize=Size() );
 
     /** @overload
-    if `outputRejectLevels` is `true` returns `rejectLevels` and `levelWeights`
+    This function allows you to retrieve the final stage decision certainty of classification.
+    For this, one needs to set `outputRejectLevels` on true and provide the `rejectLevels` and `levelWeights` parameter.
+    For each resulting detection, `levelWeights` will then contain the certainty of classification at the final stage.
+    This value can then be used to separate strong from weaker classifications.
+
+    A code sample on how to use it efficiently can be found below:
+    @code
+    Mat img;
+    vector<double> weights;
+    vector<int> levels;
+    vector<Rect> detections;
+    CascadeClassifier model("/path/to/your/model.xml");
+    model.detectMultiScale(img, detections, levels, weights, 1.1, 3, 0, Size(), Size(), true);
+    cerr << "Detection " << detections[0] << " with weight " << weights[0] << endl;
+    @endcode
     */
     CV_WRAP_AS(detectMultiScale3) void detectMultiScale( InputArray image,
                                   CV_OUT std::vector<Rect>& objects,
@@ -328,29 +346,65 @@ struct DetectionROI
 {
    //! scale(size) of the bounding box
    double scale;
-   //! set of requrested locations to be evaluated
+   //! set of requested locations to be evaluated
    std::vector<cv::Point> locations;
    //! vector that will contain confidence values for each location
    std::vector<double> confidences;
 };
 
+/**@brief Implementation of HOG (Histogram of Oriented Gradients) descriptor and object detector.
+
+the HOG descriptor algorithm introduced by Navneet Dalal and Bill Triggs @cite Dalal2005 .
+
+useful links:
+
+https://hal.inria.fr/inria-00548512/document/
+
+https://en.wikipedia.org/wiki/Histogram_of_oriented_gradients
+
+https://software.intel.com/en-us/ipp-dev-reference-histogram-of-oriented-gradients-hog-descriptor
+
+http://www.learnopencv.com/histogram-of-oriented-gradients
+
+http://www.learnopencv.com/handwritten-digits-classification-an-opencv-c-python-tutorial
+
+ */
 struct CV_EXPORTS_W HOGDescriptor
 {
 public:
-    enum { L2Hys = 0
+    enum HistogramNormType { L2Hys = 0 //!< Default histogramNormType
          };
-    enum { DEFAULT_NLEVELS = 64
+    enum { DEFAULT_NLEVELS = 64 //!< Default nlevels value.
          };
+    enum DescriptorStorageFormat { DESCR_FORMAT_COL_BY_COL, DESCR_FORMAT_ROW_BY_ROW };
 
+    /**@brief Creates the HOG descriptor and detector with default params.
+
+    aqual to HOGDescriptor(Size(64,128), Size(16,16), Size(8,8), Size(8,8), 9 )
+    */
     CV_WRAP HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8),
         cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1),
         histogramNormType(HOGDescriptor::L2Hys), L2HysThreshold(0.2), gammaCorrection(true),
         free_coef(-1.f), nlevels(HOGDescriptor::DEFAULT_NLEVELS), signedGradient(false)
     {}
 
+    /** @overload
+    @param _winSize sets winSize with given value.
+    @param _blockSize sets blockSize with given value.
+    @param _blockStride sets blockStride with given value.
+    @param _cellSize sets cellSize with given value.
+    @param _nbins sets nbins with given value.
+    @param _derivAperture sets derivAperture with given value.
+    @param _winSigma sets winSigma with given value.
+    @param _histogramNormType sets histogramNormType with given value.
+    @param _L2HysThreshold sets L2HysThreshold with given value.
+    @param _gammaCorrection sets gammaCorrection with given value.
+    @param _nlevels sets nlevels with given value.
+    @param _signedGradient sets signedGradient with given value.
+    */
     CV_WRAP HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride,
                   Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1,
-                  int _histogramNormType=HOGDescriptor::L2Hys,
+                  HOGDescriptor::HistogramNormType _histogramNormType=HOGDescriptor::L2Hys,
                   double _L2HysThreshold=0.2, bool _gammaCorrection=false,
                   int _nlevels=HOGDescriptor::DEFAULT_NLEVELS, bool _signedGradient=false)
     : winSize(_winSize), blockSize(_blockSize), blockStride(_blockStride), cellSize(_cellSize),
@@ -359,108 +413,341 @@ struct CV_EXPORTS_W HOGDescriptor
     gammaCorrection(_gammaCorrection), free_coef(-1.f), nlevels(_nlevels), signedGradient(_signedGradient)
     {}
 
+    /** @overload
+    @param filename The file name containing HOGDescriptor properties and coefficients for the linear SVM classifier.
+    */
     CV_WRAP HOGDescriptor(const String& filename)
     {
         load(filename);
     }
 
+    /** @overload
+    @param d the HOGDescriptor which cloned to create a new one.
+    */
     HOGDescriptor(const HOGDescriptor& d)
     {
         d.copyTo(*this);
     }
 
+    /**@brief Default destructor.
+    */
     virtual ~HOGDescriptor() {}
 
+    /**@brief Returns the number of coefficients required for the classification.
+    */
     CV_WRAP size_t getDescriptorSize() const;
+
+    /** @brief Checks if detector size equal to descriptor size.
+    */
     CV_WRAP bool checkDetectorSize() const;
+
+    /** @brief Returns winSigma value
+    */
     CV_WRAP double getWinSigma() const;
 
-    CV_WRAP virtual void setSVMDetector(InputArray _svmdetector);
+    /**@example samples/cpp/peopledetect.cpp
+    */
+    /**@brief Sets coefficients for the linear SVM classifier.
+    @param svmdetector coefficients for the linear SVM classifier.
+    */
+    CV_WRAP virtual void setSVMDetector(InputArray svmdetector);
 
+    /** @brief Reads HOGDescriptor parameters from a cv::FileNode.
+    @param fn File node
+    */
     virtual bool read(FileNode& fn);
+
+    /** @brief Stores HOGDescriptor parameters in a cv::FileStorage.
+    @param fs File storage
+    @param objname Object name
+    */
     virtual void write(FileStorage& fs, const String& objname) const;
 
+    /** @brief loads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file.
+    @param filename Path of the file to read.
+    @param objname The optional name of the node to read (if empty, the first top-level node will be used).
+    */
     CV_WRAP virtual bool load(const String& filename, const String& objname = String());
+
+    /** @brief saves HOGDescriptor parameters and coefficients for the linear SVM classifier to a file
+    @param filename File name
+    @param objname Object name
+    */
     CV_WRAP virtual void save(const String& filename, const String& objname = String()) const;
+
+    /** @brief clones the HOGDescriptor
+    @param c cloned HOGDescriptor
+    */
     virtual void copyTo(HOGDescriptor& c) const;
 
+    /**@example samples/cpp/train_HOG.cpp
+    */
+    /** @brief Computes HOG descriptors of given image.
+    @param img Matrix of the type CV_8U containing an image where HOG features will be calculated.
+    @param descriptors Matrix of the type CV_32F
+    @param winStride Window stride. It must be a multiple of block stride.
+    @param padding Padding
+    @param locations Vector of Point
+    */
     CV_WRAP virtual void compute(InputArray img,
                          CV_OUT std::vector<float>& descriptors,
                          Size winStride = Size(), Size padding = Size(),
                          const std::vector<Point>& locations = std::vector<Point>()) const;
 
-    //! with found weights output
-    CV_WRAP virtual void detect(const Mat& img, CV_OUT std::vector<Point>& foundLocations,
+    /** @brief Performs object detection without a multi-scale window.
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param foundLocations Vector of point where each point contains left-top corner point of detected object boundaries.
+    @param weights Vector that will contain confidence values for each detected object.
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane.
+    Usually it is 0 and should be specified in the detector coefficients (as the last free coefficient).
+    But if the free coefficient is omitted (which is allowed), you can specify it manually here.
+    @param winStride Window stride. It must be a multiple of block stride.
+    @param padding Padding
+    @param searchLocations Vector of Point includes set of requested locations to be evaluated.
+    */
+    CV_WRAP virtual void detect(InputArray img, CV_OUT std::vector<Point>& foundLocations,
                         CV_OUT std::vector<double>& weights,
                         double hitThreshold = 0, Size winStride = Size(),
                         Size padding = Size(),
                         const std::vector<Point>& searchLocations = std::vector<Point>()) const;
-    //! without found weights output
-    virtual void detect(const Mat& img, CV_OUT std::vector<Point>& foundLocations,
+
+    /** @brief Performs object detection without a multi-scale window.
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param foundLocations Vector of point where each point contains left-top corner point of detected object boundaries.
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane.
+    Usually it is 0 and should be specified in the detector coefficients (as the last free coefficient).
+    But if the free coefficient is omitted (which is allowed), you can specify it manually here.
+    @param winStride Window stride. It must be a multiple of block stride.
+    @param padding Padding
+    @param searchLocations Vector of Point includes locations to search.
+    */
+    virtual void detect(InputArray img, CV_OUT std::vector<Point>& foundLocations,
                         double hitThreshold = 0, Size winStride = Size(),
                         Size padding = Size(),
                         const std::vector<Point>& searchLocations=std::vector<Point>()) const;
 
-    //! with result weights output
+    /** @brief Detects objects of different sizes in the input image. The detected objects are returned as a list
+    of rectangles.
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param foundLocations Vector of rectangles where each rectangle contains the detected object.
+    @param foundWeights Vector that will contain confidence values for each detected object.
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane.
+    Usually it is 0 and should be specified in the detector coefficients (as the last free coefficient).
+    But if the free coefficient is omitted (which is allowed), you can specify it manually here.
+    @param winStride Window stride. It must be a multiple of block stride.
+    @param padding Padding
+    @param scale Coefficient of the detection window increase.
+    @param finalThreshold Final threshold
+    @param useMeanshiftGrouping indicates grouping algorithm
+    */
     CV_WRAP virtual void detectMultiScale(InputArray img, CV_OUT std::vector<Rect>& foundLocations,
                                   CV_OUT std::vector<double>& foundWeights, double hitThreshold = 0,
                                   Size winStride = Size(), Size padding = Size(), double scale = 1.05,
                                   double finalThreshold = 2.0,bool useMeanshiftGrouping = false) const;
-    //! without found weights output
+
+    /** @brief Detects objects of different sizes in the input image. The detected objects are returned as a list
+    of rectangles.
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param foundLocations Vector of rectangles where each rectangle contains the detected object.
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane.
+    Usually it is 0 and should be specified in the detector coefficients (as the last free coefficient).
+    But if the free coefficient is omitted (which is allowed), you can specify it manually here.
+    @param winStride Window stride. It must be a multiple of block stride.
+    @param padding Padding
+    @param scale Coefficient of the detection window increase.
+    @param finalThreshold Final threshold
+    @param useMeanshiftGrouping indicates grouping algorithm
+    */
     virtual void detectMultiScale(InputArray img, CV_OUT std::vector<Rect>& foundLocations,
                                   double hitThreshold = 0, Size winStride = Size(),
                                   Size padding = Size(), double scale = 1.05,
                                   double finalThreshold = 2.0, bool useMeanshiftGrouping = false) const;
 
-    CV_WRAP virtual void computeGradient(const Mat& img, CV_OUT Mat& grad, CV_OUT Mat& angleOfs,
+    /** @brief  Computes gradients and quantized gradient orientations.
+    @param img Matrix contains the image to be computed
+    @param grad Matrix of type CV_32FC2 contains computed gradients
+    @param angleOfs Matrix of type CV_8UC2 contains quantized gradient orientations
+    @param paddingTL Padding from top-left
+    @param paddingBR Padding from bottom-right
+    */
+    CV_WRAP virtual void computeGradient(InputArray img, InputOutputArray grad, InputOutputArray angleOfs,
                                  Size paddingTL = Size(), Size paddingBR = Size()) const;
 
+    /** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
+    */
     CV_WRAP static std::vector<float> getDefaultPeopleDetector();
+
+    /**@example samples/tapi/hog.cpp
+    */
+    /** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
+    */
     CV_WRAP static std::vector<float> getDaimlerPeopleDetector();
 
+    //! Detection window size. Align to block size and block stride. Default value is Size(64,128).
     CV_PROP Size winSize;
+
+    //! Block size in pixels. Align to cell size. Default value is Size(16,16).
     CV_PROP Size blockSize;
+
+    //! Block stride. It must be a multiple of cell size. Default value is Size(8,8).
     CV_PROP Size blockStride;
+
+    //! Cell size. Default value is Size(8,8).
     CV_PROP Size cellSize;
+
+    //! Number of bins used in the calculation of histogram of gradients. Default value is 9.
     CV_PROP int nbins;
+
+    //! not documented
     CV_PROP int derivAperture;
+
+    //! Gaussian smoothing window parameter.
     CV_PROP double winSigma;
-    CV_PROP int histogramNormType;
+
+    //! histogramNormType
+    CV_PROP HOGDescriptor::HistogramNormType histogramNormType;
+
+    //! L2-Hys normalization method shrinkage.
     CV_PROP double L2HysThreshold;
+
+    //! Flag to specify whether the gamma correction preprocessing is required or not.
     CV_PROP bool gammaCorrection;
+
+    //! coefficients for the linear SVM classifier.
     CV_PROP std::vector<float> svmDetector;
+
+    //! coefficients for the linear SVM classifier used when OpenCL is enabled
     UMat oclSvmDetector;
+
+    //! not documented
     float free_coef;
+
+    //! Maximum number of detection window increases. Default value is 64
     CV_PROP int nlevels;
-    CV_PROP bool signedGradient;
 
+    //! Indicates signed gradient will be used or not
+    CV_PROP bool signedGradient;
 
-    //! evaluate specified ROI and return confidence value for each location
-    virtual void detectROI(const cv::Mat& img, const std::vector<cv::Point> &locations,
+    /** @brief evaluate specified ROI and return confidence value for each location
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param locations Vector of Point
+    @param foundLocations Vector of Point where each Point is detected object's top-left point.
+    @param confidences confidences
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane. Usually
+    it is 0 and should be specified in the detector coefficients (as the last free coefficient). But if
+    the free coefficient is omitted (which is allowed), you can specify it manually here
+    @param winStride winStride
+    @param padding padding
+    */
+    virtual void detectROI(InputArray img, const std::vector<cv::Point> &locations,
                                    CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
                                    double hitThreshold = 0, cv::Size winStride = Size(),
                                    cv::Size padding = Size()) const;
 
-    //! evaluate specified ROI and return confidence value for each location in multiple scales
-    virtual void detectMultiScaleROI(const cv::Mat& img,
-                                                       CV_OUT std::vector<cv::Rect>& foundLocations,
-                                                       std::vector<DetectionROI>& locations,
-                                                       double hitThreshold = 0,
-                                                       int groupThreshold = 0) const;
-
-    //! read/parse Dalal's alt model file
-    void readALTModel(String modelfile);
+    /** @brief evaluate specified ROI and return confidence value for each location in multiple scales
+    @param img Matrix of the type CV_8U or CV_8UC3 containing an image where objects are detected.
+    @param foundLocations Vector of rectangles where each rectangle contains the detected object.
+    @param locations Vector of DetectionROI
+    @param hitThreshold Threshold for the distance between features and SVM classifying plane. Usually it is 0 and should be specified
+    in the detector coefficients (as the last free coefficient). But if the free coefficient is omitted (which is allowed), you can specify it manually here.
+    @param groupThreshold Minimum possible number of rectangles minus 1. The threshold is used in a group of rectangles to retain it.
+    */
+    virtual void detectMultiScaleROI(InputArray img,
+                                     CV_OUT std::vector<cv::Rect>& foundLocations,
+                                     std::vector<DetectionROI>& locations,
+                                     double hitThreshold = 0,
+                                     int groupThreshold = 0) const;
+
+    /** @brief Groups the object candidate rectangles.
+    @param rectList  Input/output vector of rectangles. Output vector includes retained and grouped rectangles. (The Python list is not modified in place.)
+    @param weights Input/output vector of weights of rectangles. Output vector includes weights of retained and grouped rectangles. (The Python list is not modified in place.)
+    @param groupThreshold Minimum possible number of rectangles minus 1. The threshold is used in a group of rectangles to retain it.
+    @param eps Relative difference between sides of the rectangles to merge them into a group.
+    */
     void groupRectangles(std::vector<cv::Rect>& rectList, std::vector<double>& weights, int groupThreshold, double eps) const;
 };
 
-//! @} objdetect
+class CV_EXPORTS_W QRCodeDetector
+{
+public:
+    CV_WRAP QRCodeDetector();
+    ~QRCodeDetector();
+
+    /** @brief sets the epsilon used during the horizontal scan of QR code stop marker detection.
+     @param epsX Epsilon neighborhood, which allows you to determine the horizontal pattern
+     of the scheme 1:1:3:1:1 according to QR code standard.
+    */
+    CV_WRAP void setEpsX(double epsX);
+    /** @brief sets the epsilon used during the vertical scan of QR code stop marker detection.
+     @param epsY Epsilon neighborhood, which allows you to determine the vertical pattern
+     of the scheme 1:1:3:1:1 according to QR code standard.
+     */
+    CV_WRAP void setEpsY(double epsY);
+
+    /** @brief Detects QR code in image and returns the quadrangle containing the code.
+     @param img grayscale or color (BGR) image containing (or not) QR code.
+     @param points Output vector of vertices of the minimum-area quadrangle containing the code.
+     */
+    CV_WRAP bool detect(InputArray img, OutputArray points) const;
+
+    /** @brief Decodes QR code in image once it's found by the detect() method.
+
+     Returns UTF8-encoded output string or empty string if the code cannot be decoded.
+     @param img grayscale or color (BGR) image containing QR code.
+     @param points Quadrangle vertices found by detect() method (or some other algorithm).
+     @param straight_qrcode The optional output image containing rectified and binarized QR code
+     */
+    CV_WRAP std::string decode(InputArray img, InputArray points, OutputArray straight_qrcode = noArray());
+
+    /** @brief Both detects and decodes QR code
 
+     @param img grayscale or color (BGR) image containing QR code.
+     @param points optional output array of vertices of the found QR code quadrangle. Will be empty if not found.
+     @param straight_qrcode The optional output image containing rectified and binarized QR code
+     */
+    CV_WRAP std::string detectAndDecode(InputArray img, OutputArray points=noArray(),
+                                        OutputArray straight_qrcode = noArray());
+    /** @brief Detects QR codes in image and returns the vector of the quadrangles containing the codes.
+     @param img grayscale or color (BGR) image containing (or not) QR codes.
+     @param points Output vector of vector of vertices of the minimum-area quadrangle containing the codes.
+     */
+    CV_WRAP
+    bool detectMulti(InputArray img, OutputArray points) const;
+
+    /** @brief Decodes QR codes in image once it's found by the detect() method.
+     @param img grayscale or color (BGR) image containing QR codes.
+     @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded.
+     @param points vector of Quadrangle vertices found by detect() method (or some other algorithm).
+     @param straight_qrcode The optional output vector of images containing rectified and binarized QR codes
+     */
+    CV_WRAP
+    bool decodeMulti(
+            InputArray img, InputArray points,
+            CV_OUT std::vector<std::string>& decoded_info,
+            OutputArrayOfArrays straight_qrcode = noArray()
+    ) const;
+
+    /** @brief Both detects and decodes QR codes
+    @param img grayscale or color (BGR) image containing QR codes.
+    @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded.
+    @param points optional output vector of vertices of the found QR code quadrangles. Will be empty if not found.
+    @param straight_qrcode The optional output vector of images containing rectified and binarized QR codes
+    */
+    CV_WRAP
+    bool detectAndDecodeMulti(
+            InputArray img, CV_OUT std::vector<std::string>& decoded_info,
+            OutputArray points = noArray(),
+            OutputArrayOfArrays straight_qrcode = noArray()
+    ) const;
+
+protected:
+    struct Impl;
+    Ptr<Impl> p;
+};
+
+//! @} objdetect
 }
 
 #include "opencv2/objdetect/detection_based_tracker.hpp"
 
-#ifndef DISABLE_OPENCV_24_COMPATIBILITY
-#include "opencv2/objdetect/objdetect_c.h"
-#endif
-
 #endif
diff --git a/IPL/include/opencv/opencv2/objdetect/detection_based_tracker.hpp b/IPL/include/opencv/opencv2/objdetect/detection_based_tracker.hpp
index 1f5f1d3..18cde13 100644
--- a/IPL/include/opencv/opencv2/objdetect/detection_based_tracker.hpp
+++ b/IPL/include/opencv/opencv2/objdetect/detection_based_tracker.hpp
@@ -41,12 +41,10 @@
 //
 //M*/
 
-#ifndef __OPENCV_OBJDETECT_DBT_HPP__
-#define __OPENCV_OBJDETECT_DBT_HPP__
+#ifndef OPENCV_OBJDETECT_DBT_HPP
+#define OPENCV_OBJDETECT_DBT_HPP
 
-// After this condition removal update blacklist for bindings: modules/python/common.cmake
-#if defined(__linux__) || defined(LINUX) || defined(__APPLE__) || defined(__ANDROID__) || \
-  (defined(__cplusplus) &&  __cplusplus > 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1700)
+#include <opencv2/core.hpp>
 
 #include <vector>
 
@@ -59,7 +57,7 @@ namespace cv
 class CV_EXPORTS DetectionBasedTracker
 {
     public:
-        struct Parameters
+        struct CV_EXPORTS Parameters
         {
             int maxTrackLifetime;
             int minDetectionPeriod; //the minimal time between run of the big object detector (on the whole frame) in ms (1000 mean 1 sec), default=0
@@ -220,6 +218,5 @@ class CV_EXPORTS DetectionBasedTracker
 //! @} objdetect
 
 } //end of cv namespace
-#endif
 
 #endif
diff --git a/IPL/include/opencv/opencv2/objdetect/objdetect_c.h b/IPL/include/opencv/opencv2/objdetect/objdetect_c.h
deleted file mode 100644
index 632a438..0000000
--- a/IPL/include/opencv/opencv2/objdetect/objdetect_c.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_OBJDETECT_C_H__
-#define __OPENCV_OBJDETECT_C_H__
-
-#include "opencv2/core/core_c.h"
-
-#ifdef __cplusplus
-#include <deque>
-#include <vector>
-
-extern "C" {
-#endif
-
-/** @addtogroup objdetect_c
-  @{
-  */
-
-/****************************************************************************************\
-*                         Haar-like Object Detection functions                           *
-\****************************************************************************************/
-
-#define CV_HAAR_MAGIC_VAL    0x42500000
-#define CV_TYPE_NAME_HAAR    "opencv-haar-classifier"
-
-#define CV_IS_HAAR_CLASSIFIER( haar )                                                    \
-    ((haar) != NULL &&                                                                   \
-    (((const CvHaarClassifierCascade*)(haar))->flags & CV_MAGIC_MASK)==CV_HAAR_MAGIC_VAL)
-
-#define CV_HAAR_FEATURE_MAX  3
-
-typedef struct CvHaarFeature
-{
-    int tilted;
-    struct
-    {
-        CvRect r;
-        float weight;
-    } rect[CV_HAAR_FEATURE_MAX];
-} CvHaarFeature;
-
-typedef struct CvHaarClassifier
-{
-    int count;
-    CvHaarFeature* haar_feature;
-    float* threshold;
-    int* left;
-    int* right;
-    float* alpha;
-} CvHaarClassifier;
-
-typedef struct CvHaarStageClassifier
-{
-    int  count;
-    float threshold;
-    CvHaarClassifier* classifier;
-
-    int next;
-    int child;
-    int parent;
-} CvHaarStageClassifier;
-
-typedef struct CvHidHaarClassifierCascade CvHidHaarClassifierCascade;
-
-typedef struct CvHaarClassifierCascade
-{
-    int  flags;
-    int  count;
-    CvSize orig_window_size;
-    CvSize real_window_size;
-    double scale;
-    CvHaarStageClassifier* stage_classifier;
-    CvHidHaarClassifierCascade* hid_cascade;
-} CvHaarClassifierCascade;
-
-typedef struct CvAvgComp
-{
-    CvRect rect;
-    int neighbors;
-} CvAvgComp;
-
-/* Loads haar classifier cascade from a directory.
-   It is obsolete: convert your cascade to xml and use cvLoad instead */
-CVAPI(CvHaarClassifierCascade*) cvLoadHaarClassifierCascade(
-                    const char* directory, CvSize orig_window_size);
-
-CVAPI(void) cvReleaseHaarClassifierCascade( CvHaarClassifierCascade** cascade );
-
-#define CV_HAAR_DO_CANNY_PRUNING    1
-#define CV_HAAR_SCALE_IMAGE         2
-#define CV_HAAR_FIND_BIGGEST_OBJECT 4
-#define CV_HAAR_DO_ROUGH_SEARCH     8
-
-CVAPI(CvSeq*) cvHaarDetectObjects( const CvArr* image,
-                     CvHaarClassifierCascade* cascade, CvMemStorage* storage,
-                     double scale_factor CV_DEFAULT(1.1),
-                     int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
-                     CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)));
-
-/* sets images for haar classifier cascade */
-CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascade,
-                                                const CvArr* sum, const CvArr* sqsum,
-                                                const CvArr* tilted_sum, double scale );
-
-/* runs the cascade on the specified window */
-CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade,
-                                       CvPoint pt, int start_stage CV_DEFAULT(0));
-
-/** @} objdetect_c */
-
-#ifdef __cplusplus
-}
-
-CV_EXPORTS CvSeq* cvHaarDetectObjectsForROC( const CvArr* image,
-                     CvHaarClassifierCascade* cascade, CvMemStorage* storage,
-                     std::vector<int>& rejectLevels, std::vector<double>& levelWeightds,
-                     double scale_factor = 1.1,
-                     int min_neighbors = 3, int flags = 0,
-                     CvSize min_size = cvSize(0, 0), CvSize max_size = cvSize(0, 0),
-                     bool outputRejectLevels = false );
-
-#endif
-
-#endif /* __OPENCV_OBJDETECT_C_H__ */
diff --git a/IPL/include/opencv/opencv2/opencv.hpp b/IPL/include/opencv/opencv2/opencv.hpp
index 49b6a66..d17b94a 100644
--- a/IPL/include/opencv/opencv2/opencv.hpp
+++ b/IPL/include/opencv/opencv2/opencv.hpp
@@ -40,41 +40,56 @@
 //
 //M*/
 
-#ifndef __OPENCV_ALL_HPP__
-#define __OPENCV_ALL_HPP__
+#ifndef OPENCV_ALL_HPP
+#define OPENCV_ALL_HPP
 
+// File that defines what modules where included during the build of OpenCV
+// These are purely the defines of the correct HAVE_OPENCV_modulename values
 #include "opencv2/opencv_modules.hpp"
 
+// Then the list of defines is checked to include the correct headers
+// Core library is always included --> without no OpenCV functionality available
 #include "opencv2/core.hpp"
-#ifdef HAVE_OPENCV_IMGPROC
-#include "opencv2/imgproc.hpp"
-#endif
-#ifdef HAVE_OPENCV_PHOTO
-#include "opencv2/photo.hpp"
-#endif
-#ifdef HAVE_OPENCV_VIDEO
-#include "opencv2/video.hpp"
+
+// Then the optional modules are checked
+#ifdef HAVE_OPENCV_CALIB3D
+#include "opencv2/calib3d.hpp"
 #endif
 #ifdef HAVE_OPENCV_FEATURES2D
 #include "opencv2/features2d.hpp"
 #endif
-#ifdef HAVE_OPENCV_OBJDETECT
-#include "opencv2/objdetect.hpp"
+#ifdef HAVE_OPENCV_DNN
+#include "opencv2/dnn.hpp"
 #endif
-#ifdef HAVE_OPENCV_CALIB3D
-#include "opencv2/calib3d.hpp"
+#ifdef HAVE_OPENCV_FLANN
+#include "opencv2/flann.hpp"
+#endif
+#ifdef HAVE_OPENCV_HIGHGUI
+#include "opencv2/highgui.hpp"
 #endif
 #ifdef HAVE_OPENCV_IMGCODECS
 #include "opencv2/imgcodecs.hpp"
 #endif
-#ifdef HAVE_OPENCV_VIDEOIO
-#include "opencv2/videoio.hpp"
-#endif
-#ifdef HAVE_OPENCV_HIGHGUI
-#include "opencv2/highgui.hpp"
+#ifdef HAVE_OPENCV_IMGPROC
+#include "opencv2/imgproc.hpp"
 #endif
 #ifdef HAVE_OPENCV_ML
 #include "opencv2/ml.hpp"
 #endif
+#ifdef HAVE_OPENCV_OBJDETECT
+#include "opencv2/objdetect.hpp"
+#endif
+#ifdef HAVE_OPENCV_PHOTO
+#include "opencv2/photo.hpp"
+#endif
+#ifdef HAVE_OPENCV_STITCHING
+#include "opencv2/stitching.hpp"
+#endif
+#ifdef HAVE_OPENCV_VIDEO
+#include "opencv2/video.hpp"
+#endif
+#ifdef HAVE_OPENCV_VIDEOIO
+#include "opencv2/videoio.hpp"
+#endif
 
 #endif
diff --git a/IPL/include/opencv/opencv2/opencv_modules.hpp b/IPL/include/opencv/opencv2/opencv_modules.hpp
index 9b85f33..cc3f93e 100644
--- a/IPL/include/opencv/opencv2/opencv_modules.hpp
+++ b/IPL/include/opencv/opencv2/opencv_modules.hpp
@@ -1,50 +1,63 @@
-/*
- *      ** File generated automatically, do not modify **
- *
- * This file defines the list of modules available in current build configuration
- *
- *
-*/
-
-#define HAVE_OPENCV_ARUCO
-#define HAVE_OPENCV_BGSEGM
-#define HAVE_OPENCV_BIOINSPIRED
-#define HAVE_OPENCV_CALIB3D
-#define HAVE_OPENCV_CCALIB
-#define HAVE_OPENCV_CORE
-#define HAVE_OPENCV_DATASETS
-#define HAVE_OPENCV_DNN
-#define HAVE_OPENCV_DPM
-#define HAVE_OPENCV_FACE
-#define HAVE_OPENCV_FEATURES2D
-#define HAVE_OPENCV_FLANN
-#define HAVE_OPENCV_FUZZY
-#define HAVE_OPENCV_HIGHGUI
-#define HAVE_OPENCV_IMGCODECS
-#define HAVE_OPENCV_IMGPROC
-#define HAVE_OPENCV_LINE_DESCRIPTOR
-#define HAVE_OPENCV_ML
-#define HAVE_OPENCV_OBJDETECT
-#define HAVE_OPENCV_OPTFLOW
-#define HAVE_OPENCV_PHOTO
-#define HAVE_OPENCV_PLOT
-#define HAVE_OPENCV_REG
-#define HAVE_OPENCV_RGBD
-#define HAVE_OPENCV_SALIENCY
-#define HAVE_OPENCV_SHAPE
-#define HAVE_OPENCV_STEREO
-#define HAVE_OPENCV_STITCHING
-#define HAVE_OPENCV_STRUCTURED_LIGHT
-#define HAVE_OPENCV_SUPERRES
-#define HAVE_OPENCV_SURFACE_MATCHING
-#define HAVE_OPENCV_TEXT
-#define HAVE_OPENCV_TRACKING
-#define HAVE_OPENCV_VIDEO
-#define HAVE_OPENCV_VIDEOIO
-#define HAVE_OPENCV_VIDEOSTAB
-#define HAVE_OPENCV_XFEATURES2D
-#define HAVE_OPENCV_XIMGPROC
-#define HAVE_OPENCV_XOBJDETECT
-#define HAVE_OPENCV_XPHOTO
-
-
+/*
+ *      ** File generated automatically, do not modify **
+ *
+ * This file defines the list of modules available in current build configuration
+ *
+ *
+*/
+
+// This definition means that OpenCV is built with enabled non-free code.
+// For example, patented algorithms for non-profit/non-commercial use only.
+/* #undef OPENCV_ENABLE_NONFREE */
+
+#define HAVE_OPENCV_ARUCO
+#define HAVE_OPENCV_BGSEGM
+#define HAVE_OPENCV_BIOINSPIRED
+#define HAVE_OPENCV_CALIB3D
+#define HAVE_OPENCV_CCALIB
+#define HAVE_OPENCV_CORE
+#define HAVE_OPENCV_DATASETS
+#define HAVE_OPENCV_DNN
+#define HAVE_OPENCV_DNN_OBJDETECT
+#define HAVE_OPENCV_DNN_SUPERRES
+#define HAVE_OPENCV_DPM
+#define HAVE_OPENCV_FACE
+#define HAVE_OPENCV_FEATURES2D
+#define HAVE_OPENCV_FLANN
+#define HAVE_OPENCV_FUZZY
+#define HAVE_OPENCV_GAPI
+#define HAVE_OPENCV_HFS
+#define HAVE_OPENCV_HIGHGUI
+#define HAVE_OPENCV_IMG_HASH
+#define HAVE_OPENCV_IMGCODECS
+#define HAVE_OPENCV_IMGPROC
+#define HAVE_OPENCV_INTENSITY_TRANSFORM
+#define HAVE_OPENCV_LINE_DESCRIPTOR
+#define HAVE_OPENCV_ML
+#define HAVE_OPENCV_OBJDETECT
+#define HAVE_OPENCV_OPTFLOW
+#define HAVE_OPENCV_PHASE_UNWRAPPING
+#define HAVE_OPENCV_PHOTO
+#define HAVE_OPENCV_PLOT
+#define HAVE_OPENCV_QUALITY
+#define HAVE_OPENCV_RAPID
+#define HAVE_OPENCV_REG
+#define HAVE_OPENCV_RGBD
+#define HAVE_OPENCV_SALIENCY
+#define HAVE_OPENCV_SHAPE
+#define HAVE_OPENCV_STEREO
+#define HAVE_OPENCV_STITCHING
+#define HAVE_OPENCV_STRUCTURED_LIGHT
+#define HAVE_OPENCV_SUPERRES
+#define HAVE_OPENCV_SURFACE_MATCHING
+#define HAVE_OPENCV_TEXT
+#define HAVE_OPENCV_TRACKING
+#define HAVE_OPENCV_VIDEO
+#define HAVE_OPENCV_VIDEOIO
+#define HAVE_OPENCV_VIDEOSTAB
+#define HAVE_OPENCV_XFEATURES2D
+#define HAVE_OPENCV_XIMGPROC
+#define HAVE_OPENCV_XOBJDETECT
+#define HAVE_OPENCV_XPHOTO
+
+
diff --git a/IPL/include/opencv/opencv2/optflow.hpp b/IPL/include/opencv/opencv2/optflow.hpp
index 667adcb..093b5fe 100644
--- a/IPL/include/opencv/opencv2/optflow.hpp
+++ b/IPL/include/opencv/opencv2/optflow.hpp
@@ -66,11 +66,14 @@ Functions reading and writing .flo files in "Middlebury" format, see: <http://vi
 
  */
 
+#include "opencv2/optflow/pcaflow.hpp"
+#include "opencv2/optflow/sparse_matching_gpc.hpp"
+#include "opencv2/optflow/rlofflow.hpp"
 namespace cv
 {
 namespace optflow
 {
-    
+
 //! @addtogroup optflow
 //! @{
 
@@ -134,27 +137,6 @@ CV_EXPORTS_W void calcOpticalFlowSparseToDense ( InputArray from, InputArray to,
                                                  bool use_post_proc = true, float fgs_lambda = 500.0f,
                                                  float fgs_sigma = 1.5f );
 
-/** @brief Read a .flo file
-
-@param path Path to the file to be loaded
-
-The function readOpticalFlow loads a flow field from a file and returns it as a single matrix.
-Resulting Mat has a type CV_32FC2 - floating-point, 2-channel. First channel corresponds to the
-flow in the horizontal direction (u), second - vertical (v).
- */
-CV_EXPORTS_W Mat readOpticalFlow( const String& path );
-/** @brief Write a .flo to disk
-
-@param path Path to the file to be written
-@param flow Flow field to be stored
-
-The function stores a flow field in a file, returns true on success, false otherwise.
-The flow field must be a 2-channel, floating-point matrix (CV_32FC2). First channel corresponds
-to the flow in the horizontal direction (u), second - vertical (v).
- */
-CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow );
-
-
 /** @brief DeepFlow optical flow algorithm implementation.
 
 The class implements the DeepFlow optical flow algorithm described in @cite Weinzaepfel2013 . See
@@ -191,6 +173,132 @@ CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_Farneback();
 //! Additional interface to the SparseToDenseFlow algorithm - calcOpticalFlowSparseToDense()
 CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_SparseToDense();
 
+/** @brief "Dual TV L1" Optical Flow Algorithm.
+
+The class implements the "Dual TV L1" optical flow algorithm described in @cite Zach2007 and
+@cite Javier2012 .
+Here are important members of the class that control the algorithm, which you can set after
+constructing the class instance:
+
+-   member double tau
+    Time step of the numerical scheme.
+
+-   member double lambda
+    Weight parameter for the data term, attachment parameter. This is the most relevant
+    parameter, which determines the smoothness of the output. The smaller this parameter is,
+    the smoother the solutions we obtain. It depends on the range of motions of the images, so
+    its value should be adapted to each image sequence.
+
+-   member double theta
+    Weight parameter for (u - v)\^2, tightness parameter. It serves as a link between the
+    attachment and the regularization terms. In theory, it should have a small value in order
+    to maintain both parts in correspondence. The method is stable for a large range of values
+    of this parameter.
+
+-   member int nscales
+    Number of scales used to create the pyramid of images.
+
+-   member int warps
+    Number of warpings per scale. Represents the number of times that I1(x+u0) and grad(
+    I1(x+u0) ) are computed per scale. This is a parameter that assures the stability of the
+    method. It also affects the running time, so it is a compromise between speed and
+    accuracy.
+
+-   member double epsilon
+    Stopping criterion threshold used in the numerical scheme, which is a trade-off between
+    precision and running time. A small value will yield more accurate solutions at the
+    expense of a slower convergence.
+
+-   member int iterations
+    Stopping criterion iterations number used in the numerical scheme.
+
+C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
+Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
+*/
+class CV_EXPORTS_W DualTVL1OpticalFlow : public DenseOpticalFlow
+{
+public:
+    //! @brief Time step of the numerical scheme
+    /** @see setTau */
+    CV_WRAP virtual double getTau() const = 0;
+    /** @copybrief getTau @see getTau */
+    CV_WRAP virtual void setTau(double val) = 0;
+    //! @brief Weight parameter for the data term, attachment parameter
+    /** @see setLambda */
+    CV_WRAP virtual double getLambda() const = 0;
+    /** @copybrief getLambda @see getLambda */
+    CV_WRAP virtual void setLambda(double val) = 0;
+    //! @brief Weight parameter for (u - v)^2, tightness parameter
+    /** @see setTheta */
+    CV_WRAP virtual double getTheta() const = 0;
+    /** @copybrief getTheta @see getTheta */
+    CV_WRAP virtual void setTheta(double val) = 0;
+    //! @brief coefficient for additional illumination variation term
+    /** @see setGamma */
+    CV_WRAP virtual double getGamma() const = 0;
+    /** @copybrief getGamma @see getGamma */
+    CV_WRAP virtual void setGamma(double val) = 0;
+    //! @brief Number of scales used to create the pyramid of images
+    /** @see setScalesNumber */
+    CV_WRAP virtual int getScalesNumber() const = 0;
+    /** @copybrief getScalesNumber @see getScalesNumber */
+    CV_WRAP virtual void setScalesNumber(int val) = 0;
+    //! @brief Number of warpings per scale
+    /** @see setWarpingsNumber */
+    CV_WRAP virtual int getWarpingsNumber() const = 0;
+    /** @copybrief getWarpingsNumber @see getWarpingsNumber */
+    CV_WRAP virtual void setWarpingsNumber(int val) = 0;
+    //! @brief Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time
+    /** @see setEpsilon */
+    CV_WRAP virtual double getEpsilon() const = 0;
+    /** @copybrief getEpsilon @see getEpsilon */
+    CV_WRAP virtual void setEpsilon(double val) = 0;
+    //! @brief Inner iterations (between outlier filtering) used in the numerical scheme
+    /** @see setInnerIterations */
+    CV_WRAP virtual int getInnerIterations() const = 0;
+    /** @copybrief getInnerIterations @see getInnerIterations */
+    CV_WRAP virtual void setInnerIterations(int val) = 0;
+    //! @brief Outer iterations (number of inner loops) used in the numerical scheme
+    /** @see setOuterIterations */
+    CV_WRAP virtual int getOuterIterations() const = 0;
+    /** @copybrief getOuterIterations @see getOuterIterations */
+    CV_WRAP virtual void setOuterIterations(int val) = 0;
+    //! @brief Use initial flow
+    /** @see setUseInitialFlow */
+    CV_WRAP virtual bool getUseInitialFlow() const = 0;
+    /** @copybrief getUseInitialFlow @see getUseInitialFlow */
+    CV_WRAP virtual void setUseInitialFlow(bool val) = 0;
+    //! @brief Step between scales (<1)
+    /** @see setScaleStep */
+    CV_WRAP virtual double getScaleStep() const = 0;
+    /** @copybrief getScaleStep @see getScaleStep */
+    CV_WRAP virtual void setScaleStep(double val) = 0;
+    //! @brief Median filter kernel size (1 = no filter) (3 or 5)
+    /** @see setMedianFiltering */
+    CV_WRAP virtual int getMedianFiltering() const = 0;
+    /** @copybrief getMedianFiltering @see getMedianFiltering */
+    CV_WRAP virtual void setMedianFiltering(int val) = 0;
+
+    /** @brief Creates instance of cv::DualTVL1OpticalFlow*/
+    CV_WRAP static Ptr<DualTVL1OpticalFlow> create(
+                                            double tau = 0.25,
+                                            double lambda = 0.15,
+                                            double theta = 0.3,
+                                            int nscales = 5,
+                                            int warps = 5,
+                                            double epsilon = 0.01,
+                                            int innnerIterations = 30,
+                                            int outerIterations = 10,
+                                            double scaleStep = 0.8,
+                                            double gamma = 0.0,
+                                            int medianFiltering = 5,
+                                            bool useInitialFlow = false);
+};
+
+/** @brief Creates instance of cv::DenseOpticalFlow
+*/
+CV_EXPORTS_W Ptr<DualTVL1OpticalFlow> createOptFlow_DualTVL1();
+
 //! @}
 
 } //optflow
diff --git a/IPL/include/opencv/opencv2/optflow/pcaflow.hpp b/IPL/include/opencv/opencv2/optflow/pcaflow.hpp
new file mode 100644
index 0000000..78946f6
--- /dev/null
+++ b/IPL/include/opencv/opencv2/optflow/pcaflow.hpp
@@ -0,0 +1,149 @@
+/*
+By downloading, copying, installing or using the software you agree to this
+license. If you do not agree to this license, do not download, install,
+copy or use the software.
+
+
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+
+Copyright (C) 2016, OpenCV Foundation, all rights reserved.
+Third party copyrights are property of their respective owners.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall copyright holders or contributors be liable for
+any direct, indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+*/
+
+/**
+ * @file   pcaflow.hpp
+ * @author Vladislav Samsonov <vvladxx@gmail.com>
+ * @brief  Implementation of the PCAFlow algorithm from the following paper:
+ * http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf
+ *
+ * @cite Wulff:CVPR:2015
+ *
+ * There are some key differences which distinguish this algorithm from the original PCAFlow (see paper):
+ * - Discrete Cosine Transform basis is used instead of basis extracted with PCA.
+ *   Reasoning: DCT basis has comparable performance and it doesn't require additional storage space.
+ *   Also, this decision helps to avoid overloading the algorithm with a lot of external input.
+ * - Usage of built-in OpenCV feature tracking instead of libviso.
+*/
+
+#ifndef __OPENCV_OPTFLOW_PCAFLOW_HPP__
+#define __OPENCV_OPTFLOW_PCAFLOW_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/video.hpp"
+
+namespace cv
+{
+namespace optflow
+{
+
+//! @addtogroup optflow
+//! @{
+
+/** @brief
+ * This class can be used for imposing a learned prior on the resulting optical flow.
+ * Solution will be regularized according to this prior.
+ * You need to generate appropriate prior file with "learn_prior.py" script beforehand.
+ */
+class CV_EXPORTS_W PCAPrior
+{
+private:
+  Mat L1;
+  Mat L2;
+  Mat c1;
+  Mat c2;
+
+public:
+  PCAPrior( const char *pathToPrior );
+
+  int getPadding() const { return L1.size().height; }
+
+  int getBasisSize() const { return L1.size().width; }
+
+  void fillConstraints( float *A1, float *A2, float *b1, float *b2 ) const;
+};
+
+/** @brief PCAFlow algorithm.
+ */
+class CV_EXPORTS_W OpticalFlowPCAFlow : public DenseOpticalFlow
+{
+protected:
+  const Ptr<const PCAPrior> prior;
+  const Size basisSize;
+  const float sparseRate;              // (0 .. 0.1)
+  const float retainedCornersFraction; // [0 .. 1]
+  const float occlusionsThreshold;
+  const float dampingFactor;
+  const float claheClip;
+  bool useOpenCL;
+
+public:
+  /** @brief Creates an instance of PCAFlow algorithm.
+   * @param _prior Learned prior or no prior (default). @see cv::optflow::PCAPrior
+   * @param _basisSize Number of basis vectors.
+   * @param _sparseRate Controls density of sparse matches.
+   * @param _retainedCornersFraction Retained corners fraction.
+   * @param _occlusionsThreshold Occlusion threshold.
+   * @param _dampingFactor Regularization term for solving least-squares. It is not related to the prior regularization.
+   * @param _claheClip Clip parameter for CLAHE.
+   */
+  OpticalFlowPCAFlow( Ptr<const PCAPrior> _prior = Ptr<const PCAPrior>(), const Size _basisSize = Size( 18, 14 ),
+                      float _sparseRate = 0.024, float _retainedCornersFraction = 0.2,
+                      float _occlusionsThreshold = 0.0003, float _dampingFactor = 0.00002, float _claheClip = 14 );
+
+  void calc( InputArray I0, InputArray I1, InputOutputArray flow ) CV_OVERRIDE;
+  void collectGarbage() CV_OVERRIDE;
+
+private:
+  void findSparseFeatures( UMat &from, UMat &to, std::vector<Point2f> &features,
+                           std::vector<Point2f> &predictedFeatures ) const;
+
+  void removeOcclusions( UMat &from, UMat &to, std::vector<Point2f> &features,
+                         std::vector<Point2f> &predictedFeatures ) const;
+
+  void getSystem( OutputArray AOut, OutputArray b1Out, OutputArray b2Out, const std::vector<Point2f> &features,
+                  const std::vector<Point2f> &predictedFeatures, const Size size );
+
+  void getSystem( OutputArray A1Out, OutputArray A2Out, OutputArray b1Out, OutputArray b2Out,
+                  const std::vector<Point2f> &features, const std::vector<Point2f> &predictedFeatures,
+                  const Size size );
+
+  OpticalFlowPCAFlow& operator=( const OpticalFlowPCAFlow& ); // make it non-assignable
+};
+
+/** @brief Creates an instance of PCAFlow
+*/
+CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_PCAFlow();
+
+//! @}
+
+}
+}
+
+#endif
diff --git a/IPL/include/opencv/opencv2/optflow/rlofflow.hpp b/IPL/include/opencv/opencv2/optflow/rlofflow.hpp
new file mode 100644
index 0000000..58daf53
--- /dev/null
+++ b/IPL/include/opencv/opencv2/optflow/rlofflow.hpp
@@ -0,0 +1,551 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef __OPENCV_OPTFLOW_RLOFFLOW_HPP__
+#define __OPENCV_OPTFLOW_RLOFFLOW_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/video.hpp"
+
+namespace cv
+{
+namespace optflow
+{
+//! @addtogroup optflow
+//! @{
+
+enum SupportRegionType {
+    SR_FIXED = 0,           /**<  Apply a constant support region */
+    SR_CROSS = 1            /**<  Apply a adaptive support region obtained by cross-based segmentation
+                             *    as described in @cite Senst2014
+                            */
+};
+enum SolverType {
+    ST_STANDART = 0,        /**< Apply standard iterative refinement */
+    ST_BILINEAR = 1         /**< Apply optimized iterative refinement based bilinear equation solutions
+                             *   as described in @cite Senst2013
+                            */
+};
+
+enum InterpolationType
+{
+    INTERP_GEO = 0,    /**<  Fast geodesic interpolation, see @cite Geistert2016 */
+    INTERP_EPIC = 1,   /**<  Edge-preserving interpolation using ximgproc::EdgeAwareInterpolator, see @cite Revaud2015,Geistert2016. */
+    INTERP_RIC = 2,    /**<  SLIC based robust interpolation using ximgproc::RICInterpolator, see @cite Hu2017. */
+};
+
+/** @brief This is used store and set up the parameters of the robust local optical flow (RLOF) algoritm.
+ *
+ * The RLOF is a fast local optical flow approach described in @cite Senst2012 @cite Senst2013 @cite Senst2014
+ * and @cite Senst2016 similar to the pyramidal iterative Lucas-Kanade method as
+ * proposed by @cite Bouguet00. More details and experiments can be found in the following thesis @cite Senst2019.
+ * The implementation is derived from optflow::calcOpticalFlowPyrLK().
+ * This RLOF implementation can be seen as an improved pyramidal iterative Lucas-Kanade and includes
+ * a set of improving modules. The main improvements in respect to the pyramidal iterative Lucas-Kanade
+ * are:
+ *  - A more robust redecending M-estimator framework (see @cite Senst2012) to improve the accuracy at
+ *     motion boundaries and appearing and disappearing pixels.
+ *  - an adaptive support region strategies to improve the accuracy at motion boundaries to reduce the
+ *     corona effect, i.e oversmoothing of the PLK at motion/object boundaries. The cross-based segementation
+ *     strategy (SR_CROSS) proposed in @cite Senst2014 uses a simple segmenation approach to obtain the optimal
+ *     shape of the support region.
+ *  - To deal with illumination changes (outdoor sequences and shadow) the intensity constancy assumption
+ *     based optical flow equation has been adopt with the Gennert and Negahdaripour illumination model
+ *     (see @cite Senst2016). This model can be switched on/off with the useIlluminationModel variable.
+ *  - By using a global motion prior initialization (see @cite Senst2016) of the iterative refinement
+ *     the accuracy could be significantly improved for large displacements. This initialization can be
+ *     switched on and of with useGlobalMotionPrior variable.
+ *
+ * The RLOF can be computed with the SparseOpticalFlow class or function interface to track a set of features
+ * or with the DenseOpticalFlow class or function interface to compute dense optical flow.
+ *
+ * @see optflow::DenseRLOFOpticalFlow, optflow::calcOpticalFlowDenseRLOF(), optflow::SparseRLOFOpticalFlow, optflow::calcOpticalFlowSparseRLOF()
+ */
+class CV_EXPORTS_W RLOFOpticalFlowParameter{
+public:
+    RLOFOpticalFlowParameter()
+        :solverType(ST_BILINEAR)
+        ,supportRegionType(SR_CROSS)
+        ,normSigma0(std::numeric_limits<float>::max())
+        ,normSigma1(std::numeric_limits<float>::max())
+        ,smallWinSize(9)
+        ,largeWinSize(21)
+        ,crossSegmentationThreshold(25)
+        ,maxLevel(4)
+        ,useInitialFlow(false)
+        ,useIlluminationModel(true)
+        ,useGlobalMotionPrior(true)
+        ,maxIteration(30)
+        ,minEigenValue(0.0001f)
+        ,globalMotionRansacThreshold(10)
+    {}
+
+    SolverType solverType;
+    /**< Variable specifies the iterative refinement strategy. Please consider citing  @cite Senst2013 when
+     *  using ST_BILINEAR.
+    */
+
+    SupportRegionType supportRegionType;
+    /**< Variable specifies the support region shape extraction or shrinking strategy.
+    */
+
+    float normSigma0;
+    /**< &sigma parameter of the shrinked Hampel norm introduced in @cite Senst2012. If
+     * &sigma = std::numeric_limist<float>::max() the least-square estimator will be used
+     * instead of the M-estimator. Althoug M-estimator is more robust against outlier in the support
+     * region the least-square can be fast in computation.
+    */
+    float normSigma1;
+    /**< &sigma parameter of the shrinked Hampel norm introduced in @cite Senst2012. If
+     * &sigma = std::numeric_limist<float>::max() the least-square estimator will be used
+     * instead of the M-estimator. Althoug M-estimator is more robust against outlier in the support
+     * region the least-square can be fast in computation.
+    */
+    int smallWinSize;
+    /**< Minimal window size of the support region. This parameter is only used if supportRegionType is SR_CROSS.
+    */
+    int largeWinSize;
+    /**< Maximal window size of the support region. If supportRegionType is SR_FIXED this gives the exact support
+     * region size. The speed of the RLOF is related to the applied win sizes. The smaller the window size the lower is the runtime,
+     * but the more sensitive to noise is the method.
+    */
+    int crossSegmentationThreshold;
+    /**< Color similarity threshold used by cross-based segmentation following @cite Senst2014 .
+     *   (Only used  if supportRegionType is SR_CROSS). With the cross-bassed segmentation
+     *   motion boundaries can be computed more accurately.
+    */
+    int maxLevel;
+    /**< Maximal number of pyramid level used. The large this value is the more likely it is
+     *   to obtain accurate solutions for long-range motions. The runtime is linear related to
+     *   this parameter.
+    */
+    bool useInitialFlow;
+    /**< Use next point list as initial values. A good intialization can imporve the algortihm
+     *   accuracy and reduce the runtime by a faster convergence of the iteration refinement.
+    */
+    bool useIlluminationModel;
+    /**< Use the Gennert and Negahdaripour illumination model instead of the intensity brigthness
+     *   constraint. (proposed in @cite Senst2016 ) This model is defined as follow:
+     *   \f[ I(\mathbf{x},t) + m \cdot I(\mathbf{x},t) + c = I(\mathbf{x},t+1) \f]
+     *   and contains with m and c a multiplicative and additive term which makes the estimate
+     *   more robust against illumination changes. The computational complexity is increased by
+     *   enabling the illumination model.
+    */
+    bool useGlobalMotionPrior;
+    /**< Use global motion prior initialisation has been introduced in @cite Senst2016 . It
+     *   allows to be more accurate for long-range motion. The computational complexity is
+     *   slightly increased by enabling the global motion prior initialisation.
+    */
+    int maxIteration;
+    /**< Number of maximal iterations used for the iterative refinement. Lower values can
+     *   reduce the runtime but also the accuracy.
+    */
+    float minEigenValue;
+    /**< Threshold for the minimal eigenvalue of the gradient matrix defines when to abort the
+     *   iterative refinement.
+    */
+    float globalMotionRansacThreshold;
+    /**< To apply the global motion prior motion vectors will be computed on a regulary sampled which
+     *   are the basis for Homography estimation using RANSAC. The reprojection threshold is based on
+     *   n-th percentil (given by this value [0 ... 100]) of the motion vectors magnitude.
+     *   See @cite Senst2016 for more details.
+    */
+
+    //! @brief Enable M-estimator or disable and use least-square estimator.
+    /** Enables M-estimator by setting sigma parameters to (3.2, 7.0). Disabling M-estimator can reduce
+     *  runtime, while enabling can improve the accuracy.
+     *  @param val If true M-estimator is used. If false least-square estimator is used.
+     *    @see setNormSigma0, setNormSigma1
+    */
+    CV_WRAP void setUseMEstimator(bool val);
+
+    CV_WRAP void setSolverType(SolverType val);
+    CV_WRAP SolverType getSolverType() const;
+
+    CV_WRAP void setSupportRegionType(SupportRegionType val);
+    CV_WRAP SupportRegionType getSupportRegionType() const;
+
+    CV_WRAP void setNormSigma0(float val);
+    CV_WRAP float getNormSigma0() const;
+
+    CV_WRAP void setNormSigma1(float val);
+    CV_WRAP float getNormSigma1() const;
+
+    CV_WRAP void setSmallWinSize(int val);
+    CV_WRAP int getSmallWinSize() const;
+
+    CV_WRAP void setLargeWinSize(int val);
+    CV_WRAP int getLargeWinSize() const;
+
+    CV_WRAP void setCrossSegmentationThreshold(int val);
+    CV_WRAP int getCrossSegmentationThreshold() const;
+
+    CV_WRAP void setMaxLevel(int val);
+    CV_WRAP int getMaxLevel() const;
+
+    CV_WRAP void setUseInitialFlow(bool val);
+    CV_WRAP bool getUseInitialFlow() const;
+
+    CV_WRAP void setUseIlluminationModel(bool val);
+    CV_WRAP bool getUseIlluminationModel() const;
+
+    CV_WRAP void setUseGlobalMotionPrior(bool val);
+    CV_WRAP bool getUseGlobalMotionPrior() const;
+
+    CV_WRAP void setMaxIteration(int val);
+    CV_WRAP int getMaxIteration() const;
+
+    CV_WRAP void setMinEigenValue(float val);
+    CV_WRAP float getMinEigenValue() const;
+
+    CV_WRAP void setGlobalMotionRansacThreshold(float val);
+    CV_WRAP float getGlobalMotionRansacThreshold() const;
+
+    //! @brief Creates instance of optflow::RLOFOpticalFlowParameter
+    CV_WRAP static Ptr<RLOFOpticalFlowParameter> create();
+};
+
+/** @brief Fast dense optical flow computation based on robust local optical flow (RLOF) algorithms and sparse-to-dense interpolation
+ * scheme.
+ *
+ * The RLOF is a fast local optical flow approach described in @cite Senst2012 @cite Senst2013 @cite Senst2014
+ * and @cite Senst2016 similar to the pyramidal iterative Lucas-Kanade method as
+ * proposed by @cite Bouguet00. More details and experiments can be found in the following thesis @cite Senst2019.
+ * The implementation is derived from optflow::calcOpticalFlowPyrLK().
+ *
+ * The sparse-to-dense interpolation scheme allows for fast computation of dense optical flow using RLOF (see @cite Geistert2016).
+ * For this scheme the following steps are applied:
+ * -# motion vector seeded at a regular sampled grid are computed. The sparsity of this grid can be configured with setGridStep
+ * -# (optinally) errornous motion vectors are filter based on the forward backward confidence. The threshold can be configured
+ * with setForwardBackward. The filter is only applied if the threshold >0 but than the runtime is doubled due to the estimation
+ * of the backward flow.
+ * -# Vector field interpolation is applied to the motion vector set to obtain a dense vector field.
+ *
+ * For the RLOF configuration see optflow::RLOFOpticalFlowParameter for further details.
+ * Parameters have been described in @cite Senst2012 @cite Senst2013 @cite Senst2014 and @cite Senst2016.
+ *
+ * @note If the grid size is set to (1,1) and the forward backward threshold <= 0 than pixelwise dense optical flow field is
+ * computed by RLOF without using interpolation.
+ *
+ * @see optflow::calcOpticalFlowDenseRLOF(), optflow::RLOFOpticalFlowParameter
+*/
+class CV_EXPORTS_W DenseRLOFOpticalFlow : public DenseOpticalFlow
+{
+public:
+    //! @brief Configuration of the RLOF alogrithm.
+    /**
+        @see optflow::RLOFOpticalFlowParameter, getRLOFOpticalFlowParameter
+    */
+    CV_WRAP virtual void setRLOFOpticalFlowParameter(Ptr<RLOFOpticalFlowParameter>  val) = 0;
+    /** @copybrief setRLOFOpticalFlowParameter
+        @see optflow::RLOFOpticalFlowParameter, setRLOFOpticalFlowParameter
+    */
+    CV_WRAP virtual Ptr<RLOFOpticalFlowParameter>  getRLOFOpticalFlowParameter() const = 0;
+    //! @brief Threshold for the forward backward confidence check
+    /**For each grid point \f$ \mathbf{x} \f$ a motion vector \f$ d_{I0,I1}(\mathbf{x}) \f$ is computed.
+     *     If the forward backward error \f[ EP_{FB} = || d_{I0,I1} + d_{I1,I0} || \f]
+     *     is larger than threshold given by this function then the motion vector will not be used by the following
+     *    vector field interpolation. \f$ d_{I1,I0} \f$ denotes the backward flow. Note, the forward backward test
+     *    will only be applied if the threshold > 0. This may results into a doubled runtime for the motion estimation.
+     *    @see getForwardBackward, setGridStep
+    */
+    CV_WRAP virtual void setForwardBackward(float val) = 0;
+    /** @copybrief setForwardBackward
+        @see setForwardBackward
+    */
+    CV_WRAP virtual float getForwardBackward() const = 0;
+    //! @brief Size of the grid to spawn the motion vectors.
+    /** For each grid point a motion vector is computed. Some motion vectors will be removed due to the forwatd backward
+     *  threshold (if set >0). The rest will be the base of the vector field interpolation.
+     *    @see getForwardBackward, setGridStep
+    */
+    CV_WRAP virtual Size getGridStep() const = 0;
+    /** @copybrief getGridStep
+     *    @see getGridStep
+     */
+    CV_WRAP virtual void setGridStep(Size val) = 0;
+
+    //! @brief Interpolation used to compute the dense optical flow.
+    /** Two interpolation algorithms are supported
+     * - **INTERP_GEO** applies the fast geodesic interpolation, see @cite Geistert2016.
+     * - **INTERP_EPIC_RESIDUAL** applies the edge-preserving interpolation, see @cite Revaud2015,Geistert2016.
+     * @see ximgproc::EdgeAwareInterpolator, getInterpolation
+    */
+    CV_WRAP virtual void setInterpolation(InterpolationType val) = 0;
+    /** @copybrief setInterpolation
+     *    @see ximgproc::EdgeAwareInterpolator, setInterpolation
+     */
+    CV_WRAP virtual InterpolationType getInterpolation() const = 0;
+    //! @brief see ximgproc::EdgeAwareInterpolator() K value.
+    /** K is a number of nearest-neighbor matches considered, when fitting a locally affine
+     *    model. Usually it should be around 128. However, lower values would make the interpolation noticeably faster.
+     *    @see ximgproc::EdgeAwareInterpolator,  setEPICK
+    */
+    CV_WRAP virtual int getEPICK() const = 0;
+    /** @copybrief getEPICK
+     *    @see ximgproc::EdgeAwareInterpolator, getEPICK
+     */
+    CV_WRAP virtual void setEPICK(int val) = 0;
+    //! @brief see ximgproc::EdgeAwareInterpolator() sigma value.
+    /** Sigma is a parameter defining how fast the weights decrease in the locally-weighted affine
+     *  fitting. Higher values can help preserve fine details, lower values can help to get rid of noise in the
+     *  output flow.
+     *    @see ximgproc::EdgeAwareInterpolator, setEPICSigma
+    */
+    CV_WRAP virtual float getEPICSigma() const = 0;
+    /** @copybrief getEPICSigma
+     *  @see ximgproc::EdgeAwareInterpolator, getEPICSigma
+     */
+    CV_WRAP virtual void setEPICSigma(float val) = 0;
+    //! @brief  see ximgproc::EdgeAwareInterpolator() lambda value.
+    /** Lambda is a parameter defining the weight of the edge-aware term in geodesic distance,
+     *    should be in the range of 0 to 1000.
+     *    @see ximgproc::EdgeAwareInterpolator, setEPICSigma
+    */
+    CV_WRAP virtual float getEPICLambda() const = 0;
+    /** @copybrief getEPICLambda
+     *    @see ximgproc::EdgeAwareInterpolator, getEPICLambda
+    */
+    CV_WRAP virtual void setEPICLambda(float val) = 0;
+    //! @brief see ximgproc::EdgeAwareInterpolator().
+    /** Sets the respective fastGlobalSmootherFilter() parameter.
+     *    @see ximgproc::EdgeAwareInterpolator, setFgsLambda
+    */
+    CV_WRAP virtual float getFgsLambda() const = 0;
+    /** @copybrief getFgsLambda
+     *    @see ximgproc::EdgeAwareInterpolator, ximgproc::fastGlobalSmootherFilter, getFgsLambda
+    */
+    CV_WRAP virtual void setFgsLambda(float val) = 0;
+    //! @brief see ximgproc::EdgeAwareInterpolator().
+    /** Sets the respective fastGlobalSmootherFilter() parameter.
+     *    @see ximgproc::EdgeAwareInterpolator, ximgproc::fastGlobalSmootherFilter, setFgsSigma
+    */
+    CV_WRAP virtual float getFgsSigma() const = 0;
+    /** @copybrief getFgsSigma
+     *    @see ximgproc::EdgeAwareInterpolator, ximgproc::fastGlobalSmootherFilter, getFgsSigma
+     */
+    CV_WRAP virtual void setFgsSigma(float val) = 0;
+    //! @brief enables ximgproc::fastGlobalSmootherFilter
+    /**
+     * @see getUsePostProc
+     */
+    CV_WRAP virtual void setUsePostProc(bool val) = 0;
+    /** @copybrief setUsePostProc
+     *    @see ximgproc::fastGlobalSmootherFilter, setUsePostProc
+     */
+    CV_WRAP virtual bool getUsePostProc() const = 0;
+    //! @brief enables VariationalRefinement
+    /**
+     * @see getUseVariationalRefinement
+     */
+    CV_WRAP virtual void setUseVariationalRefinement(bool val) = 0;
+    /** @copybrief setUseVariationalRefinement
+     *    @see ximgproc::fastGlobalSmootherFilter, setUsePostProc
+     */
+    CV_WRAP virtual bool getUseVariationalRefinement() const = 0;
+    //! @brief Parameter to tune the approximate size of the superpixel used for oversegmentation.
+    /**
+     * @see cv::ximgproc::createSuperpixelSLIC, cv::ximgproc::RICInterpolator
+     */
+    CV_WRAP virtual void setRICSPSize(int val) = 0;
+    /** @copybrief setRICSPSize
+    *    @see setRICSPSize
+    */
+    CV_WRAP virtual int  getRICSPSize() const = 0;
+    /** @brief Parameter to choose superpixel algorithm variant to use:
+     * - cv::ximgproc::SLICType SLIC segments image using a desired region_size (value: 100)
+     * - cv::ximgproc::SLICType SLICO will optimize using adaptive compactness factor (value: 101)
+     * - cv::ximgproc::SLICType MSLIC will optimize using manifold methods resulting in more content-sensitive superpixels (value: 102).
+     *  @see cv::ximgproc::createSuperpixelSLIC, cv::ximgproc::RICInterpolator
+    */
+    CV_WRAP virtual void setRICSLICType(int val) = 0;
+    /** @copybrief setRICSLICType
+     *    @see setRICSLICType
+     */
+    CV_WRAP virtual int  getRICSLICType() const = 0;
+    //! @brief Creates instance of optflow::DenseRLOFOpticalFlow
+    /**
+     *    @param rlofParam see optflow::RLOFOpticalFlowParameter
+     *    @param forwardBackwardThreshold see setForwardBackward
+     *    @param gridStep see setGridStep
+     *    @param interp_type see setInterpolation
+     *    @param epicK see setEPICK
+     *    @param epicSigma see setEPICSigma
+     *    @param epicLambda see setEPICLambda
+     *    @param ricSPSize see setRICSPSize
+     *    @param ricSLICType see setRICSLICType
+     *    @param use_post_proc see setUsePostProc
+     *    @param fgsLambda see setFgsLambda
+     *    @param fgsSigma see setFgsSigma
+     *    @param use_variational_refinement see setUseVariationalRefinement
+    */
+    CV_WRAP static Ptr<DenseRLOFOpticalFlow> create(
+        Ptr<RLOFOpticalFlowParameter> rlofParam = Ptr<RLOFOpticalFlowParameter>(),
+        float forwardBackwardThreshold = 1.f,
+        Size gridStep = Size(6, 6),
+        InterpolationType interp_type = InterpolationType::INTERP_EPIC,
+        int epicK = 128,
+        float epicSigma = 0.05f,
+        float epicLambda = 999.0f,
+        int ricSPSize = 15,
+        int ricSLICType = 100,
+        bool use_post_proc = true,
+        float fgsLambda = 500.0f,
+        float fgsSigma = 1.5f,
+        bool use_variational_refinement = false);
+};
+
+/** @brief Class used for calculation sparse optical flow and feature tracking with robust local optical flow (RLOF) algorithms.
+*
+* The RLOF is a fast local optical flow approach described in @cite Senst2012 @cite Senst2013 @cite Senst2014
+ * and @cite Senst2016 similar to the pyramidal iterative Lucas-Kanade method as
+* proposed by @cite Bouguet00. More details and experiments can be found in the following thesis @cite Senst2019.
+* The implementation is derived from optflow::calcOpticalFlowPyrLK().
+*
+* For the RLOF configuration see optflow::RLOFOpticalFlowParameter for further details.
+* Parameters have been described in @cite Senst2012, @cite Senst2013, @cite Senst2014 and @cite Senst2016.
+*
+* @note SIMD parallelization is only available when compiling with SSE4.1.
+* @see optflow::calcOpticalFlowSparseRLOF(), optflow::RLOFOpticalFlowParameter
+*/
+class CV_EXPORTS_W SparseRLOFOpticalFlow : public SparseOpticalFlow
+{
+public:
+    /** @copydoc DenseRLOFOpticalFlow::setRLOFOpticalFlowParameter
+    */
+    CV_WRAP virtual void setRLOFOpticalFlowParameter(Ptr<RLOFOpticalFlowParameter> val) = 0;
+    /** @copybrief setRLOFOpticalFlowParameter
+     *    @see setRLOFOpticalFlowParameter
+    */
+    CV_WRAP virtual Ptr<RLOFOpticalFlowParameter>  getRLOFOpticalFlowParameter() const = 0;
+    //! @brief Threshold for the forward backward confidence check
+    /** For each feature point a motion vector \f$ d_{I0,I1}(\mathbf{x}) \f$ is computed.
+     *     If the forward backward error \f[ EP_{FB} = || d_{I0,I1} + d_{I1,I0} || \f]
+     *     is larger than threshold given by this function then the status  will not be used by the following
+     *    vector field interpolation. \f$ d_{I1,I0} \f$ denotes the backward flow. Note, the forward backward test
+     *    will only be applied if the threshold > 0. This may results into a doubled runtime for the motion estimation.
+     *    @see setForwardBackward
+    */
+    CV_WRAP virtual void setForwardBackward(float val) = 0;
+    /** @copybrief setForwardBackward
+     *    @see setForwardBackward
+    */
+    CV_WRAP virtual float getForwardBackward() const = 0;
+
+    //! @brief Creates instance of SparseRLOFOpticalFlow
+    /**
+     *    @param rlofParam see setRLOFOpticalFlowParameter
+     *    @param forwardBackwardThreshold see setForwardBackward
+    */
+    CV_WRAP static Ptr<SparseRLOFOpticalFlow> create(
+        Ptr<RLOFOpticalFlowParameter> rlofParam = Ptr<RLOFOpticalFlowParameter>(),
+        float forwardBackwardThreshold = 1.f);
+
+};
+
+/** @brief Fast dense optical flow computation based on robust local optical flow (RLOF) algorithms and sparse-to-dense interpolation scheme.
+ *
+ * The RLOF is a fast local optical flow approach described in @cite Senst2012 @cite Senst2013 @cite Senst2014
+ * and @cite Senst2016 similar to the pyramidal iterative Lucas-Kanade method as
+ * proposed by @cite Bouguet00. More details and experiments can be found in the following thesis @cite Senst2019.
+ * The implementation is derived from optflow::calcOpticalFlowPyrLK().
+ *
+ * The sparse-to-dense interpolation scheme allows for fast computation of dense optical flow using RLOF (see @cite Geistert2016).
+ * For this scheme the following steps are applied:
+ * -# motion vector seeded at a regular sampled grid are computed. The sparsity of this grid can be configured with setGridStep
+ * -# (optinally) errornous motion vectors are filter based on the forward backward confidence. The threshold can be configured
+ * with setForwardBackward. The filter is only applied if the threshold >0 but than the runtime is doubled due to the estimation
+ * of the backward flow.
+ * -# Vector field interpolation is applied to the motion vector set to obtain a dense vector field.
+ *
+ * @param I0 first 8-bit input image. If The cross-based RLOF is used (by selecting optflow::RLOFOpticalFlowParameter::supportRegionType
+ * = SupportRegionType::SR_CROSS) image has to be a 8-bit 3 channel image.
+ * @param I1 second 8-bit input image. If The cross-based RLOF is used (by selecting optflow::RLOFOpticalFlowParameter::supportRegionType
+ * = SupportRegionType::SR_CROSS) image has to be a 8-bit 3 channel image.
+ * @param flow computed flow image that has the same size as I0 and type CV_32FC2.
+ * @param rlofParam see optflow::RLOFOpticalFlowParameter
+ * @param forwardBackwardThreshold Threshold for the forward backward confidence check.
+ * For each grid point \f$ \mathbf{x} \f$ a motion vector \f$ d_{I0,I1}(\mathbf{x}) \f$ is computed.
+ * If the forward backward error \f[ EP_{FB} = || d_{I0,I1} + d_{I1,I0} || \f]
+ * is larger than threshold given by this function then the motion vector will not be used by the following
+ * vector field interpolation. \f$ d_{I1,I0} \f$ denotes the backward flow. Note, the forward backward test
+ *    will only be applied if the threshold > 0. This may results into a doubled runtime for the motion estimation.
+ * @param gridStep Size of the grid to spawn the motion vectors. For each grid point a motion vector is computed.
+ * Some motion vectors will be removed due to the forwatd backward threshold (if set >0). The rest will be the
+ * base of the vector field interpolation.
+ * @param interp_type interpolation method used to compute the dense optical flow. Two interpolation algorithms are
+ * supported:
+ * - **INTERP_GEO** applies the fast geodesic interpolation, see @cite Geistert2016.
+ * - **INTERP_EPIC_RESIDUAL** applies the edge-preserving interpolation, see @cite Revaud2015,Geistert2016.
+ * @param epicK see ximgproc::EdgeAwareInterpolator sets the respective parameter.
+ * @param epicSigma see ximgproc::EdgeAwareInterpolator sets the respective parameter.
+ * @param epicLambda see ximgproc::EdgeAwareInterpolator sets the respective parameter.
+ * @param ricSPSize  see ximgproc::RICInterpolator sets the respective parameter.
+ * @param ricSLICType see ximgproc::RICInterpolator sets the respective parameter.
+ * @param use_post_proc enables ximgproc::fastGlobalSmootherFilter() parameter.
+ * @param fgsLambda sets the respective ximgproc::fastGlobalSmootherFilter() parameter.
+ * @param fgsSigma sets the respective ximgproc::fastGlobalSmootherFilter() parameter.
+ * @param use_variational_refinement enables VariationalRefinement
+ *
+ * Parameters have been described in @cite Senst2012, @cite Senst2013, @cite Senst2014, @cite Senst2016.
+ * For the RLOF configuration see optflow::RLOFOpticalFlowParameter for further details.
+ * @note If the grid size is set to (1,1) and the forward backward threshold <= 0 that the dense optical flow field is purely
+ * computed with the RLOF.
+ *
+ * @note SIMD parallelization is only available when compiling with SSE4.1.
+ *
+ * @sa optflow::DenseRLOFOpticalFlow, optflow::RLOFOpticalFlowParameter
+*/
+CV_EXPORTS_W void calcOpticalFlowDenseRLOF(InputArray I0, InputArray I1, InputOutputArray flow,
+    Ptr<RLOFOpticalFlowParameter> rlofParam = Ptr<RLOFOpticalFlowParameter>(),
+    float forwardBackwardThreshold = 0, Size gridStep = Size(6, 6),
+    InterpolationType interp_type = InterpolationType::INTERP_EPIC,
+    int epicK = 128, float epicSigma = 0.05f, float epicLambda = 100.f,
+    int ricSPSize = 15, int ricSLICType = 100,
+    bool use_post_proc = true, float fgsLambda = 500.0f, float fgsSigma = 1.5f,
+    bool use_variational_refinement = false);
+
+/** @brief Calculates fast optical flow for a sparse feature set using the robust local optical flow (RLOF) similar
+* to optflow::calcOpticalFlowPyrLK().
+*
+* The RLOF is a fast local optical flow approach described in @cite Senst2012 @cite Senst2013 @cite Senst2014
+ * and @cite Senst2016 similar to the pyramidal iterative Lucas-Kanade method as
+* proposed by @cite Bouguet00. More details and experiments can be found in the following thesis @cite Senst2019.
+* The implementation is derived from optflow::calcOpticalFlowPyrLK().
+*
+* @param prevImg first 8-bit input image. If The cross-based RLOF is used (by selecting optflow::RLOFOpticalFlowParameter::supportRegionType
+* = SupportRegionType::SR_CROSS) image has to be a 8-bit 3 channel image.
+* @param nextImg second 8-bit input image. If The cross-based RLOF is used (by selecting optflow::RLOFOpticalFlowParameter::supportRegionType
+* = SupportRegionType::SR_CROSS) image has to be a 8-bit 3 channel image.
+* @param prevPts vector of 2D points for which the flow needs to be found; point coordinates must be single-precision
+* floating-point numbers.
+* @param nextPts output vector of 2D points (with single-precision floating-point coordinates) containing the calculated
+* new positions of input features in the second image; when optflow::RLOFOpticalFlowParameter::useInitialFlow variable is true  the vector must
+* have the same size as in the input and contain the initialization point correspondences.
+* @param status output status vector (of unsigned chars); each element of the vector is set to 1 if the flow for the
+* corresponding features has passed the forward backward check.
+* @param err output vector of errors; each element of the vector is set to the forward backward error for the corresponding feature.
+* @param rlofParam see optflow::RLOFOpticalFlowParameter
+* @param forwardBackwardThreshold Threshold for the forward backward confidence check. If forewardBackwardThreshold <=0 the forward
+*
+* @note SIMD parallelization is only available when compiling with SSE4.1.
+*
+* Parameters have been described in @cite Senst2012, @cite Senst2013, @cite Senst2014 and @cite Senst2016.
+* For the RLOF configuration see optflow::RLOFOpticalFlowParameter for further details.
+*/
+CV_EXPORTS_W void calcOpticalFlowSparseRLOF(InputArray prevImg, InputArray nextImg,
+    InputArray prevPts, InputOutputArray nextPts,
+    OutputArray status, OutputArray err,
+    Ptr<RLOFOpticalFlowParameter> rlofParam = Ptr<RLOFOpticalFlowParameter>(),
+    float forwardBackwardThreshold = 0);
+
+//! Additional interface to the Dense RLOF algorithm - optflow::calcOpticalFlowDenseRLOF()
+CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_DenseRLOF();
+
+//! Additional interface to the Sparse RLOF algorithm - optflow::calcOpticalFlowSparseRLOF()
+CV_EXPORTS_W Ptr<SparseOpticalFlow> createOptFlow_SparseRLOF();
+//! @}
+
+} // namespace
+} // namespace
+#endif
diff --git a/IPL/include/opencv/opencv2/optflow/sparse_matching_gpc.hpp b/IPL/include/opencv/opencv2/optflow/sparse_matching_gpc.hpp
new file mode 100644
index 0000000..5256534
--- /dev/null
+++ b/IPL/include/opencv/opencv2/optflow/sparse_matching_gpc.hpp
@@ -0,0 +1,372 @@
+/*
+By downloading, copying, installing or using the software you agree to this
+license. If you do not agree to this license, do not download, install,
+copy or use the software.
+
+
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+
+Copyright (C) 2016, OpenCV Foundation, all rights reserved.
+Third party copyrights are property of their respective owners.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall copyright holders or contributors be liable for
+any direct, indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+*/
+
+/**
+ * @file   sparse_matching_gpc.hpp
+ * @author Vladislav Samsonov <vvladxx@gmail.com>
+ * @brief  Implementation of the Global Patch Collider.
+ *
+ * Implementation of the Global Patch Collider algorithm from the following paper:
+ * http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf
+ *
+ * @cite Wang_2016_CVPR
+ */
+
+#ifndef __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
+#define __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+
+namespace cv
+{
+namespace optflow
+{
+
+//! @addtogroup optflow
+//! @{
+
+struct CV_EXPORTS_W GPCPatchDescriptor
+{
+  static const unsigned nFeatures = 18; //!< number of features in a patch descriptor
+  Vec< double, nFeatures > feature;
+
+  double dot( const Vec< double, nFeatures > &coef ) const;
+
+  void markAsSeparated() { feature[0] = std::numeric_limits< double >::quiet_NaN(); }
+
+  bool isSeparated() const { return cvIsNaN( feature[0] ) != 0; }
+};
+
+struct CV_EXPORTS_W GPCPatchSample
+{
+  GPCPatchDescriptor ref;
+  GPCPatchDescriptor pos;
+  GPCPatchDescriptor neg;
+
+  void getDirections( bool &refdir, bool &posdir, bool &negdir, const Vec< double, GPCPatchDescriptor::nFeatures > &coef, double rhs ) const;
+};
+
+typedef std::vector< GPCPatchSample > GPCSamplesVector;
+
+/** @brief Descriptor types for the Global Patch Collider.
+ */
+enum GPCDescType
+{
+  GPC_DESCRIPTOR_DCT = 0, //!< Better quality but slow
+  GPC_DESCRIPTOR_WHT      //!< Worse quality but much faster
+};
+
+/** @brief Class encapsulating training samples.
+ */
+class CV_EXPORTS_W GPCTrainingSamples
+{
+private:
+  GPCSamplesVector samples;
+  int descriptorType;
+
+public:
+  /** @brief This function can be used to extract samples from a pair of images and a ground truth flow.
+   * Sizes of all the provided vectors must be equal.
+   */
+  static Ptr< GPCTrainingSamples > create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo,
+                                           const std::vector< String > &gt, int descriptorType );
+
+  static Ptr< GPCTrainingSamples > create( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt,
+                                           int descriptorType );
+
+  size_t size() const { return samples.size(); }
+
+  int type() const { return descriptorType; }
+
+  operator GPCSamplesVector &() { return samples; }
+};
+
+/** @brief Class encapsulating training parameters.
+ */
+struct GPCTrainingParams
+{
+  unsigned maxTreeDepth;  //!< Maximum tree depth to stop partitioning.
+  int minNumberOfSamples; //!< Minimum number of samples in the node to stop partitioning.
+  int descriptorType;     //!< Type of descriptors to use.
+  bool printProgress;     //!< Print progress to stdout.
+
+  GPCTrainingParams( unsigned _maxTreeDepth = 20, int _minNumberOfSamples = 3, GPCDescType _descriptorType = GPC_DESCRIPTOR_DCT,
+                     bool _printProgress = true )
+      : maxTreeDepth( _maxTreeDepth ), minNumberOfSamples( _minNumberOfSamples ), descriptorType( _descriptorType ),
+        printProgress( _printProgress )
+  {
+    CV_Assert( check() );
+  }
+
+  bool check() const { return maxTreeDepth > 1 && minNumberOfSamples > 1; }
+};
+
+/** @brief Class encapsulating matching parameters.
+ */
+struct GPCMatchingParams
+{
+  bool useOpenCL; //!< Whether to use OpenCL to speed up the matching.
+
+  GPCMatchingParams( bool _useOpenCL = false ) : useOpenCL( _useOpenCL ) {}
+
+  GPCMatchingParams( const GPCMatchingParams &params ) : useOpenCL( params.useOpenCL ) {}
+};
+
+/** @brief Class for individual tree.
+ */
+class CV_EXPORTS_W GPCTree : public Algorithm
+{
+public:
+  struct Node
+  {
+    Vec< double, GPCPatchDescriptor::nFeatures > coef; //!< Hyperplane coefficients
+    double rhs;                                        //!< Bias term of the hyperplane
+    unsigned left;
+    unsigned right;
+
+    bool operator==( const Node &n ) const { return coef == n.coef && rhs == n.rhs && left == n.left && right == n.right; }
+  };
+
+private:
+  typedef GPCSamplesVector::iterator SIter;
+
+  std::vector< Node > nodes;
+  GPCTrainingParams params;
+
+  bool trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth );
+
+public:
+  void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() );
+
+  void write( FileStorage &fs ) const CV_OVERRIDE;
+
+  void read( const FileNode &fn ) CV_OVERRIDE;
+
+  unsigned findLeafForPatch( const GPCPatchDescriptor &descr ) const;
+
+  static Ptr< GPCTree > create() { return makePtr< GPCTree >(); }
+
+  bool operator==( const GPCTree &t ) const { return nodes == t.nodes; }
+
+  int getDescriptorType() const { return params.descriptorType; }
+};
+
+template < int T > class GPCForest : public Algorithm
+{
+private:
+  struct Trail
+  {
+    unsigned leaf[T]; //!< Inside which leaf of the tree 0..T the patch fell?
+    Point2i coord;    //!< Patch coordinates.
+
+    bool operator==( const Trail &trail ) const { return memcmp( leaf, trail.leaf, sizeof( leaf ) ) == 0; }
+
+    bool operator<( const Trail &trail ) const
+    {
+      for ( int i = 0; i < T - 1; ++i )
+        if ( leaf[i] != trail.leaf[i] )
+          return leaf[i] < trail.leaf[i];
+      return leaf[T - 1] < trail.leaf[T - 1];
+    }
+  };
+
+  class ParallelTrailsFilling : public ParallelLoopBody
+  {
+  private:
+    const GPCForest *forest;
+    const std::vector< GPCPatchDescriptor > *descr;
+    std::vector< Trail > *trails;
+
+    ParallelTrailsFilling &operator=( const ParallelTrailsFilling & );
+
+  public:
+    ParallelTrailsFilling( const GPCForest *_forest, const std::vector< GPCPatchDescriptor > *_descr, std::vector< Trail > *_trails )
+        : forest( _forest ), descr( _descr ), trails( _trails ){};
+
+    void operator()( const Range &range ) const CV_OVERRIDE
+    {
+      for ( int t = range.start; t < range.end; ++t )
+        for ( size_t i = 0; i < descr->size(); ++i )
+          trails->at( i ).leaf[t] = forest->tree[t].findLeafForPatch( descr->at( i ) );
+    }
+  };
+
+  GPCTree tree[T];
+
+public:
+  /** @brief Train the forest using one sample set for every tree.
+   * Please, consider using the next method instead of this one for better quality.
+   */
+  void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() )
+  {
+    for ( int i = 0; i < T; ++i )
+      tree[i].train( samples, params );
+  }
+
+  /** @brief Train the forest using individual samples for each tree.
+   * It is generally better to use this instead of the first method.
+   */
+  void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > &gt,
+              const GPCTrainingParams params = GPCTrainingParams() )
+  {
+    for ( int i = 0; i < T; ++i )
+    {
+      Ptr< GPCTrainingSamples > samples =
+        GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree
+      tree[i].train( *samples, params );
+    }
+  }
+
+  void train( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt,
+              const GPCTrainingParams params = GPCTrainingParams() )
+  {
+    for ( int i = 0; i < T; ++i )
+    {
+      Ptr< GPCTrainingSamples > samples =
+        GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree
+      tree[i].train( *samples, params );
+    }
+  }
+
+  void write( FileStorage &fs ) const CV_OVERRIDE
+  {
+    fs << "ntrees" << T << "trees"
+       << "[";
+    for ( int i = 0; i < T; ++i )
+    {
+      fs << "{";
+      tree[i].write( fs );
+      fs << "}";
+    }
+    fs << "]";
+  }
+
+  void read( const FileNode &fn ) CV_OVERRIDE
+  {
+    CV_Assert( T <= (int)fn["ntrees"] );
+    FileNodeIterator it = fn["trees"].begin();
+    for ( int i = 0; i < T; ++i, ++it )
+      tree[i].read( *it );
+  }
+
+  /** @brief Find correspondences between two images.
+   * @param[in] imgFrom First image in a sequence.
+   * @param[in] imgTo Second image in a sequence.
+   * @param[out] corr Output vector with pairs of corresponding points.
+   * @param[in] params Additional matching parameters for fine-tuning.
+   */
+  void findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr,
+                            const GPCMatchingParams params = GPCMatchingParams() ) const;
+
+  static Ptr< GPCForest > create() { return makePtr< GPCForest >(); }
+};
+
+class CV_EXPORTS_W GPCDetails
+{
+public:
+  static void dropOutliers( std::vector< std::pair< Point2i, Point2i > > &corr );
+
+  static void getAllDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp,
+                                         int type );
+
+  static void getCoordinatesFromIndex( size_t index, Size sz, int &x, int &y );
+};
+
+template < int T >
+void GPCForest< T >::findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr,
+                                          const GPCMatchingParams params ) const
+{
+  CV_Assert( imgFrom.channels() == 3 );
+  CV_Assert( imgTo.channels() == 3 );
+
+  Mat from, to;
+  imgFrom.getMat().convertTo( from, CV_32FC3 );
+  imgTo.getMat().convertTo( to, CV_32FC3 );
+  cvtColor( from, from, COLOR_BGR2YCrCb );
+  cvtColor( to, to, COLOR_BGR2YCrCb );
+
+  Mat fromCh[3], toCh[3];
+  split( from, fromCh );
+  split( to, toCh );
+
+  std::vector< GPCPatchDescriptor > descr;
+  GPCDetails::getAllDescriptorsForImage( fromCh, descr, params, tree[0].getDescriptorType() );
+  std::vector< Trail > trailsFrom( descr.size() ), trailsTo( descr.size() );
+
+  for ( size_t i = 0; i < descr.size(); ++i )
+    GPCDetails::getCoordinatesFromIndex( i, from.size(), trailsFrom[i].coord.x, trailsFrom[i].coord.y );
+  parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsFrom ) );
+
+  descr.clear();
+  GPCDetails::getAllDescriptorsForImage( toCh, descr, params, tree[0].getDescriptorType() );
+
+  for ( size_t i = 0; i < descr.size(); ++i )
+    GPCDetails::getCoordinatesFromIndex( i, to.size(), trailsTo[i].coord.x, trailsTo[i].coord.y );
+  parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsTo ) );
+
+  std::sort( trailsFrom.begin(), trailsFrom.end() );
+  std::sort( trailsTo.begin(), trailsTo.end() );
+
+  for ( size_t i = 0; i < trailsFrom.size(); ++i )
+  {
+    bool uniq = true;
+    while ( i + 1 < trailsFrom.size() && trailsFrom[i] == trailsFrom[i + 1] )
+      ++i, uniq = false;
+    if ( uniq )
+    {
+      typename std::vector< Trail >::const_iterator lb = std::lower_bound( trailsTo.begin(), trailsTo.end(), trailsFrom[i] );
+      if ( lb != trailsTo.end() && *lb == trailsFrom[i] && ( ( lb + 1 ) == trailsTo.end() || !( *lb == *( lb + 1 ) ) ) )
+        corr.push_back( std::make_pair( trailsFrom[i].coord, lb->coord ) );
+    }
+  }
+
+  GPCDetails::dropOutliers( corr );
+}
+
+//! @}
+
+} // namespace optflow
+
+CV_EXPORTS void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node );
+
+CV_EXPORTS void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node );
+} // namespace cv
+
+#endif
diff --git a/IPL/include/opencv/opencv2/phase_unwrapping.hpp b/IPL/include/opencv/opencv2/phase_unwrapping.hpp
new file mode 100644
index 0000000..0e15e71
--- /dev/null
+++ b/IPL/include/opencv/opencv2/phase_unwrapping.hpp
@@ -0,0 +1,61 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+
+#include "opencv2/phase_unwrapping/phase_unwrapping.hpp"
+#include "opencv2/phase_unwrapping/histogramphaseunwrapping.hpp"
+
+/** @defgroup phase_unwrapping Phase Unwrapping API
+
+Two-dimensional phase unwrapping is found in different applications like terrain elevation estimation
+in synthetic aperture radar (SAR), field mapping in magnetic resonance imaging or as a way of finding
+corresponding pixels in structured light reconstruction with sinusoidal patterns.
+
+Given a phase map, wrapped between [-pi; pi], phase unwrapping aims at finding the "true" phase map
+by adding the right number of 2*pi to each pixel.
+
+The problem is straightforward for perfect wrapped phase map, but real data are usually not noise-free.
+Among the different algorithms that were developed, quality-guided phase unwrapping methods are fast
+and efficient. They follow a path that unwraps high quality pixels first,
+avoiding error propagation from the start.
+
+In this module, a quality-guided phase unwrapping is implemented following the approach described in @cite histogramUnwrapping .
+
+*/
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/phase_unwrapping/histogramphaseunwrapping.hpp b/IPL/include/opencv/opencv2/phase_unwrapping/histogramphaseunwrapping.hpp
new file mode 100644
index 0000000..177cb43
--- /dev/null
+++ b/IPL/include/opencv/opencv2/phase_unwrapping/histogramphaseunwrapping.hpp
@@ -0,0 +1,108 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+
+#ifndef __OPENCV_HISTOGRAM_PHASE_UNWRAPPING_HPP__
+#define __OPENCV_HISTOGRAM_PHASE_UNWRAPPING_HPP__
+
+#include "opencv2/core.hpp"
+#include <opencv2/imgproc.hpp>
+#include "opencv2/phase_unwrapping/phase_unwrapping.hpp"
+
+namespace cv {
+namespace phase_unwrapping {
+//! @addtogroup phase_unwrapping
+//! @{
+
+    /** @brief Class implementing two-dimensional phase unwrapping based on @cite histogramUnwrapping
+     * This algorithm belongs to the quality-guided phase unwrapping methods.
+     * First, it computes a reliability map from second differences between a pixel and its eight neighbours.
+     * Reliability values lie between 0 and 16*pi*pi. Then, this reliability map is used to compute
+     * the reliabilities of "edges". An edge is an entity defined by two pixels that are connected
+     * horizontally or vertically. Its reliability is found by adding the the reliabilities of the
+     * two pixels connected through it. Edges are sorted in a histogram based on their reliability values.
+     * This histogram is then used to unwrap pixels, starting from the highest quality pixel.
+
+     * The wrapped phase map and the unwrapped result are stored in CV_32FC1 Mat.
+     */
+class CV_EXPORTS_W HistogramPhaseUnwrapping : public PhaseUnwrapping
+{
+
+public:
+    /**
+     * @brief Parameters of phaseUnwrapping constructor.
+
+     * @param width Phase map width.
+     * @param height Phase map height.
+     * @param histThresh Bins in the histogram are not of equal size. Default value is 3*pi*pi. The one before "histThresh" value are smaller.
+     * @param nbrOfSmallBins Number of bins between 0 and "histThresh". Default value is 10.
+     * @param nbrOfLargeBins Number of bins between "histThresh" and 32*pi*pi (highest edge reliability value). Default value is 5.
+     */
+    struct CV_EXPORTS_W_SIMPLE Params
+    {
+        CV_WRAP Params();
+        CV_PROP_RW int width;
+        CV_PROP_RW int height;
+        CV_PROP_RW float histThresh;
+        CV_PROP_RW int nbrOfSmallBins;
+        CV_PROP_RW int nbrOfLargeBins;
+    };
+    /**
+     * @brief Constructor
+
+     * @param parameters HistogramPhaseUnwrapping parameters HistogramPhaseUnwrapping::Params: width,height of the phase map and histogram characteristics.
+     */
+    CV_WRAP
+    static Ptr<HistogramPhaseUnwrapping> create( const HistogramPhaseUnwrapping::Params &parameters =
+                                                 HistogramPhaseUnwrapping::Params() );
+
+    /**
+     * @brief Get the reliability map computed from the wrapped phase map.
+
+     * @param reliabilityMap Image where the reliability map is stored.
+     */
+    CV_WRAP
+    virtual void getInverseReliabilityMap( OutputArray reliabilityMap ) = 0;
+};
+
+//! @}
+}
+}
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/phase_unwrapping/phase_unwrapping.hpp b/IPL/include/opencv/opencv2/phase_unwrapping/phase_unwrapping.hpp
new file mode 100644
index 0000000..5b5cb51
--- /dev/null
+++ b/IPL/include/opencv/opencv2/phase_unwrapping/phase_unwrapping.hpp
@@ -0,0 +1,74 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+
+#ifndef __OPENCV_PHASE_UNWRAPPING_HPP__
+#define __OPENCV_PHASE_UNWRAPPING_HPP__
+
+#include "opencv2/core.hpp"
+
+namespace cv {
+namespace phase_unwrapping {
+//! @addtogroup phase_unwrapping
+//! @{
+
+    /**
+     @brief Abstract base class for phase unwrapping.
+    */
+class CV_EXPORTS_W PhaseUnwrapping : public virtual Algorithm
+{
+public:
+    /**
+     * @brief Unwraps a 2D phase map.
+
+     * @param wrappedPhaseMap The wrapped phase map that needs to be unwrapped.
+     * @param unwrappedPhaseMap The unwrapped phase map.
+     * @param shadowMask Optional parameter used when some pixels do not hold any phase information in the wrapped phase map.
+     */
+    CV_WRAP
+    virtual void unwrapPhaseMap( InputArray wrappedPhaseMap, OutputArray unwrappedPhaseMap,
+                                 InputArray shadowMask = noArray() ) = 0;
+
+};
+
+//! @}
+}
+}
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/photo.hpp b/IPL/include/opencv/opencv2/photo.hpp
index c093f65..c2e89a3 100644
--- a/IPL/include/opencv/opencv2/photo.hpp
+++ b/IPL/include/opencv/opencv2/photo.hpp
@@ -40,23 +40,44 @@
 //
 //M*/
 
-#ifndef __OPENCV_PHOTO_HPP__
-#define __OPENCV_PHOTO_HPP__
+#ifndef OPENCV_PHOTO_HPP
+#define OPENCV_PHOTO_HPP
 
 #include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
 
 /**
 @defgroup photo Computational Photography
+
+This module includes photo processing algorithms
 @{
+    @defgroup photo_inpaint Inpainting
     @defgroup photo_denoise Denoising
     @defgroup photo_hdr HDR imaging
 
 This section describes high dynamic range imaging algorithms namely tonemapping, exposure alignment,
 camera calibration with multiple exposures and exposure fusion.
 
+    @defgroup photo_decolor Contrast Preserving Decolorization
+
+Useful links:
+
+http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html
+
     @defgroup photo_clone Seamless Cloning
+
+Useful links:
+
+https://www.learnopencv.com/seamless-cloning-using-opencv-python-cpp
+
     @defgroup photo_render Non-Photorealistic Rendering
+
+Useful links:
+
+http://www.inf.ufrgs.br/~eslgastal/DomainTransform
+
+https://www.learnopencv.com/non-photorealistic-rendering-using-opencv-python-c/
+
     @defgroup photo_c C API
 @}
   */
@@ -67,37 +88,24 @@ namespace cv
 //! @addtogroup photo
 //! @{
 
+//! @addtogroup photo_inpaint
+//! @{
 //! the inpainting algorithm
 enum
 {
-    INPAINT_NS    = 0, // Navier-Stokes algorithm
-    INPAINT_TELEA = 1 // A. Telea algorithm
-};
-
-enum
-{
-    NORMAL_CLONE = 1,
-    MIXED_CLONE  = 2,
-    MONOCHROME_TRANSFER = 3
-};
-
-enum
-{
-    RECURS_FILTER = 1,
-    NORMCONV_FILTER = 2
+    INPAINT_NS    = 0, //!< Use Navier-Stokes based method
+    INPAINT_TELEA = 1 //!< Use the algorithm proposed by Alexandru Telea @cite Telea04
 };
 
 /** @brief Restores the selected region in an image using the region neighborhood.
 
-@param src Input 8-bit 1-channel or 3-channel image.
+@param src Input 8-bit, 16-bit unsigned or 32-bit float 1-channel or 8-bit 3-channel image.
 @param inpaintMask Inpainting mask, 8-bit 1-channel image. Non-zero pixels indicate the area that
 needs to be inpainted.
 @param dst Output image with the same size and type as src .
 @param inpaintRadius Radius of a circular neighborhood of each point inpainted that is considered
 by the algorithm.
-@param flags Inpainting method that could be one of the following:
--   **INPAINT_NS** Navier-Stokes based method [Navier01]
--   **INPAINT_TELEA** Method by Alexandru Telea @cite Telea04 .
+@param flags Inpainting method that could be cv::INPAINT_NS or cv::INPAINT_TELEA
 
 The function reconstructs the selected image area from the pixel near the area boundary. The
 function may be used to remove dust and scratches from a scanned photo, or to remove undesirable
@@ -106,12 +114,14 @@ objects from still images or video. See <http://en.wikipedia.org/wiki/Inpainting
 @note
    -   An example using the inpainting technique can be found at
         opencv_source_code/samples/cpp/inpaint.cpp
-    -   (Python) An example using the inpainting technique can be found at
+   -   (Python) An example using the inpainting technique can be found at
         opencv_source_code/samples/python/inpaint.py
  */
 CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask,
         OutputArray dst, double inpaintRadius, int flags );
 
+//! @} photo_inpaint
+
 //! @addtogroup photo_denoise
 //! @{
 
@@ -189,7 +199,7 @@ CV_EXPORTS_W void fastNlMeansDenoisingColored( InputArray src, OutputArray dst,
         float h = 3, float hColor = 3,
         int templateWindowSize = 7, int searchWindowSize = 21);
 
-/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been
+/** @brief Modification of fastNlMeansDenoising function for images sequence where consecutive images have been
 captured in small period of time. For example video. This version of the function is for grayscale
 images or for manual manipulation with colorspaces. For more details see
 <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
@@ -216,7 +226,7 @@ CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputA
         int imgToDenoiseIndex, int temporalWindowSize,
         float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
 
-/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been
+/** @brief Modification of fastNlMeansDenoising function for images sequence where consecutive images have been
 captured in small period of time. For example video. This version of the function is for grayscale
 images or for manual manipulation with colorspaces. For more details see
 <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
@@ -328,8 +338,8 @@ class CV_EXPORTS_W Tonemap : public Algorithm
 public:
     /** @brief Tonemaps image
 
-    @param src source image - 32-bit 3-channel Mat
-    @param dst destination image - 32-bit 3-channel Mat with values in [0, 1] range
+    @param src source image - CV_32FC3 Mat (float 32 bits 3 channels)
+    @param dst destination image - CV_32FC3 Mat with values in [0, 1] range
      */
     CV_WRAP virtual void process(InputArray src, OutputArray dst) = 0;
 
@@ -376,43 +386,6 @@ results, default value is 0.85.
  */
 CV_EXPORTS_W Ptr<TonemapDrago> createTonemapDrago(float gamma = 1.0f, float saturation = 1.0f, float bias = 0.85f);
 
-/** @brief This algorithm decomposes image into two layers: base layer and detail layer using bilateral filter
-and compresses contrast of the base layer thus preserving all the details.
-
-This implementation uses regular bilateral filter from opencv.
-
-Saturation enhancement is possible as in ocvTonemapDrago.
-
-For more information see @cite DD02 .
- */
-class CV_EXPORTS_W TonemapDurand : public Tonemap
-{
-public:
-
-    CV_WRAP virtual float getSaturation() const = 0;
-    CV_WRAP virtual void setSaturation(float saturation) = 0;
-
-    CV_WRAP virtual float getContrast() const = 0;
-    CV_WRAP virtual void setContrast(float contrast) = 0;
-
-    CV_WRAP virtual float getSigmaSpace() const = 0;
-    CV_WRAP virtual void setSigmaSpace(float sigma_space) = 0;
-
-    CV_WRAP virtual float getSigmaColor() const = 0;
-    CV_WRAP virtual void setSigmaColor(float sigma_color) = 0;
-};
-
-/** @brief Creates TonemapDurand object
-
-@param gamma gamma value for gamma correction. See createTonemap
-@param contrast resulting contrast on logarithmic scale, i. e. log(max / min), where max and min
-are maximum and minimum luminance values of the resulting image.
-@param saturation saturation enhancement value. See createTonemapDrago
-@param sigma_space bilateral filter sigma in color space
-@param sigma_color bilateral filter sigma in coordinate space
- */
-CV_EXPORTS_W Ptr<TonemapDurand>
-createTonemapDurand(float gamma = 1.0f, float contrast = 4.0f, float saturation = 1.0f, float sigma_space = 2.0f, float sigma_color = 2.0f);
 
 /** @brief This is a global tonemapping operator that models human visual system.
 
@@ -502,7 +475,7 @@ class CV_EXPORTS_W AlignMTB : public AlignExposures
 {
 public:
     CV_WRAP virtual void process(InputArrayOfArrays src, std::vector<Mat>& dst,
-                                 InputArray times, InputArray response) = 0;
+                                 InputArray times, InputArray response) CV_OVERRIDE = 0;
 
     /** @brief Short version of process, that doesn't take extra arguments.
 
@@ -591,7 +564,7 @@ class CV_EXPORTS_W CalibrateDebevec : public CalibrateCRF
 @param samples number of pixel locations to use
 @param lambda smoothness term weight. Greater values produce smoother results, but can alter the
 response.
-@param random if true sample pixel locations are chosen at random, otherwise the form a
+@param random if true sample pixel locations are chosen at random, otherwise they form a
 rectangular grid.
  */
 CV_EXPORTS_W Ptr<CalibrateDebevec> createCalibrateDebevec(int samples = 70, float lambda = 10.0f, bool random = false);
@@ -646,7 +619,7 @@ class CV_EXPORTS_W MergeDebevec : public MergeExposures
 {
 public:
     CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst,
-                                 InputArray times, InputArray response) = 0;
+                                 InputArray times, InputArray response) CV_OVERRIDE = 0;
     CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0;
 };
 
@@ -669,7 +642,7 @@ class CV_EXPORTS_W MergeMertens : public MergeExposures
 {
 public:
     CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst,
-                                 InputArray times, InputArray response) = 0;
+                                 InputArray times, InputArray response) CV_OVERRIDE = 0;
     /** @brief Short version of process, that doesn't take extra arguments.
 
     @param src vector of input images
@@ -705,7 +678,7 @@ class CV_EXPORTS_W MergeRobertson : public MergeExposures
 {
 public:
     CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst,
-                                 InputArray times, InputArray response) = 0;
+                                 InputArray times, InputArray response) CV_OVERRIDE = 0;
     CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0;
 };
 
@@ -715,6 +688,9 @@ CV_EXPORTS_W Ptr<MergeRobertson> createMergeRobertson();
 
 //! @} photo_hdr
 
+//! @addtogroup photo_decolor
+//! @{
+
 /** @brief Transforms a color image to a grayscale image. It is a basic tool in digital printing, stylized
 black-and-white photograph rendering, and in many single channel image processing applications
 @cite CL12 .
@@ -727,9 +703,27 @@ This function is to be applied on color images.
  */
 CV_EXPORTS_W void decolor( InputArray src, OutputArray grayscale, OutputArray color_boost);
 
+//! @} photo_decolor
+
 //! @addtogroup photo_clone
 //! @{
 
+
+//! seamlessClone algorithm flags
+enum
+{
+    /** The power of the method is fully expressed when inserting objects with complex outlines into a new background*/
+    NORMAL_CLONE = 1,
+    /** The classic method, color-based selection and alpha masking might be time consuming and often leaves an undesirable
+    halo. Seamless cloning, even averaged with the original image, is not effective. Mixed seamless cloning based on a loose selection proves effective.*/
+    MIXED_CLONE  = 2,
+    /** Monochrome transfer allows the user to easily replace certain features of one object by alternative features.*/
+    MONOCHROME_TRANSFER = 3};
+
+
+/** @example samples/cpp/tutorial_code/photo/seamless_cloning/cloning_demo.cpp
+An example using seamlessClone function
+*/
 /** @brief Image editing tasks concern either global changes (color/intensity corrections, filters,
 deformations) or local changes concerned to a selection. Here we are interested in achieving local
 changes, ones that are restricted to a region manually selected (ROI), in a seamless and effortless
@@ -741,15 +735,7 @@ content @cite PM03 .
 @param mask Input 8-bit 1 or 3-channel image.
 @param p Point in dst image where object is placed.
 @param blend Output image with the same size and type as dst.
-@param flags Cloning method that could be one of the following:
--   **NORMAL_CLONE** The power of the method is fully expressed when inserting objects with
-complex outlines into a new background
--   **MIXED_CLONE** The classic method, color-based selection and alpha masking might be time
-consuming and often leaves an undesirable halo. Seamless cloning, even averaged with the
-original image, is not effective. Mixed seamless cloning based on a loose selection proves
-effective.
--   **FEATURE_EXCHANGE** Feature exchange allows the user to easily replace certain features of
-one object by alternative features.
+@param flags Cloning method that could be cv::NORMAL_CLONE, cv::MIXED_CLONE or cv::MONOCHROME_TRANSFER
  */
 CV_EXPORTS_W void seamlessClone( InputArray src, InputArray dst, InputArray mask, Point p,
         OutputArray blend, int flags);
@@ -784,18 +770,16 @@ CV_EXPORTS_W void illuminationChange(InputArray src, InputArray mask, OutputArra
         float alpha = 0.2f, float beta = 0.4f);
 
 /** @brief By retaining only the gradients at edge locations, before integrating with the Poisson solver, one
-washes out the texture of the selected region, giving its contents a flat aspect. Here Canny Edge
-Detector is used.
+washes out the texture of the selected region, giving its contents a flat aspect. Here Canny Edge %Detector is used.
 
 @param src Input 8-bit 3-channel image.
 @param mask Input 8-bit 1 or 3-channel image.
 @param dst Output image with the same size and type as src.
-@param low_threshold Range from 0 to 100.
+@param low_threshold %Range from 0 to 100.
 @param high_threshold Value \> 100.
 @param kernel_size The size of the Sobel kernel to be used.
 
-**NOTE:**
-
+@note
 The algorithm assumes that the color of the source image is close to that of the destination. This
 assumption means that when the colors don't match, the source image color gets tinted toward the
 color of the destination image.
@@ -809,16 +793,21 @@ CV_EXPORTS_W void textureFlattening(InputArray src, InputArray mask, OutputArray
 //! @addtogroup photo_render
 //! @{
 
+//! Edge preserving filters
+enum
+{
+    RECURS_FILTER = 1, //!< Recursive Filtering
+    NORMCONV_FILTER = 2 //!< Normalized Convolution Filtering
+};
+
 /** @brief Filtering is the fundamental operation in image and video processing. Edge-preserving smoothing
 filters are used in many different applications @cite EM11 .
 
 @param src Input 8-bit 3-channel image.
 @param dst Output 8-bit 3-channel image.
-@param flags Edge preserving filters:
--   **RECURS_FILTER** = 1
--   **NORMCONV_FILTER** = 2
-@param sigma_s Range between 0 to 200.
-@param sigma_r Range between 0 to 1.
+@param flags Edge preserving filters: cv::RECURS_FILTER or cv::NORMCONV_FILTER
+@param sigma_s %Range between 0 to 200.
+@param sigma_r %Range between 0 to 1.
  */
 CV_EXPORTS_W void edgePreservingFilter(InputArray src, OutputArray dst, int flags = 1,
         float sigma_s = 60, float sigma_r = 0.4f);
@@ -827,20 +816,23 @@ CV_EXPORTS_W void edgePreservingFilter(InputArray src, OutputArray dst, int flag
 
 @param src Input 8-bit 3-channel image.
 @param dst Output image with the same size and type as src.
-@param sigma_s Range between 0 to 200.
-@param sigma_r Range between 0 to 1.
+@param sigma_s %Range between 0 to 200.
+@param sigma_r %Range between 0 to 1.
  */
 CV_EXPORTS_W void detailEnhance(InputArray src, OutputArray dst, float sigma_s = 10,
         float sigma_r = 0.15f);
 
+/** @example samples/cpp/tutorial_code/photo/non_photorealistic_rendering/npr_demo.cpp
+An example using non-photorealistic line drawing functions
+*/
 /** @brief Pencil-like non-photorealistic line drawing
 
 @param src Input 8-bit 3-channel image.
 @param dst1 Output 8-bit 1-channel image.
 @param dst2 Output image with the same size and type as src.
-@param sigma_s Range between 0 to 200.
-@param sigma_r Range between 0 to 1.
-@param shade_factor Range between 0 to 0.1.
+@param sigma_s %Range between 0 to 200.
+@param sigma_r %Range between 0 to 1.
+@param shade_factor %Range between 0 to 0.1.
  */
 CV_EXPORTS_W void pencilSketch(InputArray src, OutputArray dst1, OutputArray dst2,
         float sigma_s = 60, float sigma_r = 0.07f, float shade_factor = 0.02f);
@@ -851,8 +843,8 @@ contrast while preserving, or enhancing, high-contrast features.
 
 @param src Input 8-bit 3-channel image.
 @param dst Output image with the same size and type as src.
-@param sigma_s Range between 0 to 200.
-@param sigma_r Range between 0 to 1.
+@param sigma_s %Range between 0 to 200.
+@param sigma_r %Range between 0 to 1.
  */
 CV_EXPORTS_W void stylization(InputArray src, OutputArray dst, float sigma_s = 60,
         float sigma_r = 0.45f);
@@ -863,8 +855,4 @@ CV_EXPORTS_W void stylization(InputArray src, OutputArray dst, float sigma_s = 6
 
 } // cv
 
-#ifndef DISABLE_OPENCV_24_COMPATIBILITY
-#include "opencv2/photo/photo_c.h"
-#endif
-
 #endif
diff --git a/IPL/include/opencv/opencv2/photo/cuda.hpp b/IPL/include/opencv/opencv2/photo/cuda.hpp
index aeac1fa..a2f3816 100644
--- a/IPL/include/opencv/opencv2/photo/cuda.hpp
+++ b/IPL/include/opencv/opencv2/photo/cuda.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_PHOTO_CUDA_HPP__
-#define __OPENCV_PHOTO_CUDA_HPP__
+#ifndef OPENCV_PHOTO_CUDA_HPP
+#define OPENCV_PHOTO_CUDA_HPP
 
 #include "opencv2/core/cuda.hpp"
 
@@ -129,4 +129,4 @@ CV_EXPORTS void fastNlMeansDenoisingColored(InputArray src, OutputArray dst,
 
 }} // namespace cv { namespace cuda {
 
-#endif /* __OPENCV_PHOTO_CUDA_HPP__ */
+#endif /* OPENCV_PHOTO_CUDA_HPP */
diff --git a/IPL/include/opencv/opencv2/photo/legacy/constants_c.h b/IPL/include/opencv/opencv2/photo/legacy/constants_c.h
new file mode 100644
index 0000000..ec1d440
--- /dev/null
+++ b/IPL/include/opencv/opencv2/photo/legacy/constants_c.h
@@ -0,0 +1,14 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_PHOTO_LEGACY_CONSTANTS_H
+#define OPENCV_PHOTO_LEGACY_CONSTANTS_H
+
+enum InpaintingModes
+{
+    CV_INPAINT_NS      =0,
+    CV_INPAINT_TELEA   =1
+};
+
+#endif // OPENCV_PHOTO_LEGACY_CONSTANTS_H
diff --git a/IPL/include/opencv/opencv2/plot.hpp b/IPL/include/opencv/opencv2/plot.hpp
index 8243985..06a12a1 100644
--- a/IPL/include/opencv/opencv2/plot.hpp
+++ b/IPL/include/opencv/opencv2/plot.hpp
@@ -59,6 +59,9 @@ namespace cv
 {
     namespace plot
     {
+    //! @addtogroup plot
+    //! @{
+
         class CV_EXPORTS_W Plot2d : public Algorithm
         {
             public:
@@ -68,17 +71,48 @@ namespace cv
             CV_WRAP virtual void setMaxX(double _plotMaxX) = 0;
             CV_WRAP virtual void setMaxY(double _plotMaxY) = 0;
             CV_WRAP virtual void setPlotLineWidth(int _plotLineWidth) = 0;
+            /**
+             * @brief Switches data visualization mode
+             *
+             * @param _needPlotLine if true then neighbour plot points will be connected by lines.
+             * In other case data will be plotted as a set of standalone points.
+             */
+            CV_WRAP virtual void setNeedPlotLine(bool _needPlotLine) = 0;
             CV_WRAP virtual void setPlotLineColor(Scalar _plotLineColor) = 0;
             CV_WRAP virtual void setPlotBackgroundColor(Scalar _plotBackgroundColor) = 0;
             CV_WRAP virtual void setPlotAxisColor(Scalar _plotAxisColor) = 0;
             CV_WRAP virtual void setPlotGridColor(Scalar _plotGridColor) = 0;
             CV_WRAP virtual void setPlotTextColor(Scalar _plotTextColor) = 0;
             CV_WRAP virtual void setPlotSize(int _plotSizeWidth, int _plotSizeHeight) = 0;
-            CV_WRAP virtual void render(Mat &_plotResult) = 0;
-        };
+            CV_WRAP virtual void setShowGrid(bool needShowGrid) = 0;
+            CV_WRAP virtual void setShowText(bool needShowText) = 0;
+            CV_WRAP virtual void setGridLinesNumber(int gridLinesNumber) = 0;
+            CV_WRAP virtual void setInvertOrientation(bool _invertOrientation) = 0;
+            /**
+             * @brief Sets the index of a point which coordinates will be printed on the top left corner of the plot (if ShowText flag is true).
+             *
+             * @param pointIdx index of the required point in data array.
+             */
+            CV_WRAP virtual void setPointIdxToPrint(int pointIdx) = 0;
+            CV_WRAP virtual void render(OutputArray _plotResult) = 0;
 
-        CV_EXPORTS_W Ptr<Plot2d> createPlot2d(Mat data);
-        CV_EXPORTS_W Ptr<Plot2d> createPlot2d(Mat dataX, Mat dataY);
+            /**
+             * @brief Creates Plot2d object
+             *
+             * @param data \f$1xN\f$ or \f$Nx1\f$ matrix containing \f$Y\f$ values of points to plot. \f$X\f$ values
+             * will be equal to indexes of correspondind elements in data matrix.
+             */
+            CV_WRAP static Ptr<Plot2d> create(InputArray data);
+
+            /**
+             * @brief Creates Plot2d object
+             *
+             * @param dataX \f$1xN\f$ or \f$Nx1\f$ matrix \f$X\f$ values of points to plot.
+             * @param dataY \f$1xN\f$ or \f$Nx1\f$ matrix containing \f$Y\f$ values of points to plot.
+             */
+            CV_WRAP static Ptr<Plot2d> create(InputArray dataX, InputArray dataY);
+        };
+    //! @}
     }
 }
 
diff --git a/IPL/include/opencv/opencv2/quality.hpp b/IPL/include/opencv/opencv2/quality.hpp
new file mode 100644
index 0000000..8470f08
--- /dev/null
+++ b/IPL/include/opencv/opencv2/quality.hpp
@@ -0,0 +1,15 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_QUALITY_HPP
+#define OPENCV_QUALITY_HPP
+
+#include "quality/qualitybase.hpp"
+#include "quality/qualitymse.hpp"
+#include "quality/qualitypsnr.hpp"
+#include "quality/qualityssim.hpp"
+#include "quality/qualitygmsd.hpp"
+#include "quality/qualitybrisque.hpp"
+
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/quality/quality_utils.hpp b/IPL/include/opencv/opencv2/quality/quality_utils.hpp
new file mode 100644
index 0000000..4b3df51
--- /dev/null
+++ b/IPL/include/opencv/opencv2/quality/quality_utils.hpp
@@ -0,0 +1,109 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_QUALITY_QUALITY_UTILS_HPP
+#define OPENCV_QUALITY_QUALITY_UTILS_HPP
+
+#include "qualitybase.hpp"
+
+namespace cv
+{
+namespace quality
+{
+namespace quality_utils
+{
+
+// default type of matrix to expand to
+static CV_CONSTEXPR const int EXPANDED_MAT_DEFAULT_TYPE = CV_32F;
+
+// convert inputarray to specified mat type.  set type == -1 to preserve existing type
+template <typename R>
+inline R extract_mat(InputArray in, const int type = -1)
+{
+    R result = {};
+    if ( in.isMat() )
+        in.getMat().convertTo( result, (type != -1) ? type : in.getMat().type());
+    else if ( in.isUMat() )
+        in.getUMat().convertTo( result, (type != -1) ? type : in.getUMat().type());
+    else
+        CV_Error(Error::StsNotImplemented, "Unsupported input type");
+
+    return result;
+}
+
+// extract and expand matrix to target type
+template <typename R>
+inline R expand_mat( InputArray src, int TYPE_DEFAULT = EXPANDED_MAT_DEFAULT_TYPE)
+{
+    auto result = extract_mat<R>(src, -1);
+
+    // by default, expand to 32F unless we already have >= 32 bits, then go to 64
+    //  if/when we can detect OpenCL CV_16F support, opt for that when input depth == 8
+    //  note that this may impact the precision of the algorithms and would need testing
+    int type = TYPE_DEFAULT;
+
+    switch (result.depth())
+    {
+    case CV_32F:
+    case CV_32S:
+    case CV_64F:
+        type = CV_64F;
+    };  // switch
+
+    result.convertTo(result, type);
+    return result;
+}
+
+// return mat of observed min/max pair per column
+//  row 0:  min per column
+//  row 1:  max per column
+// template <typename T>
+inline cv::Mat get_column_range( const cv::Mat& data )
+{
+    CV_Assert(data.channels() == 1);
+    CV_Assert(data.rows > 0);
+
+    cv::Mat result( cv::Size( data.cols, 2 ), data.type() );
+
+    auto
+        row_min = result.row(0)
+        , row_max = result.row(1)
+        ;
+
+    // set initial min/max
+    data.row(0).copyTo(row_min);
+    data.row(0).copyTo(row_max);
+
+    for (int y = 1; y < data.rows; ++y)
+    {
+        auto row = data.row(y);
+        cv::min(row,row_min, row_min);
+        cv::max(row, row_max, row_max);
+    }
+    return result;
+}   // get_column_range
+
+// linear scale of each column from min to max
+//  range is column-wise pair of observed min/max.  See get_column_range
+template <typename T>
+inline void scale( cv::Mat& mat, const cv::Mat& range, const T min, const T max )
+{
+    // value = lower + (upper - lower) * (value - feature_min[index]) / (feature_max[index] - feature_min[index]);
+    // where [lower] = lower bound, [upper] = upper bound
+
+    for (int y = 0; y < mat.rows; ++y)
+    {
+        auto row = mat.row(y);
+        auto row_min = range.row(0);
+        auto row_max = range.row(1);
+
+        for (int x = 0; x < mat.cols; ++x)
+            row.at<T>(x) = min + (max - min) * (row.at<T>(x) - row_min.at<T>(x) ) / (row_max.at<T>(x) - row_min.at<T>(x));
+    }
+}
+
+}   // quality_utils
+}   // quality
+}   // cv
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/quality/qualitybase.hpp b/IPL/include/opencv/opencv2/quality/qualitybase.hpp
new file mode 100644
index 0000000..1f4887f
--- /dev/null
+++ b/IPL/include/opencv/opencv2/quality/qualitybase.hpp
@@ -0,0 +1,63 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_QUALITYBASE_HPP
+#define OPENCV_QUALITYBASE_HPP
+
+#include <opencv2/core.hpp>
+
+/**
+@defgroup quality Image Quality Analysis (IQA) API
+*/
+
+namespace cv
+{
+namespace quality
+{
+
+//! @addtogroup quality
+//! @{
+
+/************************************ Quality Base Class ************************************/
+class CV_EXPORTS_W QualityBase
+    : public virtual Algorithm
+{
+public:
+
+    /** @brief Destructor */
+    virtual ~QualityBase() = default;
+
+    /**
+    @brief Compute quality score per channel with the per-channel score in each element of the resulting cv::Scalar.  See specific algorithm for interpreting result scores
+    @param img comparison image, or image to evalute for no-reference quality algorithms
+    */
+    virtual CV_WRAP cv::Scalar compute( InputArray img ) = 0;
+
+    /** @brief Returns output quality map that was generated during computation, if supported by the algorithm  */
+    virtual CV_WRAP void getQualityMap(OutputArray dst) const
+    {
+        if (!dst.needed() || _qualityMap.empty() )
+            return;
+        dst.assign(_qualityMap);
+    }
+
+    /** @brief Implements Algorithm::clear()  */
+    CV_WRAP void clear() CV_OVERRIDE { _qualityMap = _mat_type(); Algorithm::clear(); }
+
+    /** @brief Implements Algorithm::empty()  */
+    CV_WRAP bool empty() const CV_OVERRIDE { return _qualityMap.empty(); }
+
+protected:
+
+    /** @brief internal mat type default */
+    using _mat_type = cv::UMat;
+
+    /** @brief Output quality maps if generated by algorithm */
+    _mat_type _qualityMap;
+
+};  // QualityBase
+//! @}
+}   // quality
+}   // cv
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/quality/qualitybrisque.hpp b/IPL/include/opencv/opencv2/quality/qualitybrisque.hpp
new file mode 100644
index 0000000..9732f82
--- /dev/null
+++ b/IPL/include/opencv/opencv2/quality/qualitybrisque.hpp
@@ -0,0 +1,82 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_QUALITY_QUALITYBRISQUE_HPP
+#define OPENCV_QUALITY_QUALITYBRISQUE_HPP
+
+#include "qualitybase.hpp"
+#include "opencv2/ml.hpp"
+
+namespace cv
+{
+namespace quality
+{
+
+/**
+@brief BRISQUE (Blind/Referenceless Image Spatial Quality Evaluator) is a No Reference Image Quality Assessment (NR-IQA) algorithm.
+
+BRISQUE computes a score based on extracting Natural Scene Statistics (https://en.wikipedia.org/wiki/Scene_statistics)
+and calculating feature vectors. See Mittal et al. @cite Mittal2 for original paper and original implementation @cite Mittal2_software .
+
+A trained model is provided in the /samples/ directory and is trained on the LIVE-R2 database @cite Sheikh as in the original implementation.
+When evaluated against the TID2008 database @cite Ponomarenko , the SROCC is -0.8424 versus the SROCC of -0.8354 in the original implementation.
+C++ code for the BRISQUE LIVE-R2 trainer and TID2008 evaluator are also provided in the /samples/ directory.
+*/
+class CV_EXPORTS_W QualityBRISQUE : public QualityBase {
+public:
+
+    /** @brief Computes BRISQUE quality score for input image
+    @param img Image for which to compute quality
+    @returns cv::Scalar with the score in the first element.  The score ranges from 0 (best quality) to 100 (worst quality)
+    */
+    CV_WRAP cv::Scalar compute( InputArray img ) CV_OVERRIDE;
+
+    /**
+    @brief Create an object which calculates quality
+    @param model_file_path cv::String which contains a path to the BRISQUE model data, eg. /path/to/brisque_model_live.yml
+    @param range_file_path cv::String which contains a path to the BRISQUE range data, eg. /path/to/brisque_range_live.yml
+    */
+    CV_WRAP static Ptr<QualityBRISQUE> create( const cv::String& model_file_path, const cv::String& range_file_path );
+
+    /**
+    @brief Create an object which calculates quality
+    @param model cv::Ptr<cv::ml::SVM> which contains a loaded BRISQUE model
+    @param range cv::Mat which contains BRISQUE range data
+    */
+    CV_WRAP static Ptr<QualityBRISQUE> create( const cv::Ptr<cv::ml::SVM>& model, const cv::Mat& range );
+
+    /**
+    @brief static method for computing quality
+    @param img image for which to compute quality
+    @param model_file_path cv::String which contains a path to the BRISQUE model data, eg. /path/to/brisque_model_live.yml
+    @param range_file_path cv::String which contains a path to the BRISQUE range data, eg. /path/to/brisque_range_live.yml
+    @returns cv::Scalar with the score in the first element.  The score ranges from 0 (best quality) to 100 (worst quality)
+    */
+    CV_WRAP static cv::Scalar compute( InputArray img, const cv::String& model_file_path, const cv::String& range_file_path );
+
+    /**
+    @brief static method for computing image features used by the BRISQUE algorithm
+    @param img image (BGR(A) or grayscale) for which to compute features
+    @param features output row vector of features to cv::Mat or cv::UMat
+    */
+    CV_WRAP static void computeFeatures(InputArray img, OutputArray features);
+
+protected:
+
+    cv::Ptr<cv::ml::SVM> _model = nullptr;
+    cv::Mat _range;
+
+    /** @brief Internal constructor */
+    QualityBRISQUE( const cv::String& model_file_path, const cv::String& range_file_path );
+
+    /** @brief Internal constructor */
+    QualityBRISQUE(const cv::Ptr<cv::ml::SVM>& model, const cv::Mat& range )
+        : _model{ model }
+        , _range{ range }
+    {}
+
+};  // QualityBRISQUE
+}   // quality
+}   // cv
+#endif
diff --git a/IPL/include/opencv/opencv2/quality/qualitygmsd.hpp b/IPL/include/opencv/opencv2/quality/qualitygmsd.hpp
new file mode 100644
index 0000000..3a9cd0b
--- /dev/null
+++ b/IPL/include/opencv/opencv2/quality/qualitygmsd.hpp
@@ -0,0 +1,92 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_QUALITY_QUALITYGMSD_HPP
+#define OPENCV_QUALITY_QUALITYGMSD_HPP
+
+#include "qualitybase.hpp"
+
+namespace cv
+{
+namespace quality
+{
+
+/**
+@brief Full reference GMSD algorithm
+http://www4.comp.polyu.edu.hk/~cslzhang/IQA/GMSD/GMSD.htm
+*/
+class CV_EXPORTS_W QualityGMSD
+    : public QualityBase {
+public:
+
+    /**
+    @brief Compute GMSD
+    @param cmp comparison image
+    @returns cv::Scalar with per-channel quality value.  Values range from 0 (worst) to 1 (best)
+    */
+    CV_WRAP cv::Scalar compute( InputArray cmp ) CV_OVERRIDE;
+
+    /** @brief Implements Algorithm::empty()  */
+    CV_WRAP bool empty() const CV_OVERRIDE { return _refImgData.empty() && QualityBase::empty(); }
+
+    /** @brief Implements Algorithm::clear()  */
+    CV_WRAP void clear() CV_OVERRIDE { _refImgData = _mat_data(); QualityBase::clear(); }
+
+    /**
+    @brief Create an object which calculates image quality
+    @param ref reference image
+    */
+    CV_WRAP static Ptr<QualityGMSD> create( InputArray ref );
+
+    /**
+    @brief static method for computing quality
+    @param ref reference image
+    @param cmp comparison image
+    @param qualityMap output quality map, or cv::noArray()
+    @returns cv::Scalar with per-channel quality value.  Values range from 0 (worst) to 1 (best)
+    */
+    CV_WRAP static cv::Scalar compute( InputArray ref, InputArray cmp, OutputArray qualityMap );
+
+protected:
+
+    // holds computed values for a mat
+    struct _mat_data
+    {
+        // internal mat type
+        using mat_type = QualityBase::_mat_type;
+
+        mat_type
+            gradient_map
+            , gradient_map_squared
+            ;
+
+        // allow default construction
+        _mat_data() = default;
+
+        // construct from mat_type
+        _mat_data(const mat_type&);
+
+        // construct from inputarray
+        _mat_data(InputArray);
+
+        // returns flag if empty
+        bool empty() const { return this->gradient_map.empty() && this->gradient_map_squared.empty(); }
+
+        // compute for a single frame
+        static std::pair<cv::Scalar, mat_type> compute(const _mat_data& lhs, const _mat_data& rhs);
+
+    };  // mat_data
+
+    /** @brief Reference image data */
+    _mat_data _refImgData;
+
+    // internal constructor
+    QualityGMSD(_mat_data refImgData)
+        : _refImgData(std::move(refImgData))
+    {}
+
+};  // QualityGMSD
+}   // quality
+}   // cv
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/quality/qualitymse.hpp b/IPL/include/opencv/opencv2/quality/qualitymse.hpp
new file mode 100644
index 0000000..7763a58
--- /dev/null
+++ b/IPL/include/opencv/opencv2/quality/qualitymse.hpp
@@ -0,0 +1,64 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_QUALITY_QUALITYMSE_HPP
+#define OPENCV_QUALITY_QUALITYMSE_HPP
+
+#include "qualitybase.hpp"
+
+namespace cv
+{
+namespace quality
+{
+
+/**
+@brief Full reference mean square error algorithm  https://en.wikipedia.org/wiki/Mean_squared_error
+*/
+class CV_EXPORTS_W QualityMSE : public QualityBase {
+public:
+
+    /** @brief Computes MSE for reference images supplied in class constructor and provided comparison images
+    @param cmpImgs Comparison image(s)
+    @returns cv::Scalar with per-channel quality values.  Values range from 0 (best) to potentially max float (worst)
+    */
+    CV_WRAP cv::Scalar compute( InputArrayOfArrays cmpImgs ) CV_OVERRIDE;
+
+    /** @brief Implements Algorithm::empty()  */
+    CV_WRAP bool empty() const CV_OVERRIDE { return _ref.empty() && QualityBase::empty(); }
+
+    /** @brief Implements Algorithm::clear()  */
+    CV_WRAP void clear() CV_OVERRIDE { _ref = _mat_type(); QualityBase::clear(); }
+
+    /**
+    @brief Create an object which calculates quality
+    @param ref input image to use as the reference for comparison
+    */
+    CV_WRAP static Ptr<QualityMSE> create(InputArray ref);
+
+    /**
+    @brief static method for computing quality
+    @param ref reference image
+    @param cmp comparison image=
+    @param qualityMap output quality map, or cv::noArray()
+    @returns cv::Scalar with per-channel quality values.  Values range from 0 (best) to max float (worst)
+    */
+    CV_WRAP static cv::Scalar compute( InputArray ref, InputArray cmp, OutputArray qualityMap );
+
+protected:
+
+    /** @brief Reference image, converted to internal mat type */
+    QualityBase::_mat_type _ref;
+
+    /**
+    @brief Constructor
+    @param ref reference image, converted to internal type
+    */
+    QualityMSE(QualityBase::_mat_type ref)
+        : _ref(std::move(ref))
+    {}
+
+};  // QualityMSE
+}   // quality
+}   // cv
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/quality/qualitypsnr.hpp b/IPL/include/opencv/opencv2/quality/qualitypsnr.hpp
new file mode 100644
index 0000000..59b7325
--- /dev/null
+++ b/IPL/include/opencv/opencv2/quality/qualitypsnr.hpp
@@ -0,0 +1,120 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_QUALITY_QUALITYPSNR_HPP
+#define OPENCV_QUALITY_QUALITYPSNR_HPP
+
+#include <limits>   // numeric_limits
+#include "qualitybase.hpp"
+#include "qualitymse.hpp"
+
+namespace cv
+{
+namespace quality
+{
+
+/**
+@brief Full reference peak signal to noise ratio (PSNR) algorithm  https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
+*/
+class CV_EXPORTS_W QualityPSNR
+    : public QualityBase {
+
+public:
+
+    /** @brief Default maximum pixel value */
+#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
+    static constexpr double MAX_PIXEL_VALUE_DEFAULT = 255.;
+#else
+    // support MSVS 2013
+    static const int MAX_PIXEL_VALUE_DEFAULT = 255;
+#endif
+
+    /**
+    @brief Create an object which calculates quality
+    @param ref input image to use as the source for comparison
+    @param maxPixelValue maximum per-channel value for any individual pixel; eg 255 for uint8 image
+    */
+    CV_WRAP static Ptr<QualityPSNR> create( InputArray ref, double maxPixelValue = QualityPSNR::MAX_PIXEL_VALUE_DEFAULT )
+    {
+        return Ptr<QualityPSNR>(new QualityPSNR(QualityMSE::create(ref), maxPixelValue));
+    }
+
+    /**
+    @brief Compute the PSNR
+    @param cmp Comparison image
+    @returns Per-channel PSNR value, or std::numeric_limits<double>::infinity() if the MSE between the two images == 0
+    */
+    CV_WRAP cv::Scalar compute( InputArray cmp ) CV_OVERRIDE
+    {
+        auto result = _qualityMSE->compute( cmp );
+        _qualityMSE->getQualityMap(_qualityMap);  // copy from internal obj to this obj
+        return _mse_to_psnr(
+            result
+            , _maxPixelValue
+        );
+    }
+
+    /** @brief Implements Algorithm::empty()  */
+    CV_WRAP bool empty() const CV_OVERRIDE { return _qualityMSE->empty() && QualityBase::empty(); }
+
+    /** @brief Implements Algorithm::clear()  */
+    CV_WRAP void clear() CV_OVERRIDE { _qualityMSE->clear(); QualityBase::clear(); }
+
+    /**
+    @brief static method for computing quality
+    @param ref reference image
+    @param cmp comparison image
+    @param qualityMap output quality map, or cv::noArray()
+    @param maxPixelValue maximum per-channel value for any individual pixel; eg 255 for uint8 image
+    @returns PSNR value, or std::numeric_limits<double>::infinity() if the MSE between the two images == 0
+    */
+    CV_WRAP static cv::Scalar compute( InputArray ref, InputArray cmp, OutputArray qualityMap, double maxPixelValue = QualityPSNR::MAX_PIXEL_VALUE_DEFAULT)
+    {
+        return _mse_to_psnr(
+            QualityMSE::compute(ref, cmp, qualityMap)
+            , maxPixelValue
+        );
+    }
+
+    /** @brief return the maximum pixel value used for PSNR computation */
+    CV_WRAP double getMaxPixelValue() const { return _maxPixelValue; }
+
+    /**
+    @brief sets the maximum pixel value used for PSNR computation
+    @param val Maximum pixel value
+    */
+    CV_WRAP void setMaxPixelValue(double val) { this->_maxPixelValue = val; }
+
+protected:
+
+    Ptr<QualityMSE> _qualityMSE;
+    double _maxPixelValue = QualityPSNR::MAX_PIXEL_VALUE_DEFAULT;
+
+    /** @brief Constructor */
+    QualityPSNR( Ptr<QualityMSE> qualityMSE, double maxPixelValue )
+        : _qualityMSE(std::move(qualityMSE))
+        , _maxPixelValue(maxPixelValue)
+    {}
+
+    // convert mse to psnr
+    static double _mse_to_psnr(double mse, double max_pixel_value)
+    {
+        return (mse == 0.)
+            ? std::numeric_limits<double>::infinity()
+            : 10. * std::log10((max_pixel_value * max_pixel_value) / mse)
+            ;
+    }
+
+    // convert scalar of mses to psnrs
+    static cv::Scalar _mse_to_psnr(cv::Scalar mse, double max_pixel_value)
+    {
+        for (int i = 0; i < mse.rows; ++i)
+            mse(i) = _mse_to_psnr(mse(i), max_pixel_value);
+        return mse;
+    }
+
+};    // QualityPSNR
+}   // quality
+}   // cv
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/quality/qualityssim.hpp b/IPL/include/opencv/opencv2/quality/qualityssim.hpp
new file mode 100644
index 0000000..edbd3ae
--- /dev/null
+++ b/IPL/include/opencv/opencv2/quality/qualityssim.hpp
@@ -0,0 +1,97 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_QUALITY_QUALITYSSIM_HPP
+#define OPENCV_QUALITY_QUALITYSSIM_HPP
+
+#include "qualitybase.hpp"
+
+namespace cv
+{
+namespace quality
+{
+
+/**
+@brief Full reference structural similarity algorithm  https://en.wikipedia.org/wiki/Structural_similarity
+*/
+class CV_EXPORTS_W QualitySSIM
+    : public QualityBase {
+public:
+
+    /**
+    @brief Computes SSIM
+    @param cmp Comparison image
+    @returns cv::Scalar with per-channel quality values.  Values range from 0 (worst) to 1 (best)
+    */
+    CV_WRAP cv::Scalar compute( InputArray cmp ) CV_OVERRIDE;
+
+    /** @brief Implements Algorithm::empty()  */
+    CV_WRAP bool empty() const CV_OVERRIDE { return _refImgData.empty() && QualityBase::empty(); }
+
+    /** @brief Implements Algorithm::clear()  */
+    CV_WRAP void clear() CV_OVERRIDE { _refImgData = _mat_data(); QualityBase::clear(); }
+
+    /**
+    @brief Create an object which calculates quality
+    @param ref input image to use as the reference image for comparison
+    */
+    CV_WRAP static Ptr<QualitySSIM> create( InputArray ref );
+
+    /**
+    @brief static method for computing quality
+    @param ref reference image
+    @param cmp comparison image
+    @param qualityMap output quality map, or cv::noArray()
+    @returns cv::Scalar with per-channel quality values.  Values range from 0 (worst) to 1 (best)
+    */
+    CV_WRAP static cv::Scalar compute( InputArray ref, InputArray cmp, OutputArray qualityMap );
+
+protected:
+
+    // holds computed values for a mat
+    struct _mat_data
+    {
+        // internal mat type
+        using mat_type = QualityBase::_mat_type;
+
+        mat_type
+            I
+            , I_2
+            , mu
+            , mu_2
+            , sigma_2
+            ;
+
+        // allow default construction
+        _mat_data() = default;
+
+        // construct from mat_type
+        _mat_data(const mat_type&);
+
+        // construct from inputarray
+        _mat_data(InputArray);
+
+        // return flag if this is empty
+        bool empty() const { return I.empty() && I_2.empty() && mu.empty() && mu_2.empty() && sigma_2.empty(); }
+
+        // computes ssim and quality map for single frame
+        static std::pair<cv::Scalar, mat_type> compute(const _mat_data& lhs, const _mat_data& rhs);
+
+    };  // mat_data
+
+    /** @brief Reference image data */
+    _mat_data _refImgData;
+
+    /**
+    @brief Constructor
+    @param refImgData reference image, converted to internal type
+    */
+    QualitySSIM( _mat_data refImgData )
+        : _refImgData( std::move(refImgData) )
+    {}
+
+};  // QualitySSIM
+}   // quality
+}   // cv
+#endif
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/rapid.hpp b/IPL/include/opencv/opencv2/rapid.hpp
new file mode 100644
index 0000000..829bd86
--- /dev/null
+++ b/IPL/include/opencv/opencv2/rapid.hpp
@@ -0,0 +1,126 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_RAPID_HPP_
+#define OPENCV_RAPID_HPP_
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+/**
+@defgroup rapid silhouette based 3D object tracking
+
+implements "RAPID-a video rate object tracker" @cite harris1990rapid with the dynamic control point extraction of @cite drummond2002real
+*/
+
+namespace cv
+{
+namespace rapid
+{
+//! @addtogroup rapid
+//! @{
+
+/**
+ * Debug draw markers of matched correspondences onto a lineBundle
+ * @param bundle the lineBundle
+ * @param srcLocations the according source locations
+ * @param newLocations matched source locations
+ * @param colors colors for the markers. Defaults to white.
+ */
+CV_EXPORTS_W void drawCorrespondencies(InputOutputArray bundle, InputArray srcLocations,
+                                       InputArray newLocations, InputArray colors = noArray());
+/**
+ * Debug draw search lines onto an image
+ * @param img the output image
+ * @param locations the source locations of a line bundle
+ * @param color the line color
+ */
+CV_EXPORTS_W void drawSearchLines(InputOutputArray img, InputArray locations, const Scalar& color);
+
+/**
+ * Draw a wireframe of a triangle mesh
+ * @param img the output image
+ * @param pts2d the 2d points obtained by @ref projectPoints
+ * @param tris triangle face connectivity
+ * @param color line color
+ * @param type line type. See @ref LineTypes.
+ * @param cullBackface enable back-face culling based on CCW order
+ */
+CV_EXPORTS_W void drawWireframe(InputOutputArray img, InputArray pts2d, InputArray tris,
+                                const Scalar& color, int type = LINE_8, bool cullBackface = false);
+/**
+ * Extract control points from the projected silhouette of a mesh
+ *
+ * see @cite drummond2002real Sec 2.1, Step b
+ * @param num number of control points
+ * @param len search radius (used to restrict the ROI)
+ * @param pts3d the 3D points of the mesh
+ * @param rvec rotation between mesh and camera
+ * @param tvec translation between mesh and camera
+ * @param K camera intrinsic
+ * @param imsize size of the video frame
+ * @param tris triangle face connectivity
+ * @param ctl2d the 2D locations of the control points
+ * @param ctl3d matching 3D points of the mesh
+ */
+CV_EXPORTS_W void extractControlPoints(int num, int len, InputArray pts3d, InputArray rvec, InputArray tvec,
+                                       InputArray K, const Size& imsize, InputArray tris, OutputArray ctl2d,
+                                       OutputArray ctl3d);
+/**
+ * Extract the line bundle from an image
+ * @param len the search radius. The bundle will have `2*len + 1` columns.
+ * @param ctl2d the search lines will be centered at this points and orthogonal to the contour defined by
+ * them. The bundle will have as many rows.
+ * @param img the image to read the pixel intensities values from
+ * @param bundle line bundle image with size `ctl2d.rows() x (2 * len + 1)` and the same type as @p img
+ * @param srcLocations the source pixel locations of @p bundle in @p img as CV_16SC2
+ */
+CV_EXPORTS_W void extractLineBundle(int len, InputArray ctl2d, InputArray img, OutputArray bundle,
+                                    OutputArray srcLocations);
+
+/**
+ * Find corresponding image locations by searching for a maximal sobel edge along the search line (a single
+ * row in the bundle)
+ * @param bundle the line bundle
+ * @param srcLocations the according source image location
+ * @param newLocations image locations with maximal edge along the search line
+ * @param response the sobel response for the selected point
+ */
+CV_EXPORTS_W void findCorrespondencies(InputArray bundle, InputArray srcLocations, OutputArray newLocations,
+                                       OutputArray response = noArray());
+
+/**
+ * Filter corresponding 2d and 3d points based on mask
+ * @param pts2d 2d points
+ * @param pts3d 3d points
+ * @param mask mask containing non-zero values for the elements to be retained
+ */
+CV_EXPORTS_W void filterCorrespondencies(InputOutputArray pts2d, InputOutputArray pts3d, InputArray mask);
+
+/**
+ * High level function to execute a single rapid @cite harris1990rapid iteration
+ *
+ * 1. @ref extractControlPoints
+ * 2. @ref extractLineBundle
+ * 3. @ref findCorrespondencies
+ * 4. @ref filterCorrespondencies
+ * 5. @ref solvePnPRefineLM
+ *
+ * @param img the video frame
+ * @param num number of search lines
+ * @param len search line radius
+ * @param pts3d the 3D points of the mesh
+ * @param tris triangle face connectivity
+ * @param K camera matrix
+ * @param rvec rotation between mesh and camera. Input values are used as an initial solution.
+ * @param tvec translation between mesh and camera. Input values are used as an initial solution.
+ * @return ratio of search lines that could be extracted and matched
+ */
+CV_EXPORTS_W float rapid(InputArray img, int num, int len, InputArray pts3d, InputArray tris, InputArray K,
+                         InputOutputArray rvec, InputOutputArray tvec);
+//! @}
+} /* namespace rapid */
+} /* namespace cv */
+
+#endif /* OPENCV_RAPID_HPP_ */
diff --git a/IPL/include/opencv/opencv2/reg/map.hpp b/IPL/include/opencv/opencv2/reg/map.hpp
index 26b29e3..a885a28 100644
--- a/IPL/include/opencv/opencv2/reg/map.hpp
+++ b/IPL/include/opencv/opencv2/reg/map.hpp
@@ -121,13 +121,13 @@ namespace reg {
 
 The class is only used to define the common interface for any possible map.
  */
-class CV_EXPORTS Map
+class CV_EXPORTS_W Map
 {
 public:
     /*!
      * Virtual destructor
      */
-    virtual ~Map(void);
+    virtual ~Map();
 
     /*!
      * Warps image to a new coordinate frame. The calculation is img2(x)=img1(T^{-1}(x)), as we
@@ -136,7 +136,7 @@ class CV_EXPORTS Map
      * \param[in] img1 Original image
      * \param[out] img2 Warped image
      */
-    virtual void warp(const cv::Mat& img1, cv::Mat& img2) const;
+    CV_WRAP virtual void warp(InputArray img1, OutputArray img2) const;
 
     /*!
      * Warps image to a new coordinate frame. The calculation is img2(x)=img1(T(x)), so in fact
@@ -145,27 +145,27 @@ class CV_EXPORTS Map
      * \param[in] img1 Original image
      * \param[out] img2 Warped image
      */
-    virtual void inverseWarp(const cv::Mat& img1, cv::Mat& img2) const = 0;
+    CV_WRAP virtual void inverseWarp(InputArray img1, OutputArray img2) const = 0;
 
     /*!
      * Calculates the inverse map
      * \return Inverse map
      */
-    virtual cv::Ptr<Map> inverseMap(void) const = 0;
+    CV_WRAP virtual cv::Ptr<Map> inverseMap() const = 0;
 
     /*!
      * Changes the map composing the current transformation with the one provided in the call.
      * The order is first the current transformation, then the input argument.
      * \param[in] map Transformation to compose with.
      */
-    virtual void compose(const Map& map) = 0;
+    CV_WRAP virtual void compose(cv::Ptr<Map> map) = 0;
 
     /*!
      * Scales the map by a given factor as if the coordinates system is expanded/compressed
      * by that factor.
      * \param[in] factor Expansion if bigger than one, compression if smaller than one
      */
-    virtual void scale(double factor) = 0;
+    CV_WRAP virtual void scale(double factor) = 0;
 };
 
 //! @}
diff --git a/IPL/include/opencv/opencv2/reg/mapaffine.hpp b/IPL/include/opencv/opencv2/reg/mapaffine.hpp
index 1c91326..52ab8d6 100644
--- a/IPL/include/opencv/opencv2/reg/mapaffine.hpp
+++ b/IPL/include/opencv/opencv2/reg/mapaffine.hpp
@@ -49,33 +49,33 @@ namespace reg {
 /*!
  * Defines an affine transformation
  */
-class CV_EXPORTS MapAffine : public Map
+class CV_EXPORTS_W MapAffine : public Map
 {
 public:
     /*!
      * Default constructor builds an identity map
      */
-    MapAffine(void);
+    CV_WRAP MapAffine();
 
     /*!
      * Constructor providing explicit values
      * \param[in] linTr Linear part of the affine transformation
      * \param[in] shift Displacement part of the affine transformation
      */
-    MapAffine(const cv::Matx<double, 2, 2>& linTr, const cv::Vec<double, 2>& shift);
+    CV_WRAP MapAffine(InputArray linTr, InputArray shift);
 
     /*!
      * Destructor
      */
-    ~MapAffine(void);
+    ~MapAffine();
 
-    void inverseWarp(const cv::Mat& img1, cv::Mat& img2) const;
+    CV_WRAP void inverseWarp(InputArray img1, OutputArray img2) const CV_OVERRIDE;
 
-    cv::Ptr<Map> inverseMap(void) const;
+    CV_WRAP cv::Ptr<Map> inverseMap() const CV_OVERRIDE;
 
-    void compose(const Map& map);
+    CV_WRAP void compose(cv::Ptr<Map> map) CV_OVERRIDE;
 
-    void scale(double factor);
+    CV_WRAP void scale(double factor) CV_OVERRIDE;
 
     /*!
      * Return linear part of the affine transformation
@@ -85,6 +85,10 @@ class CV_EXPORTS MapAffine : public Map
         return linTr_;
     }
 
+    CV_WRAP void getLinTr(OutputArray linTr) const {
+        Mat(linTr_).copyTo(linTr);
+    }
+
     /*!
      * Return displacement part of the affine transformation
      * \return Displacement part of the affine transformation
@@ -93,6 +97,10 @@ class CV_EXPORTS MapAffine : public Map
         return shift_;
     }
 
+    CV_WRAP void getShift(OutputArray shift) const {
+        Mat(shift_).copyTo(shift);
+    }
+
 private:
     cv::Matx<double, 2, 2> linTr_;
     cv::Vec<double, 2> shift_;
diff --git a/IPL/include/opencv/opencv2/reg/mapper.hpp b/IPL/include/opencv/opencv2/reg/mapper.hpp
index 8abadd1..2c2862d 100644
--- a/IPL/include/opencv/opencv2/reg/mapper.hpp
+++ b/IPL/include/opencv/opencv2/reg/mapper.hpp
@@ -47,11 +47,11 @@ namespace reg {
 //! @addtogroup reg
 //! @{
 
-/** @brief Base class for modelling an algorithm for calculating a
+/** @brief Base class for modelling an algorithm for calculating a map
 
 The class is only used to define the common interface for any possible mapping algorithm.
  */
-class CV_EXPORTS Mapper
+class CV_EXPORTS_W Mapper
 {
 public:
     virtual ~Mapper(void) {}
@@ -60,16 +60,16 @@ class CV_EXPORTS Mapper
      * Calculate mapping between two images
      * \param[in] img1 Reference image
      * \param[in] img2 Warped image
-     * \param[in,out] res Map from img1 to img2, stored in a smart pointer. If present as input,
-     *       it is an initial rough estimation that the mapper will try to refine.
+     * \param[in] If present, it is an initial rough estimation that the mapper will try to refine.
+     * \return Map from img1 to img2, stored in a smart pointer.
      */
-    virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr<Map>& res) const = 0;
+    CV_WRAP virtual cv::Ptr<Map> calculate(InputArray img1, InputArray img2, cv::Ptr<Map> init = cv::Ptr<Map>()) const = 0;
 
     /*
      * Returns a map compatible with the Mapper class
      * \return Pointer to identity Map
      */
-    virtual cv::Ptr<Map> getMap(void) const = 0;
+    CV_WRAP virtual cv::Ptr<Map> getMap() const = 0;
 
 protected:
     /*
diff --git a/IPL/include/opencv/opencv2/reg/mappergradaffine.hpp b/IPL/include/opencv/opencv2/reg/mappergradaffine.hpp
index 08d5397..49b0bc7 100644
--- a/IPL/include/opencv/opencv2/reg/mappergradaffine.hpp
+++ b/IPL/include/opencv/opencv2/reg/mappergradaffine.hpp
@@ -49,15 +49,15 @@ namespace reg {
 /*!
  * Mapper for affine motion
  */
-class CV_EXPORTS MapperGradAffine: public Mapper
+class CV_EXPORTS_W MapperGradAffine: public Mapper
 {
 public:
-    MapperGradAffine(void);
+    CV_WRAP MapperGradAffine();
     ~MapperGradAffine(void);
 
-    virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr<Map>& res) const;
+    CV_WRAP virtual cv::Ptr<Map> calculate(InputArray img1, InputArray img2, cv::Ptr<Map> init = cv::Ptr<Map>()) const CV_OVERRIDE;
 
-    cv::Ptr<Map> getMap(void) const;
+    CV_WRAP cv::Ptr<Map> getMap() const CV_OVERRIDE;
 };
 
 //! @}
diff --git a/IPL/include/opencv/opencv2/reg/mappergradeuclid.hpp b/IPL/include/opencv/opencv2/reg/mappergradeuclid.hpp
index 29c49cb..4f6c5c8 100644
--- a/IPL/include/opencv/opencv2/reg/mappergradeuclid.hpp
+++ b/IPL/include/opencv/opencv2/reg/mappergradeuclid.hpp
@@ -49,15 +49,15 @@ namespace reg {
 /*!
  * Mapper for euclidean motion: rotation plus shift
  */
-class CV_EXPORTS MapperGradEuclid: public Mapper
+class CV_EXPORTS_W MapperGradEuclid: public Mapper
 {
 public:
-    MapperGradEuclid(void);
-    ~MapperGradEuclid(void);
+    CV_WRAP MapperGradEuclid();
+    ~MapperGradEuclid();
 
-    virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr<Map>& res) const;
+    CV_WRAP virtual cv::Ptr<Map> calculate(InputArray img1, InputArray img2, cv::Ptr<Map> init = cv::Ptr<Map>()) const CV_OVERRIDE;
 
-    cv::Ptr<Map> getMap(void) const;
+    CV_WRAP cv::Ptr<Map> getMap() const CV_OVERRIDE;
 };
 
 //! @}
diff --git a/IPL/include/opencv/opencv2/reg/mappergradproj.hpp b/IPL/include/opencv/opencv2/reg/mappergradproj.hpp
index f1721e8..1e01c71 100644
--- a/IPL/include/opencv/opencv2/reg/mappergradproj.hpp
+++ b/IPL/include/opencv/opencv2/reg/mappergradproj.hpp
@@ -49,15 +49,15 @@ namespace reg {
 /*!
  * Gradient mapper for a projective transformation
  */
-class CV_EXPORTS MapperGradProj: public Mapper
+class CV_EXPORTS_W MapperGradProj: public Mapper
 {
 public:
-    MapperGradProj(void);
-    ~MapperGradProj(void);
+    CV_WRAP MapperGradProj();
+    ~MapperGradProj();
 
-    virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr<Map>& res) const;
+    CV_WRAP virtual cv::Ptr<Map> calculate(InputArray img1, InputArray img2, cv::Ptr<Map> init = cv::Ptr<Map>()) const CV_OVERRIDE;
 
-    cv::Ptr<Map> getMap(void) const;
+    CV_WRAP cv::Ptr<Map> getMap() const CV_OVERRIDE;
 };
 
 //! @}
diff --git a/IPL/include/opencv/opencv2/reg/mappergradshift.hpp b/IPL/include/opencv/opencv2/reg/mappergradshift.hpp
index a9f75b3..cd812d9 100644
--- a/IPL/include/opencv/opencv2/reg/mappergradshift.hpp
+++ b/IPL/include/opencv/opencv2/reg/mappergradshift.hpp
@@ -49,15 +49,15 @@ namespace reg {
 /*!
  * Gradient mapper for a translation
  */
-class CV_EXPORTS MapperGradShift: public Mapper
+class CV_EXPORTS_W MapperGradShift: public Mapper
 {
 public:
-    MapperGradShift(void);
-    virtual ~MapperGradShift(void);
+    CV_WRAP MapperGradShift();
+    virtual ~MapperGradShift();
 
-    virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr<Map>& res) const;
+    CV_WRAP virtual cv::Ptr<Map> calculate(InputArray img1, InputArray img2, cv::Ptr<Map> init = cv::Ptr<Map>()) const CV_OVERRIDE;
 
-    cv::Ptr<Map> getMap(void) const;
+    CV_WRAP cv::Ptr<Map> getMap() const CV_OVERRIDE;
 };
 
 //! @}
diff --git a/IPL/include/opencv/opencv2/reg/mappergradsimilar.hpp b/IPL/include/opencv/opencv2/reg/mappergradsimilar.hpp
index ea45ab9..07d64d9 100644
--- a/IPL/include/opencv/opencv2/reg/mappergradsimilar.hpp
+++ b/IPL/include/opencv/opencv2/reg/mappergradsimilar.hpp
@@ -49,15 +49,15 @@ namespace reg {
 /*!
  * Calculates a similarity transformation between to images (scale, rotation, and shift)
  */
-class CV_EXPORTS MapperGradSimilar: public Mapper
+class CV_EXPORTS_W MapperGradSimilar: public Mapper
 {
 public:
-    MapperGradSimilar(void);
-    ~MapperGradSimilar(void);
+    CV_WRAP MapperGradSimilar();
+    ~MapperGradSimilar();
 
-    virtual void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr<Map>& res) const;
+    CV_WRAP virtual cv::Ptr<Map> calculate(InputArray img1, InputArray img2, cv::Ptr<Map> init = cv::Ptr<Map>()) const CV_OVERRIDE;
 
-    cv::Ptr<Map> getMap(void) const;
+    CV_WRAP cv::Ptr<Map> getMap() const CV_OVERRIDE;
 };
 
 //! @}
diff --git a/IPL/include/opencv/opencv2/reg/mapperpyramid.hpp b/IPL/include/opencv/opencv2/reg/mapperpyramid.hpp
index 33440bd..6106f49 100644
--- a/IPL/include/opencv/opencv2/reg/mapperpyramid.hpp
+++ b/IPL/include/opencv/opencv2/reg/mapperpyramid.hpp
@@ -39,7 +39,9 @@
 #define MAPPERPYRAMID_H_
 
 #include "mapper.hpp"
-
+#include "mapaffine.hpp"
+#include "mapprojec.hpp"
+#include "mapshift.hpp"
 
 namespace cv {
 namespace reg {
@@ -50,27 +52,52 @@ namespace reg {
 /*!
  * Calculates a map using a gaussian pyramid
  */
-class CV_EXPORTS MapperPyramid: public Mapper
+class CV_EXPORTS_W MapperPyramid: public Mapper
 {
 public:
     /*
      * Constructor
      * \param[in] baseMapper Base mapper used for the refinements
      */
-    MapperPyramid(const Mapper& baseMapper);
+    CV_WRAP MapperPyramid(Ptr<Mapper> baseMapper);
 
-    void calculate(const cv::Mat& img1, const cv::Mat& img2, cv::Ptr<Map>& res) const;
+    CV_WRAP virtual cv::Ptr<Map> calculate(InputArray img1, InputArray img2, cv::Ptr<Map> init = cv::Ptr<Map>()) const CV_OVERRIDE;
 
-    cv::Ptr<Map> getMap(void) const;
+    CV_WRAP cv::Ptr<Map> getMap() const CV_OVERRIDE;
 
-    unsigned numLev_;           /*!< Number of levels of the pyramid */
-    unsigned numIterPerScale_;  /*!< Number of iterations at a given scale of the pyramid */
+    CV_PROP_RW int numLev_;           /*!< Number of levels of the pyramid */
+    CV_PROP_RW int numIterPerScale_;  /*!< Number of iterations at a given scale of the pyramid */
 
 private:
     MapperPyramid& operator=(const MapperPyramid&);
     const Mapper& baseMapper_;  /*!< Mapper used in inner level */
 };
 
+/*!
+ * Converts a pointer to a Map returned by MapperPyramid::calculate into the specified Map pointer type
+ */
+class CV_EXPORTS_W MapTypeCaster
+{
+public:
+    CV_WRAP static Ptr<MapAffine> toAffine(Ptr<Map> sourceMap)
+    {
+        MapAffine& affineMap = dynamic_cast<MapAffine&>(*sourceMap);
+        return Ptr<MapAffine>(new MapAffine(affineMap));
+    }
+
+    CV_WRAP static Ptr<MapShift> toShift(Ptr<Map> sourceMap)
+    {
+        MapShift& shiftMap = dynamic_cast<MapShift&>(*sourceMap);
+        return Ptr<MapShift>(new MapShift(shiftMap));
+    }
+
+    CV_WRAP static Ptr<MapProjec> toProjec(Ptr<Map> sourceMap)
+    {
+        MapProjec& projecMap = dynamic_cast<MapProjec&>(*sourceMap);
+        return Ptr<MapProjec>(new MapProjec(projecMap));
+    }
+};
+
 //! @}
 
 }}  // namespace cv::reg
diff --git a/IPL/include/opencv/opencv2/reg/mapprojec.hpp b/IPL/include/opencv/opencv2/reg/mapprojec.hpp
index 57ef146..6d1e565 100644
--- a/IPL/include/opencv/opencv2/reg/mapprojec.hpp
+++ b/IPL/include/opencv/opencv2/reg/mapprojec.hpp
@@ -50,32 +50,32 @@ namespace reg {
 /*!
  * Defines an transformation that consists on a projective transformation
  */
-class CV_EXPORTS MapProjec : public Map
+class CV_EXPORTS_W MapProjec : public Map
 {
 public:
     /*!
      * Default constructor builds an identity map
      */
-    MapProjec(void);
+    CV_WRAP MapProjec();
 
     /*!
      * Constructor providing explicit values
      * \param[in] projTr Projective transformation
      */
-    MapProjec(const cv::Matx<double, 3, 3>& projTr);
+    CV_WRAP MapProjec(InputArray projTr);
 
     /*!
      * Destructor
      */
-    ~MapProjec(void);
+    ~MapProjec();
 
-    void inverseWarp(const cv::Mat& img1, cv::Mat& img2) const;
+    CV_WRAP void inverseWarp(InputArray img1, OutputArray img2) const CV_OVERRIDE;
 
-    cv::Ptr<Map> inverseMap(void) const;
+    CV_WRAP cv::Ptr<Map> inverseMap() const CV_OVERRIDE;
 
-    void compose(const Map& map);
+    CV_WRAP void compose(cv::Ptr<Map> map) CV_OVERRIDE;
 
-    void scale(double factor);
+    CV_WRAP void scale(double factor) CV_OVERRIDE;
 
     /*!
      * Returns projection matrix
@@ -85,10 +85,14 @@ class CV_EXPORTS MapProjec : public Map
         return projTr_;
     }
 
+    CV_WRAP void getProjTr(OutputArray projTr) const {
+        Mat(projTr_).copyTo(projTr);
+    }
+
     /*!
      * Normalizes object's homography
      */
-    void normalize(void) {
+    CV_WRAP void normalize() {
         double z = 1./projTr_(2, 2);
         for(size_t v_i = 0; v_i < sizeof(projTr_.val)/sizeof(projTr_.val[0]); ++v_i)
             projTr_.val[v_i] *= z;
diff --git a/IPL/include/opencv/opencv2/reg/mapshift.hpp b/IPL/include/opencv/opencv2/reg/mapshift.hpp
index e5f54a4..ada9545 100644
--- a/IPL/include/opencv/opencv2/reg/mapshift.hpp
+++ b/IPL/include/opencv/opencv2/reg/mapshift.hpp
@@ -50,32 +50,33 @@ namespace reg {
 /*!
  * Defines an transformation that consists on a simple displacement
  */
-class CV_EXPORTS MapShift : public Map
+class CV_EXPORTS_W MapShift : public Map
 {
 public:
     /*!
      * Default constructor builds an identity map
      */
-    MapShift(void);
+    CV_WRAP MapShift();
 
     /*!
      * Constructor providing explicit values
      * \param[in] shift Displacement
      */
-    MapShift(const cv::Vec<double, 2>& shift);
+
+    CV_WRAP MapShift(InputArray shift);
 
     /*!
      * Destructor
      */
-    ~MapShift(void);
+    ~MapShift();
 
-    void inverseWarp(const cv::Mat& img1, cv::Mat& img2) const;
+    CV_WRAP void inverseWarp(InputArray img1, OutputArray img2) const CV_OVERRIDE;
 
-    cv::Ptr<Map> inverseMap(void) const;
+    CV_WRAP cv::Ptr<Map> inverseMap() const CV_OVERRIDE;
 
-    void compose(const Map& map);
+    CV_WRAP void compose(cv::Ptr<Map> map) CV_OVERRIDE;
 
-    void scale(double factor);
+    CV_WRAP void scale(double factor) CV_OVERRIDE;
 
     /*!
      * Return displacement
@@ -85,6 +86,10 @@ class CV_EXPORTS MapShift : public Map
         return shift_;
     }
 
+    CV_WRAP void getShift(OutputArray shift) const {
+        Mat(shift_).copyTo(shift);
+    }
+
 private:
     cv::Vec<double, 2> shift_;      /*< Displacement */
 };
diff --git a/IPL/include/opencv/opencv2/rgbd.hpp b/IPL/include/opencv/opencv2/rgbd.hpp
index b25bd3d..37b2927 100644
--- a/IPL/include/opencv/opencv2/rgbd.hpp
+++ b/IPL/include/opencv/opencv2/rgbd.hpp
@@ -1,1049 +1,23 @@
-/*
- * Software License Agreement (BSD License)
- *
- *  Copyright (c) 2009, Willow Garage, Inc.
- *  All rights reserved.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- *     copyright notice, this list of conditions and the following
- *     disclaimer in the documentation and/or other materials provided
- *     with the distribution.
- *   * Neither the name of Willow Garage, Inc. nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- *
- */
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
+
+// This code is also subject to the license terms in the LICENSE_WillowGarage.md file found in this module's directory
 
 #ifndef __OPENCV_RGBD_HPP__
 #define __OPENCV_RGBD_HPP__
 
-#ifdef __cplusplus
+#include "opencv2/rgbd/linemod.hpp"
+#include "opencv2/rgbd/depth.hpp"
+#include "opencv2/rgbd/kinfu.hpp"
+#include "opencv2/rgbd/dynafu.hpp"
 
-#include <opencv2/core.hpp>
-#include <limits>
 
 /** @defgroup rgbd RGB-Depth Processing
 */
 
-namespace cv
-{
-namespace rgbd
-{
-
-//! @addtogroup rgbd
-//! @{
-
-  /** Checks if the value is a valid depth. For CV_16U or CV_16S, the convention is to be invalid if it is
-   * a limit. For a float/double, we just check if it is a NaN
-   * @param depth the depth to check for validity
-   */
-  CV_EXPORTS
-  inline bool
-  isValidDepth(const float & depth)
-  {
-    return !cvIsNaN(depth);
-  }
-  CV_EXPORTS
-  inline bool
-  isValidDepth(const double & depth)
-  {
-    return !cvIsNaN(depth);
-  }
-  CV_EXPORTS
-  inline bool
-  isValidDepth(const short int & depth)
-  {
-    return (depth != std::numeric_limits<short int>::min()) && (depth != std::numeric_limits<short int>::max());
-  }
-  CV_EXPORTS
-  inline bool
-  isValidDepth(const unsigned short int & depth)
-  {
-    return (depth != std::numeric_limits<unsigned short int>::min())
-        && (depth != std::numeric_limits<unsigned short int>::max());
-  }
-  CV_EXPORTS
-  inline bool
-  isValidDepth(const int & depth)
-  {
-    return (depth != std::numeric_limits<int>::min()) && (depth != std::numeric_limits<int>::max());
-  }
-  CV_EXPORTS
-  inline bool
-  isValidDepth(const unsigned int & depth)
-  {
-    return (depth != std::numeric_limits<unsigned int>::min()) && (depth != std::numeric_limits<unsigned int>::max());
-  }
-
-  /** Object that can compute the normals in an image.
-   * It is an object as it can cache data for speed efficiency
-   * The implemented methods are either:
-   * - FALS (the fastest) and SRI from
-   * ``Fast and Accurate Computation of Surface Normals from Range Images``
-   * by H. Badino, D. Huber, Y. Park and T. Kanade
-   * - the normals with bilateral filtering on a depth image from
-   * ``Gradient Response Maps for Real-Time Detection of Texture-Less Objects``
-   * by S. Hinterstoisser, C. Cagniart, S. Ilic, P. Sturm, N. Navab, P. Fua, and V. Lepetit
-   */
-  class CV_EXPORTS RgbdNormals: public Algorithm
-  {
-  public:
-    enum RGBD_NORMALS_METHOD
-    {
-      RGBD_NORMALS_METHOD_FALS, RGBD_NORMALS_METHOD_LINEMOD, RGBD_NORMALS_METHOD_SRI
-    };
-
-    RgbdNormals()
-        :
-          rows_(0),
-          cols_(0),
-          depth_(0),
-          K_(Mat()),
-          window_size_(0),
-          method_(RGBD_NORMALS_METHOD_FALS),
-          rgbd_normals_impl_(0)
-    {
-    }
-
-    /** Constructor
-     * @param rows the number of rows of the depth image normals will be computed on
-     * @param cols the number of cols of the depth image normals will be computed on
-     * @param depth the depth of the normals (only CV_32F or CV_64F)
-     * @param K the calibration matrix to use
-     * @param window_size the window size to compute the normals: can only be 1,3,5 or 7
-     * @param method one of the methods to use: RGBD_NORMALS_METHOD_SRI, RGBD_NORMALS_METHOD_FALS
-     */
-    RgbdNormals(int rows, int cols, int depth, InputArray K, int window_size = 5, int method =
-        RGBD_NORMALS_METHOD_FALS);
-
-    ~RgbdNormals();
-
-    /** Given a set of 3d points in a depth image, compute the normals at each point.
-     * @param points a rows x cols x 3 matrix of CV_32F/CV64F or a rows x cols x 1 CV_U16S
-     * @param normals a rows x cols x 3 matrix
-     */
-    void
-    operator()(InputArray points, OutputArray normals) const;
-
-    /** Initializes some data that is cached for later computation
-     * If that function is not called, it will be called the first time normals are computed
-     */
-    void
-    initialize() const;
-
-    int getRows() const
-    {
-        return rows_;
-    }
-    void setRows(int val)
-    {
-        rows_ = val;
-    }
-    int getCols() const
-    {
-        return cols_;
-    }
-    void setCols(int val)
-    {
-        cols_ = val;
-    }
-    int getWindowSize() const
-    {
-        return window_size_;
-    }
-    void setWindowSize(int val)
-    {
-        window_size_ = val;
-    }
-    int getDepth() const
-    {
-        return depth_;
-    }
-    void setDepth(int val)
-    {
-        depth_ = val;
-    }
-    cv::Mat getK() const
-    {
-        return K_;
-    }
-    void setK(const cv::Mat &val)
-    {
-        K_ = val;
-    }
-    int getMethod() const
-    {
-        return method_;
-    }
-    void setMethod(int val)
-    {
-        method_ = val;
-    }
-
-  protected:
-    void
-    initialize_normals_impl(int rows, int cols, int depth, const Mat & K, int window_size, int method) const;
-
-    int rows_, cols_, depth_;
-    Mat K_;
-    int window_size_;
-    int method_;
-    mutable void* rgbd_normals_impl_;
-  };
-
-  /** Object that can clean a noisy depth image
-   */
-  class CV_EXPORTS DepthCleaner: public Algorithm
-  {
-  public:
-    /** NIL method is from
-     * ``Modeling Kinect Sensor Noise for Improved 3d Reconstruction and Tracking``
-     * by C. Nguyen, S. Izadi, D. Lovel
-     */
-    enum DEPTH_CLEANER_METHOD
-    {
-      DEPTH_CLEANER_NIL
-    };
-
-    DepthCleaner()
-        :
-          depth_(0),
-          window_size_(0),
-          method_(DEPTH_CLEANER_NIL),
-          depth_cleaner_impl_(0)
-    {
-    }
-
-    /** Constructor
-     * @param depth the depth of the normals (only CV_32F or CV_64F)
-     * @param window_size the window size to compute the normals: can only be 1,3,5 or 7
-     * @param method one of the methods to use: RGBD_NORMALS_METHOD_SRI, RGBD_NORMALS_METHOD_FALS
-     */
-    DepthCleaner(int depth, int window_size = 5, int method = DEPTH_CLEANER_NIL);
-
-    ~DepthCleaner();
-
-    /** Given a set of 3d points in a depth image, compute the normals at each point.
-     * @param points a rows x cols x 3 matrix of CV_32F/CV64F or a rows x cols x 1 CV_U16S
-     * @param depth a rows x cols matrix of the cleaned up depth
-     */
-    void
-    operator()(InputArray points, OutputArray depth) const;
-
-    /** Initializes some data that is cached for later computation
-     * If that function is not called, it will be called the first time normals are computed
-     */
-    void
-    initialize() const;
-
-    int getWindowSize() const
-    {
-        return window_size_;
-    }
-    void setWindowSize(int val)
-    {
-        window_size_ = val;
-    }
-    int getDepth() const
-    {
-        return depth_;
-    }
-    void setDepth(int val)
-    {
-        depth_ = val;
-    }
-    int getMethod() const
-    {
-        return method_;
-    }
-    void setMethod(int val)
-    {
-        method_ = val;
-    }
-
-  protected:
-    void
-    initialize_cleaner_impl() const;
-
-    int depth_;
-    int window_size_;
-    int method_;
-    mutable void* depth_cleaner_impl_;
-  };
-
-
-  /** Registers depth data to an external camera
-   * Registration is performed by creating a depth cloud, transforming the cloud by
-   * the rigid body transformation between the cameras, and then projecting the
-   * transformed points into the RGB camera.
-   *
-   * uv_rgb = K_rgb * [R | t] * z * inv(K_ir) * uv_ir
-   *
-   * Currently does not check for negative depth values.
-   *
-   * @param unregisteredCameraMatrix the camera matrix of the depth camera
-   * @param registeredCameraMatrix the camera matrix of the external camera
-   * @param registeredDistCoeffs the distortion coefficients of the external camera
-   * @param Rt the rigid body transform between the cameras. Transforms points from depth camera frame to external camera frame.
-   * @param unregisteredDepth the input depth data
-   * @param outputImagePlaneSize the image plane dimensions of the external camera (width, height)
-   * @param registeredDepth the result of transforming the depth into the external camera
-   * @param depthDilation whether or not the depth is dilated to avoid holes and occlusion errors (optional)
-   */
-  CV_EXPORTS
-  void
-  registerDepth(InputArray unregisteredCameraMatrix, InputArray registeredCameraMatrix, InputArray registeredDistCoeffs,
-                InputArray Rt, InputArray unregisteredDepth, const Size& outputImagePlaneSize,
-                OutputArray registeredDepth, bool depthDilation=false);
-
-  /**
-   * @param depth the depth image
-   * @param in_K
-   * @param in_points the list of xy coordinates
-   * @param points3d the resulting 3d points
-   */
-  CV_EXPORTS
-  void
-  depthTo3dSparse(InputArray depth, InputArray in_K, InputArray in_points, OutputArray points3d);
-
-  /** Converts a depth image to an organized set of 3d points.
-   * The coordinate system is x pointing left, y down and z away from the camera
-   * @param depth the depth image (if given as short int CV_U, it is assumed to be the depth in millimeters
-   *              (as done with the Microsoft Kinect), otherwise, if given as CV_32F or CV_64F, it is assumed in meters)
-   * @param K The calibration matrix
-   * @param points3d the resulting 3d points. They are of depth the same as `depth` if it is CV_32F or CV_64F, and the
-   *        depth of `K` if `depth` is of depth CV_U
-   * @param mask the mask of the points to consider (can be empty)
-   */
-  CV_EXPORTS
-  void
-  depthTo3d(InputArray depth, InputArray K, OutputArray points3d, InputArray mask = noArray());
-
-  /** If the input image is of type CV_16UC1 (like the Kinect one), the image is converted to floats, divided
-   * by 1000 to get a depth in meters, and the values 0 are converted to std::numeric_limits<float>::quiet_NaN()
-   * Otherwise, the image is simply converted to floats
-   * @param in the depth image (if given as short int CV_U, it is assumed to be the depth in millimeters
-   *              (as done with the Microsoft Kinect), it is assumed in meters)
-   * @param depth the desired output depth (floats or double)
-   * @param out The rescaled float depth image
-   */
-  CV_EXPORTS
-  void
-  rescaleDepth(InputArray in, int depth, OutputArray out);
-
-  /** Object that can compute planes in an image
-   */
-  class CV_EXPORTS RgbdPlane: public Algorithm
-  {
-  public:
-    enum RGBD_PLANE_METHOD
-    {
-      RGBD_PLANE_METHOD_DEFAULT
-    };
-
-    RgbdPlane(RGBD_PLANE_METHOD method = RGBD_PLANE_METHOD_DEFAULT)
-        :
-          method_(method),
-          block_size_(40),
-          min_size_(block_size_*block_size_),
-          threshold_(0.01),
-          sensor_error_a_(0),
-          sensor_error_b_(0),
-          sensor_error_c_(0)
-    {
-    }
-
-    /** Find The planes in a depth image
-     * @param points3d the 3d points organized like the depth image: rows x cols with 3 channels
-     * @param normals the normals for every point in the depth image
-     * @param mask An image where each pixel is labeled with the plane it belongs to
-     *        and 255 if it does not belong to any plane
-     * @param plane_coefficients the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0, norm(a,b,c)=1
-     *        and c < 0 (so that the normal points towards the camera)
-     */
-    void
-    operator()(InputArray points3d, InputArray normals, OutputArray mask,
-               OutputArray plane_coefficients);
-
-    /** Find The planes in a depth image but without doing a normal check, which is faster but less accurate
-     * @param points3d the 3d points organized like the depth image: rows x cols with 3 channels
-     * @param mask An image where each pixel is labeled with the plane it belongs to
-     *        and 255 if it does not belong to any plane
-     * @param plane_coefficients the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0
-     */
-    void
-    operator()(InputArray points3d, OutputArray mask, OutputArray plane_coefficients);
-
-    int getBlockSize() const
-    {
-        return block_size_;
-    }
-    void setBlockSize(int val)
-    {
-        block_size_ = val;
-    }
-    int getMinSize() const
-    {
-        return min_size_;
-    }
-    void setMinSize(int val)
-    {
-        min_size_ = val;
-    }
-    int getMethod() const
-    {
-        return method_;
-    }
-    void setMethod(int val)
-    {
-        method_ = val;
-    }
-    double getThreshold() const
-    {
-        return threshold_;
-    }
-    void setThreshold(double val)
-    {
-        threshold_ = val;
-    }
-    double getSensorErrorA() const
-    {
-        return sensor_error_a_;
-    }
-    void setSensorErrorA(double val)
-    {
-        sensor_error_a_ = val;
-    }
-    double getSensorErrorB() const
-    {
-        return sensor_error_b_;
-    }
-    void setSensorErrorB(double val)
-    {
-        sensor_error_b_ = val;
-    }
-    double getSensorErrorC() const
-    {
-        return sensor_error_c_;
-    }
-    void setSensorErrorC(double val)
-    {
-        sensor_error_c_ = val;
-    }
-
-  private:
-    /** The method to use to compute the planes */
-    int method_;
-    /** The size of the blocks to look at for a stable MSE */
-    int block_size_;
-    /** The minimum size of a cluster to be considered a plane */
-    int min_size_;
-    /** How far a point can be from a plane to belong to it (in meters) */
-    double threshold_;
-    /** coefficient of the sensor error with respect to the. All 0 by default but you want a=0.0075 for a Kinect */
-    double sensor_error_a_, sensor_error_b_, sensor_error_c_;
-  };
-
-  /** Object that contains a frame data.
-   */
-  struct CV_EXPORTS RgbdFrame
-  {
-      RgbdFrame();
-      RgbdFrame(const Mat& image, const Mat& depth, const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1);
-      virtual ~RgbdFrame();
-
-      virtual void
-      release();
-
-      int ID;
-      Mat image;
-      Mat depth;
-      Mat mask;
-      Mat normals;
-  };
-
-  /** Object that contains a frame data that is possibly needed for the Odometry.
-   * It's used for the efficiency (to pass precomputed/cached data of the frame that participates
-   * in the Odometry processing several times).
-   */
-  struct CV_EXPORTS OdometryFrame : public RgbdFrame
-  {
-    /** These constants are used to set a type of cache which has to be prepared depending on the frame role:
-     * srcFrame or dstFrame (see compute method of the Odometry class). For the srcFrame and dstFrame different cache data may be required,
-     * some part of a cache may be common for both frame roles.
-     * @param CACHE_SRC The cache data for the srcFrame will be prepared.
-     * @param CACHE_DST The cache data for the dstFrame will be prepared.
-     * @param CACHE_ALL The cache data for both srcFrame and dstFrame roles will be computed.
-     */
-    enum
-    {
-      CACHE_SRC = 1, CACHE_DST = 2, CACHE_ALL = CACHE_SRC + CACHE_DST
-    };
-
-    OdometryFrame();
-    OdometryFrame(const Mat& image, const Mat& depth, const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1);
-
-    virtual void
-    release();
-
-    void
-    releasePyramids();
-
-    std::vector<Mat> pyramidImage;
-    std::vector<Mat> pyramidDepth;
-    std::vector<Mat> pyramidMask;
-
-    std::vector<Mat> pyramidCloud;
-
-    std::vector<Mat> pyramid_dI_dx;
-    std::vector<Mat> pyramid_dI_dy;
-    std::vector<Mat> pyramidTexturedMask;
-
-    std::vector<Mat> pyramidNormals;
-    std::vector<Mat> pyramidNormalsMask;
-  };
-
-  /** Base class for computation of odometry.
-   */
-  class CV_EXPORTS Odometry: public Algorithm
-  {
-  public:
-
-    /** A class of transformation*/
-    enum
-    {
-      ROTATION = 1, TRANSLATION = 2, RIGID_BODY_MOTION = 4
-    };
-
-    static inline float
-    DEFAULT_MIN_DEPTH()
-    {
-      return 0.f; // in meters
-    }
-    static inline float
-    DEFAULT_MAX_DEPTH()
-    {
-      return 4.f; // in meters
-    }
-    static inline float
-    DEFAULT_MAX_DEPTH_DIFF()
-    {
-      return 0.07f; // in meters
-    }
-    static inline float
-    DEFAULT_MAX_POINTS_PART()
-    {
-      return 0.07f; // in [0, 1]
-    }
-    static inline float
-    DEFAULT_MAX_TRANSLATION()
-    {
-      return 0.15f; // in meters
-    }
-    static inline float
-    DEFAULT_MAX_ROTATION()
-    {
-      return 15; // in degrees
-    }
-
-    /** Method to compute a transformation from the source frame to the destination one.
-     * Some odometry algorithms do not used some data of frames (eg. ICP does not use images).
-     * In such case corresponding arguments can be set as empty Mat.
-     * The method returns true if all internal computions were possible (e.g. there were enough correspondences,
-     * system of equations has a solution, etc) and resulting transformation satisfies some test if it's provided
-     * by the Odometry inheritor implementation (e.g. thresholds for maximum translation and rotation).
-     * @param srcImage Image data of the source frame (CV_8UC1)
-     * @param srcDepth Depth data of the source frame (CV_32FC1, in meters)
-     * @param srcMask Mask that sets which pixels have to be used from the source frame (CV_8UC1)
-     * @param dstImage Image data of the destination frame (CV_8UC1)
-     * @param dstDepth Depth data of the destination frame (CV_32FC1, in meters)
-     * @param dstMask Mask that sets which pixels have to be used from the destination frame (CV_8UC1)
-     * @param Rt Resulting transformation from the source frame to the destination one (rigid body motion):
-     dst_p = Rt * src_p, where dst_p is a homogeneous point in the destination frame and src_p is
-     homogeneous point in the source frame,
-     Rt is 4x4 matrix of CV_64FC1 type.
-     * @param initRt Initial transformation from the source frame to the destination one (optional)
-     */
-    bool
-    compute(const Mat& srcImage, const Mat& srcDepth, const Mat& srcMask, const Mat& dstImage, const Mat& dstDepth,
-            const Mat& dstMask, Mat& Rt, const Mat& initRt = Mat()) const;
-
-    /** One more method to compute a transformation from the source frame to the destination one.
-     * It is designed to save on computing the frame data (image pyramids, normals, etc.).
-     */
-    bool
-    compute(Ptr<OdometryFrame>& srcFrame, Ptr<OdometryFrame>& dstFrame, Mat& Rt, const Mat& initRt = Mat()) const;
-
-    /** Prepare a cache for the frame. The function checks the precomputed/passed data (throws the error if this data
-     * does not satisfy) and computes all remaining cache data needed for the frame. Returned size is a resolution
-     * of the prepared frame.
-     * @param frame The odometry which will process the frame.
-     * @param cacheType The cache type: CACHE_SRC, CACHE_DST or CACHE_ALL.
-     */
-    virtual Size prepareFrameCache(Ptr<OdometryFrame>& frame, int cacheType) const;
-
-    static Ptr<Odometry> create(const String & odometryType);
-
-    /** @see setCameraMatrix */
-    virtual cv::Mat getCameraMatrix() const = 0;
-    /** @copybrief getCameraMatrix @see getCameraMatrix */
-    virtual void setCameraMatrix(const cv::Mat &val) = 0;
-    /** @see setTransformType */
-    virtual int getTransformType() const = 0;
-    /** @copybrief getTransformType @see getTransformType */
-    virtual void setTransformType(int val) = 0;
-
-  protected:
-    virtual void
-    checkParams() const = 0;
-
-    virtual bool
-    computeImpl(const Ptr<OdometryFrame>& srcFrame, const Ptr<OdometryFrame>& dstFrame, Mat& Rt,
-                const Mat& initRt) const = 0;
-  };
-
-  /** Odometry based on the paper "Real-Time Visual Odometry from Dense RGB-D Images",
-   * F. Steinbucker, J. Strum, D. Cremers, ICCV, 2011.
-   */
-  class CV_EXPORTS RgbdOdometry: public Odometry
-  {
-  public:
-    RgbdOdometry();
-    /** Constructor.
-     * @param cameraMatrix Camera matrix
-     * @param minDepth Pixels with depth less than minDepth will not be used (in meters)
-     * @param maxDepth Pixels with depth larger than maxDepth will not be used (in meters)
-     * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out
-     *                     if their depth difference is larger than maxDepthDiff (in meters)
-     * @param iterCounts Count of iterations on each pyramid level.
-     * @param minGradientMagnitudes For each pyramid level the pixels will be filtered out
-     *                              if they have gradient magnitude less than minGradientMagnitudes[level].
-     * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart
-     * @param transformType Class of transformation
-     */
-    RgbdOdometry(const Mat& cameraMatrix, float minDepth = DEFAULT_MIN_DEPTH(), float maxDepth = DEFAULT_MAX_DEPTH(),
-                 float maxDepthDiff = DEFAULT_MAX_DEPTH_DIFF(), const std::vector<int>& iterCounts = std::vector<int>(),
-                 const std::vector<float>& minGradientMagnitudes = std::vector<float>(), float maxPointsPart = DEFAULT_MAX_POINTS_PART(),
-                 int transformType = RIGID_BODY_MOTION);
-
-    virtual Size prepareFrameCache(Ptr<OdometryFrame>& frame, int cacheType) const;
-
-    cv::Mat getCameraMatrix() const
-    {
-        return cameraMatrix;
-    }
-    void setCameraMatrix(const cv::Mat &val)
-    {
-        cameraMatrix = val;
-    }
-    double getMinDepth() const
-    {
-        return minDepth;
-    }
-    void setMinDepth(double val)
-    {
-        minDepth = val;
-    }
-    double getMaxDepth() const
-    {
-        return maxDepth;
-    }
-    void setMaxDepth(double val)
-    {
-        maxDepth = val;
-    }
-    double getMaxDepthDiff() const
-    {
-        return maxDepthDiff;
-    }
-    void setMaxDepthDiff(double val)
-    {
-        maxDepthDiff = val;
-    }
-    cv::Mat getIterationCounts() const
-    {
-        return iterCounts;
-    }
-    void setIterationCounts(const cv::Mat &val)
-    {
-        iterCounts = val;
-    }
-    cv::Mat getMinGradientMagnitudes() const
-    {
-        return minGradientMagnitudes;
-    }
-    void setMinGradientMagnitudes(const cv::Mat &val)
-    {
-        minGradientMagnitudes = val;
-    }
-    double getMaxPointsPart() const
-    {
-        return maxPointsPart;
-    }
-    void setMaxPointsPart(double val)
-    {
-        maxPointsPart = val;
-    }
-    int getTransformType() const
-    {
-        return transformType;
-    }
-    void setTransformType(int val)
-    {
-        transformType = val;
-    }
-    double getMaxTranslation() const
-    {
-        return maxTranslation;
-    }
-    void setMaxTranslation(double val)
-    {
-        maxTranslation = val;
-    }
-    double getMaxRotation() const
-    {
-        return maxRotation;
-    }
-    void setMaxRotation(double val)
-    {
-        maxRotation = val;
-    }
-
-  protected:
-    virtual void
-    checkParams() const;
-
-    virtual bool
-    computeImpl(const Ptr<OdometryFrame>& srcFrame, const Ptr<OdometryFrame>& dstFrame, Mat& Rt,
-                const Mat& initRt) const;
-
-    // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now.
-    /*float*/
-    double minDepth, maxDepth, maxDepthDiff;
-    /*vector<int>*/
-    Mat iterCounts;
-    /*vector<float>*/
-    Mat minGradientMagnitudes;
-    double maxPointsPart;
-
-    Mat cameraMatrix;
-    int transformType;
-
-    double maxTranslation, maxRotation;
-  };
-
-  /** Odometry based on the paper "KinectFusion: Real-Time Dense Surface Mapping and Tracking",
-   * Richard A. Newcombe, Andrew Fitzgibbon, at al, SIGGRAPH, 2011.
-   */
-  class ICPOdometry: public Odometry
-  {
-  public:
-    ICPOdometry();
-    /** Constructor.
-     * @param cameraMatrix Camera matrix
-     * @param minDepth Pixels with depth less than minDepth will not be used
-     * @param maxDepth Pixels with depth larger than maxDepth will not be used
-     * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out
-     *                     if their depth difference is larger than maxDepthDiff
-     * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart
-     * @param iterCounts Count of iterations on each pyramid level.
-     * @param transformType Class of trasformation
-     */
-    ICPOdometry(const Mat& cameraMatrix, float minDepth = DEFAULT_MIN_DEPTH(), float maxDepth = DEFAULT_MAX_DEPTH(),
-                float maxDepthDiff = DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = DEFAULT_MAX_POINTS_PART(),
-                const std::vector<int>& iterCounts = std::vector<int>(), int transformType = RIGID_BODY_MOTION);
-
-    virtual Size prepareFrameCache(Ptr<OdometryFrame>& frame, int cacheType) const;
-
-    cv::Mat getCameraMatrix() const
-    {
-        return cameraMatrix;
-    }
-    void setCameraMatrix(const cv::Mat &val)
-    {
-        cameraMatrix = val;
-    }
-    double getMinDepth() const
-    {
-        return minDepth;
-    }
-    void setMinDepth(double val)
-    {
-        minDepth = val;
-    }
-    double getMaxDepth() const
-    {
-        return maxDepth;
-    }
-    void setMaxDepth(double val)
-    {
-        maxDepth = val;
-    }
-    double getMaxDepthDiff() const
-    {
-        return maxDepthDiff;
-    }
-    void setMaxDepthDiff(double val)
-    {
-        maxDepthDiff = val;
-    }
-    cv::Mat getIterationCounts() const
-    {
-        return iterCounts;
-    }
-    void setIterationCounts(const cv::Mat &val)
-    {
-        iterCounts = val;
-    }
-    double getMaxPointsPart() const
-    {
-        return maxPointsPart;
-    }
-    void setMaxPointsPart(double val)
-    {
-        maxPointsPart = val;
-    }
-    int getTransformType() const
-    {
-        return transformType;
-    }
-    void setTransformType(int val)
-    {
-        transformType = val;
-    }
-    double getMaxTranslation() const
-    {
-        return maxTranslation;
-    }
-    void setMaxTranslation(double val)
-    {
-        maxTranslation = val;
-    }
-    double getMaxRotation() const
-    {
-        return maxRotation;
-    }
-    void setMaxRotation(double val)
-    {
-        maxRotation = val;
-    }
-    Ptr<RgbdNormals> getNormalsComputer() const
-    {
-        return normalsComputer;
-    }
-
-  protected:
-    virtual void
-    checkParams() const;
-
-    virtual bool
-    computeImpl(const Ptr<OdometryFrame>& srcFrame, const Ptr<OdometryFrame>& dstFrame, Mat& Rt,
-                const Mat& initRt) const;
-
-    // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now.
-    /*float*/
-    double minDepth, maxDepth, maxDepthDiff;
-    /*float*/
-    double maxPointsPart;
-    /*vector<int>*/
-    Mat iterCounts;
-
-    Mat cameraMatrix;
-    int transformType;
-
-    double maxTranslation, maxRotation;
-
-    mutable Ptr<RgbdNormals> normalsComputer;
-  };
-
-  /** Odometry that merges RgbdOdometry and ICPOdometry by minimize sum of their energy functions.
-   */
-
-  class RgbdICPOdometry: public Odometry
-  {
-  public:
-    RgbdICPOdometry();
-    /** Constructor.
-     * @param cameraMatrix Camera matrix
-     * @param minDepth Pixels with depth less than minDepth will not be used
-     * @param maxDepth Pixels with depth larger than maxDepth will not be used
-     * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out
-     *                     if their depth difference is larger than maxDepthDiff
-     * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart
-     * @param iterCounts Count of iterations on each pyramid level.
-     * @param minGradientMagnitudes For each pyramid level the pixels will be filtered out
-     *                              if they have gradient magnitude less than minGradientMagnitudes[level].
-     * @param transformType Class of trasformation
-     */
-    RgbdICPOdometry(const Mat& cameraMatrix, float minDepth = DEFAULT_MIN_DEPTH(), float maxDepth = DEFAULT_MAX_DEPTH(),
-                    float maxDepthDiff = DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = DEFAULT_MAX_POINTS_PART(),
-                    const std::vector<int>& iterCounts = std::vector<int>(),
-                    const std::vector<float>& minGradientMagnitudes = std::vector<float>(),
-                    int transformType = RIGID_BODY_MOTION);
-
-    virtual Size prepareFrameCache(Ptr<OdometryFrame>& frame, int cacheType) const;
-
-    cv::Mat getCameraMatrix() const
-    {
-        return cameraMatrix;
-    }
-    void setCameraMatrix(const cv::Mat &val)
-    {
-        cameraMatrix = val;
-    }
-    double getMinDepth() const
-    {
-        return minDepth;
-    }
-    void setMinDepth(double val)
-    {
-        minDepth = val;
-    }
-    double getMaxDepth() const
-    {
-        return maxDepth;
-    }
-    void setMaxDepth(double val)
-    {
-        maxDepth = val;
-    }
-    double getMaxDepthDiff() const
-    {
-        return maxDepthDiff;
-    }
-    void setMaxDepthDiff(double val)
-    {
-        maxDepthDiff = val;
-    }
-    double getMaxPointsPart() const
-    {
-        return maxPointsPart;
-    }
-    void setMaxPointsPart(double val)
-    {
-        maxPointsPart = val;
-    }
-    cv::Mat getIterationCounts() const
-    {
-        return iterCounts;
-    }
-    void setIterationCounts(const cv::Mat &val)
-    {
-        iterCounts = val;
-    }
-    cv::Mat getMinGradientMagnitudes() const
-    {
-        return minGradientMagnitudes;
-    }
-    void setMinGradientMagnitudes(const cv::Mat &val)
-    {
-        minGradientMagnitudes = val;
-    }
-    int getTransformType() const
-    {
-        return transformType;
-    }
-    void setTransformType(int val)
-    {
-        transformType = val;
-    }
-    double getMaxTranslation() const
-    {
-        return maxTranslation;
-    }
-    void setMaxTranslation(double val)
-    {
-        maxTranslation = val;
-    }
-    double getMaxRotation() const
-    {
-        return maxRotation;
-    }
-    void setMaxRotation(double val)
-    {
-        maxRotation = val;
-    }
-    Ptr<RgbdNormals> getNormalsComputer() const
-    {
-        return normalsComputer;
-    }
-
-  protected:
-    virtual void
-    checkParams() const;
-
-    virtual bool
-    computeImpl(const Ptr<OdometryFrame>& srcFrame, const Ptr<OdometryFrame>& dstFrame, Mat& Rt,
-                const Mat& initRt) const;
-
-    // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now.
-    /*float*/
-    double minDepth, maxDepth, maxDepthDiff;
-    /*float*/
-    double maxPointsPart;
-    /*vector<int>*/
-    Mat iterCounts;
-    /*vector<float>*/
-    Mat minGradientMagnitudes;
-
-    Mat cameraMatrix;
-    int transformType;
-
-    double maxTranslation, maxRotation;
-
-    mutable Ptr<RgbdNormals> normalsComputer;
-  };
-
-  /** Warp the image: compute 3d points from the depth, transform them using given transformation,
-   * then project color point cloud to an image plane.
-   * This function can be used to visualize results of the Odometry algorithm.
-   * @param image The image (of CV_8UC1 or CV_8UC3 type)
-   * @param depth The depth (of type used in depthTo3d fuction)
-   * @param mask The mask of used pixels (of CV_8UC1), it can be empty
-   * @param Rt The transformation that will be applied to the 3d points computed from the depth
-   * @param cameraMatrix Camera matrix
-   * @param distCoeff Distortion coefficients
-   * @param warpedImage The warped image.
-   * @param warpedDepth The warped depth.
-   * @param warpedMask The warped mask.
-   */
-  CV_EXPORTS
-  void
-  warpFrame(const Mat& image, const Mat& depth, const Mat& mask, const Mat& Rt, const Mat& cameraMatrix,
-            const Mat& distCoeff, Mat& warpedImage, Mat* warpedDepth = 0, Mat* warpedMask = 0);
-
-// TODO Depth interpolation
-// Curvature
-// Get rescaleDepth return dubles if asked for
-
-//! @}
-
-} /* namespace rgbd */
-} /* namespace cv */
-
-#include "opencv2/rgbd/linemod.hpp"
-
-#endif /* __cplusplus */
 #endif
 
 /* End of file. */
-
diff --git a/IPL/include/opencv/opencv2/rgbd/depth.hpp b/IPL/include/opencv/opencv2/rgbd/depth.hpp
new file mode 100644
index 0000000..94fdca6
--- /dev/null
+++ b/IPL/include/opencv/opencv2/rgbd/depth.hpp
@@ -0,0 +1,1192 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_WillowGarage.md file found in this module's directory
+
+#ifndef __OPENCV_RGBD_DEPTH_HPP__
+#define __OPENCV_RGBD_DEPTH_HPP__
+
+#include <opencv2/core.hpp>
+#include <limits>
+
+namespace cv
+{
+namespace rgbd
+{
+
+//! @addtogroup rgbd
+//! @{
+
+  /** Checks if the value is a valid depth. For CV_16U or CV_16S, the convention is to be invalid if it is
+   * a limit. For a float/double, we just check if it is a NaN
+   * @param depth the depth to check for validity
+   */
+  CV_EXPORTS
+  inline bool
+  isValidDepth(const float & depth)
+  {
+    return !cvIsNaN(depth);
+  }
+  CV_EXPORTS
+  inline bool
+  isValidDepth(const double & depth)
+  {
+    return !cvIsNaN(depth);
+  }
+  CV_EXPORTS
+  inline bool
+  isValidDepth(const short int & depth)
+  {
+    return (depth != std::numeric_limits<short int>::min()) && (depth != std::numeric_limits<short int>::max());
+  }
+  CV_EXPORTS
+  inline bool
+  isValidDepth(const unsigned short int & depth)
+  {
+    return (depth != std::numeric_limits<unsigned short int>::min())
+        && (depth != std::numeric_limits<unsigned short int>::max());
+  }
+  CV_EXPORTS
+  inline bool
+  isValidDepth(const int & depth)
+  {
+    return (depth != std::numeric_limits<int>::min()) && (depth != std::numeric_limits<int>::max());
+  }
+  CV_EXPORTS
+  inline bool
+  isValidDepth(const unsigned int & depth)
+  {
+    return (depth != std::numeric_limits<unsigned int>::min()) && (depth != std::numeric_limits<unsigned int>::max());
+  }
+
+  /** Object that can compute the normals in an image.
+   * It is an object as it can cache data for speed efficiency
+   * The implemented methods are either:
+   * - FALS (the fastest) and SRI from
+   * ``Fast and Accurate Computation of Surface Normals from Range Images``
+   * by H. Badino, D. Huber, Y. Park and T. Kanade
+   * - the normals with bilateral filtering on a depth image from
+   * ``Gradient Response Maps for Real-Time Detection of Texture-Less Objects``
+   * by S. Hinterstoisser, C. Cagniart, S. Ilic, P. Sturm, N. Navab, P. Fua, and V. Lepetit
+   */
+  class CV_EXPORTS_W RgbdNormals: public Algorithm
+  {
+  public:
+    enum RGBD_NORMALS_METHOD
+    {
+      RGBD_NORMALS_METHOD_FALS = 0,
+      RGBD_NORMALS_METHOD_LINEMOD = 1,
+      RGBD_NORMALS_METHOD_SRI = 2
+    };
+
+    RgbdNormals()
+        :
+          rows_(0),
+          cols_(0),
+          depth_(0),
+          K_(Mat()),
+          window_size_(0),
+          method_(RGBD_NORMALS_METHOD_FALS),
+          rgbd_normals_impl_(0)
+    {
+    }
+
+    /** Constructor
+     * @param rows the number of rows of the depth image normals will be computed on
+     * @param cols the number of cols of the depth image normals will be computed on
+     * @param depth the depth of the normals (only CV_32F or CV_64F)
+     * @param K the calibration matrix to use
+     * @param window_size the window size to compute the normals: can only be 1,3,5 or 7
+     * @param method one of the methods to use: RGBD_NORMALS_METHOD_SRI, RGBD_NORMALS_METHOD_FALS
+     */
+    RgbdNormals(int rows, int cols, int depth, InputArray K, int window_size = 5, int method =
+        RgbdNormals::RGBD_NORMALS_METHOD_FALS);
+
+    ~RgbdNormals();
+
+    CV_WRAP static Ptr<RgbdNormals> create(int rows, int cols, int depth, InputArray K, int window_size = 5, int method =
+        RgbdNormals::RGBD_NORMALS_METHOD_FALS);
+
+    /** Given a set of 3d points in a depth image, compute the normals at each point.
+     * @param points a rows x cols x 3 matrix of CV_32F/CV64F or a rows x cols x 1 CV_U16S
+     * @param normals a rows x cols x 3 matrix
+     */
+    CV_WRAP_AS(apply) void
+    operator()(InputArray points, OutputArray normals) const;
+
+    /** Initializes some data that is cached for later computation
+     * If that function is not called, it will be called the first time normals are computed
+     */
+    CV_WRAP void
+    initialize() const;
+
+    CV_WRAP int getRows() const
+    {
+        return rows_;
+    }
+    CV_WRAP void setRows(int val)
+    {
+        rows_ = val;
+    }
+    CV_WRAP int getCols() const
+    {
+        return cols_;
+    }
+    CV_WRAP void setCols(int val)
+    {
+        cols_ = val;
+    }
+    CV_WRAP int getWindowSize() const
+    {
+        return window_size_;
+    }
+    CV_WRAP void setWindowSize(int val)
+    {
+        window_size_ = val;
+    }
+    CV_WRAP int getDepth() const
+    {
+        return depth_;
+    }
+    CV_WRAP void setDepth(int val)
+    {
+        depth_ = val;
+    }
+    CV_WRAP cv::Mat getK() const
+    {
+        return K_;
+    }
+    CV_WRAP void setK(const cv::Mat &val)
+    {
+        K_ = val;
+    }
+    CV_WRAP int getMethod() const
+    {
+        return method_;
+    }
+    CV_WRAP void setMethod(int val)
+    {
+        method_ = val;
+    }
+
+  protected:
+    void
+    initialize_normals_impl(int rows, int cols, int depth, const Mat & K, int window_size, int method) const;
+
+    int rows_, cols_, depth_;
+    Mat K_;
+    int window_size_;
+    int method_;
+    mutable void* rgbd_normals_impl_;
+  };
+
+  /** Object that can clean a noisy depth image
+   */
+  class CV_EXPORTS_W DepthCleaner: public Algorithm
+  {
+  public:
+    /** NIL method is from
+     * ``Modeling Kinect Sensor Noise for Improved 3d Reconstruction and Tracking``
+     * by C. Nguyen, S. Izadi, D. Lovel
+     */
+    enum DEPTH_CLEANER_METHOD
+    {
+      DEPTH_CLEANER_NIL
+    };
+
+    DepthCleaner()
+        :
+          depth_(0),
+          window_size_(0),
+          method_(DEPTH_CLEANER_NIL),
+          depth_cleaner_impl_(0)
+    {
+    }
+
+    /** Constructor
+     * @param depth the depth of the normals (only CV_32F or CV_64F)
+     * @param window_size the window size to compute the normals: can only be 1,3,5 or 7
+     * @param method one of the methods to use: RGBD_NORMALS_METHOD_SRI, RGBD_NORMALS_METHOD_FALS
+     */
+    DepthCleaner(int depth, int window_size = 5, int method = DepthCleaner::DEPTH_CLEANER_NIL);
+
+    ~DepthCleaner();
+
+    CV_WRAP static Ptr<DepthCleaner> create(int depth, int window_size = 5, int method = DepthCleaner::DEPTH_CLEANER_NIL);
+
+    /** Given a set of 3d points in a depth image, compute the normals at each point.
+     * @param points a rows x cols x 3 matrix of CV_32F/CV64F or a rows x cols x 1 CV_U16S
+     * @param depth a rows x cols matrix of the cleaned up depth
+     */
+    CV_WRAP_AS(apply) void
+    operator()(InputArray points, OutputArray depth) const;
+
+    /** Initializes some data that is cached for later computation
+     * If that function is not called, it will be called the first time normals are computed
+     */
+    CV_WRAP void
+    initialize() const;
+
+    CV_WRAP int getWindowSize() const
+    {
+        return window_size_;
+    }
+    CV_WRAP void setWindowSize(int val)
+    {
+        window_size_ = val;
+    }
+    CV_WRAP int getDepth() const
+    {
+        return depth_;
+    }
+    CV_WRAP void setDepth(int val)
+    {
+        depth_ = val;
+    }
+    CV_WRAP int getMethod() const
+    {
+        return method_;
+    }
+    CV_WRAP void setMethod(int val)
+    {
+        method_ = val;
+    }
+
+  protected:
+    void
+    initialize_cleaner_impl() const;
+
+    int depth_;
+    int window_size_;
+    int method_;
+    mutable void* depth_cleaner_impl_;
+  };
+
+
+  /** Registers depth data to an external camera
+   * Registration is performed by creating a depth cloud, transforming the cloud by
+   * the rigid body transformation between the cameras, and then projecting the
+   * transformed points into the RGB camera.
+   *
+   * uv_rgb = K_rgb * [R | t] * z * inv(K_ir) * uv_ir
+   *
+   * Currently does not check for negative depth values.
+   *
+   * @param unregisteredCameraMatrix the camera matrix of the depth camera
+   * @param registeredCameraMatrix the camera matrix of the external camera
+   * @param registeredDistCoeffs the distortion coefficients of the external camera
+   * @param Rt the rigid body transform between the cameras. Transforms points from depth camera frame to external camera frame.
+   * @param unregisteredDepth the input depth data
+   * @param outputImagePlaneSize the image plane dimensions of the external camera (width, height)
+   * @param registeredDepth the result of transforming the depth into the external camera
+   * @param depthDilation whether or not the depth is dilated to avoid holes and occlusion errors (optional)
+   */
+  CV_EXPORTS_W
+  void
+  registerDepth(InputArray unregisteredCameraMatrix, InputArray registeredCameraMatrix, InputArray registeredDistCoeffs,
+                InputArray Rt, InputArray unregisteredDepth, const Size& outputImagePlaneSize,
+                OutputArray registeredDepth, bool depthDilation=false);
+
+  /**
+   * @param depth the depth image
+   * @param in_K
+   * @param in_points the list of xy coordinates
+   * @param points3d the resulting 3d points
+   */
+  CV_EXPORTS_W
+  void
+  depthTo3dSparse(InputArray depth, InputArray in_K, InputArray in_points, OutputArray points3d);
+
+  /** Converts a depth image to an organized set of 3d points.
+   * The coordinate system is x pointing left, y down and z away from the camera
+   * @param depth the depth image (if given as short int CV_U, it is assumed to be the depth in millimeters
+   *              (as done with the Microsoft Kinect), otherwise, if given as CV_32F or CV_64F, it is assumed in meters)
+   * @param K The calibration matrix
+   * @param points3d the resulting 3d points. They are of depth the same as `depth` if it is CV_32F or CV_64F, and the
+   *        depth of `K` if `depth` is of depth CV_U
+   * @param mask the mask of the points to consider (can be empty)
+   */
+  CV_EXPORTS_W
+  void
+  depthTo3d(InputArray depth, InputArray K, OutputArray points3d, InputArray mask = noArray());
+
+  /** If the input image is of type CV_16UC1 (like the Kinect one), the image is converted to floats, divided
+   * by 1000 to get a depth in meters, and the values 0 are converted to std::numeric_limits<float>::quiet_NaN()
+   * Otherwise, the image is simply converted to floats
+   * @param in the depth image (if given as short int CV_U, it is assumed to be the depth in millimeters
+   *              (as done with the Microsoft Kinect), it is assumed in meters)
+   * @param depth the desired output depth (floats or double)
+   * @param out The rescaled float depth image
+   */
+  CV_EXPORTS_W
+  void
+  rescaleDepth(InputArray in, int depth, OutputArray out);
+
+  /** Object that can compute planes in an image
+   */
+  class CV_EXPORTS_W RgbdPlane: public Algorithm
+  {
+  public:
+    enum RGBD_PLANE_METHOD
+    {
+      RGBD_PLANE_METHOD_DEFAULT
+    };
+
+      RgbdPlane(int method = RgbdPlane::RGBD_PLANE_METHOD_DEFAULT)
+        :
+          method_(method),
+          block_size_(40),
+          min_size_(block_size_*block_size_),
+          threshold_(0.01),
+          sensor_error_a_(0),
+          sensor_error_b_(0),
+          sensor_error_c_(0)
+    {
+    }
+
+    /** Constructor
+     * @param block_size The size of the blocks to look at for a stable MSE
+     * @param min_size The minimum size of a cluster to be considered a plane
+     * @param threshold The maximum distance of a point from a plane to belong to it (in meters)
+     * @param sensor_error_a coefficient of the sensor error. 0 by default, 0.0075 for a Kinect
+     * @param sensor_error_b coefficient of the sensor error. 0 by default
+     * @param sensor_error_c coefficient of the sensor error. 0 by default
+     * @param method The method to use to compute the planes.
+     */
+    RgbdPlane(int method, int block_size,
+              int min_size, double threshold, double sensor_error_a = 0,
+              double sensor_error_b = 0, double sensor_error_c = 0);
+
+    ~RgbdPlane();
+
+    CV_WRAP static Ptr<RgbdPlane> create(int method, int block_size, int min_size, double threshold,
+                                         double sensor_error_a = 0, double sensor_error_b = 0,
+                                         double sensor_error_c = 0);
+
+    /** Find The planes in a depth image
+     * @param points3d the 3d points organized like the depth image: rows x cols with 3 channels
+     * @param normals the normals for every point in the depth image
+     * @param mask An image where each pixel is labeled with the plane it belongs to
+     *        and 255 if it does not belong to any plane
+     * @param plane_coefficients the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0, norm(a,b,c)=1
+     *        and c < 0 (so that the normal points towards the camera)
+     */
+    CV_WRAP_AS(apply) void
+    operator()(InputArray points3d, InputArray normals, OutputArray mask,
+               OutputArray plane_coefficients);
+
+    /** Find The planes in a depth image but without doing a normal check, which is faster but less accurate
+     * @param points3d the 3d points organized like the depth image: rows x cols with 3 channels
+     * @param mask An image where each pixel is labeled with the plane it belongs to
+     *        and 255 if it does not belong to any plane
+     * @param plane_coefficients the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0
+     */
+    CV_WRAP_AS(apply) void
+    operator()(InputArray points3d, OutputArray mask, OutputArray plane_coefficients);
+
+    CV_WRAP int getBlockSize() const
+    {
+        return block_size_;
+    }
+    CV_WRAP void setBlockSize(int val)
+    {
+        block_size_ = val;
+    }
+    CV_WRAP int getMinSize() const
+    {
+        return min_size_;
+    }
+    CV_WRAP void setMinSize(int val)
+    {
+        min_size_ = val;
+    }
+    CV_WRAP int getMethod() const
+    {
+        return method_;
+    }
+    CV_WRAP void setMethod(int val)
+    {
+        method_ = val;
+    }
+    CV_WRAP double getThreshold() const
+    {
+        return threshold_;
+    }
+    CV_WRAP void setThreshold(double val)
+    {
+        threshold_ = val;
+    }
+    CV_WRAP double getSensorErrorA() const
+    {
+        return sensor_error_a_;
+    }
+    CV_WRAP void setSensorErrorA(double val)
+    {
+        sensor_error_a_ = val;
+    }
+    CV_WRAP double getSensorErrorB() const
+    {
+        return sensor_error_b_;
+    }
+    CV_WRAP void setSensorErrorB(double val)
+    {
+        sensor_error_b_ = val;
+    }
+    CV_WRAP double getSensorErrorC() const
+    {
+        return sensor_error_c_;
+    }
+    CV_WRAP void setSensorErrorC(double val)
+    {
+        sensor_error_c_ = val;
+    }
+
+  private:
+    /** The method to use to compute the planes */
+    int method_;
+    /** The size of the blocks to look at for a stable MSE */
+    int block_size_;
+    /** The minimum size of a cluster to be considered a plane */
+    int min_size_;
+    /** How far a point can be from a plane to belong to it (in meters) */
+    double threshold_;
+    /** coefficient of the sensor error with respect to the. All 0 by default but you want a=0.0075 for a Kinect */
+    double sensor_error_a_, sensor_error_b_, sensor_error_c_;
+  };
+
+  /** Object that contains a frame data.
+   */
+  struct CV_EXPORTS_W RgbdFrame
+  {
+      RgbdFrame();
+      RgbdFrame(const Mat& image, const Mat& depth, const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1);
+      virtual ~RgbdFrame();
+
+      CV_WRAP static Ptr<RgbdFrame> create(const Mat& image=Mat(), const Mat& depth=Mat(), const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1);
+
+      CV_WRAP virtual void
+      release();
+
+      CV_PROP int ID;
+      CV_PROP Mat image;
+      CV_PROP Mat depth;
+      CV_PROP Mat mask;
+      CV_PROP Mat normals;
+  };
+
+  /** Object that contains a frame data that is possibly needed for the Odometry.
+   * It's used for the efficiency (to pass precomputed/cached data of the frame that participates
+   * in the Odometry processing several times).
+   */
+  struct CV_EXPORTS_W OdometryFrame : public RgbdFrame
+  {
+    /** These constants are used to set a type of cache which has to be prepared depending on the frame role:
+     * srcFrame or dstFrame (see compute method of the Odometry class). For the srcFrame and dstFrame different cache data may be required,
+     * some part of a cache may be common for both frame roles.
+     * @param CACHE_SRC The cache data for the srcFrame will be prepared.
+     * @param CACHE_DST The cache data for the dstFrame will be prepared.
+     * @param CACHE_ALL The cache data for both srcFrame and dstFrame roles will be computed.
+     */
+    enum
+    {
+      CACHE_SRC = 1, CACHE_DST = 2, CACHE_ALL = CACHE_SRC + CACHE_DST
+    };
+
+    OdometryFrame();
+    OdometryFrame(const Mat& image, const Mat& depth, const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1);
+
+    CV_WRAP static Ptr<OdometryFrame> create(const Mat& image=Mat(), const Mat& depth=Mat(), const Mat& mask=Mat(), const Mat& normals=Mat(), int ID=-1);
+
+    CV_WRAP virtual void
+    release() CV_OVERRIDE;
+
+    CV_WRAP void
+    releasePyramids();
+
+    CV_PROP std::vector<Mat> pyramidImage;
+    CV_PROP std::vector<Mat> pyramidDepth;
+    CV_PROP std::vector<Mat> pyramidMask;
+
+    CV_PROP std::vector<Mat> pyramidCloud;
+
+    CV_PROP std::vector<Mat> pyramid_dI_dx;
+    CV_PROP std::vector<Mat> pyramid_dI_dy;
+    CV_PROP std::vector<Mat> pyramidTexturedMask;
+
+    CV_PROP std::vector<Mat> pyramidNormals;
+    CV_PROP std::vector<Mat> pyramidNormalsMask;
+  };
+
+  /** Base class for computation of odometry.
+   */
+  class CV_EXPORTS_W Odometry: public Algorithm
+  {
+  public:
+
+    /** A class of transformation*/
+    enum
+    {
+      ROTATION = 1, TRANSLATION = 2, RIGID_BODY_MOTION = 4
+    };
+
+    CV_WRAP static inline float
+    DEFAULT_MIN_DEPTH()
+    {
+      return 0.f; // in meters
+    }
+    CV_WRAP static inline float
+    DEFAULT_MAX_DEPTH()
+    {
+      return 4.f; // in meters
+    }
+    CV_WRAP static inline float
+    DEFAULT_MAX_DEPTH_DIFF()
+    {
+      return 0.07f; // in meters
+    }
+    CV_WRAP static inline float
+    DEFAULT_MAX_POINTS_PART()
+    {
+      return 0.07f; // in [0, 1]
+    }
+    CV_WRAP static inline float
+    DEFAULT_MAX_TRANSLATION()
+    {
+      return 0.15f; // in meters
+    }
+    CV_WRAP static inline float
+    DEFAULT_MAX_ROTATION()
+    {
+      return 15; // in degrees
+    }
+
+    /** Method to compute a transformation from the source frame to the destination one.
+     * Some odometry algorithms do not used some data of frames (eg. ICP does not use images).
+     * In such case corresponding arguments can be set as empty Mat.
+     * The method returns true if all internal computions were possible (e.g. there were enough correspondences,
+     * system of equations has a solution, etc) and resulting transformation satisfies some test if it's provided
+     * by the Odometry inheritor implementation (e.g. thresholds for maximum translation and rotation).
+     * @param srcImage Image data of the source frame (CV_8UC1)
+     * @param srcDepth Depth data of the source frame (CV_32FC1, in meters)
+     * @param srcMask Mask that sets which pixels have to be used from the source frame (CV_8UC1)
+     * @param dstImage Image data of the destination frame (CV_8UC1)
+     * @param dstDepth Depth data of the destination frame (CV_32FC1, in meters)
+     * @param dstMask Mask that sets which pixels have to be used from the destination frame (CV_8UC1)
+     * @param Rt Resulting transformation from the source frame to the destination one (rigid body motion):
+     dst_p = Rt * src_p, where dst_p is a homogeneous point in the destination frame and src_p is
+     homogeneous point in the source frame,
+     Rt is 4x4 matrix of CV_64FC1 type.
+     * @param initRt Initial transformation from the source frame to the destination one (optional)
+     */
+    CV_WRAP bool
+    compute(const Mat& srcImage, const Mat& srcDepth, const Mat& srcMask, const Mat& dstImage, const Mat& dstDepth,
+            const Mat& dstMask, OutputArray Rt, const Mat& initRt = Mat()) const;
+
+    /** One more method to compute a transformation from the source frame to the destination one.
+     * It is designed to save on computing the frame data (image pyramids, normals, etc.).
+     */
+    CV_WRAP_AS(compute2) bool
+    compute(Ptr<OdometryFrame>& srcFrame, Ptr<OdometryFrame>& dstFrame, OutputArray Rt, const Mat& initRt = Mat()) const;
+
+    /** Prepare a cache for the frame. The function checks the precomputed/passed data (throws the error if this data
+     * does not satisfy) and computes all remaining cache data needed for the frame. Returned size is a resolution
+     * of the prepared frame.
+     * @param frame The odometry which will process the frame.
+     * @param cacheType The cache type: CACHE_SRC, CACHE_DST or CACHE_ALL.
+     */
+    CV_WRAP virtual Size prepareFrameCache(Ptr<OdometryFrame>& frame, int cacheType) const;
+
+    CV_WRAP static Ptr<Odometry> create(const String & odometryType);
+
+    /** @see setCameraMatrix */
+    CV_WRAP virtual cv::Mat getCameraMatrix() const = 0;
+    /** @copybrief getCameraMatrix @see getCameraMatrix */
+    CV_WRAP virtual void setCameraMatrix(const cv::Mat &val) = 0;
+    /** @see setTransformType */
+    CV_WRAP virtual int getTransformType() const = 0;
+    /** @copybrief getTransformType @see getTransformType */
+    CV_WRAP virtual void setTransformType(int val) = 0;
+
+  protected:
+    virtual void
+    checkParams() const = 0;
+
+    virtual bool
+    computeImpl(const Ptr<OdometryFrame>& srcFrame, const Ptr<OdometryFrame>& dstFrame, OutputArray Rt,
+                const Mat& initRt) const = 0;
+  };
+
+  /** Odometry based on the paper "Real-Time Visual Odometry from Dense RGB-D Images",
+   * F. Steinbucker, J. Strum, D. Cremers, ICCV, 2011.
+   */
+  class CV_EXPORTS_W RgbdOdometry: public Odometry
+  {
+  public:
+    RgbdOdometry();
+    /** Constructor.
+     * @param cameraMatrix Camera matrix
+     * @param minDepth Pixels with depth less than minDepth will not be used (in meters)
+     * @param maxDepth Pixels with depth larger than maxDepth will not be used (in meters)
+     * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out
+     *                     if their depth difference is larger than maxDepthDiff (in meters)
+     * @param iterCounts Count of iterations on each pyramid level.
+     * @param minGradientMagnitudes For each pyramid level the pixels will be filtered out
+     *                              if they have gradient magnitude less than minGradientMagnitudes[level].
+     * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart
+     * @param transformType Class of transformation
+     */
+    RgbdOdometry(const Mat& cameraMatrix, float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(),
+                 float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), const std::vector<int>& iterCounts = std::vector<int>(),
+                 const std::vector<float>& minGradientMagnitudes = std::vector<float>(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(),
+                 int transformType = Odometry::RIGID_BODY_MOTION);
+
+    CV_WRAP static Ptr<RgbdOdometry> create(const Mat& cameraMatrix = Mat(), float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(),
+                 float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), const std::vector<int>& iterCounts = std::vector<int>(),
+                 const std::vector<float>& minGradientMagnitudes = std::vector<float>(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(),
+                 int transformType = Odometry::RIGID_BODY_MOTION);
+
+    CV_WRAP virtual Size prepareFrameCache(Ptr<OdometryFrame>& frame, int cacheType) const CV_OVERRIDE;
+
+    CV_WRAP cv::Mat getCameraMatrix() const CV_OVERRIDE
+    {
+        return cameraMatrix;
+    }
+    CV_WRAP void setCameraMatrix(const cv::Mat &val) CV_OVERRIDE
+    {
+        cameraMatrix = val;
+    }
+    CV_WRAP double getMinDepth() const
+    {
+        return minDepth;
+    }
+    CV_WRAP void setMinDepth(double val)
+    {
+        minDepth = val;
+    }
+    CV_WRAP double getMaxDepth() const
+    {
+        return maxDepth;
+    }
+    CV_WRAP void setMaxDepth(double val)
+    {
+        maxDepth = val;
+    }
+    CV_WRAP double getMaxDepthDiff() const
+    {
+        return maxDepthDiff;
+    }
+    CV_WRAP void setMaxDepthDiff(double val)
+    {
+        maxDepthDiff = val;
+    }
+    CV_WRAP cv::Mat getIterationCounts() const
+    {
+        return iterCounts;
+    }
+    CV_WRAP void setIterationCounts(const cv::Mat &val)
+    {
+        iterCounts = val;
+    }
+    CV_WRAP cv::Mat getMinGradientMagnitudes() const
+    {
+        return minGradientMagnitudes;
+    }
+    CV_WRAP void setMinGradientMagnitudes(const cv::Mat &val)
+    {
+        minGradientMagnitudes = val;
+    }
+    CV_WRAP double getMaxPointsPart() const
+    {
+        return maxPointsPart;
+    }
+    CV_WRAP void setMaxPointsPart(double val)
+    {
+        maxPointsPart = val;
+    }
+    CV_WRAP int getTransformType() const CV_OVERRIDE
+    {
+        return transformType;
+    }
+    CV_WRAP void setTransformType(int val) CV_OVERRIDE
+    {
+        transformType = val;
+    }
+    CV_WRAP double getMaxTranslation() const
+    {
+        return maxTranslation;
+    }
+    CV_WRAP void setMaxTranslation(double val)
+    {
+        maxTranslation = val;
+    }
+    CV_WRAP double getMaxRotation() const
+    {
+        return maxRotation;
+    }
+    CV_WRAP void setMaxRotation(double val)
+    {
+        maxRotation = val;
+    }
+
+  protected:
+    virtual void
+    checkParams() const CV_OVERRIDE;
+
+    virtual bool
+    computeImpl(const Ptr<OdometryFrame>& srcFrame, const Ptr<OdometryFrame>& dstFrame, OutputArray Rt,
+                const Mat& initRt) const CV_OVERRIDE;
+
+    // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now.
+    /*float*/
+    double minDepth, maxDepth, maxDepthDiff;
+    /*vector<int>*/
+    Mat iterCounts;
+    /*vector<float>*/
+    Mat minGradientMagnitudes;
+    double maxPointsPart;
+
+    Mat cameraMatrix;
+    int transformType;
+
+    double maxTranslation, maxRotation;
+  };
+
+  /** Odometry based on the paper "KinectFusion: Real-Time Dense Surface Mapping and Tracking",
+   * Richard A. Newcombe, Andrew Fitzgibbon, at al, SIGGRAPH, 2011.
+   */
+  class CV_EXPORTS_W ICPOdometry: public Odometry
+  {
+  public:
+    ICPOdometry();
+    /** Constructor.
+     * @param cameraMatrix Camera matrix
+     * @param minDepth Pixels with depth less than minDepth will not be used
+     * @param maxDepth Pixels with depth larger than maxDepth will not be used
+     * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out
+     *                     if their depth difference is larger than maxDepthDiff
+     * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart
+     * @param iterCounts Count of iterations on each pyramid level.
+     * @param transformType Class of trasformation
+     */
+    ICPOdometry(const Mat& cameraMatrix, float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(),
+                float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(),
+                const std::vector<int>& iterCounts = std::vector<int>(), int transformType = Odometry::RIGID_BODY_MOTION);
+
+    CV_WRAP static Ptr<ICPOdometry> create(const Mat& cameraMatrix = Mat(), float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(),
+                float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(),
+                const std::vector<int>& iterCounts = std::vector<int>(), int transformType = Odometry::RIGID_BODY_MOTION);
+
+    CV_WRAP virtual Size prepareFrameCache(Ptr<OdometryFrame>& frame, int cacheType) const CV_OVERRIDE;
+
+    CV_WRAP cv::Mat getCameraMatrix() const CV_OVERRIDE
+    {
+        return cameraMatrix;
+    }
+    CV_WRAP void setCameraMatrix(const cv::Mat &val) CV_OVERRIDE
+    {
+        cameraMatrix = val;
+    }
+    CV_WRAP double getMinDepth() const
+    {
+        return minDepth;
+    }
+    CV_WRAP void setMinDepth(double val)
+    {
+        minDepth = val;
+    }
+    CV_WRAP double getMaxDepth() const
+    {
+        return maxDepth;
+    }
+    CV_WRAP void setMaxDepth(double val)
+    {
+        maxDepth = val;
+    }
+    CV_WRAP double getMaxDepthDiff() const
+    {
+        return maxDepthDiff;
+    }
+    CV_WRAP void setMaxDepthDiff(double val)
+    {
+        maxDepthDiff = val;
+    }
+    CV_WRAP cv::Mat getIterationCounts() const
+    {
+        return iterCounts;
+    }
+    CV_WRAP void setIterationCounts(const cv::Mat &val)
+    {
+        iterCounts = val;
+    }
+    CV_WRAP double getMaxPointsPart() const
+    {
+        return maxPointsPart;
+    }
+    CV_WRAP void setMaxPointsPart(double val)
+    {
+        maxPointsPart = val;
+    }
+    CV_WRAP int getTransformType() const CV_OVERRIDE
+    {
+        return transformType;
+    }
+    CV_WRAP void setTransformType(int val) CV_OVERRIDE
+    {
+        transformType = val;
+    }
+    CV_WRAP double getMaxTranslation() const
+    {
+        return maxTranslation;
+    }
+    CV_WRAP void setMaxTranslation(double val)
+    {
+        maxTranslation = val;
+    }
+    CV_WRAP double getMaxRotation() const
+    {
+        return maxRotation;
+    }
+    CV_WRAP void setMaxRotation(double val)
+    {
+        maxRotation = val;
+    }
+    CV_WRAP Ptr<RgbdNormals> getNormalsComputer() const
+    {
+        return normalsComputer;
+    }
+
+  protected:
+    virtual void
+    checkParams() const CV_OVERRIDE;
+
+    virtual bool
+    computeImpl(const Ptr<OdometryFrame>& srcFrame, const Ptr<OdometryFrame>& dstFrame, OutputArray Rt,
+                const Mat& initRt) const CV_OVERRIDE;
+
+    // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now.
+    /*float*/
+    double minDepth, maxDepth, maxDepthDiff;
+    /*float*/
+    double maxPointsPart;
+    /*vector<int>*/
+    Mat iterCounts;
+
+    Mat cameraMatrix;
+    int transformType;
+
+    double maxTranslation, maxRotation;
+
+    mutable Ptr<RgbdNormals> normalsComputer;
+  };
+
+  /** Odometry that merges RgbdOdometry and ICPOdometry by minimize sum of their energy functions.
+   */
+
+  class CV_EXPORTS_W RgbdICPOdometry: public Odometry
+  {
+  public:
+    RgbdICPOdometry();
+    /** Constructor.
+     * @param cameraMatrix Camera matrix
+     * @param minDepth Pixels with depth less than minDepth will not be used
+     * @param maxDepth Pixels with depth larger than maxDepth will not be used
+     * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out
+     *                     if their depth difference is larger than maxDepthDiff
+     * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart
+     * @param iterCounts Count of iterations on each pyramid level.
+     * @param minGradientMagnitudes For each pyramid level the pixels will be filtered out
+     *                              if they have gradient magnitude less than minGradientMagnitudes[level].
+     * @param transformType Class of trasformation
+     */
+    RgbdICPOdometry(const Mat& cameraMatrix, float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(),
+                    float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(),
+                    const std::vector<int>& iterCounts = std::vector<int>(),
+                    const std::vector<float>& minGradientMagnitudes = std::vector<float>(),
+                    int transformType = Odometry::RIGID_BODY_MOTION);
+
+    CV_WRAP static Ptr<RgbdICPOdometry> create(const Mat& cameraMatrix = Mat(), float minDepth = Odometry::DEFAULT_MIN_DEPTH(), float maxDepth = Odometry::DEFAULT_MAX_DEPTH(),
+                    float maxDepthDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = Odometry::DEFAULT_MAX_POINTS_PART(),
+                    const std::vector<int>& iterCounts = std::vector<int>(),
+                    const std::vector<float>& minGradientMagnitudes = std::vector<float>(),
+                    int transformType = Odometry::RIGID_BODY_MOTION);
+
+    CV_WRAP virtual Size prepareFrameCache(Ptr<OdometryFrame>& frame, int cacheType) const CV_OVERRIDE;
+
+    CV_WRAP cv::Mat getCameraMatrix() const CV_OVERRIDE
+    {
+        return cameraMatrix;
+    }
+    CV_WRAP void setCameraMatrix(const cv::Mat &val) CV_OVERRIDE
+    {
+        cameraMatrix = val;
+    }
+    CV_WRAP double getMinDepth() const
+    {
+        return minDepth;
+    }
+    CV_WRAP void setMinDepth(double val)
+    {
+        minDepth = val;
+    }
+    CV_WRAP double getMaxDepth() const
+    {
+        return maxDepth;
+    }
+    CV_WRAP void setMaxDepth(double val)
+    {
+        maxDepth = val;
+    }
+    CV_WRAP double getMaxDepthDiff() const
+    {
+        return maxDepthDiff;
+    }
+    CV_WRAP void setMaxDepthDiff(double val)
+    {
+        maxDepthDiff = val;
+    }
+    CV_WRAP double getMaxPointsPart() const
+    {
+        return maxPointsPart;
+    }
+    CV_WRAP void setMaxPointsPart(double val)
+    {
+        maxPointsPart = val;
+    }
+    CV_WRAP cv::Mat getIterationCounts() const
+    {
+        return iterCounts;
+    }
+    CV_WRAP void setIterationCounts(const cv::Mat &val)
+    {
+        iterCounts = val;
+    }
+    CV_WRAP cv::Mat getMinGradientMagnitudes() const
+    {
+        return minGradientMagnitudes;
+    }
+    CV_WRAP void setMinGradientMagnitudes(const cv::Mat &val)
+    {
+        minGradientMagnitudes = val;
+    }
+    CV_WRAP int getTransformType() const CV_OVERRIDE
+    {
+        return transformType;
+    }
+    CV_WRAP void setTransformType(int val) CV_OVERRIDE
+    {
+        transformType = val;
+    }
+    CV_WRAP double getMaxTranslation() const
+    {
+        return maxTranslation;
+    }
+    CV_WRAP void setMaxTranslation(double val)
+    {
+        maxTranslation = val;
+    }
+    CV_WRAP double getMaxRotation() const
+    {
+        return maxRotation;
+    }
+    CV_WRAP void setMaxRotation(double val)
+    {
+        maxRotation = val;
+    }
+    CV_WRAP Ptr<RgbdNormals> getNormalsComputer() const
+    {
+        return normalsComputer;
+    }
+
+  protected:
+    virtual void
+    checkParams() const CV_OVERRIDE;
+
+    virtual bool
+    computeImpl(const Ptr<OdometryFrame>& srcFrame, const Ptr<OdometryFrame>& dstFrame, OutputArray Rt,
+                const Mat& initRt) const CV_OVERRIDE;
+
+    // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now.
+    /*float*/
+    double minDepth, maxDepth, maxDepthDiff;
+    /*float*/
+    double maxPointsPart;
+    /*vector<int>*/
+    Mat iterCounts;
+    /*vector<float>*/
+    Mat minGradientMagnitudes;
+
+    Mat cameraMatrix;
+    int transformType;
+
+    double maxTranslation, maxRotation;
+
+    mutable Ptr<RgbdNormals> normalsComputer;
+  };
+
+  /** A faster version of ICPOdometry which is used in KinectFusion implementation
+   * Partial list of differences:
+   * - Works in parallel
+   * - Written in universal intrinsics
+   * - Filters points by angle
+   * - Interpolates points and normals
+   * - Doesn't use masks or min/max depth filtering
+   * - Doesn't use random subsets of points
+   * - Supports only Rt transform type
+   * - Supports only 4-float vectors as input type
+   */
+  class CV_EXPORTS_W FastICPOdometry: public Odometry
+  {
+  public:
+    FastICPOdometry();
+    /** Constructor.
+     * @param cameraMatrix Camera matrix
+     * @param maxDistDiff Correspondences between pixels of two given frames will be filtered out
+     *                     if their depth difference is larger than maxDepthDiff
+     * @param angleThreshold Correspondence will be filtered out
+     *                     if an angle between their normals is bigger than threshold
+     * @param sigmaDepth Depth sigma in meters for bilateral smooth
+     * @param sigmaSpatial Spatial sigma in pixels for bilateral smooth
+     * @param kernelSize Kernel size in pixels for bilateral smooth
+     * @param iterCounts Count of iterations on each pyramid level
+     */
+    FastICPOdometry(const Mat& cameraMatrix,
+                    float maxDistDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(),
+                    float angleThreshold = (float)(30. * CV_PI / 180.),
+                    float sigmaDepth = 0.04f,
+                    float sigmaSpatial = 4.5f,
+                    int kernelSize = 7,
+                    const std::vector<int>& iterCounts = std::vector<int>());
+
+    CV_WRAP static Ptr<FastICPOdometry> create(const Mat& cameraMatrix,
+                                               float maxDistDiff = Odometry::DEFAULT_MAX_DEPTH_DIFF(),
+                                               float angleThreshold = (float)(30. * CV_PI / 180.),
+                                               float sigmaDepth = 0.04f,
+                                               float sigmaSpatial = 4.5f,
+                                               int kernelSize = 7,
+                                               const std::vector<int>& iterCounts = std::vector<int>());
+
+    CV_WRAP virtual Size prepareFrameCache(Ptr<OdometryFrame>& frame, int cacheType) const CV_OVERRIDE;
+
+    CV_WRAP cv::Mat getCameraMatrix() const CV_OVERRIDE
+    {
+        return cameraMatrix;
+    }
+    CV_WRAP void setCameraMatrix(const cv::Mat &val) CV_OVERRIDE
+    {
+        cameraMatrix = val;
+    }
+    CV_WRAP double getMaxDistDiff() const
+    {
+        return maxDistDiff;
+    }
+    CV_WRAP void setMaxDistDiff(float val)
+    {
+        maxDistDiff = val;
+    }
+    CV_WRAP float getAngleThreshold() const
+    {
+        return angleThreshold;
+    }
+    CV_WRAP void setAngleThreshold(float f)
+    {
+        angleThreshold = f;
+    }
+    CV_WRAP float getSigmaDepth() const
+    {
+        return sigmaDepth;
+    }
+    CV_WRAP void setSigmaDepth(float f)
+    {
+        sigmaDepth = f;
+    }
+    CV_WRAP float getSigmaSpatial() const
+    {
+        return sigmaSpatial;
+    }
+    CV_WRAP void setSigmaSpatial(float f)
+    {
+        sigmaSpatial = f;
+    }
+    CV_WRAP int getKernelSize() const
+    {
+        return kernelSize;
+    }
+    CV_WRAP void setKernelSize(int f)
+    {
+        kernelSize = f;
+    }
+    CV_WRAP cv::Mat getIterationCounts() const
+    {
+        return iterCounts;
+    }
+    CV_WRAP void setIterationCounts(const cv::Mat &val)
+    {
+        iterCounts = val;
+    }
+    CV_WRAP int getTransformType() const CV_OVERRIDE
+    {
+        return Odometry::RIGID_BODY_MOTION;
+    }
+    CV_WRAP void setTransformType(int val) CV_OVERRIDE
+    {
+        if(val != Odometry::RIGID_BODY_MOTION)
+            throw std::runtime_error("Rigid Body Motion is the only accepted transformation type"
+                                     " for this odometry method");
+    }
+
+  protected:
+    virtual void
+    checkParams() const CV_OVERRIDE;
+
+    virtual bool
+    computeImpl(const Ptr<OdometryFrame>& srcFrame, const Ptr<OdometryFrame>& dstFrame, OutputArray Rt,
+                const Mat& initRt) const CV_OVERRIDE;
+
+    // Some params have commented desired type. It's due to AlgorithmInfo::addParams does not support it now.
+    float maxDistDiff;
+
+    float angleThreshold;
+
+    float sigmaDepth;
+
+    float sigmaSpatial;
+
+    int kernelSize;
+
+    /*vector<int>*/
+    Mat iterCounts;
+
+    Mat cameraMatrix;
+  };
+
+  /** Warp the image: compute 3d points from the depth, transform them using given transformation,
+   * then project color point cloud to an image plane.
+   * This function can be used to visualize results of the Odometry algorithm.
+   * @param image The image (of CV_8UC1 or CV_8UC3 type)
+   * @param depth The depth (of type used in depthTo3d fuction)
+   * @param mask The mask of used pixels (of CV_8UC1), it can be empty
+   * @param Rt The transformation that will be applied to the 3d points computed from the depth
+   * @param cameraMatrix Camera matrix
+   * @param distCoeff Distortion coefficients
+   * @param warpedImage The warped image.
+   * @param warpedDepth The warped depth.
+   * @param warpedMask The warped mask.
+   */
+  CV_EXPORTS_W
+  void
+  warpFrame(const Mat& image, const Mat& depth, const Mat& mask, const Mat& Rt, const Mat& cameraMatrix,
+            const Mat& distCoeff, OutputArray warpedImage, OutputArray warpedDepth = noArray(), OutputArray warpedMask = noArray());
+
+// TODO Depth interpolation
+// Curvature
+// Get rescaleDepth return dubles if asked for
+
+//! @}
+
+} /* namespace rgbd */
+} /* namespace cv */
+
+#endif
+
+/* End of file. */
diff --git a/IPL/include/opencv/opencv2/rgbd/dynafu.hpp b/IPL/include/opencv/opencv2/rgbd/dynafu.hpp
new file mode 100644
index 0000000..d057ebe
--- /dev/null
+++ b/IPL/include/opencv/opencv2/rgbd/dynafu.hpp
@@ -0,0 +1,206 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
+
+#ifndef __OPENCV_RGBD_DYNAFU_HPP__
+#define __OPENCV_RGBD_DYNAFU_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/affine.hpp"
+
+namespace cv {
+namespace dynafu {
+
+struct CV_EXPORTS_W Params
+{
+    /** @brief Default parameters
+    A set of parameters which provides better model quality, can be very slow.
+    */
+    CV_WRAP static Ptr<Params> defaultParams();
+
+    /** @brief Coarse parameters
+    A set of parameters which provides better speed, can fail to match frames
+    in case of rapid sensor motion.
+    */
+    CV_WRAP static Ptr<Params> coarseParams();
+
+    /** @brief frame size in pixels */
+    CV_PROP_RW Size frameSize;
+
+    /** @brief camera intrinsics */
+    CV_PROP Matx33f intr;
+
+    /** @brief pre-scale per 1 meter for input values
+
+    Typical values are:
+         * 5000 per 1 meter for the 16-bit PNG files of TUM database
+         * 1000 per 1 meter for Kinect 2 device
+         * 1 per 1 meter for the 32-bit float images in the ROS bag files
+    */
+    CV_PROP_RW float depthFactor;
+
+    /** @brief Depth sigma in meters for bilateral smooth */
+    CV_PROP_RW float bilateral_sigma_depth;
+    /** @brief Spatial sigma in pixels for bilateral smooth */
+    CV_PROP_RW float bilateral_sigma_spatial;
+    /** @brief Kernel size in pixels for bilateral smooth */
+    CV_PROP_RW int   bilateral_kernel_size;
+
+    /** @brief Number of pyramid levels for ICP */
+    CV_PROP_RW int pyramidLevels;
+
+    /** @brief Resolution of voxel space
+
+    Number of voxels in each dimension.
+    */
+    CV_PROP_RW Vec3i volumeDims;
+    /** @brief Size of voxel in meters */
+    CV_PROP_RW float voxelSize;
+
+    /** @brief Minimal camera movement in meters
+
+    Integrate new depth frame only if camera movement exceeds this value.
+    */
+    CV_PROP_RW float tsdf_min_camera_movement;
+
+    /** @brief initial volume pose in meters */
+    Affine3f volumePose;
+
+    /** @brief distance to truncate in meters
+
+    Distances to surface that exceed this value will be truncated to 1.0.
+    */
+    CV_PROP_RW float tsdf_trunc_dist;
+
+    /** @brief max number of frames per voxel
+
+    Each voxel keeps running average of distances no longer than this value.
+    */
+    CV_PROP_RW int tsdf_max_weight;
+
+    /** @brief A length of one raycast step
+
+    How much voxel sizes we skip each raycast step
+    */
+    CV_PROP_RW float raycast_step_factor;
+
+    // gradient delta in voxel sizes
+    // fixed at 1.0f
+    // float gradient_delta_factor;
+
+    /** @brief light pose for rendering in meters */
+    CV_PROP Vec3f lightPose;
+
+    /** @brief distance theshold for ICP in meters */
+    CV_PROP_RW float icpDistThresh;
+    /** angle threshold for ICP in radians */
+    CV_PROP_RW float icpAngleThresh;
+    /** number of ICP iterations for each pyramid level */
+    CV_PROP std::vector<int> icpIterations;
+
+    /** @brief Threshold for depth truncation in meters
+
+    All depth values beyond this threshold will be set to zero
+    */
+    CV_PROP_RW float truncateThreshold;
+};
+
+/** @brief DynamicFusion implementation
+
+  This class implements a 3d reconstruction algorithm as described in @cite dynamicfusion.
+
+  It takes a sequence of depth images taken from depth sensor
+  (or any depth images source such as stereo camera matching algorithm or even raymarching renderer).
+  The output can be obtained as a vector of points and their normals
+  or can be Phong-rendered from given camera pose.
+
+  It extends the KinectFusion algorithm to handle non-rigidly deforming scenes by maintaining a sparse
+  set of nodes covering the geometry such that each node contains a warp to transform it from a canonical
+  space to the live frame.
+
+  An internal representation of a model is a voxel cuboid that keeps TSDF values
+  which are a sort of distances to the surface (for details read the @cite kinectfusion article about TSDF).
+  There is no interface to that representation yet.
+
+  Note that DynamicFusion is based on the KinectFusion algorithm which is patented and its use may be
+  restricted by the list of patents mentioned in README.md file in this module directory.
+
+  That's why you need to set the OPENCV_ENABLE_NONFREE option in CMake to use DynamicFusion.
+*/
+class CV_EXPORTS_W DynaFu
+{
+public:
+    CV_WRAP static Ptr<DynaFu> create(const Ptr<Params>& _params);
+    virtual ~DynaFu();
+
+    /** @brief Get current parameters */
+    virtual const Params& getParams() const = 0;
+
+    /** @brief Renders a volume into an image
+
+      Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat.
+      Light pose is fixed in DynaFu params.
+
+        @param image resulting image
+        @param cameraPose pose of camera to render from. If empty then render from current pose
+        which is a last frame camera pose.
+    */
+
+    CV_WRAP virtual void render(OutputArray image, const Matx44f& cameraPose = Matx44f::eye()) const = 0;
+
+    /** @brief Gets points and normals of current 3d mesh
+
+      The order of normals corresponds to order of points.
+      The order of points is undefined.
+
+        @param points vector of points which are 4-float vectors
+        @param normals vector of normals which are 4-float vectors
+     */
+    CV_WRAP virtual void getCloud(OutputArray points, OutputArray normals) const = 0;
+
+    /** @brief Gets points of current 3d mesh
+
+     The order of points is undefined.
+
+        @param points vector of points which are 4-float vectors
+     */
+    CV_WRAP virtual void getPoints(OutputArray points) const = 0;
+
+    /** @brief Calculates normals for given points
+        @param points input vector of points which are 4-float vectors
+        @param normals output vector of corresponding normals which are 4-float vectors
+     */
+    CV_WRAP virtual  void getNormals(InputArray points, OutputArray normals) const = 0;
+
+    /** @brief Resets the algorithm
+
+    Clears current model and resets a pose.
+    */
+    CV_WRAP virtual void reset() = 0;
+
+    /** @brief Get current pose in voxel space */
+    virtual const Affine3f getPose() const = 0;
+
+    /** @brief Process next depth frame
+
+      Integrates depth into voxel space with respect to its ICP-calculated pose.
+      Input image is converted to CV_32F internally if has another type.
+
+    @param depth one-channel image which size and depth scale is described in algorithm's parameters
+    @return true if succeeded to align new frame with current scene, false if opposite
+    */
+    CV_WRAP virtual bool update(InputArray depth) = 0;
+
+    virtual std::vector<Point3f> getNodesPos() const = 0;
+
+    virtual void marchCubes(OutputArray vertices, OutputArray edges) const = 0;
+
+    virtual void renderSurface(OutputArray depthImage, OutputArray vertImage, OutputArray normImage, bool warp=true) = 0;
+};
+
+//! @}
+}
+}
+#endif
diff --git a/IPL/include/opencv/opencv2/rgbd/kinfu.hpp b/IPL/include/opencv/opencv2/rgbd/kinfu.hpp
new file mode 100644
index 0000000..a2c55bd
--- /dev/null
+++ b/IPL/include/opencv/opencv2/rgbd/kinfu.hpp
@@ -0,0 +1,244 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
+
+#ifndef __OPENCV_RGBD_KINFU_HPP__
+#define __OPENCV_RGBD_KINFU_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/affine.hpp"
+
+namespace cv {
+namespace kinfu {
+//! @addtogroup kinect_fusion
+//! @{
+
+struct CV_EXPORTS_W Params
+{
+
+    CV_WRAP Params(){}
+
+    /**
+     * @brief Constructor for Params
+     * Sets the initial pose of the TSDF volume.
+     * @param volumeIntialPoseRot rotation matrix
+     * @param volumeIntialPoseTransl translation vector
+     */
+    CV_WRAP Params(Matx33f volumeIntialPoseRot, Vec3f volumeIntialPoseTransl)
+    {
+      setInitialVolumePose(volumeIntialPoseRot,volumeIntialPoseTransl);
+    }
+
+    /**
+     * @brief Constructor for Params
+     * Sets the initial pose of the TSDF volume.
+     * @param volumeIntialPose 4 by 4 Homogeneous Transform matrix to set the intial pose of TSDF volume
+     */
+    CV_WRAP Params(Matx44f volumeIntialPose)
+    {
+      setInitialVolumePose(volumeIntialPose);
+    }
+
+    /**
+     * @brief Set Initial Volume Pose
+     * Sets the initial pose of the TSDF volume.
+     * @param R rotation matrix
+     * @param t translation vector
+     */
+    CV_WRAP void setInitialVolumePose(Matx33f R, Vec3f t);
+
+    /**
+     * @brief Set Initial Volume Pose
+     * Sets the initial pose of the TSDF volume.
+     * @param homogen_tf 4 by 4 Homogeneous Transform matrix to set the intial pose of TSDF volume
+     */
+    CV_WRAP void setInitialVolumePose(Matx44f homogen_tf);
+
+    /**
+     * @brief Default parameters
+     * A set of parameters which provides better model quality, can be very slow.
+     */
+    CV_WRAP static Ptr<Params> defaultParams();
+
+    /** @brief Coarse parameters
+    A set of parameters which provides better speed, can fail to match frames
+    in case of rapid sensor motion.
+    */
+    CV_WRAP static Ptr<Params> coarseParams();
+
+    /** @brief frame size in pixels */
+    CV_PROP_RW Size frameSize;
+
+    /** @brief camera intrinsics */
+    CV_PROP_RW Matx33f intr;
+
+    /** @brief pre-scale per 1 meter for input values
+
+    Typical values are:
+         * 5000 per 1 meter for the 16-bit PNG files of TUM database
+         * 1000 per 1 meter for Kinect 2 device
+         * 1 per 1 meter for the 32-bit float images in the ROS bag files
+    */
+    CV_PROP_RW float depthFactor;
+
+    /** @brief Depth sigma in meters for bilateral smooth */
+    CV_PROP_RW float bilateral_sigma_depth;
+    /** @brief Spatial sigma in pixels for bilateral smooth */
+    CV_PROP_RW float bilateral_sigma_spatial;
+    /** @brief Kernel size in pixels for bilateral smooth */
+    CV_PROP_RW int   bilateral_kernel_size;
+
+    /** @brief Number of pyramid levels for ICP */
+    CV_PROP_RW int pyramidLevels;
+
+    /** @brief Resolution of voxel space
+
+    Number of voxels in each dimension.
+    */
+    CV_PROP_RW Vec3i volumeDims;
+    /** @brief Size of voxel in meters */
+    CV_PROP_RW float voxelSize;
+
+    /** @brief Minimal camera movement in meters
+
+    Integrate new depth frame only if camera movement exceeds this value.
+    */
+    CV_PROP_RW float tsdf_min_camera_movement;
+
+    /** @brief initial volume pose in meters */
+    Affine3f volumePose;
+
+    /** @brief distance to truncate in meters
+
+    Distances to surface that exceed this value will be truncated to 1.0.
+    */
+    CV_PROP_RW float tsdf_trunc_dist;
+
+    /** @brief max number of frames per voxel
+
+    Each voxel keeps running average of distances no longer than this value.
+    */
+    CV_PROP_RW int tsdf_max_weight;
+
+    /** @brief A length of one raycast step
+
+    How much voxel sizes we skip each raycast step
+    */
+    CV_PROP_RW float raycast_step_factor;
+
+    // gradient delta in voxel sizes
+    // fixed at 1.0f
+    // float gradient_delta_factor;
+
+    /** @brief light pose for rendering in meters */
+    CV_PROP_RW Vec3f lightPose;
+
+    /** @brief distance theshold for ICP in meters */
+    CV_PROP_RW float icpDistThresh;
+    /** angle threshold for ICP in radians */
+    CV_PROP_RW float icpAngleThresh;
+    /** number of ICP iterations for each pyramid level */
+    CV_PROP_RW std::vector<int> icpIterations;
+
+    /** @brief Threshold for depth truncation in meters
+
+    All depth values beyond this threshold will be set to zero
+    */
+    CV_PROP_RW float truncateThreshold;
+};
+
+/** @brief KinectFusion implementation
+
+  This class implements a 3d reconstruction algorithm described in
+  @cite kinectfusion paper.
+
+  It takes a sequence of depth images taken from depth sensor
+  (or any depth images source such as stereo camera matching algorithm or even raymarching renderer).
+  The output can be obtained as a vector of points and their normals
+  or can be Phong-rendered from given camera pose.
+
+  An internal representation of a model is a voxel cuboid that keeps TSDF values
+  which are a sort of distances to the surface (for details read the @cite kinectfusion article about TSDF).
+  There is no interface to that representation yet.
+
+  KinFu uses OpenCL acceleration automatically if available.
+  To enable or disable it explicitly use cv::setUseOptimized() or cv::ocl::setUseOpenCL().
+
+  This implementation is based on [kinfu-remake](https://github.com/Nerei/kinfu_remake).
+
+  Note that the KinectFusion algorithm was patented and its use may be restricted by
+  the list of patents mentioned in README.md file in this module directory.
+
+  That's why you need to set the OPENCV_ENABLE_NONFREE option in CMake to use KinectFusion.
+*/
+class CV_EXPORTS_W KinFu
+{
+public:
+    CV_WRAP static Ptr<KinFu> create(const Ptr<Params>& _params);
+    virtual ~KinFu();
+
+    /** @brief Get current parameters */
+    virtual const Params& getParams() const = 0;
+
+    /** @brief Renders a volume into an image
+
+      Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat.
+      Light pose is fixed in KinFu params.
+
+        @param image resulting image
+        @param cameraPose pose of camera to render from. If empty then render from current pose
+        which is a last frame camera pose.
+    */
+
+    CV_WRAP virtual void render(OutputArray image, const Matx44f& cameraPose = Matx44f::eye()) const = 0;
+
+    /** @brief Gets points and normals of current 3d mesh
+
+      The order of normals corresponds to order of points.
+      The order of points is undefined.
+
+        @param points vector of points which are 4-float vectors
+        @param normals vector of normals which are 4-float vectors
+     */
+    CV_WRAP virtual void getCloud(OutputArray points, OutputArray normals) const = 0;
+
+    /** @brief Gets points of current 3d mesh
+
+     The order of points is undefined.
+
+        @param points vector of points which are 4-float vectors
+     */
+    CV_WRAP virtual void getPoints(OutputArray points) const = 0;
+
+    /** @brief Calculates normals for given points
+        @param points input vector of points which are 4-float vectors
+        @param normals output vector of corresponding normals which are 4-float vectors
+     */
+    CV_WRAP virtual  void getNormals(InputArray points, OutputArray normals) const = 0;
+
+    /** @brief Resets the algorithm
+
+    Clears current model and resets a pose.
+    */
+    CV_WRAP virtual void reset() = 0;
+
+    /** @brief Get current pose in voxel space */
+    virtual const Affine3f getPose() const = 0;
+
+    /** @brief Process next depth frame
+
+      Integrates depth into voxel space with respect to its ICP-calculated pose.
+      Input image is converted to CV_32F internally if has another type.
+
+    @param depth one-channel image which size and depth scale is described in algorithm's parameters
+    @return true if succeeded to align new frame with current scene, false if opposite
+    */
+    CV_WRAP virtual bool update(InputArray depth) = 0;
+};
+
+//! @}
+}
+}
+#endif
diff --git a/IPL/include/opencv/opencv2/rgbd/linemod.hpp b/IPL/include/opencv/opencv2/rgbd/linemod.hpp
index ac56291..76b61bf 100644
--- a/IPL/include/opencv/opencv2/rgbd/linemod.hpp
+++ b/IPL/include/opencv/opencv2/rgbd/linemod.hpp
@@ -1,48 +1,11 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_OBJDETECT_LINEMOD_HPP__
-#define __OPENCV_OBJDETECT_LINEMOD_HPP__
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_WillowGarage.md file found in this module's directory
+
+#ifndef __OPENCV_RGBD_LINEMOD_HPP__
+#define __OPENCV_RGBD_LINEMOD_HPP__
 
 #include "opencv2/core.hpp"
 #include <map>
@@ -60,14 +23,14 @@ namespace linemod {
 /**
  * \brief Discriminant feature described by its location and label.
  */
-struct CV_EXPORTS Feature
+struct CV_EXPORTS_W_SIMPLE Feature
 {
-  int x; ///< x offset
-  int y; ///< y offset
-  int label; ///< Quantization
+  CV_PROP_RW int x; ///< x offset
+  CV_PROP_RW int y; ///< y offset
+  CV_PROP_RW int label; ///< Quantization
 
-  Feature() : x(0), y(0), label(0) {}
-  Feature(int x, int y, int label);
+  CV_WRAP Feature() : x(0), y(0), label(0) {}
+  CV_WRAP Feature(int x, int y, int label);
 
   void read(const FileNode& fn);
   void write(FileStorage& fs) const;
@@ -75,12 +38,12 @@ struct CV_EXPORTS Feature
 
 inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}
 
-struct CV_EXPORTS Template
+struct CV_EXPORTS_W_SIMPLE Template
 {
-  int width;
-  int height;
-  int pyramid_level;
-  std::vector<Feature> features;
+  CV_PROP int width;
+  CV_PROP int height;
+  CV_PROP int pyramid_level;
+  CV_PROP std::vector<Feature> features;
 
   void read(const FileNode& fn);
   void write(FileStorage& fs) const;
@@ -89,7 +52,7 @@ struct CV_EXPORTS Template
 /**
  * \brief Represents a modality operating over an image pyramid.
  */
-class QuantizedPyramid
+class CV_EXPORTS_W QuantizedPyramid
 {
 public:
   // Virtual destructor
@@ -101,21 +64,21 @@ class QuantizedPyramid
    * \param[out] dst The destination 8-bit image. For each pixel at most one bit is set,
    *                 representing its classification.
    */
-  virtual void quantize(Mat& dst) const =0;
+  CV_WRAP virtual void quantize(CV_OUT Mat& dst) const =0;
 
   /**
    * \brief Extract most discriminant features at current pyramid level to form a new template.
    *
    * \param[out] templ The new template.
    */
-  virtual bool extractTemplate(Template& templ) const =0;
+  CV_WRAP virtual bool extractTemplate(CV_OUT Template& templ) const =0;
 
   /**
    * \brief Go to the next pyramid level.
    *
    * \todo Allow pyramid scale factor other than 2
    */
-  virtual void pyrDown() =0;
+  CV_WRAP virtual void pyrDown() =0;
 
 protected:
   /// Candidate feature with a score
@@ -153,7 +116,7 @@ inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _sc
  *
  * \todo Max response, to allow optimization of summing (255/MAX) features as uint8
  */
-class CV_EXPORTS Modality
+class CV_EXPORTS_W Modality
 {
 public:
   // Virtual destructor
@@ -166,15 +129,15 @@ class CV_EXPORTS Modality
    * \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero
    *                 in quantized image and cannot be extracted as features.
    */
-  Ptr<QuantizedPyramid> process(const Mat& src,
+  CV_WRAP Ptr<QuantizedPyramid> process(const Mat& src,
                     const Mat& mask = Mat()) const
   {
     return processImpl(src, mask);
   }
 
-  virtual String name() const =0;
+  CV_WRAP virtual String name() const =0;
 
-  virtual void read(const FileNode& fn) =0;
+  CV_WRAP virtual void read(const FileNode& fn) =0;
   virtual void write(FileStorage& fs) const =0;
 
   /**
@@ -184,12 +147,12 @@ class CV_EXPORTS Modality
    * - "ColorGradient"
    * - "DepthNormal"
    */
-  static Ptr<Modality> create(const String& modality_type);
+  CV_WRAP static Ptr<Modality> create(const String& modality_type);
 
   /**
    * \brief Load a modality from file.
    */
-  static Ptr<Modality> create(const FileNode& fn);
+  CV_WRAP static Ptr<Modality> create(const FileNode& fn);
 
 protected:
   // Indirection is because process() has a default parameter.
@@ -200,7 +163,7 @@ class CV_EXPORTS Modality
 /**
  * \brief Modality that computes quantized gradient orientations from a color image.
  */
-class CV_EXPORTS ColorGradient : public Modality
+class CV_EXPORTS_W ColorGradient : public Modality
 {
 public:
   /**
@@ -218,24 +181,26 @@ class CV_EXPORTS ColorGradient : public Modality
    */
   ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);
 
-  virtual String name() const;
+  CV_WRAP static Ptr<ColorGradient> create(float weak_threshold, size_t num_features, float strong_threshold);
 
-  virtual void read(const FileNode& fn);
-  virtual void write(FileStorage& fs) const;
+  virtual String name() const CV_OVERRIDE;
 
-  float weak_threshold;
-  size_t num_features;
-  float strong_threshold;
+  virtual void read(const FileNode& fn) CV_OVERRIDE;
+  virtual void write(FileStorage& fs) const CV_OVERRIDE;
+
+  CV_PROP float weak_threshold;
+  CV_PROP size_t num_features;
+  CV_PROP float strong_threshold;
 
 protected:
   virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
-                        const Mat& mask) const;
+                        const Mat& mask) const CV_OVERRIDE;
 };
 
 /**
  * \brief Modality that computes quantized surface normals from a dense depth map.
  */
-class CV_EXPORTS DepthNormal : public Modality
+class CV_EXPORTS_W DepthNormal : public Modality
 {
 public:
   /**
@@ -256,36 +221,48 @@ class CV_EXPORTS DepthNormal : public Modality
   DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
               int extract_threshold);
 
-  virtual String name() const;
+  CV_WRAP static Ptr<DepthNormal> create(int distance_threshold, int difference_threshold,
+                                         size_t num_features, int extract_threshold);
+
+  virtual String name() const CV_OVERRIDE;
 
-  virtual void read(const FileNode& fn);
-  virtual void write(FileStorage& fs) const;
+  virtual void read(const FileNode& fn) CV_OVERRIDE;
+  virtual void write(FileStorage& fs) const CV_OVERRIDE;
 
-  int distance_threshold;
-  int difference_threshold;
-  size_t num_features;
-  int extract_threshold;
+  CV_PROP int distance_threshold;
+  CV_PROP int difference_threshold;
+  CV_PROP size_t num_features;
+  CV_PROP int extract_threshold;
 
 protected:
   virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
-                        const Mat& mask) const;
+                        const Mat& mask) const CV_OVERRIDE;
 };
 
 /**
  * \brief Debug function to colormap a quantized image for viewing.
  */
-void colormap(const Mat& quantized, Mat& dst);
+CV_EXPORTS_W void colormap(const Mat& quantized, CV_OUT Mat& dst);
+
+/**
+ * \brief Debug function to draw linemod features
+ * @param img
+ * @param templates see @ref Detector::addTemplate
+ * @param tl template bbox top-left offset see @ref Detector::addTemplate
+ * @param size marker size see @ref cv::drawMarker
+ */
+CV_EXPORTS_W void drawFeatures(InputOutputArray img, const std::vector<Template>& templates, const Point2i& tl, int size = 10);
 
 /**
  * \brief Represents a successful template match.
  */
-struct CV_EXPORTS Match
+struct CV_EXPORTS_W_SIMPLE Match
 {
-  Match()
+  CV_WRAP Match()
   {
   }
 
-  Match(int x, int y, float similarity, const String& class_id, int template_id);
+  CV_WRAP Match(int x, int y, float similarity, const String& class_id, int template_id);
 
   /// Sort matches with high similarity to the front
   bool operator<(const Match& rhs) const
@@ -302,11 +279,11 @@ struct CV_EXPORTS Match
     return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
   }
 
-  int x;
-  int y;
-  float similarity;
-  String class_id;
-  int template_id;
+  CV_PROP_RW int x;
+  CV_PROP_RW int y;
+  CV_PROP_RW float similarity;
+  CV_PROP_RW String class_id;
+  CV_PROP_RW int template_id;
 };
 
 inline
@@ -318,13 +295,13 @@ Match::Match(int _x, int _y, float _similarity, const String& _class_id, int _te
  * \brief Object detector using the LINE template matching algorithm with any set of
  * modalities.
  */
-class CV_EXPORTS Detector
+class CV_EXPORTS_W Detector
 {
 public:
   /**
    * \brief Empty constructor, initialize with read().
    */
-  Detector();
+  CV_WRAP Detector();
 
   /**
    * \brief Constructor.
@@ -333,7 +310,7 @@ class CV_EXPORTS Detector
    * \param T_pyramid        Value of the sampling step T at each pyramid level. The
    *                         number of pyramid levels is T_pyramid.size().
    */
-  Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
+  CV_WRAP Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
 
   /**
    * \brief Detect objects by template matching.
@@ -350,7 +327,7 @@ class CV_EXPORTS Detector
    *                       the same size as sources.  Each element must be
    *                       empty or the same size as its corresponding source.
    */
-  void match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches,
+  CV_WRAP void match(const std::vector<Mat>& sources, float threshold, CV_OUT std::vector<Match>& matches,
              const std::vector<String>& class_ids = std::vector<String>(),
              OutputArrayOfArrays quantized_images = noArray(),
              const std::vector<Mat>& masks = std::vector<Mat>()) const;
@@ -365,13 +342,13 @@ class CV_EXPORTS Detector
    *
    * \return Template ID, or -1 if failed to extract a valid template.
    */
-  int addTemplate(const std::vector<Mat>& sources, const String& class_id,
-          const Mat& object_mask, Rect* bounding_box = NULL);
+  CV_WRAP int addTemplate(const std::vector<Mat>& sources, const String& class_id,
+          const Mat& object_mask, CV_OUT Rect* bounding_box = NULL);
 
   /**
    * \brief Add a new object template computed by external means.
    */
-  int addSyntheticTemplate(const std::vector<Template>& templates, const String& class_id);
+  CV_WRAP int addSyntheticTemplate(const std::vector<Template>& templates, const String& class_id);
 
   /**
    * \brief Get the modalities used by this detector.
@@ -379,17 +356,17 @@ class CV_EXPORTS Detector
    * You are not permitted to add/remove modalities, but you may dynamic_cast them to
    * tweak parameters.
    */
-  const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }
+  CV_WRAP const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }
 
   /**
    * \brief Get sampling step T at pyramid_level.
    */
-  int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }
+  CV_WRAP int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }
 
   /**
    * \brief Get number of pyramid levels used by this detector.
    */
-  int pyramidLevels() const { return pyramid_levels; }
+  CV_WRAP int pyramidLevels() const { return pyramid_levels; }
 
   /**
    * \brief Get the template pyramid identified by template_id.
@@ -397,23 +374,23 @@ class CV_EXPORTS Detector
    * For example, with 2 modalities (Gradient, Normal) and two pyramid levels
    * (L0, L1), the order is (GradientL0, NormalL0, GradientL1, NormalL1).
    */
-  const std::vector<Template>& getTemplates(const String& class_id, int template_id) const;
+  CV_WRAP const std::vector<Template>& getTemplates(const String& class_id, int template_id) const;
 
-  int numTemplates() const;
-  int numTemplates(const String& class_id) const;
-  int numClasses() const { return static_cast<int>(class_templates.size()); }
+  CV_WRAP int numTemplates() const;
+  CV_WRAP int numTemplates(const String& class_id) const;
+  CV_WRAP int numClasses() const { return static_cast<int>(class_templates.size()); }
 
-  std::vector<String> classIds() const;
+  CV_WRAP std::vector<String> classIds() const;
 
-  void read(const FileNode& fn);
+  CV_WRAP void read(const FileNode& fn);
   void write(FileStorage& fs) const;
 
   String readClass(const FileNode& fn, const String &class_id_override = "");
   void writeClass(const String& class_id, FileStorage& fs) const;
 
-  void readClasses(const std::vector<String>& class_ids,
+  CV_WRAP void readClasses(const std::vector<String>& class_ids,
                    const String& format = "templates_%s.yml.gz");
-  void writeClasses(const String& format = "templates_%s.yml.gz") const;
+  CV_WRAP void writeClasses(const String& format = "templates_%s.yml.gz") const;
 
 protected:
   std::vector< Ptr<Modality> > modalities;
@@ -440,7 +417,7 @@ class CV_EXPORTS Detector
  *
  * Default parameter settings suitable for VGA images.
  */
-CV_EXPORTS Ptr<Detector> getDefaultLINE();
+CV_EXPORTS_W Ptr<linemod::Detector> getDefaultLINE();
 
 /**
  * \brief Factory function for detector using LINE-MOD algorithm with color gradients
@@ -448,7 +425,7 @@ CV_EXPORTS Ptr<Detector> getDefaultLINE();
  *
  * Default parameter settings suitable for VGA images.
  */
-CV_EXPORTS Ptr<Detector> getDefaultLINEMOD();
+CV_EXPORTS_W Ptr<linemod::Detector> getDefaultLINEMOD();
 
 //! @}
 
diff --git a/IPL/include/opencv/opencv2/saliency/saliencyBaseClasses.hpp b/IPL/include/opencv/opencv2/saliency/saliencyBaseClasses.hpp
index 84b4d8f..06fb4e2 100644
--- a/IPL/include/opencv/opencv2/saliency/saliencyBaseClasses.hpp
+++ b/IPL/include/opencv/opencv2/saliency/saliencyBaseClasses.hpp
@@ -59,7 +59,7 @@ namespace saliency
 
 /************************************ Saliency Base Class ************************************/
 
-class CV_EXPORTS Saliency : public virtual Algorithm
+class CV_EXPORTS_W Saliency : public virtual Algorithm
 {
  public:
   /**
@@ -67,24 +67,13 @@ class CV_EXPORTS Saliency : public virtual Algorithm
    */
   virtual ~Saliency();
 
-  /**
-   * \brief Create Saliency by saliency type.
-   */
-  static Ptr<Saliency> create( const String& saliencyType );
-
   /**
    * \brief Compute the saliency
    * \param image        The image.
    * \param saliencyMap      The computed saliency map.
    * \return true if the saliency map is computed, false otherwise
    */
-  bool computeSaliency( InputArray image, OutputArray saliencyMap );
-
-  /**
-   * \brief Get the name of the specific saliency type
-   * \return The name of the tracker initializer
-   */
-  String getClassName() const;
+  CV_WRAP bool computeSaliency( InputArray image, OutputArray saliencyMap );
 
  protected:
 
@@ -93,7 +82,7 @@ class CV_EXPORTS Saliency : public virtual Algorithm
 };
 
 /************************************ Static Saliency Base Class ************************************/
-class CV_EXPORTS StaticSaliency : public virtual Saliency
+class CV_EXPORTS_W StaticSaliency : public virtual Saliency
 {
  public:
 
@@ -104,35 +93,35 @@ class CV_EXPORTS StaticSaliency : public virtual Saliency
     targets, a segmentation by clustering is performed, using *K-means algorithm*. Then, to gain a
     binary representation of clustered saliency map, since values of the map can vary according to
     the characteristics of frame under analysis, it is not convenient to use a fixed threshold. So,
-    *Otsu’s algorithm* is used, which assumes that the image to be thresholded contains two classes
+    *Otsu's algorithm* is used, which assumes that the image to be thresholded contains two classes
     of pixels or bi-modal histograms (e.g. foreground and back-ground pixels); later on, the
     algorithm calculates the optimal threshold separating those two classes, so that their
     intra-class variance is minimal.
 
-    @param saliencyMap the saliency map obtained through one of the specialized algorithms
-    @param binaryMap the binary map
+    @param _saliencyMap the saliency map obtained through one of the specialized algorithms
+    @param _binaryMap the binary map
      */
-  bool computeBinaryMap( const Mat& saliencyMap, Mat& binaryMap );
+  CV_WRAP bool computeBinaryMap( InputArray _saliencyMap, OutputArray _binaryMap );
  protected:
-  virtual bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap )=0;
+  virtual bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap ) CV_OVERRIDE = 0;
 
 };
 
 /************************************ Motion Saliency Base Class ************************************/
-class CV_EXPORTS MotionSaliency : public virtual Saliency
+class CV_EXPORTS_W MotionSaliency : public virtual Saliency
 {
 
  protected:
-  virtual bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap )=0;
+  virtual bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap ) CV_OVERRIDE = 0;
 
 };
 
 /************************************ Objectness Base Class ************************************/
-class CV_EXPORTS Objectness : public virtual Saliency
+class CV_EXPORTS_W Objectness : public virtual Saliency
 {
 
  protected:
-  virtual bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap )=0;
+  virtual bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap ) CV_OVERRIDE = 0;
 
 };
 
diff --git a/IPL/include/opencv/opencv2/saliency/saliencySpecializedClasses.hpp b/IPL/include/opencv/opencv2/saliency/saliencySpecializedClasses.hpp
index 6a5f7a3..c3149bf 100644
--- a/IPL/include/opencv/opencv2/saliency/saliencySpecializedClasses.hpp
+++ b/IPL/include/opencv/opencv2/saliency/saliencySpecializedClasses.hpp
@@ -42,11 +42,11 @@
 #ifndef __OPENCV_SALIENCY_SPECIALIZED_CLASSES_HPP__
 #define __OPENCV_SALIENCY_SPECIALIZED_CLASSES_HPP__
 
-//#include "opencv2/saliency/kyheader.hpp"
 #include <cstdio>
 #include <string>
 #include <iostream>
 #include <stdint.h>
+#include "saliencyBaseClasses.hpp"
 #include "opencv2/core.hpp"
 
 namespace cv
@@ -66,40 +66,95 @@ pre-attentive visual search. The algorithm analyze the log spectrum of each imag
 spectral residual. Then transform the spectral residual to spatial domain to obtain the saliency
 map, which suggests the positions of proto-objects.
  */
-class CV_EXPORTS StaticSaliencySpectralResidual : public StaticSaliency
+class CV_EXPORTS_W StaticSaliencySpectralResidual : public StaticSaliency
 {
 public:
 
   StaticSaliencySpectralResidual();
   virtual ~StaticSaliencySpectralResidual();
 
-  void read( const FileNode& fn );
-  void write( FileStorage& fs ) const;
+  CV_WRAP static Ptr<StaticSaliencySpectralResidual> create()
+  {
+    return makePtr<StaticSaliencySpectralResidual>();
+  }
+
+  CV_WRAP bool computeSaliency( InputArray image, OutputArray saliencyMap )
+  {
+    if( image.empty() )
+      return false;
+
+    return computeSaliencyImpl( image, saliencyMap );
+  }
 
-  int getImageWidth() const
+  CV_WRAP void read( const FileNode& fn ) CV_OVERRIDE;
+  void write( FileStorage& fs ) const CV_OVERRIDE;
+
+  CV_WRAP int getImageWidth() const
   {
     return resImWidth;
   }
-  inline void setImageWidth(int val)
+  CV_WRAP inline void setImageWidth(int val)
   {
     resImWidth = val;
   }
-  int getImageHeight() const
+  CV_WRAP int getImageHeight() const
   {
     return resImHeight;
   }
-  void setImageHeight(int val)
+  CV_WRAP void setImageHeight(int val)
   {
     resImHeight = val;
   }
 
 protected:
-  bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap );
-  int resImWidth;
-  int resImHeight;
+  bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap ) CV_OVERRIDE;
+  CV_PROP_RW int resImWidth;
+  CV_PROP_RW int resImHeight;
 
 };
 
+
+/** @brief the Fine Grained Saliency approach from @cite FGS
+
+This method calculates saliency based on center-surround differences.
+High resolution saliency maps are generated in real time by using integral images.
+ */
+class CV_EXPORTS_W StaticSaliencyFineGrained : public StaticSaliency
+{
+public:
+
+  StaticSaliencyFineGrained();
+
+  CV_WRAP static Ptr<StaticSaliencyFineGrained> create()
+  {
+    return makePtr<StaticSaliencyFineGrained>();
+  }
+
+  CV_WRAP bool computeSaliency( InputArray image, OutputArray saliencyMap )
+  {
+    if( image.empty() )
+      return false;
+
+    return computeSaliencyImpl( image, saliencyMap );
+  }
+  virtual ~StaticSaliencyFineGrained();
+
+protected:
+  bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap ) CV_OVERRIDE;
+
+private:
+  void calcIntensityChannel(Mat src, Mat dst);
+  void copyImage(Mat src, Mat dst);
+  void getIntensityScaled(Mat integralImage, Mat gray, Mat saliencyOn, Mat saliencyOff, int neighborhood);
+  float getMean(Mat srcArg, Point2i PixArg, int neighbourhood, int centerVal);
+  void mixScales(Mat *saliencyOn, Mat intensityOn, Mat *saliencyOff, Mat intensityOff, const int numScales);
+  void mixOnOff(Mat intensityOn, Mat intensityOff, Mat intensity);
+  void getIntensity(Mat srcArg, Mat dstArg,  Mat dstOnArg,  Mat dstOffArg, bool generateOnOff);
+};
+
+
+
+
 /************************************ Specific Motion Saliency Specialized Classes ************************************/
 
 /*!
@@ -111,36 +166,49 @@ class CV_EXPORTS StaticSaliencySpectralResidual : public StaticSaliency
  */
 /** @brief the Fast Self-tuning Background Subtraction Algorithm from @cite BinWangApr2014
  */
-class CV_EXPORTS MotionSaliencyBinWangApr2014 : public MotionSaliency
+class CV_EXPORTS_W MotionSaliencyBinWangApr2014 : public MotionSaliency
 {
 public:
   MotionSaliencyBinWangApr2014();
   virtual ~MotionSaliencyBinWangApr2014();
 
+  CV_WRAP static Ptr<MotionSaliencyBinWangApr2014> create()
+  {
+    return makePtr<MotionSaliencyBinWangApr2014>();
+  }
+
+  CV_WRAP bool computeSaliency( InputArray image, OutputArray saliencyMap )
+  {
+    if( image.empty() )
+      return false;
+
+    return computeSaliencyImpl( image, saliencyMap );
+  }
+
   /** @brief This is a utility function that allows to set the correct size (taken from the input image) in the
     corresponding variables that will be used to size the data structures of the algorithm.
     @param W width of input image
     @param H height of input image
   */
-  void setImagesize( int W, int H );
+  CV_WRAP void setImagesize( int W, int H );
   /** @brief This function allows the correct initialization of all data structures that will be used by the
     algorithm.
   */
-  bool init();
+  CV_WRAP bool init();
 
-  int getImageWidth() const
+  CV_WRAP int getImageWidth() const
   {
     return imageWidth;
   }
-  inline void setImageWidth(int val)
+  CV_WRAP inline void setImageWidth(int val)
   {
     imageWidth = val;
   }
-  int getImageHeight() const
+  CV_WRAP int getImageHeight() const
   {
     return imageHeight;
   }
-  void setImageHeight(int val)
+  CV_WRAP void setImageHeight(int val)
   {
     imageHeight = val;
   }
@@ -154,7 +222,7 @@ class CV_EXPORTS MotionSaliencyBinWangApr2014 : public MotionSaliency
        The saliency map is given by a single *Mat* (one for each frame of an hypothetical video
         stream).
   */
-  bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap );
+  bool computeSaliencyImpl( InputArray image, OutputArray saliencyMap ) CV_OVERRIDE;
 
 private:
 
@@ -166,19 +234,32 @@ class CV_EXPORTS MotionSaliencyBinWangApr2014 : public MotionSaliency
   bool templateOrdering();
   bool templateReplacement( const Mat& finalBFMask, const Mat& image );
 
+  // Decision threshold adaptation and Activity control function
+  bool activityControl(const Mat& current_noisePixelsMask);
+  bool decisionThresholdAdaptation();
+
   // changing structure
   std::vector<Ptr<Mat> > backgroundModel;// The vector represents the background template T0---TK of reference paper.
   // Matrices are two-channel matrix. In the first layer there are the B (background value)
   // for each pixel. In the second layer, there are the C (efficacy) value for each pixel
   Mat potentialBackground;// Two channel Matrix. For each pixel, in the first level there are the Ba value (potential background value)
                           // and in the secon level there are the Ca value, the counter for each potential value.
-  Mat epslonPixelsValue;  // epslon threshold
+  Mat epslonPixelsValue;// epslon threshold
+
+  Mat activityPixelsValue;// Activity level of each pixel
+
+  //vector<Mat> noisePixelMask; // We define a ‘noise-pixel’ as a pixel that has been classified as a foreground pixel during the full resolution
+  Mat noisePixelMask;// We define a ‘noise-pixel’ as a pixel that has been classified as a foreground pixel during the full resolution
+  //detection process,however, after the low resolution detection, it has become a
+  // background pixel. The matrix is  two-channel matrix. In the first layer there is the mask ( the identified noise-pixels are set to 1 while other pixels are 0)
+  // for each pixel. In the second layer, there is the value of activity level A for each pixel.
 
   //fixed parameter
+  bool activityControlFlag;
   bool neighborhoodCheck;
   int N_DS;// Number of template to be downsampled and used in lowResolutionDetection function
-  int imageWidth;// Width of input image
-  int imageHeight;//Height of input image
+  CV_PROP_RW int imageWidth;// Width of input image
+  CV_PROP_RW int imageHeight;//Height of input image
   int K;// Number of background model template
   int N;// NxN is the size of the block for downsampling in the lowlowResolutionDetection
   float alpha;// Learning rate
@@ -189,6 +270,13 @@ class CV_EXPORTS MotionSaliencyBinWangApr2014 : public MotionSaliency
             // long-term template, regardless of any subsequent background changes. A relatively large (eg gamma=3) will
             //restrain the generation of ghosts.
 
+  uchar Ainc;// Activity Incrementation;
+  int Bmax;// Upper-bound value for pixel activity
+  int Bth;// Max activity threshold
+  int Binc, Bdec;// Threshold for pixel-level decision threshold (epslon) adaptation
+  float deltaINC, deltaDEC;// Increment-decrement value for epslon adaptation
+  int epslonMIN, epslonMAX;// Range values for epslon threshold
+
 };
 
 /************************************ Specific Objectness Specialized Classes ************************************/
@@ -200,15 +288,28 @@ class CV_EXPORTS MotionSaliencyBinWangApr2014 : public MotionSaliency
 
 /** @brief the Binarized normed gradients algorithm from @cite BING
  */
-class CV_EXPORTS ObjectnessBING : public Objectness
+class CV_EXPORTS_W ObjectnessBING : public Objectness
 {
 public:
 
   ObjectnessBING();
   virtual ~ObjectnessBING();
 
-  void read();
-  void write() const;
+  CV_WRAP static Ptr<ObjectnessBING> create()
+  {
+    return makePtr<ObjectnessBING>();
+  }
+
+  CV_WRAP bool computeSaliency( InputArray image, OutputArray saliencyMap )
+  {
+    if( image.empty() )
+      return false;
+
+    return computeSaliencyImpl( image, saliencyMap );
+  }
+
+  CV_WRAP void read();
+  CV_WRAP void write() const;
 
   /** @brief Return the list of the rectangles' objectness value,
 
@@ -216,13 +317,13 @@ class CV_EXPORTS ObjectnessBING : public Objectness
     computeSaliencyImpl function). The bigger value these scores are, it is more likely to be an
     object window.
      */
-  std::vector<float> getobjectnessValues();
+  CV_WRAP std::vector<float> getobjectnessValues();
 
   /** @brief This is a utility function that allows to set the correct path from which the algorithm will load
     the trained model.
     @param trainingPath trained model path
      */
-  void setTrainingPath( std::string trainingPath );
+  CV_WRAP void setTrainingPath( const String& trainingPath );
 
   /** @brief This is a utility function that allows to set an arbitrary path in which the algorithm will save the
     optional results
@@ -231,29 +332,29 @@ class CV_EXPORTS ObjectnessBING : public Objectness
     each row).
     @param resultsDir results' folder path
      */
-  void setBBResDir( std::string resultsDir );
+  CV_WRAP void setBBResDir( const String& resultsDir );
 
-  double getBase() const
+  CV_WRAP double getBase() const
   {
     return _base;
   }
-  inline void setBase(double val)
+  CV_WRAP inline void setBase(double val)
   {
     _base = val;
   }
-  int getNSS() const
+  CV_WRAP int getNSS() const
   {
     return _NSS;
   }
-  void setNSS(int val)
+  CV_WRAP void setNSS(int val)
   {
     _NSS = val;
   }
-  int getW() const
+  CV_WRAP int getW() const
   {
     return _W;
   }
-  void setW(int val)
+  CV_WRAP void setW(int val)
   {
     _W = val;
   }
@@ -268,7 +369,7 @@ class CV_EXPORTS ObjectnessBING : public Objectness
     specialized algorithm, the objectnessBoundingBox is a *vector\<Vec4i\>*. Each bounding box is
     represented by a *Vec4i* for (minX, minY, maxX, maxY).
      */
-  bool computeSaliencyImpl( InputArray image, OutputArray objectnessBoundingBox );
+  bool computeSaliencyImpl( InputArray image, OutputArray objectnessBoundingBox ) CV_OVERRIDE;
 
 private:
 
@@ -336,7 +437,7 @@ class CV_EXPORTS ObjectnessBING : public Objectness
   int _Clr;//
   static const char* _clrName[3];
 
-  // Names and paths to read model and to store results
+// Names and paths to read model and to store results
   std::string _modelName, _bbResDir, _trainingPath, _resultsDir;
 
   std::vector<int> _svmSzIdxs;// Indexes of active size. It's equal to _svmFilters.size() and _svmReW1f.rows
@@ -344,12 +445,12 @@ class CV_EXPORTS ObjectnessBING : public Objectness
   FilterTIG _tigF;// TIG filter
   Mat _svmReW1f;// Re-weight parameters learned at stage II.
 
-  // List of the rectangles' objectness value, in the same order as
-  // the  vector<Vec4i> objectnessBoundingBox returned by the algorithm (in computeSaliencyImpl function)
+// List of the rectangles' objectness value, in the same order as
+// the  vector<Vec4i> objectnessBoundingBox returned by the algorithm (in computeSaliencyImpl function)
   std::vector<float> objectnessValues;
 
 private:
-  // functions
+// functions
 
   inline static float LoG( float x, float y, float delta )
   {
@@ -357,17 +458,17 @@ class CV_EXPORTS ObjectnessBING : public Objectness
     return -1.0f / ( (float) ( CV_PI ) * pow( delta, 4 ) ) * ( 1 + d ) * exp( d );
   }  // Laplacian of Gaussian
 
-  // Read matrix from binary file
+// Read matrix from binary file
   static bool matRead( const std::string& filename, Mat& M );
 
   void setColorSpace( int clr = MAXBGR );
 
-  // Load trained model.
-  int loadTrainedModel( std::string modelName = "" );// Return -1, 0, or 1 if partial, none, or all loaded
+// Load trained model.
+  int loadTrainedModel();// Return -1, 0, or 1 if partial, none, or all loaded
 
-  // Get potential bounding boxes, each of which is represented by a Vec4i for (minX, minY, maxX, maxY).
-  // The trained model should be prepared before calling this function: loadTrainedModel() or trainStageI() + trainStageII().
-  // Use numDet to control the final number of proposed bounding boxes, and number of per size (scale and aspect ratio)
+// Get potential bounding boxes, each of which is represented by a Vec4i for (minX, minY, maxX, maxY).
+// The trained model should be prepared before calling this function: loadTrainedModel() or trainStageI() + trainStageII().
+// Use numDet to control the final number of proposed bounding boxes, and number of per size (scale and aspect ratio)
   void getObjBndBoxes( Mat &img3u, ValStructVec<float, Vec4i> &valBoxes, int numDetPerSize = 120 );
   void getObjBndBoxesForSingleImage( Mat img, ValStructVec<float, Vec4i> &boxes, int numDetPerSize );
 
@@ -379,7 +480,7 @@ class CV_EXPORTS ObjectnessBING : public Objectness
   void predictBBoxSI( Mat &mag3u, ValStructVec<float, Vec4i> &valBoxes, std::vector<int> &sz, int NUM_WIN_PSZ = 100, bool fast = true );
   void predictBBoxSII( ValStructVec<float, Vec4i> &valBoxes, const std::vector<int> &sz );
 
-  // Calculate the image gradient: center option as in VLFeat
+// Calculate the image gradient: center option as in VLFeat
   void gradientMag( Mat &imgBGR3u, Mat &mag1u );
 
   static void gradientRGB( Mat &bgr3u, Mat &mag1u );
@@ -398,7 +499,7 @@ class CV_EXPORTS ObjectnessBING : public Objectness
     return abs( u[0] - v[0] ) + abs( u[1] - v[1] ) + abs( u[2] - v[2] );
   }
 
-  //Non-maximal suppress
+//Non-maximal suppress
   static void nonMaxSup( Mat &matchCost1f, ValStructVec<float, Point> &matchCost, int NSS = 1, int maxPoint = 50, bool fast = true );
 
 };
diff --git a/IPL/include/opencv/opencv2/shape.hpp b/IPL/include/opencv/opencv2/shape.hpp
index 6999476..f302b6b 100644
--- a/IPL/include/opencv/opencv2/shape.hpp
+++ b/IPL/include/opencv/opencv2/shape.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_SHAPE_HPP__
-#define __OPENCV_SHAPE_HPP__
+#ifndef OPENCV_SHAPE_HPP
+#define OPENCV_SHAPE_HPP
 
 #include "opencv2/shape/emdL1.hpp"
 #include "opencv2/shape/shape_transformer.hpp"
diff --git a/IPL/include/opencv/opencv2/shape/emdL1.hpp b/IPL/include/opencv/opencv2/shape/emdL1.hpp
index 1dfa758..a15d68c 100644
--- a/IPL/include/opencv/opencv2/shape/emdL1.hpp
+++ b/IPL/include/opencv/opencv2/shape/emdL1.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_EMD_L1_HPP__
-#define __OPENCV_EMD_L1_HPP__
+#ifndef OPENCV_EMD_L1_HPP
+#define OPENCV_EMD_L1_HPP
 
 #include "opencv2/core.hpp"
 
diff --git a/IPL/include/opencv/opencv2/shape/hist_cost.hpp b/IPL/include/opencv/opencv2/shape/hist_cost.hpp
index 15c0a87..21d0d68 100644
--- a/IPL/include/opencv/opencv2/shape/hist_cost.hpp
+++ b/IPL/include/opencv/opencv2/shape/hist_cost.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_HIST_COST_HPP__
-#define __OPENCV_HIST_COST_HPP__
+#ifndef OPENCV_HIST_COST_HPP
+#define OPENCV_HIST_COST_HPP
 
 #include "opencv2/imgproc.hpp"
 
diff --git a/IPL/include/opencv/opencv2/shape/shape_distance.hpp b/IPL/include/opencv/opencv2/shape/shape_distance.hpp
index 4b0c3b5..94e20bc 100644
--- a/IPL/include/opencv/opencv2/shape/shape_distance.hpp
+++ b/IPL/include/opencv/opencv2/shape/shape_distance.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_SHAPE_SHAPE_DISTANCE_HPP__
-#define __OPENCV_SHAPE_SHAPE_DISTANCE_HPP__
+#ifndef OPENCV_SHAPE_SHAPE_DISTANCE_HPP
+#define OPENCV_SHAPE_SHAPE_DISTANCE_HPP
 #include "opencv2/core.hpp"
 #include "opencv2/shape/hist_cost.hpp"
 #include "opencv2/shape/shape_transformer.hpp"
@@ -53,6 +53,9 @@ namespace cv
 //! @addtogroup shape
 //! @{
 
+/** @example modules/shape/samples/shape_example.cpp
+An example using shape distance algorithm
+*/
 /** @brief Abstract base class for shape distance algorithms.
  */
 class CV_EXPORTS_W ShapeDistanceExtractor : public Algorithm
diff --git a/IPL/include/opencv/opencv2/shape/shape_transformer.hpp b/IPL/include/opencv/opencv2/shape/shape_transformer.hpp
index 2180613..3c3ce20 100644
--- a/IPL/include/opencv/opencv2/shape/shape_transformer.hpp
+++ b/IPL/include/opencv/opencv2/shape/shape_transformer.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_SHAPE_SHAPE_TRANSFORM_HPP__
-#define __OPENCV_SHAPE_SHAPE_TRANSFORM_HPP__
+#ifndef OPENCV_SHAPE_SHAPE_TRANSFORM_HPP
+#define OPENCV_SHAPE_SHAPE_TRANSFORM_HPP
 #include <vector>
 #include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
@@ -92,7 +92,7 @@ class CV_EXPORTS_W ShapeTransformer : public Algorithm
 
 /** @brief Definition of the transformation
 
-ocupied in the paper "Principal Warps: Thin-Plate Splines and Decomposition of Deformations", by
+occupied in the paper "Principal Warps: Thin-Plate Splines and Decomposition of Deformations", by
 F.L. Bookstein (PAMI 1989). :
  */
 class CV_EXPORTS_W ThinPlateSplineShapeTransformer : public ShapeTransformer
diff --git a/IPL/include/opencv/opencv2/stereo.hpp b/IPL/include/opencv/opencv2/stereo.hpp
index fc22938..9fd789d 100644
--- a/IPL/include/opencv/opencv2/stereo.hpp
+++ b/IPL/include/opencv/opencv2/stereo.hpp
@@ -45,10 +45,8 @@
 #define __OPENCV_STEREO_HPP__
 
 #include "opencv2/core.hpp"
-#include "opencv2/features2d.hpp"
-#include "opencv2/core/affine.hpp"
 #include "opencv2/stereo/descriptor.hpp"
-#include "opencv2/stereo/matching.hpp"
+#include <opencv2/stereo/quasi_dense_stereo.hpp>
 
 /**
 @defgroup stereo Stereo Correspondance Algorithms
@@ -61,8 +59,6 @@ namespace cv
     {
         //! @addtogroup stereo
         //! @{
-        //		 void correctMatches( InputArray F, InputArray points1, InputArray points2,
-        //	OutputArray newPoints1, OutputArray newPoints2 );
         /** @brief Filters off small noise blobs (speckles) in the disparity map
         @param img The input 16-bit signed disparity image
         @param newVal The disparity value used to paint-off the speckles
diff --git a/IPL/include/opencv/opencv2/stereo/descriptor.hpp b/IPL/include/opencv/opencv2/stereo/descriptor.hpp
index bdbd7ce..e7dfc28 100644
--- a/IPL/include/opencv/opencv2/stereo/descriptor.hpp
+++ b/IPL/include/opencv/opencv2/stereo/descriptor.hpp
@@ -1,452 +1,44 @@
-//By downloading, copying, installing or using the software you agree to this license.
-//If you do not agree to this license, do not download, install,
-//copy or use the software.
-//
-//
-//                          License Agreement
-//               For Open Source Computer Vision Library
-//                       (3-clause BSD License)
-//
-//Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
-//Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
-//Copyright (C) 2009-2015, NVIDIA Corporation, all rights reserved.
-//Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-//Copyright (C) 2015, OpenCV Foundation, all rights reserved.
-//Copyright (C) 2015, Itseez Inc., all rights reserved.
-//Third party copyrights are property of their respective owners.
-//
-//Redistribution and use in source and binary forms, with or without modification,
-//are permitted provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above copyright notice,
-//    this list of conditions and the following disclaimer in the documentation
-//    and/or other materials provided with the distribution.
-//
-//  * Neither the names of the copyright holders nor the names of the contributors
-//    may be used to endorse or promote products derived from this software
-//    without specific prior written permission.
-//
-//This software is provided by the copyright holders and contributors "as is" and
-//any express or implied warranties, including, but not limited to, the implied
-//warranties of merchantability and fitness for a particular purpose are disclaimed.
-//In no event shall copyright holders or contributors be liable for any direct,
-//indirect, incidental, special, exemplary, or consequential damages
-//(including, but not limited to, procurement of substitute goods or services;
-//loss of use, data, or profits; or business interruption) however caused
-//and on any theory of liability, whether in contract, strict liability,
-//or tort (including negligence or otherwise) arising in any way out of
-//the use of this software, even if advised of the possibility of such damage.
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
 
-/*****************************************************************************************************************\
-*   The interface contains the main descriptors that will be implemented in the descriptor class                  *
-\*****************************************************************************************************************/
+#ifndef _OPENCV_STEREO_DESCRIPTOR_HPP_
+#define _OPENCV_STEREO_DESCRIPTOR_HPP_
 
-#include <stdint.h>
-#ifndef _OPENCV_DESCRIPTOR_HPP_
-#define _OPENCV_DESCRIPTOR_HPP_
-#ifdef __cplusplus
+namespace cv { namespace stereo {
 
-namespace cv
-{
-    namespace stereo
-    {
-        //types of supported kernels
-        enum {
-            CV_DENSE_CENSUS, CV_SPARSE_CENSUS,
-            CV_CS_CENSUS, CV_MODIFIED_CS_CENSUS, CV_MODIFIED_CENSUS_TRANSFORM,
-            CV_MEAN_VARIATION, CV_STAR_KERNEL
-        };
-        //!Mean Variation is a robust kernel that compares a pixel
-        //!not just with the center but also with the mean of the window
-        template<int num_images>
-        struct MVKernel
-        {
-            uint8_t *image[num_images];
-            int *integralImage[num_images];
-            int stop;
-            MVKernel(){}
-            MVKernel(uint8_t **images, int **integral)
-            {
-                for(int i = 0; i < num_images; i++)
-                {
-                    image[i] = images[i];
-                    integralImage[i] = integral[i];
-                }
-                stop = num_images;
-            }
-            void operator()(int rrWidth,int w2, int rWidth, int jj, int j, int c[num_images]) const
-            {
-                (void)w2;
-                for (int i = 0; i < stop; i++)
-                {
-                    if (image[i][rrWidth + jj] > image[i][rWidth + j])
-                    {
-                        c[i] = c[i] + 1;
-                    }
-                    c[i] = c[i] << 1;
-                    if (integralImage[i][rrWidth + jj] > image[i][rWidth + j])
-                    {
-                        c[i] = c[i] + 1;
-                    }
-                    c[i] = c[i] << 1;
-                }
-            }
-        };
-        //!Compares pixels from a patch giving high weights to pixels in which
-        //!the intensity is higher. The other pixels receive a lower weight
-        template <int num_images>
-        struct MCTKernel
-        {
-            uint8_t *image[num_images];
-            int t,imageStop;
-            MCTKernel(){}
-            MCTKernel(uint8_t ** images, int threshold)
-            {
-                for(int i = 0; i < num_images; i++)
-                {
-                    image[i] = images[i];
-                }
-                imageStop = num_images;
-                t = threshold;
-            }
-            void operator()(int rrWidth,int w2, int rWidth, int jj, int j, int c[num_images]) const
-            {
-                (void)w2;
-                for(int i = 0; i < imageStop; i++)
-                {
-                    if (image[i][rrWidth + jj] > image[i][rWidth + j] - t)
-                    {
-                        c[i] = c[i] << 1;
-                        c[i] = c[i] + 1;
-                        c[i] = c[i] << 1;
-                        c[i] = c[i] + 1;
-                    }
-                    else if (image[i][rWidth + j] - t < image[i][rrWidth + jj] && image[i][rWidth + j] + t >= image[i][rrWidth + jj])
-                    {
-                        c[i] = c[i] << 2;
-                        c[i] = c[i] + 1;
-                    }
-                    else
-                    {
-                        c[i] <<= 2;
-                    }
-                }
-            }
-        };
-        //!A madified cs census that compares a pixel with the imediat neightbour starting
-        //!from the center
-        template<int num_images>
-        struct ModifiedCsCensus
-        {
-            uint8_t *image[num_images];
-            int n2;
-            int imageStop;
-            ModifiedCsCensus(){}
-            ModifiedCsCensus(uint8_t **images, int ker)
-            {
-                for(int i = 0; i < num_images; i++)
-                    image[i] = images[i];
-                imageStop = num_images;
-                n2 = ker;
-            }
-            void operator()(int rrWidth,int w2, int rWidth, int jj, int j, int c[num_images]) const
-            {
-                (void)j;
-                (void)rWidth;
-                for(int i = 0; i < imageStop; i++)
-                {
-                    if (image[i][(rrWidth + jj)] > image[i][(w2 + (jj + n2))])
-                    {
-                        c[i] = c[i] + 1;
-                    }
-                    c[i] = c[i] * 2;
-                }
-            }
-        };
-        //!A kernel in which a pixel is compared with the center of the window
-        template<int num_images>
-        struct CensusKernel
-        {
-            uint8_t *image[num_images];
-            int imageStop;
-            CensusKernel(){}
-            CensusKernel(uint8_t **images)
-            {
-                for(int i = 0; i < num_images; i++)
-                    image[i] = images[i];
-                imageStop = num_images;
-            }
-            void operator()(int rrWidth,int w2, int rWidth, int jj, int j, int c[num_images]) const
-            {
-                (void)w2;
-                for(int i = 0; i < imageStop; i++)
-                {
-                    ////compare a pixel with the center from the kernel
-                    if (image[i][rrWidth + jj] > image[i][rWidth + j])
-                    {
-                        c[i] += 1;
-                    }
-                    c[i] <<= 1;
-                }
-            }
-        };
-        //template clas which efficiently combines the descriptors
-        template <int step_start, int step_end, int step_inc,int nr_img, typename Kernel>
-        class CombinedDescriptor:public ParallelLoopBody
-        {
-        private:
-            int width, height,n2;
-            int stride_;
-            int *dst[nr_img];
-            Kernel kernel_;
-            int n2_stop;
-        public:
-            CombinedDescriptor(int w, int h,int stride, int k2, int **distance, Kernel kernel,int k2Stop)
-            {
-                width = w;
-                height = h;
-                n2 = k2;
-                stride_ = stride;
-                for(int i = 0; i < nr_img; i++)
-                    dst[i] = distance[i];
-                kernel_ = kernel;
-                n2_stop = k2Stop;
-            }
-            void operator()(const cv::Range &r) const {
-                for (int i = r.start; i <= r.end ; i++)
-                {
-                    int rWidth = i * stride_;
-                    for (int j = n2 + 2; j <= width - n2 - 2; j++)
-                    {
-                        int c[nr_img];
-                        memset(c,0,nr_img);
-                        for(int step = step_start; step <= step_end; step += step_inc)
-                        {
-                            for (int ii = - n2; ii <= + n2_stop; ii += step)
-                            {
-                                int rrWidth = (ii + i) * stride_;
-                                int rrWidthC = (ii + i + n2) * stride_;
-                                for (int jj = j - n2; jj <= j + n2; jj += step)
-                                {
-                                    if (ii != i || jj != j)
-                                    {
-                                        kernel_(rrWidth,rrWidthC, rWidth, jj, j,c);
-                                    }
-                                }
-                            }
-                        }
-                        for(int l = 0; l < nr_img; l++)
-                            dst[l][rWidth + j] = c[l];
-                    }
-                }
-            }
-        };
-        //!calculate the mean of every windowSizexWindwoSize block from the integral Image
-        //!this is a preprocessing for MV kernel
-        class MeanKernelIntegralImage : public ParallelLoopBody
-        {
-        private:
-            int *img;
-            int windowSize,width;
-            float scalling;
-            int *c;
-        public:
-            MeanKernelIntegralImage(const cv::Mat &image, int window,float scale, int *cost):
-                img((int *)image.data),windowSize(window) ,width(image.cols) ,scalling(scale) , c(cost){};
-            void operator()(const cv::Range &r) const{
-                for (int i = r.start; i <= r.end; i++)
-                {
-                    int iw = i * width;
-                    for (int j = windowSize + 1; j <= width - windowSize - 1; j++)
-                    {
-                        c[iw + j] = (int)((img[(i + windowSize - 1) * width + j + windowSize - 1] + img[(i - windowSize - 1) * width + j - windowSize - 1]
-                        - img[(i + windowSize) * width + j - windowSize] - img[(i - windowSize) * width + j + windowSize]) * scalling);
-                    }
-                }
-            }
-        };
-        //!implementation for the star kernel descriptor
-        template<int num_images>
-        class StarKernelCensus:public ParallelLoopBody
-        {
-        private:
-            uint8_t *image[num_images];
-            int *dst[num_images];
-            int n2, width, height, im_num,stride_;
-        public:
-            StarKernelCensus(const cv::Mat *img, int k2, int **distance)
-            {
-                for(int i = 0; i < num_images; i++)
-                {
-                    image[i] = img[i].data;
-                    dst[i] = distance[i];
-                }
-                n2 = k2;
-                width = img[0].cols;
-                height = img[0].rows;
-                im_num = num_images;
-                stride_ = (int)img[0].step;
-            }
-            void operator()(const cv::Range &r) const {
-                for (int i = r.start; i <= r.end ; i++)
-                {
-                    int rWidth = i * stride_;
-                    for (int j = n2; j <= width - n2; j++)
-                    {
-                        for(int d = 0 ; d < im_num; d++)
-                        {
-                            int c = 0;
-                            for(int step = 4; step > 0; step--)
-                            {
-                                for (int ii = i - step; ii <= i + step; ii += step)
-                                {
-                                    int rrWidth = ii * stride_;
-                                    for (int jj = j - step; jj <= j + step; jj += step)
-                                    {
-                                        if (image[d][rrWidth + jj] > image[d][rWidth + j])
-                                        {
-                                            c = c + 1;
-                                        }
-                                        c = c * 2;
-                                    }
-                                }
-                            }
-                            for (int ii = -1; ii <= +1; ii++)
-                            {
-                                int rrWidth = (ii + i) * stride_;
-                                if (i == -1)
-                                {
-                                    if (ii + i != i)
-                                    {
-                                        if (image[d][rrWidth + j] > image[d][rWidth + j])
-                                        {
-                                            c = c + 1;
-                                        }
-                                        c = c * 2;
-                                    }
-                                }
-                                else if (i == 0)
-                                {
-                                    for (int j2 = -1; j2 <= 1; j2 += 2)
-                                    {
-                                        if (ii + i != i)
-                                        {
-                                            if (image[d][rrWidth + j + j2] > image[d][rWidth + j])
-                                            {
-                                                c = c + 1;
-                                            }
-                                            c = c * 2;
-                                        }
-                                    }
-                                }
-                                else
-                                {
-                                    if (ii + i != i)
-                                    {
-                                        if (image[d][rrWidth + j] > image[d][rWidth + j])
-                                        {
-                                            c = c + 1;
-                                        }
-                                        c = c * 2;
-                                    }
-                                }
-                            }
-                            dst[d][rWidth + j] = c;
-                        }
-                    }
-                }
-            }
-        };
-        //!paralel implementation of the center symetric census
-        template <int num_images>
-        class SymetricCensus:public ParallelLoopBody
-        {
-        private:
-            uint8_t *image[num_images];
-            int *dst[num_images];
-            int n2, width, height, im_num,stride_;
-        public:
-            SymetricCensus(const cv::Mat *img, int k2, int **distance)
-            {
-                for(int i = 0; i < num_images; i++)
-                {
-                    image[i] = img[i].data;
-                    dst[i] = distance[i];
-                }
-                n2 = k2;
-                width = img[0].cols;
-                height = img[0].rows;
-                im_num = num_images;
-                stride_ = (int)img[0].step;
-            }
-            void operator()(const cv::Range &r) const {
-                for (int i = r.start; i <= r.end ; i++)
-                {
-                    int distV = i*stride_;
-                    for (int j = n2; j <= width - n2; j++)
-                    {
-                        for(int d = 0; d < im_num; d++)
-                        {
-                            int c = 0;
-                            //the classic center symetric census which compares the curent pixel with its symetric not its center.
-                            for (int ii = -n2; ii <= 0; ii++)
-                            {
-                                int rrWidth = (ii + i) * stride_;
-                                for (int jj = -n2; jj <= +n2; jj++)
-                                {
-                                    if (image[d][(rrWidth + (jj + j))] > image[d][((ii * (-1) + i) * width + (-1 * jj) + j)])
-                                    {
-                                        c = c + 1;
-                                    }
-                                    c = c * 2;
-                                    if(ii == 0 && jj < 0)
-                                    {
-                                        if (image[d][(i * width + (jj + j))] > image[d][(i * width + (-1 * jj) + j)])
-                                        {
-                                            c = c + 1;
-                                        }
-                                        c = c * 2;
-                                    }
-                                }
-                            }
-                            dst[d][(distV + j)] = c;
-                        }
-                    }
-                }
-            }
-        };
-        /**
-        Two variations of census applied on input images
-        Implementation of a census transform which is taking into account just the some pixels from the census kernel thus allowing for larger block sizes
-        **/
-        //void applyCensusOnImages(const cv::Mat &im1,const cv::Mat &im2, int kernelSize, cv::Mat &dist, cv::Mat &dist2, const int type);
-        CV_EXPORTS void censusTransform(const cv::Mat &image1, const cv::Mat &image2, int kernelSize, cv::Mat &dist1, cv::Mat &dist2, const int type);
-        //single image census transform
-        CV_EXPORTS void censusTransform(const cv::Mat &image1, int kernelSize, cv::Mat &dist1, const int type);
-        /**
-        STANDARD_MCT - Modified census which is memorizing for each pixel 2 bits and includes a tolerance to the pixel comparison
-        MCT_MEAN_VARIATION - Implementation of a modified census transform which is also taking into account the variation to the mean of the window not just the center pixel
-        **/
-        CV_EXPORTS void modifiedCensusTransform(const cv::Mat &img1, const cv::Mat &img2, int kernelSize, cv::Mat &dist1,cv::Mat &dist2, const int type, int t = 0 , const cv::Mat &IntegralImage1 = cv::Mat::zeros(100,100,CV_8UC1), const cv::Mat &IntegralImage2 = cv::Mat::zeros(100,100,CV_8UC1));
-        //single version of modified census transform descriptor
-        CV_EXPORTS void modifiedCensusTransform(const cv::Mat &img1, int kernelSize, cv::Mat &dist, const int type, int t = 0 ,const cv::Mat &IntegralImage = cv::Mat::zeros(100,100,CV_8UC1));
-        /**The classical center symetric census
-        A modified version of cs census which is comparing a pixel with its correspondent after the center
-        **/
-        CV_EXPORTS void symetricCensusTransform(const cv::Mat &img1, const cv::Mat &img2, int kernelSize, cv::Mat &dist1, cv::Mat &dist2, const int type);
-        //single version of census transform
-        CV_EXPORTS void symetricCensusTransform(const cv::Mat &img1, int kernelSize, cv::Mat &dist1, const int type);
-        //in a 9x9 kernel only certain positions are choosen
-        CV_EXPORTS void starCensusTransform(const cv::Mat &img1, const cv::Mat &img2, int kernelSize, cv::Mat &dist1,cv::Mat &dist2);
-        //single image version of star kernel
-        CV_EXPORTS void starCensusTransform(const cv::Mat &img1, int kernelSize, cv::Mat &dist);
-        //integral image computation used in the Mean Variation Census Transform
-        void imageMeanKernelSize(const cv::Mat &img, int windowSize, cv::Mat &c);
-    }
-}
-#endif
+// FIXIT deprecate and remove CV_ prefix
+/// types of supported kernels
+enum {
+    CV_DENSE_CENSUS, CV_SPARSE_CENSUS,
+    CV_CS_CENSUS, CV_MODIFIED_CS_CENSUS, CV_MODIFIED_CENSUS_TRANSFORM,
+    CV_MEAN_VARIATION, CV_STAR_KERNEL
+};
+
+/**
+Two variations of census applied on input images
+Implementation of a census transform which is taking into account just the some pixels from the census kernel thus allowing for larger block sizes
+**/
+CV_EXPORTS void censusTransform(const Mat &image1, const Mat &image2, int kernelSize, Mat &dist1, Mat &dist2, const int type);
+/// single image census transform
+CV_EXPORTS void censusTransform(const Mat &image1, int kernelSize, Mat &dist1, const int type);
+/**
+STANDARD_MCT - Modified census which is memorizing for each pixel 2 bits and includes a tolerance to the pixel comparison
+MCT_MEAN_VARIATION - Implementation of a modified census transform which is also taking into account the variation to the mean of the window not just the center pixel
+**/
+CV_EXPORTS void modifiedCensusTransform(const Mat &img1, const Mat &img2, int kernelSize, Mat &dist1, Mat &dist2, const int type, int t = 0, const Mat &integralImage1 = Mat(), const Mat &integralImage2 = Mat());
+///single version of modified census transform descriptor
+CV_EXPORTS void modifiedCensusTransform(const Mat &img1, int kernelSize, Mat &dist, const int type, int t = 0, const Mat &integralImage = Mat());
+/**The classical center symetric census
+A modified version of cs census which is comparing a pixel with its correspondent after the center
+**/
+CV_EXPORTS void symetricCensusTransform(const Mat &img1, const Mat &img2, int kernelSize, Mat &dist1, Mat &dist2, const int type);
+///single version of census transform
+CV_EXPORTS void symetricCensusTransform(const Mat &img1, int kernelSize, Mat &dist1, const int type);
+///in a 9x9 kernel only certain positions are choosen
+CV_EXPORTS void starCensusTransform(const Mat &img1, const Mat &img2, int kernelSize, Mat &dist1, Mat &dist2);
+///single image version of star kernel
+CV_EXPORTS void starCensusTransform(const Mat &img1, int kernelSize, Mat &dist);
+
+}}  // namespace
 #endif
-/*End of file*/
diff --git a/IPL/include/opencv/opencv2/stereo/matching.hpp b/IPL/include/opencv/opencv2/stereo/matching.hpp
deleted file mode 100644
index 2238961..0000000
--- a/IPL/include/opencv/opencv2/stereo/matching.hpp
+++ /dev/null
@@ -1,624 +0,0 @@
-//By downloading, copying, installing or using the software you agree to this license.
-//If you do not agree to this license, do not download, install,
-//copy or use the software.
-//
-//
-//                          License Agreement
-//               For Open Source Computer Vision Library
-//                       (3-clause BSD License)
-//
-//Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
-//Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
-//Copyright (C) 2009-2015, NVIDIA Corporation, all rights reserved.
-//Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
-//Copyright (C) 2015, OpenCV Foundation, all rights reserved.
-//Copyright (C) 2015, Itseez Inc., all rights reserved.
-//Third party copyrights are property of their respective owners.
-//
-//Redistribution and use in source and binary forms, with or without modification,
-//are permitted provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright notice,
-//    this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above copyright notice,
-//    this list of conditions and the following disclaimer in the documentation
-//    and/or other materials provided with the distribution.
-//
-//  * Neither the names of the copyright holders nor the names of the contributors
-//    may be used to endorse or promote products derived from this software
-//    without specific prior written permission.
-//
-//This software is provided by the copyright holders and contributors "as is" and
-//any express or implied warranties, including, but not limited to, the implied
-//warranties of merchantability and fitness for a particular purpose are disclaimed.
-//In no event shall copyright holders or contributors be liable for any direct,
-//indirect, incidental, special, exemplary, or consequential damages
-//(including, but not limited to, procurement of substitute goods or services;
-//loss of use, data, or profits; or business interruption) however caused
-//and on any theory of liability, whether in contract, strict liability,
-//or tort (including negligence or otherwise) arising in any way out of
-//the use of this software, even if advised of the possibility of such damage.
-
-/*****************************************************************************************************************\
-*   The interface contains the main methods for computing the matching between the left and right images	      *
-*                                                                                                                 *
-\******************************************************************************************************************/
-#include <stdint.h>
-
-#ifndef _OPENCV_MATCHING_HPP_
-#define _OPENCV_MATCHING_HPP_
-#ifdef __cplusplus
-
-namespace cv
-{
-    namespace stereo
-    {
-        class Matching
-        {
-        private:
-            //!The maximum disparity
-            int maxDisparity;
-            //!the factor by which we are multiplying the disparity
-            int scallingFactor;
-            //!the confidence to which a min disparity found is good or not
-            double confidenceCheck;
-            //!the LUT used in case SSE is not available
-            int hamLut[65537];
-            //!function used for getting the minimum disparity from the cost volume"
-            static int minim(short *c, int iwpj, int widthDisp,const double confidence, const int search_region)
-            {
-                double mini, mini2, mini3;
-                mini = mini2 = mini3 = DBL_MAX;
-                int index = 0;
-                int iw = iwpj;
-                int widthDisp2;
-                widthDisp2 = widthDisp;
-                widthDisp -= 1;
-                for (int i = 0; i <= widthDisp; i++)
-                {
-                    if (c[(iw + i * search_region) * widthDisp2 + i] < mini)
-                    {
-                        mini3 = mini2;
-                        mini2 = mini;
-                        mini = c[(iw + i * search_region) * widthDisp2 + i];
-                        index = i;
-                    }
-                    else if (c[(iw + i * search_region) * widthDisp2 + i] < mini2)
-                    {
-                        mini3 = mini2;
-                        mini2 = c[(iw + i * search_region) * widthDisp2 + i];
-                    }
-                    else if (c[(iw + i * search_region) * widthDisp2 + i] < mini3)
-                    {
-                        mini3 = c[(iw + i * search_region) * widthDisp2 + i];
-                    }
-                }
-                if(mini != 0)
-                {
-                    if (mini3 / mini <= confidence)
-                        return index;
-                }
-                return -1;
-            }
-            //!Interpolate in order to obtain better results
-            //!function for refining the disparity at sub pixel using simetric v
-            static double symetricVInterpolation(short *c, int iwjp, int widthDisp, int winDisp,const int search_region)
-            {
-                if (winDisp == 0 || winDisp == widthDisp - 1)
-                    return winDisp;
-                double m2m1, m3m1, m3, m2, m1;
-                m2 = c[(iwjp + (winDisp - 1) * search_region) * widthDisp + winDisp - 1];
-                m3 = c[(iwjp + (winDisp + 1) * search_region)* widthDisp + winDisp + 1];
-                m1 = c[(iwjp + winDisp * search_region) * widthDisp + winDisp];
-                m2m1 = m2 - m1;
-                m3m1 = m3 - m1;
-                if (m2m1 == 0 || m3m1 == 0) return winDisp;
-                double p;
-                p = 0;
-                if (m2 > m3)
-                {
-                    p = (0.5 - 0.25 * ((m3m1 * m3m1) / (m2m1 * m2m1) + (m3m1 / m2m1)));
-                }
-                else
-                {
-                    p = -1 * (0.5 - 0.25 * ((m2m1 * m2m1) / (m3m1 * m3m1) + (m2m1 / m3m1)));
-                }
-                if (p >= -0.5 && p <= 0.5)
-                    p = winDisp + p;
-                return p;
-            }
-            //!a pre processing function that generates the Hamming LUT in case the algorithm will ever be used on platform where SSE is not available
-            void hammingLut()
-            {
-                for (int i = 0; i <= 65536; i++)
-                {
-                    int dist = 0;
-                    int j = i;
-                    //we number the bits from our number
-                    while (j)
-                    {
-                        dist = dist + 1;
-                        j = j & (j - 1);
-                    }
-                    hamLut[i] = dist;
-                }
-            }
-            //!the class used in computing the hamming distance
-            class hammingDistance : public ParallelLoopBody
-            {
-            private:
-                int *left, *right;
-                short *c;
-                int v,kernelSize, width;
-                int MASK;
-                int *hammLut;
-            public :
-                hammingDistance(const Mat &leftImage, const Mat &rightImage, short *cost, int maxDisp, int kerSize, int *hammingLUT):
-                    left((int *)leftImage.data), right((int *)rightImage.data), c(cost), v(maxDisp),kernelSize(kerSize),width(leftImage.cols), MASK(65535), hammLut(hammingLUT){}
-                void operator()(const cv::Range &r) const {
-                    for (int i = r.start; i <= r.end ; i++)
-                    {
-                        int iw = i * width;
-                        for (int j = kernelSize; j < width - kernelSize; j++)
-                        {
-                            int j2;
-                            int xorul;
-                            int iwj;
-                            iwj = iw + j;
-                            for (int d = 0; d <= v; d++)
-                            {
-                                j2 = (0 > j - d) ? (0) : (j - d);
-                                xorul = left[(iwj)] ^ right[(iw + j2)];
-#if CV_POPCNT
-                                if (checkHardwareSupport(CV_CPU_POPCNT))
-                                {
-                                    c[(iwj)* (v + 1) + d] = (short)_mm_popcnt_u32(xorul);
-                                }
-                                else
-#endif
-                                {
-                                    c[(iwj)* (v + 1) + d] = (short)(hammLut[xorul & MASK] + hammLut[(xorul >> 16) & MASK]);
-                                }
-                            }
-                        }
-                    }
-                }
-            };
-            //!cost aggregation
-            class agregateCost:public ParallelLoopBody
-            {
-            private:
-                int win;
-                short *c, *parSum;
-                int maxDisp,width, height;
-            public:
-                agregateCost(const Mat &partialSums, int windowSize, int maxDispa, Mat &cost)
-                {
-                    win = windowSize / 2;
-                    c = (short *)cost.data;
-                    maxDisp = maxDispa;
-                    width = cost.cols / ( maxDisp + 1) - 1;
-                    height = cost.rows - 1;
-                    parSum = (short *)partialSums.data;
-                }
-                void operator()(const cv::Range &r) const {
-                    for (int i = r.start; i <= r.end; i++)
-                    {
-                        int iwi = (i - 1) * width;
-                        for (int j = win + 1; j <= width - win - 1; j++)
-                        {
-                            int w1 = ((i + win + 1) * width + j + win) * (maxDisp + 1);
-                            int w2 = ((i - win) * width + j - win - 1) * (maxDisp + 1);
-                            int w3 = ((i + win + 1) * width + j - win - 1) * (maxDisp + 1);
-                            int w4 = ((i - win) * width + j + win) * (maxDisp + 1);
-                            int w = (iwi + j - 1) * (maxDisp + 1);
-                            for (int d = 0; d <= maxDisp; d++)
-                            {
-                                c[w + d] = parSum[w1 + d] + parSum[w2 + d]
-                                - parSum[w3 + d] - parSum[w4 + d];
-                            }
-                        }
-                    }
-                }
-            };
-            //!class that is responsable for generating the disparity map
-            class makeMap:public ParallelLoopBody
-            {
-            private:
-                //enum used to notify wether we are searching on the vertical ie (lr) or diagonal (rl)
-                enum {CV_VERTICAL_SEARCH, CV_DIAGONAL_SEARCH};
-                int width,disparity,scallingFact,th;
-                double confCheck;
-                uint8_t *map;
-                short *c;
-            public:
-                makeMap(const Mat &costVolume, int threshold, int maxDisp, double confidence,int scale, Mat &mapFinal)
-                {
-                    c = (short *)costVolume.data;
-                    map = mapFinal.data;
-                    disparity = maxDisp;
-                    width = costVolume.cols / ( disparity + 1) - 1;
-                    th = threshold;
-                    scallingFact = scale;
-                    confCheck = confidence;
-                }
-                void operator()(const cv::Range &r) const {
-                    for (int i = r.start; i <= r.end ; i++)
-                    {
-                        int lr;
-                        int v = -1;
-                        double p1, p2;
-                        int iw = i * width;
-                        for (int j = 0; j < width; j++)
-                        {
-                            lr = Matching:: minim(c, iw + j, disparity + 1, confCheck,CV_VERTICAL_SEARCH);
-                            if (lr != -1)
-                            {
-                                v = Matching::minim(c, iw + j - lr, disparity + 1, confCheck,CV_DIAGONAL_SEARCH);
-                                if (v != -1)
-                                {
-                                    p1 = Matching::symetricVInterpolation(c, iw + j - lr, disparity + 1, v,CV_DIAGONAL_SEARCH);
-                                    p2 = Matching::symetricVInterpolation(c, iw + j, disparity + 1, lr,CV_VERTICAL_SEARCH);
-                                    if (abs(p1 - p2) <= th)
-                                        map[iw + j] = (uint8_t)((p2)* scallingFact);
-                                    else
-                                    {
-                                        map[iw + j] = 0;
-                                    }
-                                }
-                                else
-                                {
-                                    if (width - j <= disparity)
-                                    {
-                                        p2 = Matching::symetricVInterpolation(c, iw + j, disparity + 1, lr,CV_VERTICAL_SEARCH);
-                                        map[iw + j] = (uint8_t)(p2* scallingFact);
-                                    }
-                                }
-                            }
-                            else
-                            {
-                                map[iw + j] = 0;
-                            }
-                        }
-                    }
-                }
-            };
-            //!median 1x9 paralelized filter
-            template <typename T>
-            class Median1x9:public ParallelLoopBody
-            {
-            private:
-                T *original;
-                T *filtered;
-                int height, width;
-            public:
-                Median1x9(const Mat &originalImage, Mat &filteredImage)
-                {
-                    original = (T *)originalImage.data;
-                    filtered = (T *)filteredImage.data;
-                    height = originalImage.rows;
-                    width = originalImage.cols;
-                }
-                void operator()(const cv::Range &r) const{
-                    for (int m = r.start; m <= r.end; m++)
-                    {
-                        for (int n = 4; n < width - 4; ++n)
-                        {
-                            int k = 0;
-                            T window[9];
-                            for (int i = n - 4; i <= n + 4; ++i)
-                                window[k++] = original[m * width + i];
-                            for (int j = 0; j < 5; ++j)
-                            {
-                                int min = j;
-                                for (int l = j + 1; l < 9; ++l)
-                                    if (window[l] < window[min])
-                                        min = l;
-                                const T temp = window[j];
-                                window[j] = window[min];
-                                window[min] = temp;
-                            }
-                            filtered[m  * width + n] = window[4];
-                        }
-                    }
-                }
-            };
-            //!median 9x1 paralelized filter
-            template <typename T>
-            class Median9x1:public ParallelLoopBody
-            {
-            private:
-                T *original;
-                T *filtered;
-                int height, width;
-            public:
-                Median9x1(const Mat &originalImage, Mat &filteredImage)
-                {
-                    original = (T *)originalImage.data;
-                    filtered = (T *)filteredImage.data;
-                    height = originalImage.rows;
-                    width = originalImage.cols;
-                }
-                void operator()(const Range &r) const{
-                    for (int n = r.start; n <= r.end; ++n)
-                    {
-                        for (int m = 4; m < height - 4; ++m)
-                        {
-                            int k = 0;
-                            T window[9];
-                            for (int i = m - 4; i <= m + 4; ++i)
-                                window[k++] = original[i * width + n];
-                            for (int j = 0; j < 5; j++)
-                            {
-                                int min = j;
-                                for (int l = j + 1; l < 9; ++l)
-                                    if (window[l] < window[min])
-                                        min = l;
-                                const T temp = window[j];
-                                window[j] = window[min];
-                                window[min] = temp;
-                            }
-                            filtered[m  * width + n] = window[4];
-                        }
-                    }
-                }
-            };
-        protected:
-            //arrays used in the region removal
-            Mat speckleY;
-            Mat speckleX;
-            Mat puss;
-            //int *specklePointX;
-            //int *specklePointY;
-            //long long *pus;
-            int previous_size;
-            //!method for setting the maximum disparity
-            void setMaxDisparity(int val)
-            {
-                CV_Assert(val > 10);
-                this->maxDisparity = val;
-            }
-            //!method for getting the disparity
-            int getMaxDisparity()
-            {
-                return this->maxDisparity;
-            }
-            //! a number by which the disparity will be multiplied for better display
-            void setScallingFactor(int val)
-            {
-                CV_Assert(val > 0);
-                this->scallingFactor = val;
-            }
-            //!method for getting the scalling factor
-            int getScallingFactor()
-            {
-                return scallingFactor;
-            }
-            //!setter for the confidence check
-            void setConfidence(double val)
-            {
-                CV_Assert(val >= 1);
-                this->confidenceCheck = val;
-            }
-            //getter for confidence check
-            double getConfidence()
-            {
-                return confidenceCheck;
-            }
-            //! Hamming distance computation method
-            //! leftImage and rightImage are the two transformed images
-            //! the cost is the resulted cost volume and kernel Size is the size of the matching window
-            void hammingDistanceBlockMatching(const Mat &leftImage, const Mat &rightImage, Mat &cost, const int kernelSize= 9)
-            {
-                CV_Assert(leftImage.cols == rightImage.cols);
-                CV_Assert(leftImage.rows == rightImage.rows);
-                CV_Assert(kernelSize % 2 != 0);
-                CV_Assert(cost.rows == leftImage.rows);
-                CV_Assert(cost.cols / (maxDisparity + 1) == leftImage.cols);
-                short *c = (short *)cost.data;
-                memset(c, 0, sizeof(c[0]) * leftImage.cols * leftImage.rows * (maxDisparity + 1));
-                parallel_for_(cv::Range(kernelSize / 2,leftImage.rows - kernelSize / 2), hammingDistance(leftImage,rightImage,(short *)cost.data,maxDisparity,kernelSize / 2,hamLut));
-            }
-            //preprocessing the cost volume in order to get it ready for aggregation
-            void costGathering(const Mat &hammingDistanceCost, Mat &cost)
-            {
-                CV_Assert(hammingDistanceCost.rows == hammingDistanceCost.rows);
-                CV_Assert(hammingDistanceCost.type() == CV_16S);
-                CV_Assert(cost.type() == CV_16S);
-                int maxDisp = maxDisparity;
-                int width = cost.cols / ( maxDisp + 1) - 1;
-                int height = cost.rows - 1;
-                short *c = (short *)cost.data;
-                short *ham = (short *)hammingDistanceCost.data;
-                memset(c, 0, sizeof(c[0]) * (width + 1) * (height + 1) * (maxDisp + 1));
-                for (int i = 1; i <= height; i++)
-                {
-                    int iw = i * width;
-                    int iwi = (i - 1) * width;
-                    for (int j = 1; j <= width; j++)
-                    {
-                        int iwj = (iw + j) * (maxDisp + 1);
-                        int iwjmu = (iw + j - 1) * (maxDisp + 1);
-                        int iwijmu = (iwi + j - 1) * (maxDisp + 1);
-                        for (int d = 0; d <= maxDisp; d++)
-                        {
-                            c[iwj + d] = ham[iwijmu + d] + c[iwjmu + d];
-                        }
-                    }
-                }
-                for (int i = 1; i <= height; i++)
-                {
-                    for (int j = 1; j <= width; j++)
-                    {
-                        int iwj = (i * width + j) * (maxDisp + 1);
-                        int iwjmu = ((i - 1)  * width + j) * (maxDisp + 1);
-                        for (int d = 0; d <= maxDisp; d++)
-                        {
-                            c[iwj + d] += c[iwjmu + d];
-                        }
-                    }
-                }
-            }
-            //!The aggregation on the cost volume
-            void blockAgregation(const Mat &partialSums, int windowSize, Mat &cost)
-            {
-                CV_Assert(windowSize % 2 != 0);
-                CV_Assert(partialSums.rows == cost.rows);
-                CV_Assert(partialSums.cols == cost.cols);
-                int win = windowSize / 2;
-                short *c = (short *)cost.data;
-                int maxDisp = maxDisparity;
-                int width = cost.cols / ( maxDisp + 1) - 1;
-                int height = cost.rows - 1;
-                memset(c, 0, sizeof(c[0]) * width * height * (maxDisp + 1));
-                parallel_for_(cv::Range(win + 1,height - win - 1), agregateCost(partialSums,windowSize,maxDisp,cost));
-            }
-            //!remove small regions that have an area smaller than t, we fill the region with the average of the good pixels around it
-            template <typename T>
-            void smallRegionRemoval(const Mat &currentMap, int t, Mat &out)
-            {
-                CV_Assert(currentMap.cols == out.cols);
-                CV_Assert(currentMap.rows == out.rows);
-                CV_Assert(t >= 0);
-                int *pus = (int *)puss.data;
-                int *specklePointX = (int *)speckleX.data;
-                int *specklePointY = (int *)speckleY.data;
-                memset(pus, 0, previous_size * sizeof(pus[0]));
-                T *map = (T *)currentMap.data;
-                T *outputMap = (T *)out.data;
-                int height = currentMap.rows;
-                int width = currentMap.cols;
-                T k = 1;
-                int st, dr;
-                int di[] = { -1, -1, -1, 0, 1, 1, 1, 0 },
-                    dj[] = { -1, 0, 1, 1, 1, 0, -1, -1 };
-                int speckle_size = 0;
-                st = 0;
-                dr = 0;
-                for (int i = 1; i < height - 1; i++)
-                {
-                    int iw = i * width;
-                    for (int j = 1; j < width - 1; j++)
-                    {
-                        if (map[iw + j] != 0)
-                        {
-                            outputMap[iw + j] = map[iw + j];
-                        }
-                        else if (map[iw + j] == 0)
-                        {
-                            T nr = 1;
-                            T avg = 0;
-                            speckle_size = dr;
-                            specklePointX[dr] = i;
-                            specklePointY[dr] = j;
-                            pus[i * width + j] = 1;
-                            dr++;
-                            map[iw + j] = k;
-                            while (st < dr)
-                            {
-                                int ii = specklePointX[st];
-                                int jj = specklePointY[st];
-                                //going on 8 directions
-                                for (int d = 0; d < 8; d++)
-                                {//if insisde
-                                    if (ii + di[d] >= 0 && ii + di[d] < height && jj + dj[d] >= 0 && jj + dj[d] < width &&
-                                        pus[(ii + di[d]) * width + jj + dj[d]] == 0)
-                                    {
-                                        T val = map[(ii + di[d]) * width + jj + dj[d]];
-                                        if (val == 0)
-                                        {
-                                            map[(ii + di[d]) * width + jj + dj[d]] = k;
-                                            specklePointX[dr] = (ii + di[d]);
-                                            specklePointY[dr] = (jj + dj[d]);
-                                            dr++;
-                                            pus[(ii + di[d]) * width + jj + dj[d]] = 1;
-                                        }//this means that my point is a good point to be used in computing the final filling value
-                                        else if (val >= 1 && val < 250)
-                                        {
-                                            avg += val;
-                                            nr++;
-                                        }
-                                    }
-                                }
-                                st++;
-                            }//if hole size is smaller than a specified threshold we fill the respective hole with the average of the good neighbours
-                            if (st - speckle_size <= t)
-                            {
-                                T fillValue = (T)(avg / nr);
-                                while (speckle_size < st)
-                                {
-                                    int ii = specklePointX[speckle_size];
-                                    int jj = specklePointY[speckle_size];
-                                    outputMap[ii * width + jj] = fillValue;
-                                    speckle_size++;
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            //!Method responsible for generating the disparity map
-            //!function for generating disparity maps at sub pixel level
-            /* costVolume - represents the cost volume
-            * width, height - represent the width and height of the iage
-            *disparity - represents the maximum disparity
-            *map - is the disparity map that will result
-            *th - is the LR threshold
-            */
-            void dispartyMapFormation(const Mat &costVolume, Mat &mapFinal, int th)
-            {
-                uint8_t *map = mapFinal.data;
-                int disparity = maxDisparity;
-                int width = costVolume.cols / ( disparity + 1) - 1;
-                int height = costVolume.rows - 1;
-                memset(map, 0, sizeof(map[0]) * width * height);
-                parallel_for_(Range(0,height - 1), makeMap(costVolume,th,disparity,confidenceCheck,scallingFactor,mapFinal));
-            }
-        public:
-            //!a median filter of 1x9 and 9x1
-            //!1x9 median filter
-            template<typename T>
-            void Median1x9Filter(const Mat &originalImage, Mat &filteredImage)
-            {
-                CV_Assert(originalImage.rows == filteredImage.rows);
-                CV_Assert(originalImage.cols == filteredImage.cols);
-                parallel_for_(Range(1,originalImage.rows - 2), Median1x9<T>(originalImage,filteredImage));
-            }
-            //!9x1 median filter
-            template<typename T>
-            void Median9x1Filter(const Mat &originalImage, Mat &filteredImage)
-            {
-                CV_Assert(originalImage.cols == filteredImage.cols);
-                CV_Assert(originalImage.cols == filteredImage.cols);
-                parallel_for_(Range(1,originalImage.cols - 2), Median9x1<T>(originalImage,filteredImage));
-            }
-            //!constructor for the matching class
-            //!maxDisp - represents the maximum disparity
-            Matching(void)
-            {
-                hammingLut();
-            }
-            ~Matching(void)
-            {
-            }
-            //constructor for the matching class
-            //maxDisp - represents the maximum disparity
-            //confidence - represents the confidence check
-            Matching(int maxDisp, int scalling = 4, int confidence = 6)
-            {
-                //set the maximum disparity
-                setMaxDisparity(maxDisp);
-                //set scalling factor
-                setScallingFactor(scalling);
-                //set the value for the confidence
-                setConfidence(confidence);
-                //generate the hamming lut in case SSE is not available
-                hammingLut();
-            }
-        };
-    }
-}
-#endif
-#endif
-/*End of file*/
diff --git a/IPL/include/opencv/opencv2/stereo/quasi_dense_stereo.hpp b/IPL/include/opencv/opencv2/stereo/quasi_dense_stereo.hpp
new file mode 100644
index 0000000..b302c13
--- /dev/null
+++ b/IPL/include/opencv/opencv2/stereo/quasi_dense_stereo.hpp
@@ -0,0 +1,197 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+//authors: Danail Stoyanov, Evangelos Mazomenos, Dimitrios Psychogyios
+
+
+//__OPENCV_QUASI_DENSE_STEREO_H__
+#ifndef __OPENCV_QUASI_DENSE_STEREO_H__
+#define __OPENCV_QUASI_DENSE_STEREO_H__
+
+
+
+#include <opencv2/core.hpp>
+
+
+namespace cv
+{
+namespace stereo
+{
+/** \addtogroup stereo
+ *  @{
+ */
+
+
+// A basic match structure
+struct CV_EXPORTS Match
+{
+    cv::Point2i p0;
+    cv::Point2i p1;
+    float	corr;
+
+    bool operator < (const Match & rhs) const//fixme  may be used uninitialized in this function
+    {
+        return this->corr < rhs.corr;
+    }
+};
+struct CV_EXPORTS PropagationParameters
+{
+    int	corrWinSizeX;			// similarity window
+    int	corrWinSizeY;
+
+    int borderX;					// border to ignore
+    int borderY;
+
+    //matching
+    float correlationThreshold;	// correlation threshold
+    float textrureThreshold;		// texture threshold
+
+    int	  neighborhoodSize;		// neighborhood size
+    int	  disparityGradient;	// disparity gradient threshold
+
+    // Parameters for LK flow algorithm
+    int lkTemplateSize;
+    int lkPyrLvl;
+    int lkTermParam1;
+    float lkTermParam2;
+
+    // Parameters for GFT algorithm.
+    float gftQualityThres;
+    int gftMinSeperationDist;
+    int gftMaxNumFeatures;
+
+};
+
+
+/**
+ * @brief Class containing the methods needed for Quasi Dense Stereo computation.
+ *
+ * This module contains the code to perform quasi dense stereo matching.
+ * The method initially starts with a sparse 3D reconstruction based on feature matching across a
+ * stereo image pair and subsequently propagates the structure into neighboring image regions.
+ * To obtain initial seed correspondences, the algorithm locates Shi and Tomashi features in the
+ * left image of the stereo pair and then tracks them using pyramidal Lucas-Kanade in the right image.
+ * To densify the sparse correspondences, the algorithm computes the zero-mean normalized
+ * cross-correlation (ZNCC) in small patches around every seed pair and uses it as a quality metric
+ * for each match. In this code, we introduce a custom structure to store the location and ZNCC value
+ * of correspondences called "Match". Seed Matches are stored in a priority queue sorted according to
+ * their ZNCC value, allowing for the best quality Match to be readily available. The algorithm pops
+ * Matches and uses them to extract new matches around them. This is done by considering a small
+ * neighboring area around each Seed and retrieving correspondences above a certain texture threshold
+ * that are not previously computed. New matches are stored in the seed priority queue and used as seeds.
+ * The propagation process ends when no additional matches can be retrieved.
+ *
+ *
+ * @sa This code represents the work presented in @cite Stoyanov2010.
+ * If this code is useful for your work please cite @cite Stoyanov2010.
+ *
+ * Also the original growing scheme idea is described in @cite Lhuillier2000
+ *
+ */
+
+class CV_EXPORTS QuasiDenseStereo
+{
+public:
+    /**
+     * @brief destructor
+     * Method to free all the memory allocated by matrices and vectors in this class.
+     */
+    virtual ~QuasiDenseStereo() = 0;
+
+
+    /**
+     * @brief Load a file containing the configuration parameters of the class.
+     * @param[in] filepath The location of the .YAML file containing the configuration parameters.
+     * @note default value is an empty string in which case the default parameters will be loaded.
+     * @retval 1: If the path is not empty and the program loaded the parameters successfully.
+     * @retval 0: If the path is empty and the program loaded default parameters.
+     * @retval -1: If the file location is not valid or the program could not open the file and
+     * loaded default parameters from defaults.hpp.
+     * @note The method is automatically called in the constructor and configures the class.
+     * @note Loading different parameters will have an effect on the output. This is useful for tuning
+     * in case of video processing.
+     * @sa loadParameters
+     */
+    virtual int loadParameters(cv::String filepath) = 0;
+
+
+    /**
+     * @brief Save a file containing all the configuration parameters the class is currently set to.
+     * @param[in] filepath The location to store the parameters file.
+     * @note Calling this method with no arguments will result in storing class parameters to a file
+     * names "qds_parameters.yaml" in the root project folder.
+     * @note This method can be used to generate a template file for tuning the class.
+     * @sa loadParameters
+     */
+    virtual int saveParameters(cv::String filepath) = 0;
+
+
+    /**
+     * @brief Get The sparse corresponding points.
+     * @param[out] sMatches A vector containing all sparse correspondences.
+     * @note The method clears the sMatches vector.
+     * @note The returned Match elements inside the sMatches vector, do not use corr member.
+     */
+    virtual void getSparseMatches(std::vector<stereo::Match> &sMatches) = 0;
+
+
+    /**
+     * @brief Get The dense corresponding points.
+     * @param[out] denseMatches A vector containing all dense matches.
+     * @note The method clears the denseMatches vector.
+     * @note The returned Match elements inside the sMatches vector, do not use corr member.
+     */
+    virtual void getDenseMatches(std::vector<stereo::Match> &denseMatches) = 0;
+
+
+
+    /**
+     * @brief Main process of the algorithm. This method computes the sparse seeds and then densifies them.
+     *
+     * Initially input images are converted to gray-scale and then the sparseMatching method
+     * is called to obtain the sparse stereo. Finally quasiDenseMatching is called to densify the corresponding
+     * points.
+     * @param[in] imgLeft The left Channel of a stereo image pair.
+     * @param[in] imgRight The right Channel of a stereo image pair.
+     * @note If input images are in color, the method assumes that are BGR and converts them to grayscale.
+     * @sa sparseMatching
+     * @sa quasiDenseMatching
+     */
+    virtual void process(const cv::Mat &imgLeft ,const cv::Mat &imgRight) = 0;
+
+
+    /**
+     * @brief Specify pixel coordinates in the left image and get its corresponding location in the right image.
+     * @param[in] x The x pixel coordinate in the left image channel.
+     * @param[in] y The y pixel coordinate in the left image channel.
+     * @retval cv::Point(x, y) The location of the corresponding pixel in the right image.
+     * @retval cv::Point(0, 0) (NO_MATCH)  if no match is found in the right image for the specified pixel location in the left image.
+     * @note This method should be always called after process, otherwise the matches will not be correct.
+     */
+    virtual cv::Point2f getMatch(const int x, const int y) = 0;
+
+
+    /**
+     * @brief Compute and return the disparity map based on the correspondences found in the "process" method.
+     * @param[in] disparityLvls The level of detail in output disparity image.
+     * @note Default level is 50
+     * @return cv::Mat containing a the disparity image in grayscale.
+     * @sa computeDisparity
+     * @sa quantizeDisparity
+     */
+    virtual cv::Mat getDisparity(uint8_t disparityLvls=50) = 0;
+
+
+    static cv::Ptr<QuasiDenseStereo> create(cv::Size monoImgSize, cv::String paramFilepath = cv::String());
+
+
+    PropagationParameters Param;
+};
+
+} //namespace cv
+} //namespace stereo
+
+/** @}*/
+
+#endif // __OPENCV_QUASI_DENSE_STEREO_H__
diff --git a/IPL/include/opencv/opencv2/stitching.hpp b/IPL/include/opencv/opencv2/stitching.hpp
index 96cde14..016e7d8 100644
--- a/IPL/include/opencv/opencv2/stitching.hpp
+++ b/IPL/include/opencv/opencv2/stitching.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_STITCHER_HPP__
-#define __OPENCV_STITCHING_STITCHER_HPP__
+#ifndef OPENCV_STITCHING_STITCHER_HPP
+#define OPENCV_STITCHING_STITCHER_HPP
 
 #include "opencv2/core.hpp"
 #include "opencv2/features2d.hpp"
@@ -53,6 +53,12 @@
 #include "opencv2/stitching/detail/blenders.hpp"
 #include "opencv2/stitching/detail/camera.hpp"
 
+
+#if defined(Status)
+#  warning Detected X11 'Status' macro definition, it can cause build conflicts. Please, include this header before any X11 headers.
+#endif
+
+
 /**
 @defgroup stitching Images stitching
 
@@ -63,7 +69,29 @@ one can combine and use them separately.
 
 The implemented stitching pipeline is very similar to the one proposed in @cite BL07 .
 
-![image](StitchingPipeline.jpg)
+![stitching pipeline](StitchingPipeline.jpg)
+
+Camera models
+-------------
+
+There are currently 2 camera models implemented in stitching pipeline.
+
+- _Homography model_ expecting perspective transformations between images
+  implemented in @ref cv::detail::BestOf2NearestMatcher cv::detail::HomographyBasedEstimator
+  cv::detail::BundleAdjusterReproj cv::detail::BundleAdjusterRay
+- _Affine model_ expecting affine transformation with 6 DOF or 4 DOF implemented in
+  @ref cv::detail::AffineBestOf2NearestMatcher cv::detail::AffineBasedEstimator
+  cv::detail::BundleAdjusterAffine cv::detail::BundleAdjusterAffinePartial cv::AffineWarper
+
+Homography model is useful for creating photo panoramas captured by camera,
+while affine-based model can be used to stitch scans and object captured by
+specialized devices. Use @ref cv::Stitcher::create to get preconfigured pipeline for one
+of those models.
+
+@note
+Certain detailed settings of @ref cv::Stitcher might not make sense. Especially
+you should not mix classes implementing affine model and classes implementing
+Homography model, as they work with different transformations.
 
 @{
     @defgroup stitching_match Features Finding and Images Matching
@@ -81,6 +109,18 @@ namespace cv {
 //! @addtogroup stitching
 //! @{
 
+/** @example samples/cpp/stitching.cpp
+A basic example on image stitching
+*/
+
+/** @example samples/python/stitching.py
+A basic example on image stitching in Python.
+*/
+
+/** @example samples/cpp/stitching_detailed.cpp
+A detailed example on image stitching
+*/
+
 /** @brief High level image stitcher.
 
 It's possible to use this class without being aware of the entire stitching pipeline. However, to
@@ -88,15 +128,27 @@ be able to achieve higher stitching stability and quality of the final images at
 familiar with the theory is recommended.
 
 @note
-   -   A basic example on image stitching can be found at
-        opencv_source_code/samples/cpp/stitching.cpp
-    -   A detailed example on image stitching can be found at
-        opencv_source_code/samples/cpp/stitching_detailed.cpp
+-   A basic example on image stitching can be found at
+    opencv_source_code/samples/cpp/stitching.cpp
+-   A basic example on image stitching in Python can be found at
+    opencv_source_code/samples/python/stitching.py
+-   A detailed example on image stitching can be found at
+    opencv_source_code/samples/cpp/stitching_detailed.cpp
  */
 class CV_EXPORTS_W Stitcher
 {
 public:
-    enum { ORIG_RESOL = -1 };
+    /**
+     * When setting a resolution for stitching, this values is a placeholder
+     * for preserving the original resolution.
+     */
+#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
+    static constexpr double ORIG_RESOL = -1.0;
+#else
+    // support MSVS 2013
+    static const double ORIG_RESOL; // Initialized in stitcher.cpp
+#endif
+
     enum Status
     {
         OK = 0,
@@ -105,13 +157,31 @@ class CV_EXPORTS_W Stitcher
         ERR_CAMERA_PARAMS_ADJUST_FAIL = 3
     };
 
-   // Stitcher() {}
-    /** @brief Creates a stitcher with the default parameters.
+    enum Mode
+    {
+        /** Mode for creating photo panoramas. Expects images under perspective
+        transformation and projects resulting pano to sphere.
 
-    @param try_use_gpu Flag indicating whether GPU should be used whenever it's possible.
+        @sa detail::BestOf2NearestMatcher SphericalWarper
+        */
+        PANORAMA = 0,
+        /** Mode for composing scans. Expects images under affine transformation does
+        not compensate exposure by default.
+
+        @sa detail::AffineBestOf2NearestMatcher AffineWarper
+        */
+        SCANS = 1,
+
+    };
+
+    /** @brief Creates a Stitcher configured in one of the stitching modes.
+
+    @param mode Scenario for stitcher operation. This is usually determined by source of images
+    to stitch and their transformation. Default parameters will be chosen for operation in given
+    scenario.
     @return Stitcher class instance.
      */
-    static Stitcher createDefault(bool try_use_gpu = false);
+    CV_WRAP static Ptr<Stitcher> create(Mode mode = Stitcher::PANORAMA);
 
     CV_WRAP double registrationResol() const { return registr_resol_; }
     CV_WRAP void setRegistrationResol(double resol_mpx) { registr_resol_ = resol_mpx; }
@@ -128,12 +198,15 @@ class CV_EXPORTS_W Stitcher
     CV_WRAP bool waveCorrection() const { return do_wave_correct_; }
     CV_WRAP void setWaveCorrection(bool flag) { do_wave_correct_ = flag; }
 
+    CV_WRAP InterpolationFlags interpolationFlags() const { return interp_flags_; }
+    CV_WRAP void setInterpolationFlags(InterpolationFlags interp_flags) { interp_flags_ = interp_flags; }
+
     detail::WaveCorrectKind waveCorrectKind() const { return wave_correct_kind_; }
     void setWaveCorrectKind(detail::WaveCorrectKind kind) { wave_correct_kind_ = kind; }
 
-    Ptr<detail::FeaturesFinder> featuresFinder() { return features_finder_; }
-    const Ptr<detail::FeaturesFinder> featuresFinder() const { return features_finder_; }
-    void setFeaturesFinder(Ptr<detail::FeaturesFinder> features_finder)
+    Ptr<Feature2D> featuresFinder() { return features_finder_; }
+    const Ptr<Feature2D> featuresFinder() const { return features_finder_; }
+    void setFeaturesFinder(Ptr<Feature2D> features_finder)
         { features_finder_ = features_finder; }
 
     Ptr<detail::FeaturesMatcher> featuresMatcher() { return features_matcher_; }
@@ -153,6 +226,11 @@ class CV_EXPORTS_W Stitcher
     void setBundleAdjuster(Ptr<detail::BundleAdjusterBase> bundle_adjuster)
         { bundle_adjuster_ = bundle_adjuster; }
 
+    Ptr<detail::Estimator> estimator() { return estimator_; }
+    const Ptr<detail::Estimator> estimator() const { return estimator_; }
+    void setEstimator(Ptr<detail::Estimator> estimator)
+        { estimator_ = estimator; }
+
     Ptr<WarperCreator> warper() { return warper_; }
     const Ptr<WarperCreator> warper() const { return warper_; }
     void setWarper(Ptr<WarperCreator> creator) { warper_ = creator; }
@@ -170,18 +248,16 @@ class CV_EXPORTS_W Stitcher
     const Ptr<detail::Blender> blender() const { return blender_; }
     void setBlender(Ptr<detail::Blender> b) { blender_ = b; }
 
-    /** @overload */
-    CV_WRAP Status estimateTransform(InputArrayOfArrays images);
     /** @brief These functions try to match the given images and to estimate rotations of each camera.
 
     @note Use the functions only if you're aware of the stitching pipeline, otherwise use
     Stitcher::stitch.
 
     @param images Input images.
-    @param rois Region of interest rectangles.
+    @param masks Masks for each input image specifying where to look for keypoints (optional).
     @return Status code.
      */
-    Status estimateTransform(InputArrayOfArrays images, const std::vector<std::vector<Rect> > &rois);
+    CV_WRAP Status estimateTransform(InputArrayOfArrays images, InputArrayOfArrays masks = noArray());
 
     /** @overload */
     CV_WRAP Status composePanorama(OutputArray pano);
@@ -203,19 +279,18 @@ class CV_EXPORTS_W Stitcher
     /** @brief These functions try to stitch the given images.
 
     @param images Input images.
-    @param rois Region of interest rectangles.
+    @param masks Masks for each input image specifying where to look for keypoints (optional).
     @param pano Final pano.
     @return Status code.
      */
-    Status stitch(InputArrayOfArrays images, const std::vector<std::vector<Rect> > &rois, OutputArray pano);
+    CV_WRAP Status stitch(InputArrayOfArrays images, InputArrayOfArrays masks, OutputArray pano);
 
     std::vector<int> component() const { return indices_; }
     std::vector<detail::CameraParams> cameras() const { return cameras_; }
     CV_WRAP double workScale() const { return work_scale_; }
+    UMat resultMask() const { return result_mask_; }
 
 private:
-    //Stitcher() {}
-
     Status matchImages();
     Status estimateCameraParams();
 
@@ -223,10 +298,12 @@ class CV_EXPORTS_W Stitcher
     double seam_est_resol_;
     double compose_resol_;
     double conf_thresh_;
-    Ptr<detail::FeaturesFinder> features_finder_;
+    InterpolationFlags interp_flags_;
+    Ptr<Feature2D> features_finder_;
     Ptr<detail::FeaturesMatcher> features_matcher_;
     cv::UMat matching_mask_;
     Ptr<detail::BundleAdjusterBase> bundle_adjuster_;
+    Ptr<detail::Estimator> estimator_;
     bool do_wave_correct_;
     detail::WaveCorrectKind wave_correct_kind_;
     Ptr<WarperCreator> warper_;
@@ -235,23 +312,32 @@ class CV_EXPORTS_W Stitcher
     Ptr<detail::Blender> blender_;
 
     std::vector<cv::UMat> imgs_;
-    std::vector<std::vector<cv::Rect> > rois_;
+    std::vector<cv::UMat> masks_;
     std::vector<cv::Size> full_img_sizes_;
     std::vector<detail::ImageFeatures> features_;
     std::vector<detail::MatchesInfo> pairwise_matches_;
     std::vector<cv::UMat> seam_est_imgs_;
     std::vector<int> indices_;
     std::vector<detail::CameraParams> cameras_;
+    UMat result_mask_;
     double work_scale_;
     double seam_scale_;
     double seam_work_aspect_;
     double warped_image_scale_;
 };
 
-CV_EXPORTS_W Ptr<Stitcher> createStitcher(bool try_use_gpu = false);
+/**
+ * @deprecated use Stitcher::create
+ */
+CV_DEPRECATED Ptr<Stitcher> createStitcher(bool try_use_gpu = false);
+
+/**
+ * @deprecated use Stitcher::create
+ */
+CV_DEPRECATED Ptr<Stitcher> createStitcherScans(bool try_use_gpu = false);
 
 //! @} stitching
 
 } // namespace cv
 
-#endif // __OPENCV_STITCHING_STITCHER_HPP__
+#endif // OPENCV_STITCHING_STITCHER_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/autocalib.hpp b/IPL/include/opencv/opencv2/stitching/detail/autocalib.hpp
index ccc0aa1..8eb6212 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/autocalib.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/autocalib.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_AUTOCALIB_HPP__
-#define __OPENCV_STITCHING_AUTOCALIB_HPP__
+#ifndef OPENCV_STITCHING_AUTOCALIB_HPP
+#define OPENCV_STITCHING_AUTOCALIB_HPP
 
 #include "opencv2/core.hpp"
 #include "matchers.hpp"
@@ -64,7 +64,7 @@ undergoes rotations around its centre only.
 See "Construction of Panoramic Image Mosaics with Global and Local Alignment"
 by Heung-Yeung Shum and Richard Szeliski.
  */
-void CV_EXPORTS focalsFromHomography(const Mat &H, double &f0, double &f1, bool &f0_ok, bool &f1_ok);
+void CV_EXPORTS_W focalsFromHomography(const Mat &H, double &f0, double &f1, bool &f0_ok, bool &f1_ok);
 
 /** @brief Estimates focal lengths for each given camera.
 
@@ -76,11 +76,11 @@ void CV_EXPORTS estimateFocal(const std::vector<ImageFeatures> &features,
                               const std::vector<MatchesInfo> &pairwise_matches,
                               std::vector<double> &focals);
 
-bool CV_EXPORTS calibrateRotatingCamera(const std::vector<Mat> &Hs, Mat &K);
+bool CV_EXPORTS_W calibrateRotatingCamera(const std::vector<Mat> &Hs,CV_OUT Mat &K);
 
 //! @} stitching_autocalib
 
 } // namespace detail
 } // namespace cv
 
-#endif // __OPENCV_STITCHING_AUTOCALIB_HPP__
+#endif // OPENCV_STITCHING_AUTOCALIB_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/blenders.hpp b/IPL/include/opencv/opencv2/stitching/detail/blenders.hpp
index 3d20036..ec35aa7 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/blenders.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/blenders.hpp
@@ -40,14 +40,15 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_BLENDERS_HPP__
-#define __OPENCV_STITCHING_BLENDERS_HPP__
+#ifndef OPENCV_STITCHING_BLENDERS_HPP
+#define OPENCV_STITCHING_BLENDERS_HPP
 
 #if defined(NO)
 #  warning Detected Apple 'NO' macro definition, it can cause build conflicts. Please, include this header before any Apple headers.
 #endif
 
 #include "opencv2/core.hpp"
+#include "opencv2/core/cuda.hpp"
 
 namespace cv {
 namespace detail {
@@ -59,35 +60,35 @@ namespace detail {
 
 Simple blender which puts one image over another
 */
-class CV_EXPORTS Blender
+class CV_EXPORTS_W Blender
 {
 public:
     virtual ~Blender() {}
 
     enum { NO, FEATHER, MULTI_BAND };
-    static Ptr<Blender> createDefault(int type, bool try_gpu = false);
+    CV_WRAP static Ptr<Blender> createDefault(int type, bool try_gpu = false);
 
     /** @brief Prepares the blender for blending.
 
     @param corners Source images top-left corners
     @param sizes Source image sizes
      */
-    void prepare(const std::vector<Point> &corners, const std::vector<Size> &sizes);
+    CV_WRAP virtual void prepare(const std::vector<Point> &corners, const std::vector<Size> &sizes);
     /** @overload */
-    virtual void prepare(Rect dst_roi);
+    CV_WRAP virtual void prepare(Rect dst_roi);
     /** @brief Processes the image.
 
     @param img Source image
     @param mask Source image mask
     @param tl Source image top-left corners
      */
-    virtual void feed(InputArray img, InputArray mask, Point tl);
+    CV_WRAP virtual void feed(InputArray img, InputArray mask, Point tl);
     /** @brief Blends and returns the final pano.
 
     @param dst Final pano
     @param dst_mask Final pano mask
      */
-    virtual void blend(InputOutputArray dst, InputOutputArray dst_mask);
+    CV_WRAP virtual void blend(CV_IN_OUT InputOutputArray dst,CV_IN_OUT  InputOutputArray dst_mask);
 
 protected:
     UMat dst_, dst_mask_;
@@ -96,22 +97,22 @@ class CV_EXPORTS Blender
 
 /** @brief Simple blender which mixes images at its borders.
  */
-class CV_EXPORTS FeatherBlender : public Blender
+class CV_EXPORTS_W FeatherBlender : public Blender
 {
 public:
-    FeatherBlender(float sharpness = 0.02f);
+    CV_WRAP FeatherBlender(float sharpness = 0.02f);
 
-    float sharpness() const { return sharpness_; }
-    void setSharpness(float val) { sharpness_ = val; }
+    CV_WRAP float sharpness() const { return sharpness_; }
+    CV_WRAP void setSharpness(float val) { sharpness_ = val; }
 
-    void prepare(Rect dst_roi);
-    void feed(InputArray img, InputArray mask, Point tl);
-    void blend(InputOutputArray dst, InputOutputArray dst_mask);
+    CV_WRAP void prepare(Rect dst_roi) CV_OVERRIDE;
+    CV_WRAP void feed(InputArray img, InputArray mask, Point tl) CV_OVERRIDE;
+    CV_WRAP void blend(InputOutputArray dst, InputOutputArray dst_mask) CV_OVERRIDE;
 
     //! Creates weight maps for fixed set of source images by their masks and top-left corners.
     //! Final image can be obtained by simple weighting of the source images.
-    Rect createWeightMaps(const std::vector<UMat> &masks, const std::vector<Point> &corners,
-                          std::vector<UMat> &weight_maps);
+    CV_WRAP Rect createWeightMaps(const std::vector<UMat> &masks, const std::vector<Point> &corners,
+        CV_IN_OUT std::vector<UMat> &weight_maps);
 
 private:
     float sharpness_;
@@ -123,17 +124,17 @@ inline FeatherBlender::FeatherBlender(float _sharpness) { setSharpness(_sharpnes
 
 /** @brief Blender which uses multi-band blending algorithm (see @cite BA83).
  */
-class CV_EXPORTS MultiBandBlender : public Blender
+class CV_EXPORTS_W MultiBandBlender : public Blender
 {
 public:
-    MultiBandBlender(int try_gpu = false, int num_bands = 5, int weight_type = CV_32F);
+    CV_WRAP MultiBandBlender(int try_gpu = false, int num_bands = 5, int weight_type = CV_32F);
 
-    int numBands() const { return actual_num_bands_; }
-    void setNumBands(int val) { actual_num_bands_ = val; }
+    CV_WRAP int numBands() const { return actual_num_bands_; }
+    CV_WRAP void setNumBands(int val) { actual_num_bands_ = val; }
 
-    void prepare(Rect dst_roi);
-    void feed(InputArray img, InputArray mask, Point tl);
-    void blend(InputOutputArray dst, InputOutputArray dst_mask);
+    CV_WRAP void prepare(Rect dst_roi) CV_OVERRIDE;
+    CV_WRAP void feed(InputArray img, InputArray mask, Point tl) CV_OVERRIDE;
+    CV_WRAP void blend(CV_IN_OUT InputOutputArray dst, CV_IN_OUT InputOutputArray dst_mask) CV_OVERRIDE;
 
 private:
     int actual_num_bands_, num_bands_;
@@ -142,26 +143,42 @@ class CV_EXPORTS MultiBandBlender : public Blender
     Rect dst_roi_final_;
     bool can_use_gpu_;
     int weight_type_; //CV_32F or CV_16S
+#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+    std::vector<cuda::GpuMat> gpu_dst_pyr_laplace_;
+    std::vector<cuda::GpuMat> gpu_dst_band_weights_;
+    std::vector<Point> gpu_tl_points_;
+    std::vector<cuda::GpuMat> gpu_imgs_with_border_;
+    std::vector<std::vector<cuda::GpuMat> > gpu_weight_pyr_gauss_vec_;
+    std::vector<std::vector<cuda::GpuMat> > gpu_src_pyr_laplace_vec_;
+    std::vector<std::vector<cuda::GpuMat> > gpu_ups_;
+    cuda::GpuMat gpu_dst_mask_;
+    cuda::GpuMat gpu_mask_;
+    cuda::GpuMat gpu_img_;
+    cuda::GpuMat gpu_weight_map_;
+    cuda::GpuMat gpu_add_mask_;
+    int gpu_feed_idx_;
+    bool gpu_initialized_;
+#endif
 };
 
 
 //////////////////////////////////////////////////////////////////////////////
 // Auxiliary functions
 
-void CV_EXPORTS normalizeUsingWeightMap(InputArray weight, InputOutputArray src);
+void CV_EXPORTS_W normalizeUsingWeightMap(InputArray weight, CV_IN_OUT InputOutputArray src);
 
-void CV_EXPORTS createWeightMap(InputArray mask, float sharpness, InputOutputArray weight);
+void CV_EXPORTS_W createWeightMap(InputArray mask, float sharpness, CV_IN_OUT InputOutputArray weight);
 
-void CV_EXPORTS createLaplacePyr(InputArray img, int num_levels, std::vector<UMat>& pyr);
-void CV_EXPORTS createLaplacePyrGpu(InputArray img, int num_levels, std::vector<UMat>& pyr);
+void CV_EXPORTS_W createLaplacePyr(InputArray img, int num_levels, CV_IN_OUT std::vector<UMat>& pyr);
+void CV_EXPORTS_W createLaplacePyrGpu(InputArray img, int num_levels, CV_IN_OUT std::vector<UMat>& pyr);
 
 // Restores source image
-void CV_EXPORTS restoreImageFromLaplacePyr(std::vector<UMat>& pyr);
-void CV_EXPORTS restoreImageFromLaplacePyrGpu(std::vector<UMat>& pyr);
+void CV_EXPORTS_W restoreImageFromLaplacePyr(CV_IN_OUT std::vector<UMat>& pyr);
+void CV_EXPORTS_W restoreImageFromLaplacePyrGpu(CV_IN_OUT std::vector<UMat>& pyr);
 
 //! @}
 
 } // namespace detail
 } // namespace cv
 
-#endif // __OPENCV_STITCHING_BLENDERS_HPP__
+#endif // OPENCV_STITCHING_BLENDERS_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/camera.hpp b/IPL/include/opencv/opencv2/stitching/detail/camera.hpp
index c231ba5..14ecf60 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/camera.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/camera.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_CAMERA_HPP__
-#define __OPENCV_STITCHING_CAMERA_HPP__
+#ifndef OPENCV_STITCHING_CAMERA_HPP
+#define OPENCV_STITCHING_CAMERA_HPP
 
 #include "opencv2/core.hpp"
 
@@ -55,19 +55,19 @@ namespace detail {
 
 @note Translation is assumed to be zero during the whole stitching pipeline. :
  */
-struct CV_EXPORTS CameraParams
+struct CV_EXPORTS_W_SIMPLE CameraParams
 {
     CameraParams();
     CameraParams(const CameraParams& other);
-    const CameraParams& operator =(const CameraParams& other);
-    Mat K() const;
+    CameraParams& operator =(const CameraParams& other);
+    CV_WRAP Mat K() const;
 
-    double focal; // Focal length
-    double aspect; // Aspect ratio
-    double ppx; // Principal point X
-    double ppy; // Principal point Y
-    Mat R; // Rotation
-    Mat t; // Translation
+    CV_PROP_RW double focal; // Focal length
+    CV_PROP_RW double aspect; // Aspect ratio
+    CV_PROP_RW double ppx; // Principal point X
+    CV_PROP_RW double ppy; // Principal point Y
+    CV_PROP_RW Mat R; // Rotation
+    CV_PROP_RW Mat t; // Translation
 };
 
 //! @}
@@ -75,4 +75,4 @@ struct CV_EXPORTS CameraParams
 } // namespace detail
 } // namespace cv
 
-#endif // #ifndef __OPENCV_STITCHING_CAMERA_HPP__
+#endif // #ifndef OPENCV_STITCHING_CAMERA_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/exposure_compensate.hpp b/IPL/include/opencv/opencv2/stitching/detail/exposure_compensate.hpp
index 7855956..2b76d09 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/exposure_compensate.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/exposure_compensate.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_EXPOSURE_COMPENSATE_HPP__
-#define __OPENCV_STITCHING_EXPOSURE_COMPENSATE_HPP__
+#ifndef OPENCV_STITCHING_EXPOSURE_COMPENSATE_HPP
+#define OPENCV_STITCHING_EXPOSURE_COMPENSATE_HPP
 
 #if defined(NO)
 #  warning Detected Apple 'NO' macro definition, it can cause build conflicts. Please, include this header before any Apple headers.
@@ -57,80 +57,174 @@ namespace detail {
 
 /** @brief Base class for all exposure compensators.
  */
-class CV_EXPORTS ExposureCompensator
+class CV_EXPORTS_W ExposureCompensator
 {
 public:
+    ExposureCompensator(): updateGain(true) {}
     virtual ~ExposureCompensator() {}
 
-    enum { NO, GAIN, GAIN_BLOCKS };
-    static Ptr<ExposureCompensator> createDefault(int type);
+    enum { NO, GAIN, GAIN_BLOCKS, CHANNELS, CHANNELS_BLOCKS };
+    CV_WRAP static Ptr<ExposureCompensator> createDefault(int type);
 
     /**
     @param corners Source image top-left corners
     @param images Source images
     @param masks Image masks to update (second value in pair specifies the value which should be used
     to detect where image is)
-     */
-    void feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
-              const std::vector<UMat> &masks);
+        */
+    CV_WRAP void feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
+        const std::vector<UMat> &masks);
     /** @overload */
     virtual void feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
-                      const std::vector<std::pair<UMat,uchar> > &masks) = 0;
+        const std::vector<std::pair<UMat, uchar> > &masks) = 0;
     /** @brief Compensate exposure in the specified image.
 
     @param index Image index
     @param corner Image top-left corner
     @param image Image to process
     @param mask Image mask
-     */
-    virtual void apply(int index, Point corner, InputOutputArray image, InputArray mask) = 0;
+        */
+    CV_WRAP virtual void apply(int index, Point corner, InputOutputArray image, InputArray mask) = 0;
+    CV_WRAP virtual void getMatGains(CV_OUT std::vector<Mat>& ) {CV_Error(Error::StsInternal, "");};
+    CV_WRAP virtual void setMatGains(std::vector<Mat>& ) { CV_Error(Error::StsInternal, ""); };
+    CV_WRAP void setUpdateGain(bool b) { updateGain = b; };
+    CV_WRAP bool getUpdateGain() { return updateGain; };
+protected :
+    bool updateGain;
 };
 
 /** @brief Stub exposure compensator which does nothing.
  */
-class CV_EXPORTS NoExposureCompensator : public ExposureCompensator
+class CV_EXPORTS_W NoExposureCompensator : public ExposureCompensator
 {
 public:
     void feed(const std::vector<Point> &/*corners*/, const std::vector<UMat> &/*images*/,
-              const std::vector<std::pair<UMat,uchar> > &/*masks*/) { }
-    void apply(int /*index*/, Point /*corner*/, InputOutputArray /*image*/, InputArray /*mask*/) { }
+              const std::vector<std::pair<UMat,uchar> > &/*masks*/) CV_OVERRIDE { }
+    CV_WRAP void apply(int /*index*/, Point /*corner*/, InputOutputArray /*image*/, InputArray /*mask*/) CV_OVERRIDE { }
+    CV_WRAP void getMatGains(CV_OUT std::vector<Mat>& umv) CV_OVERRIDE { umv.clear(); return; };
+    CV_WRAP void setMatGains(std::vector<Mat>& umv) CV_OVERRIDE { umv.clear(); return; };
 };
 
 /** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image
 intensities, see @cite BL07 and @cite WJ10 for details.
  */
-class CV_EXPORTS GainCompensator : public ExposureCompensator
+class CV_EXPORTS_W GainCompensator : public ExposureCompensator
 {
 public:
+    // This Constructor only exists to make source level compatibility detector happy
+    CV_WRAP GainCompensator()
+            : GainCompensator(1) {}
+    CV_WRAP GainCompensator(int nr_feeds)
+            : nr_feeds_(nr_feeds) {}
     void feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
-              const std::vector<std::pair<UMat,uchar> > &masks);
-    void apply(int index, Point corner, InputOutputArray image, InputArray mask);
+              const std::vector<std::pair<UMat,uchar> > &masks) CV_OVERRIDE;
+    void singleFeed(const std::vector<Point> &corners, const std::vector<UMat> &images,
+                    const std::vector<std::pair<UMat,uchar> > &masks);
+    CV_WRAP void apply(int index, Point corner, InputOutputArray image, InputArray mask) CV_OVERRIDE;
+    CV_WRAP void getMatGains(CV_OUT std::vector<Mat>& umv) CV_OVERRIDE ;
+    CV_WRAP void setMatGains(std::vector<Mat>& umv) CV_OVERRIDE ;
+    CV_WRAP void setNrFeeds(int nr_feeds) { nr_feeds_ = nr_feeds; }
+    CV_WRAP int getNrFeeds() { return nr_feeds_; }
     std::vector<double> gains() const;
 
 private:
     Mat_<double> gains_;
+    int nr_feeds_;
 };
 
-/** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image block
-intensities, see @cite UES01 for details.
+/** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image
+intensities on each channel independently.
  */
-class CV_EXPORTS BlocksGainCompensator : public ExposureCompensator
+class CV_EXPORTS_W ChannelsCompensator : public ExposureCompensator
 {
 public:
-    BlocksGainCompensator(int bl_width = 32, int bl_height = 32)
-            : bl_width_(bl_width), bl_height_(bl_height) {}
+    CV_WRAP ChannelsCompensator(int nr_feeds=1) : nr_feeds_(nr_feeds) {}
+    void feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
+              const std::vector<std::pair<UMat,uchar> > &masks) CV_OVERRIDE;
+    CV_WRAP void apply(int index, Point corner, InputOutputArray image, InputArray mask) CV_OVERRIDE;
+    CV_WRAP void getMatGains(CV_OUT std::vector<Mat>& umv) CV_OVERRIDE;
+    CV_WRAP void setMatGains(std::vector<Mat>& umv) CV_OVERRIDE;
+    CV_WRAP void setNrFeeds(int nr_feeds) { nr_feeds_ = nr_feeds; }
+    CV_WRAP int getNrFeeds() { return nr_feeds_; }
+    std::vector<Scalar> gains() const { return gains_; }
+
+private:
+    std::vector<Scalar> gains_;
+    int nr_feeds_;
+};
+
+/** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image blocks.
+ */
+class CV_EXPORTS_W BlocksCompensator : public ExposureCompensator
+{
+public:
+    BlocksCompensator(int bl_width=32, int bl_height=32, int nr_feeds=1)
+            : bl_width_(bl_width), bl_height_(bl_height), nr_feeds_(nr_feeds), nr_gain_filtering_iterations_(2) {}
+    CV_WRAP void apply(int index, Point corner, InputOutputArray image, InputArray mask) CV_OVERRIDE;
+    CV_WRAP void getMatGains(CV_OUT std::vector<Mat>& umv) CV_OVERRIDE;
+    CV_WRAP void setMatGains(std::vector<Mat>& umv) CV_OVERRIDE;
+    CV_WRAP void setNrFeeds(int nr_feeds) { nr_feeds_ = nr_feeds; }
+    CV_WRAP int getNrFeeds() { return nr_feeds_; }
+    CV_WRAP void setBlockSize(int width, int height) { bl_width_ = width; bl_height_ = height; }
+    CV_WRAP void setBlockSize(Size size) { setBlockSize(size.width, size.height); }
+    CV_WRAP Size getBlockSize() const { return Size(bl_width_, bl_height_); }
+    CV_WRAP void setNrGainsFilteringIterations(int nr_iterations) { nr_gain_filtering_iterations_ = nr_iterations; }
+    CV_WRAP int getNrGainsFilteringIterations() const { return nr_gain_filtering_iterations_; }
+
+protected:
+    template<class Compensator>
     void feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
               const std::vector<std::pair<UMat,uchar> > &masks);
-    void apply(int index, Point corner, InputOutputArray image, InputArray mask);
 
 private:
+    UMat getGainMap(const GainCompensator& compensator, int bl_idx, Size bl_per_img);
+    UMat getGainMap(const ChannelsCompensator& compensator, int bl_idx, Size bl_per_img);
+
     int bl_width_, bl_height_;
     std::vector<UMat> gain_maps_;
+    int nr_feeds_;
+    int nr_gain_filtering_iterations_;
+};
+
+/** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image block
+intensities, see @cite UES01 for details.
+ */
+class CV_EXPORTS_W BlocksGainCompensator : public BlocksCompensator
+{
+public:
+    // This Constructor only exists to make source level compatibility detector happy
+    CV_WRAP BlocksGainCompensator(int bl_width = 32, int bl_height = 32)
+            : BlocksGainCompensator(bl_width, bl_height, 1) {}
+    CV_WRAP BlocksGainCompensator(int bl_width, int bl_height, int nr_feeds)
+            : BlocksCompensator(bl_width, bl_height, nr_feeds) {}
+
+    void feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
+              const std::vector<std::pair<UMat,uchar> > &masks) CV_OVERRIDE;
+
+    // This function only exists to make source level compatibility detector happy
+    CV_WRAP void apply(int index, Point corner, InputOutputArray image, InputArray mask) CV_OVERRIDE {
+        BlocksCompensator::apply(index, corner, image, mask); }
+    // This function only exists to make source level compatibility detector happy
+    CV_WRAP void getMatGains(CV_OUT std::vector<Mat>& umv) CV_OVERRIDE { BlocksCompensator::getMatGains(umv); }
+    // This function only exists to make source level compatibility detector happy
+    CV_WRAP void setMatGains(std::vector<Mat>& umv) CV_OVERRIDE { BlocksCompensator::setMatGains(umv); }
 };
 
+/** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image block
+on each channel.
+ */
+class CV_EXPORTS_W BlocksChannelsCompensator : public BlocksCompensator
+{
+public:
+    CV_WRAP BlocksChannelsCompensator(int bl_width=32, int bl_height=32, int nr_feeds=1)
+            : BlocksCompensator(bl_width, bl_height, nr_feeds) {}
+
+    void feed(const std::vector<Point> &corners, const std::vector<UMat> &images,
+              const std::vector<std::pair<UMat,uchar> > &masks) CV_OVERRIDE;
+};
 //! @}
 
 } // namespace detail
 } // namespace cv
 
-#endif // __OPENCV_STITCHING_EXPOSURE_COMPENSATE_HPP__
+#endif // OPENCV_STITCHING_EXPOSURE_COMPENSATE_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/matchers.hpp b/IPL/include/opencv/opencv2/stitching/detail/matchers.hpp
index 8f34bd2..ef4684f 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/matchers.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/matchers.hpp
@@ -40,18 +40,14 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_MATCHERS_HPP__
-#define __OPENCV_STITCHING_MATCHERS_HPP__
+#ifndef OPENCV_STITCHING_MATCHERS_HPP
+#define OPENCV_STITCHING_MATCHERS_HPP
 
 #include "opencv2/core.hpp"
 #include "opencv2/features2d.hpp"
 
 #include "opencv2/opencv_modules.hpp"
 
-#ifdef HAVE_OPENCV_XFEATURES2D
-#  include "opencv2/xfeatures2d/cuda.hpp"
-#endif
-
 namespace cv {
 namespace detail {
 
@@ -59,132 +55,77 @@ namespace detail {
 //! @{
 
 /** @brief Structure containing image keypoints and descriptors. */
-struct CV_EXPORTS ImageFeatures
+struct CV_EXPORTS_W_SIMPLE ImageFeatures
 {
-    int img_idx;
-    Size img_size;
+    CV_PROP_RW int img_idx;
+    CV_PROP_RW Size img_size;
     std::vector<KeyPoint> keypoints;
-    UMat descriptors;
+    CV_PROP_RW UMat descriptors;
+    CV_WRAP std::vector<KeyPoint> getKeypoints() { return keypoints; };
 };
+/** @brief
 
-/** @brief Feature finders base class */
-class CV_EXPORTS FeaturesFinder
-{
-public:
-    virtual ~FeaturesFinder() {}
-    /** @overload */
-    void operator ()(InputArray image, ImageFeatures &features);
-    /** @brief Finds features in the given image.
-
-    @param image Source image
-    @param features Found features
-    @param rois Regions of interest
-
-    @sa detail::ImageFeatures, Rect_
-    */
-    void operator ()(InputArray image, ImageFeatures &features, const std::vector<cv::Rect> &rois);
-    /** @brief Frees unused memory allocated before if there is any. */
-    virtual void collectGarbage() {}
-
-protected:
-    /** @brief This method must implement features finding logic in order to make the wrappers
-    detail::FeaturesFinder::operator()_ work.
-
-    @param image Source image
-    @param features Found features
-
-    @sa detail::ImageFeatures */
-    virtual void find(InputArray image, ImageFeatures &features) = 0;
-};
-
-/** @brief SURF features finder.
-
-@sa detail::FeaturesFinder, SURF
+@param featuresFinder
+@param images
+@param features
+@param masks
 */
-class CV_EXPORTS SurfFeaturesFinder : public FeaturesFinder
-{
-public:
-    SurfFeaturesFinder(double hess_thresh = 300., int num_octaves = 3, int num_layers = 4,
-                       int num_octaves_descr = /*4*/3, int num_layers_descr = /*2*/4);
-
-private:
-    void find(InputArray image, ImageFeatures &features);
-
-    Ptr<FeatureDetector> detector_;
-    Ptr<DescriptorExtractor> extractor_;
-    Ptr<Feature2D> surf;
-};
-
-/** @brief ORB features finder. :
-
-@sa detail::FeaturesFinder, ORB
+CV_EXPORTS_W void computeImageFeatures(
+    const Ptr<Feature2D> &featuresFinder,
+    InputArrayOfArrays  images,
+    CV_OUT std::vector<ImageFeatures> &features,
+    InputArrayOfArrays masks = noArray());
+
+/** @brief
+
+@param featuresFinder
+@param image
+@param features
+@param mask
 */
-class CV_EXPORTS OrbFeaturesFinder : public FeaturesFinder
-{
-public:
-    OrbFeaturesFinder(Size _grid_size = Size(3,1), int nfeatures=1500, float scaleFactor=1.3f, int nlevels=5);
-
-private:
-    void find(InputArray image, ImageFeatures &features);
-
-    Ptr<ORB> orb;
-    Size grid_size;
-};
-
-
-#ifdef HAVE_OPENCV_XFEATURES2D
-class CV_EXPORTS SurfFeaturesFinderGpu : public FeaturesFinder
-{
-public:
-    SurfFeaturesFinderGpu(double hess_thresh = 300., int num_octaves = 3, int num_layers = 4,
-                          int num_octaves_descr = 4, int num_layers_descr = 2);
-
-    void collectGarbage();
-
-private:
-    void find(InputArray image, ImageFeatures &features);
-
-    cuda::GpuMat image_;
-    cuda::GpuMat gray_image_;
-    cuda::SURF_CUDA surf_;
-    cuda::GpuMat keypoints_;
-    cuda::GpuMat descriptors_;
-    int num_octaves_, num_layers_;
-    int num_octaves_descr_, num_layers_descr_;
-};
-#endif
+CV_EXPORTS_AS(computeImageFeatures2) void computeImageFeatures(
+    const Ptr<Feature2D> &featuresFinder,
+    InputArray image,
+    CV_OUT ImageFeatures &features,
+    InputArray mask = noArray());
 
 /** @brief Structure containing information about matches between two images.
 
-It's assumed that there is a homography between those images.
+It's assumed that there is a transformation between those images. Transformation may be
+homography or affine transformation based on selected matcher.
+
+@sa detail::FeaturesMatcher
 */
-struct CV_EXPORTS MatchesInfo
+struct CV_EXPORTS_W_SIMPLE MatchesInfo
 {
     MatchesInfo();
     MatchesInfo(const MatchesInfo &other);
-    const MatchesInfo& operator =(const MatchesInfo &other);
+    MatchesInfo& operator =(const MatchesInfo &other);
 
-    int src_img_idx, dst_img_idx;       //!< Images indices (optional)
+    CV_PROP_RW int src_img_idx;
+    CV_PROP_RW int dst_img_idx;       //!< Images indices (optional)
     std::vector<DMatch> matches;
     std::vector<uchar> inliers_mask;    //!< Geometrically consistent matches mask
-    int num_inliers;                    //!< Number of geometrically consistent matches
-    Mat H;                              //!< Estimated homography
-    double confidence;                  //!< Confidence two images are from the same panorama
+    CV_PROP_RW int num_inliers;                    //!< Number of geometrically consistent matches
+    CV_PROP_RW Mat H;                              //!< Estimated transformation
+    CV_PROP_RW double confidence;                  //!< Confidence two images are from the same panorama
+    CV_WRAP std::vector<DMatch> getMatches() { return matches; };
+    CV_WRAP std::vector<uchar> getInliers() { return inliers_mask; };
 };
 
 /** @brief Feature matchers base class. */
-class CV_EXPORTS FeaturesMatcher
+class CV_EXPORTS_W FeaturesMatcher
 {
 public:
-    virtual ~FeaturesMatcher() {}
+    CV_WRAP virtual ~FeaturesMatcher() {}
 
     /** @overload
     @param features1 First image features
     @param features2 Second image features
     @param matches_info Found matches
     */
-    void operator ()(const ImageFeatures &features1, const ImageFeatures &features2,
-                     MatchesInfo& matches_info) { match(features1, features2, matches_info); }
+    CV_WRAP_AS(apply) void operator ()(const ImageFeatures &features1, const ImageFeatures &features2,
+                     CV_OUT MatchesInfo& matches_info) { match(features1, features2, matches_info); }
 
     /** @brief Performs images matching.
 
@@ -196,16 +137,16 @@ class CV_EXPORTS FeaturesMatcher
 
     @sa detail::MatchesInfo
     */
-    void operator ()(const std::vector<ImageFeatures> &features, std::vector<MatchesInfo> &pairwise_matches,
+    CV_WRAP_AS(apply2) void operator ()(const std::vector<ImageFeatures> &features, CV_OUT std::vector<MatchesInfo> &pairwise_matches,
                      const cv::UMat &mask = cv::UMat());
 
     /** @return True, if it's possible to use the same matcher instance in parallel, false otherwise
     */
-    bool isThreadSafe() const { return is_thread_safe_; }
+   CV_WRAP bool isThreadSafe() const { return is_thread_safe_; }
 
     /** @brief Frees unused memory allocated before if there is any.
     */
-    virtual void collectGarbage() {}
+   CV_WRAP virtual void collectGarbage() {}
 
 protected:
     FeaturesMatcher(bool is_thread_safe = false) : is_thread_safe_(is_thread_safe) {}
@@ -228,7 +169,7 @@ ratio between descriptor distances is greater than the threshold match_conf
 
 @sa detail::FeaturesMatcher
  */
-class CV_EXPORTS BestOf2NearestMatcher : public FeaturesMatcher
+class CV_EXPORTS_W BestOf2NearestMatcher : public FeaturesMatcher
 {
 public:
     /** @brief Constructs a "best of 2 nearest" matcher.
@@ -240,23 +181,25 @@ class CV_EXPORTS BestOf2NearestMatcher : public FeaturesMatcher
     @param num_matches_thresh2 Minimum number of matches required for the 2D projective transform
     re-estimation on inliers
      */
-    BestOf2NearestMatcher(bool try_use_gpu = false, float match_conf = 0.3f, int num_matches_thresh1 = 6,
+    CV_WRAP BestOf2NearestMatcher(bool try_use_gpu = false, float match_conf = 0.3f, int num_matches_thresh1 = 6,
                           int num_matches_thresh2 = 6);
 
-    void collectGarbage();
+    CV_WRAP void collectGarbage() CV_OVERRIDE;
+    CV_WRAP static Ptr<BestOf2NearestMatcher> create(bool try_use_gpu = false, float match_conf = 0.3f, int num_matches_thresh1 = 6,
+        int num_matches_thresh2 = 6);
 
 protected:
-    void match(const ImageFeatures &features1, const ImageFeatures &features2, MatchesInfo &matches_info);
 
+    void match(const ImageFeatures &features1, const ImageFeatures &features2, MatchesInfo &matches_info) CV_OVERRIDE;
     int num_matches_thresh1_;
     int num_matches_thresh2_;
     Ptr<FeaturesMatcher> impl_;
 };
 
-class CV_EXPORTS BestOf2NearestRangeMatcher : public BestOf2NearestMatcher
+class CV_EXPORTS_W BestOf2NearestRangeMatcher : public BestOf2NearestMatcher
 {
 public:
-    BestOf2NearestRangeMatcher(int range_width = 5, bool try_use_gpu = false, float match_conf = 0.3f,
+    CV_WRAP BestOf2NearestRangeMatcher(int range_width = 5, bool try_use_gpu = false, float match_conf = 0.3f,
                             int num_matches_thresh1 = 6, int num_matches_thresh2 = 6);
 
     void operator ()(const std::vector<ImageFeatures> &features, std::vector<MatchesInfo> &pairwise_matches,
@@ -267,9 +210,44 @@ class CV_EXPORTS BestOf2NearestRangeMatcher : public BestOf2NearestMatcher
     int range_width_;
 };
 
+/** @brief Features matcher similar to cv::detail::BestOf2NearestMatcher which
+finds two best matches for each feature and leaves the best one only if the
+ratio between descriptor distances is greater than the threshold match_conf.
+
+Unlike cv::detail::BestOf2NearestMatcher this matcher uses affine
+transformation (affine transformation estimate will be placed in matches_info).
+
+@sa cv::detail::FeaturesMatcher cv::detail::BestOf2NearestMatcher
+ */
+class CV_EXPORTS_W AffineBestOf2NearestMatcher : public BestOf2NearestMatcher
+{
+public:
+    /** @brief Constructs a "best of 2 nearest" matcher that expects affine transformation
+    between images
+
+    @param full_affine whether to use full affine transformation with 6 degress of freedom or reduced
+    transformation with 4 degrees of freedom using only rotation, translation and uniform scaling
+    @param try_use_gpu Should try to use GPU or not
+    @param match_conf Match distances ration threshold
+    @param num_matches_thresh1 Minimum number of matches required for the 2D affine transform
+    estimation used in the inliers classification step
+
+    @sa cv::estimateAffine2D cv::estimateAffinePartial2D
+     */
+    CV_WRAP AffineBestOf2NearestMatcher(bool full_affine = false, bool try_use_gpu = false,
+                                float match_conf = 0.3f, int num_matches_thresh1 = 6) :
+        BestOf2NearestMatcher(try_use_gpu, match_conf, num_matches_thresh1, num_matches_thresh1),
+        full_affine_(full_affine) {}
+
+protected:
+    void match(const ImageFeatures &features1, const ImageFeatures &features2, MatchesInfo &matches_info) CV_OVERRIDE;
+
+    bool full_affine_;
+};
+
 //! @} stitching_match
 
 } // namespace detail
 } // namespace cv
 
-#endif // __OPENCV_STITCHING_MATCHERS_HPP__
+#endif // OPENCV_STITCHING_MATCHERS_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/motion_estimators.hpp b/IPL/include/opencv/opencv2/stitching/detail/motion_estimators.hpp
index 2c86e63..ff05af1 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/motion_estimators.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/motion_estimators.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_MOTION_ESTIMATORS_HPP__
-#define __OPENCV_STITCHING_MOTION_ESTIMATORS_HPP__
+#ifndef OPENCV_STITCHING_MOTION_ESTIMATORS_HPP
+#define OPENCV_STITCHING_MOTION_ESTIMATORS_HPP
 
 #include "opencv2/core.hpp"
 #include "matchers.hpp"
@@ -62,7 +62,7 @@ cameras.
 @note The coordinate system origin is implementation-dependent, but you can always normalize the
 rotations in respect to the first camera, for instance. :
  */
-class CV_EXPORTS Estimator
+class CV_EXPORTS_W Estimator
 {
 public:
     virtual ~Estimator() {}
@@ -74,10 +74,12 @@ class CV_EXPORTS Estimator
     @param cameras Estimated camera parameters
     @return True in case of success, false otherwise
      */
-    bool operator ()(const std::vector<ImageFeatures> &features,
-                     const std::vector<MatchesInfo> &pairwise_matches,
-                     std::vector<CameraParams> &cameras)
-        { return estimate(features, pairwise_matches, cameras); }
+    CV_WRAP_AS(apply) bool operator ()(const std::vector<ImageFeatures> &features,
+        const std::vector<MatchesInfo> &pairwise_matches,
+        CV_OUT CV_IN_OUT std::vector<CameraParams> &cameras)
+    {
+        return estimate(features, pairwise_matches, cameras);
+    }
 
 protected:
     /** @brief This method must implement camera parameters estimation logic in order to make the wrapper
@@ -90,42 +92,59 @@ class CV_EXPORTS Estimator
      */
     virtual bool estimate(const std::vector<ImageFeatures> &features,
                           const std::vector<MatchesInfo> &pairwise_matches,
-                          std::vector<CameraParams> &cameras) = 0;
+                          CV_OUT std::vector<CameraParams> &cameras) = 0;
 };
 
 /** @brief Homography based rotation estimator.
  */
-class CV_EXPORTS HomographyBasedEstimator : public Estimator
+class CV_EXPORTS_W HomographyBasedEstimator : public Estimator
 {
 public:
-    HomographyBasedEstimator(bool is_focals_estimated = false)
+    CV_WRAP HomographyBasedEstimator(bool is_focals_estimated = false)
         : is_focals_estimated_(is_focals_estimated) {}
 
 private:
     virtual bool estimate(const std::vector<ImageFeatures> &features,
                           const std::vector<MatchesInfo> &pairwise_matches,
-                          std::vector<CameraParams> &cameras);
+                          std::vector<CameraParams> &cameras) CV_OVERRIDE;
 
     bool is_focals_estimated_;
 };
 
+/** @brief Affine transformation based estimator.
+
+This estimator uses pairwise transformations estimated by matcher to estimate
+final transformation for each camera.
+
+@sa cv::detail::HomographyBasedEstimator
+ */
+class CV_EXPORTS_W AffineBasedEstimator : public Estimator
+{
+public:
+    CV_WRAP AffineBasedEstimator(){}
+private:
+    virtual bool estimate(const std::vector<ImageFeatures> &features,
+                          const std::vector<MatchesInfo> &pairwise_matches,
+                          std::vector<CameraParams> &cameras) CV_OVERRIDE;
+};
+
 /** @brief Base class for all camera parameters refinement methods.
  */
-class CV_EXPORTS BundleAdjusterBase : public Estimator
+class CV_EXPORTS_W BundleAdjusterBase : public Estimator
 {
 public:
-    const Mat refinementMask() const { return refinement_mask_.clone(); }
-    void setRefinementMask(const Mat &mask)
+    CV_WRAP const Mat refinementMask() const { return refinement_mask_.clone(); }
+    CV_WRAP void setRefinementMask(const Mat &mask)
     {
         CV_Assert(mask.type() == CV_8U && mask.size() == Size(3, 3));
         refinement_mask_ = mask.clone();
     }
 
-    double confThresh() const { return conf_thresh_; }
-    void setConfThresh(double conf_thresh) { conf_thresh_ = conf_thresh; }
+    CV_WRAP double confThresh() const { return conf_thresh_; }
+    CV_WRAP void setConfThresh(double conf_thresh) { conf_thresh_ = conf_thresh; }
 
-    TermCriteria termCriteria() { return term_criteria_; }
-    void setTermCriteria(const TermCriteria& term_criteria) { term_criteria_ = term_criteria; }
+    CV_WRAP TermCriteria termCriteria() { return term_criteria_; }
+    CV_WRAP void setTermCriteria(const TermCriteria& term_criteria) { term_criteria_ = term_criteria; }
 
 protected:
     /** @brief Construct a bundle adjuster base instance.
@@ -134,8 +153,10 @@ class CV_EXPORTS BundleAdjusterBase : public Estimator
     @param num_errs_per_measurement Number of error terms (components) per match
      */
     BundleAdjusterBase(int num_params_per_cam, int num_errs_per_measurement)
-        : num_params_per_cam_(num_params_per_cam),
-          num_errs_per_measurement_(num_errs_per_measurement)
+        : num_images_(0), total_num_matches_(0),
+          num_params_per_cam_(num_params_per_cam),
+          num_errs_per_measurement_(num_errs_per_measurement),
+          features_(0), pairwise_matches_(0), conf_thresh_(0)
     {
         setRefinementMask(Mat::ones(3, 3, CV_8U));
         setConfThresh(1.);
@@ -145,7 +166,7 @@ class CV_EXPORTS BundleAdjusterBase : public Estimator
     // Runs bundle adjustment
     virtual bool estimate(const std::vector<ImageFeatures> &features,
                           const std::vector<MatchesInfo> &pairwise_matches,
-                          std::vector<CameraParams> &cameras);
+                          std::vector<CameraParams> &cameras) CV_OVERRIDE;
 
     /** @brief Sets initial camera parameter to refine.
 
@@ -184,7 +205,7 @@ class CV_EXPORTS BundleAdjusterBase : public Estimator
     // Threshold to filter out poorly matched image pairs
     double conf_thresh_;
 
-    //Levenberg–Marquardt algorithm termination criteria
+    //Levenberg-Marquardt algorithm termination criteria
     TermCriteria term_criteria_;
 
     // Camera parameters matrix (CV_64F)
@@ -195,22 +216,42 @@ class CV_EXPORTS BundleAdjusterBase : public Estimator
 };
 
 
+/** @brief Stub bundle adjuster that does nothing.
+ */
+class CV_EXPORTS_W NoBundleAdjuster : public BundleAdjusterBase
+{
+public:
+    CV_WRAP NoBundleAdjuster() : BundleAdjusterBase(0, 0) {}
+
+private:
+    bool estimate(const std::vector<ImageFeatures> &, const std::vector<MatchesInfo> &,
+                  std::vector<CameraParams> &) CV_OVERRIDE
+    {
+        return true;
+    }
+    void setUpInitialCameraParams(const std::vector<CameraParams> &) CV_OVERRIDE {}
+    void obtainRefinedCameraParams(std::vector<CameraParams> &) const CV_OVERRIDE {}
+    void calcError(Mat &) CV_OVERRIDE {}
+    void calcJacobian(Mat &) CV_OVERRIDE {}
+};
+
+
 /** @brief Implementation of the camera parameters refinement algorithm which minimizes sum of the reprojection
 error squares
 
 It can estimate focal length, aspect ratio, principal point.
 You can affect only on them via the refinement mask.
  */
-class CV_EXPORTS BundleAdjusterReproj : public BundleAdjusterBase
+class CV_EXPORTS_W BundleAdjusterReproj : public BundleAdjusterBase
 {
 public:
-    BundleAdjusterReproj() : BundleAdjusterBase(7, 2) {}
+    CV_WRAP BundleAdjusterReproj() : BundleAdjusterBase(7, 2) {}
 
 private:
-    void setUpInitialCameraParams(const std::vector<CameraParams> &cameras);
-    void obtainRefinedCameraParams(std::vector<CameraParams> &cameras) const;
-    void calcError(Mat &err);
-    void calcJacobian(Mat &jac);
+    void setUpInitialCameraParams(const std::vector<CameraParams> &cameras) CV_OVERRIDE;
+    void obtainRefinedCameraParams(std::vector<CameraParams> &cameras) const CV_OVERRIDE;
+    void calcError(Mat &err) CV_OVERRIDE;
+    void calcJacobian(Mat &jac) CV_OVERRIDE;
 
     Mat err1_, err2_;
 };
@@ -221,16 +262,64 @@ between the rays passing through the camera center and a feature. :
 
 It can estimate focal length. It ignores the refinement mask for now.
  */
-class CV_EXPORTS BundleAdjusterRay : public BundleAdjusterBase
+class CV_EXPORTS_W BundleAdjusterRay : public BundleAdjusterBase
+{
+public:
+    CV_WRAP BundleAdjusterRay() : BundleAdjusterBase(4, 3) {}
+
+private:
+    void setUpInitialCameraParams(const std::vector<CameraParams> &cameras) CV_OVERRIDE;
+    void obtainRefinedCameraParams(std::vector<CameraParams> &cameras) const CV_OVERRIDE;
+    void calcError(Mat &err) CV_OVERRIDE;
+    void calcJacobian(Mat &jac) CV_OVERRIDE;
+
+    Mat err1_, err2_;
+};
+
+
+/** @brief Bundle adjuster that expects affine transformation
+represented in homogeneous coordinates in R for each camera param. Implements
+camera parameters refinement algorithm which minimizes sum of the reprojection
+error squares
+
+It estimates all transformation parameters. Refinement mask is ignored.
+
+@sa AffineBasedEstimator AffineBestOf2NearestMatcher BundleAdjusterAffinePartial
+ */
+class CV_EXPORTS_W BundleAdjusterAffine : public BundleAdjusterBase
+{
+public:
+    CV_WRAP BundleAdjusterAffine() : BundleAdjusterBase(6, 2) {}
+
+private:
+    void setUpInitialCameraParams(const std::vector<CameraParams> &cameras) CV_OVERRIDE;
+    void obtainRefinedCameraParams(std::vector<CameraParams> &cameras) const CV_OVERRIDE;
+    void calcError(Mat &err) CV_OVERRIDE;
+    void calcJacobian(Mat &jac) CV_OVERRIDE;
+
+    Mat err1_, err2_;
+};
+
+
+/** @brief Bundle adjuster that expects affine transformation with 4 DOF
+represented in homogeneous coordinates in R for each camera param. Implements
+camera parameters refinement algorithm which minimizes sum of the reprojection
+error squares
+
+It estimates all transformation parameters. Refinement mask is ignored.
+
+@sa AffineBasedEstimator AffineBestOf2NearestMatcher BundleAdjusterAffine
+ */
+class CV_EXPORTS_W BundleAdjusterAffinePartial : public BundleAdjusterBase
 {
 public:
-    BundleAdjusterRay() : BundleAdjusterBase(4, 3) {}
+    CV_WRAP BundleAdjusterAffinePartial() : BundleAdjusterBase(4, 2) {}
 
 private:
-    void setUpInitialCameraParams(const std::vector<CameraParams> &cameras);
-    void obtainRefinedCameraParams(std::vector<CameraParams> &cameras) const;
-    void calcError(Mat &err);
-    void calcJacobian(Mat &jac);
+    void setUpInitialCameraParams(const std::vector<CameraParams> &cameras) CV_OVERRIDE;
+    void obtainRefinedCameraParams(std::vector<CameraParams> &cameras) const CV_OVERRIDE;
+    void calcError(Mat &err) CV_OVERRIDE;
+    void calcJacobian(Mat &jac) CV_OVERRIDE;
 
     Mat err1_, err2_;
 };
@@ -247,17 +336,17 @@ enum WaveCorrectKind
 @param rmats Camera rotation matrices.
 @param kind Correction kind, see detail::WaveCorrectKind.
  */
-void CV_EXPORTS waveCorrect(std::vector<Mat> &rmats, WaveCorrectKind kind);
+void CV_EXPORTS_W waveCorrect(CV_IN_OUT std::vector<Mat> &rmats, WaveCorrectKind kind);
 
 
 //////////////////////////////////////////////////////////////////////////////
 // Auxiliary functions
 
 // Returns matches graph representation in DOT language
-String CV_EXPORTS matchesGraphAsString(std::vector<String> &pathes, std::vector<MatchesInfo> &pairwise_matches,
+String CV_EXPORTS_W matchesGraphAsString(std::vector<String> &pathes, std::vector<MatchesInfo> &pairwise_matches,
                                             float conf_threshold);
 
-std::vector<int> CV_EXPORTS leaveBiggestComponent(
+CV_EXPORTS_W std::vector<int>  leaveBiggestComponent(
         std::vector<ImageFeatures> &features,
         std::vector<MatchesInfo> &pairwise_matches,
         float conf_threshold);
@@ -271,4 +360,4 @@ void CV_EXPORTS findMaxSpanningTree(
 } // namespace detail
 } // namespace cv
 
-#endif // __OPENCV_STITCHING_MOTION_ESTIMATORS_HPP__
+#endif // OPENCV_STITCHING_MOTION_ESTIMATORS_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/seam_finders.hpp b/IPL/include/opencv/opencv2/stitching/detail/seam_finders.hpp
index 4ff22c4..71dae7f 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/seam_finders.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/seam_finders.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_SEAM_FINDERS_HPP__
-#define __OPENCV_STITCHING_SEAM_FINDERS_HPP__
+#ifndef OPENCV_STITCHING_SEAM_FINDERS_HPP
+#define OPENCV_STITCHING_SEAM_FINDERS_HPP
 
 #include <set>
 #include "opencv2/core.hpp"
@@ -55,35 +55,37 @@ namespace detail {
 
 /** @brief Base class for a seam estimator.
  */
-class CV_EXPORTS SeamFinder
+class CV_EXPORTS_W SeamFinder
 {
 public:
-    virtual ~SeamFinder() {}
+    CV_WRAP  virtual ~SeamFinder() {}
+    enum { NO, VORONOI_SEAM, DP_SEAM };
     /** @brief Estimates seams.
 
     @param src Source images
     @param corners Source image top-left corners
     @param masks Source image masks to update
      */
-    virtual void find(const std::vector<UMat> &src, const std::vector<Point> &corners,
-                      std::vector<UMat> &masks) = 0;
+    CV_WRAP virtual void find(const std::vector<UMat> &src, const std::vector<Point> &corners,
+                      CV_IN_OUT std::vector<UMat> &masks) = 0;
+    CV_WRAP static Ptr<SeamFinder> createDefault(int type);
 };
 
 /** @brief Stub seam estimator which does nothing.
  */
-class CV_EXPORTS NoSeamFinder : public SeamFinder
+class CV_EXPORTS_W NoSeamFinder : public SeamFinder
 {
 public:
-    void find(const std::vector<UMat>&, const std::vector<Point>&, std::vector<UMat>&) {}
+    CV_WRAP void find(const std::vector<UMat>&, const std::vector<Point>&, CV_IN_OUT std::vector<UMat>&) CV_OVERRIDE {}
 };
 
 /** @brief Base class for all pairwise seam estimators.
  */
-class CV_EXPORTS PairwiseSeamFinder : public SeamFinder
+class CV_EXPORTS_W PairwiseSeamFinder : public SeamFinder
 {
 public:
-    virtual void find(const std::vector<UMat> &src, const std::vector<Point> &corners,
-                      std::vector<UMat> &masks);
+    CV_WRAP virtual void find(const std::vector<UMat> &src, const std::vector<Point> &corners,
+                      CV_IN_OUT std::vector<UMat> &masks) CV_OVERRIDE;
 
 protected:
     void run();
@@ -103,30 +105,32 @@ class CV_EXPORTS PairwiseSeamFinder : public SeamFinder
 
 /** @brief Voronoi diagram-based seam estimator.
  */
-class CV_EXPORTS VoronoiSeamFinder : public PairwiseSeamFinder
+class CV_EXPORTS_W VoronoiSeamFinder : public PairwiseSeamFinder
 {
 public:
-    virtual void find(const std::vector<UMat> &src, const std::vector<Point> &corners,
-                      std::vector<UMat> &masks);
+    CV_WRAP virtual void find(const std::vector<UMat> &src, const std::vector<Point> &corners,
+                      CV_IN_OUT std::vector<UMat> &masks) CV_OVERRIDE;
     virtual void find(const std::vector<Size> &size, const std::vector<Point> &corners,
                       std::vector<UMat> &masks);
 private:
-    void findInPair(size_t first, size_t second, Rect roi);
+    void findInPair(size_t first, size_t second, Rect roi) CV_OVERRIDE;
 };
 
 
-class CV_EXPORTS DpSeamFinder : public SeamFinder
+class CV_EXPORTS_W DpSeamFinder : public SeamFinder
 {
 public:
     enum CostFunction { COLOR, COLOR_GRAD };
 
     DpSeamFinder(CostFunction costFunc = COLOR);
+    CV_WRAP DpSeamFinder(String costFunc );
 
     CostFunction costFunction() const { return costFunc_; }
     void setCostFunction(CostFunction val) { costFunc_ = val; }
+    CV_WRAP void setCostFunction(String val);
 
     virtual void find(const std::vector<UMat> &src, const std::vector<Point> &corners,
-                      std::vector<UMat> &masks);
+                      std::vector<UMat> &masks) CV_OVERRIDE;
 
 private:
     enum ComponentState
@@ -233,16 +237,18 @@ class CV_EXPORTS GraphCutSeamFinderBase
 
 /** @brief Minimum graph cut-based seam estimator. See details in @cite V03 .
  */
-class CV_EXPORTS GraphCutSeamFinder : public GraphCutSeamFinderBase, public SeamFinder
+class CV_EXPORTS_W GraphCutSeamFinder : public GraphCutSeamFinderBase, public SeamFinder
 {
 public:
     GraphCutSeamFinder(int cost_type = COST_COLOR_GRAD, float terminal_cost = 10000.f,
                        float bad_region_penalty = 1000.f);
+    CV_WRAP GraphCutSeamFinder(String cost_type,float terminal_cost = 10000.f,
+        float bad_region_penalty = 1000.f);
 
     ~GraphCutSeamFinder();
 
-    void find(const std::vector<UMat> &src, const std::vector<Point> &corners,
-              std::vector<UMat> &masks);
+    CV_WRAP void find(const std::vector<UMat> &src, const std::vector<Point> &corners,
+              std::vector<UMat> &masks) CV_OVERRIDE;
 
 private:
     // To avoid GCGraph dependency
@@ -261,8 +267,8 @@ class CV_EXPORTS GraphCutSeamFinderGpu : public GraphCutSeamFinderBase, public P
                             bad_region_penalty_(bad_region_penalty) {}
 
     void find(const std::vector<cv::UMat> &src, const std::vector<cv::Point> &corners,
-              std::vector<cv::UMat> &masks);
-    void findInPair(size_t first, size_t second, Rect roi);
+              std::vector<cv::UMat> &masks) CV_OVERRIDE;
+    void findInPair(size_t first, size_t second, Rect roi) CV_OVERRIDE;
 
 private:
     void setGraphWeightsColor(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &mask1, const cv::Mat &mask2,
@@ -282,4 +288,4 @@ class CV_EXPORTS GraphCutSeamFinderGpu : public GraphCutSeamFinderBase, public P
 } // namespace detail
 } // namespace cv
 
-#endif // __OPENCV_STITCHING_SEAM_FINDERS_HPP__
+#endif // OPENCV_STITCHING_SEAM_FINDERS_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/timelapsers.hpp b/IPL/include/opencv/opencv2/stitching/detail/timelapsers.hpp
index d64c03c..f6f3da8 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/timelapsers.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/timelapsers.hpp
@@ -41,8 +41,8 @@
 //M*/
 
 
-#ifndef __OPENCV_STITCHING_TIMELAPSERS_HPP__
-#define __OPENCV_STITCHING_TIMELAPSERS_HPP__
+#ifndef OPENCV_STITCHING_TIMELAPSERS_HPP
+#define OPENCV_STITCHING_TIMELAPSERS_HPP
 
 #include "opencv2/core.hpp"
 
@@ -54,7 +54,7 @@ namespace detail {
 
 //  Base Timelapser class, takes a sequence of images, applies appropriate shift, stores result in dst_.
 
-class CV_EXPORTS Timelapser
+class CV_EXPORTS_W Timelapser
 {
 public:
 
@@ -62,11 +62,11 @@ class CV_EXPORTS Timelapser
 
     virtual ~Timelapser() {}
 
-    static Ptr<Timelapser> createDefault(int type);
+    CV_WRAP static Ptr<Timelapser> createDefault(int type);
 
-    virtual void initialize(const std::vector<Point> &corners, const std::vector<Size> &sizes);
-    virtual void process(InputArray img, InputArray mask, Point tl);
-    virtual const UMat& getDst() {return dst_;}
+    CV_WRAP virtual void initialize(const std::vector<Point> &corners, const std::vector<Size> &sizes);
+    CV_WRAP virtual void process(InputArray img, InputArray mask, Point tl);
+    CV_WRAP virtual const UMat& getDst() {return dst_;}
 
 protected:
 
@@ -77,10 +77,10 @@ class CV_EXPORTS Timelapser
 };
 
 
-class CV_EXPORTS TimelapserCrop : public Timelapser
+class CV_EXPORTS_W TimelapserCrop : public Timelapser
 {
 public:
-    virtual void initialize(const std::vector<Point> &corners, const std::vector<Size> &sizes);
+    virtual void initialize(const std::vector<Point> &corners, const std::vector<Size> &sizes) CV_OVERRIDE;
 };
 
 //! @}
@@ -88,4 +88,4 @@ class CV_EXPORTS TimelapserCrop : public Timelapser
 } // namespace detail
 } // namespace cv
 
-#endif // __OPENCV_STITCHING_TIMELAPSERS_HPP__
+#endif // OPENCV_STITCHING_TIMELAPSERS_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/util.hpp b/IPL/include/opencv/opencv2/stitching/detail/util.hpp
index 3845ba5..bf7a390 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/util.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/util.hpp
@@ -40,62 +40,12 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_UTIL_HPP__
-#define __OPENCV_STITCHING_UTIL_HPP__
+#ifndef OPENCV_STITCHING_UTIL_HPP
+#define OPENCV_STITCHING_UTIL_HPP
 
 #include <list>
 #include "opencv2/core.hpp"
 
-#ifndef ENABLE_LOG
-#define ENABLE_LOG 0
-#endif
-
-// TODO remove LOG macros, add logging class
-#if ENABLE_LOG
-#ifdef ANDROID
-  #include <iostream>
-  #include <sstream>
-  #include <android/log.h>
-  #define LOG_STITCHING_MSG(msg) \
-    do { \
-        Stringstream _os; \
-        _os << msg; \
-       __android_log_print(ANDROID_LOG_DEBUG, "STITCHING", "%s", _os.str().c_str()); \
-    } while(0);
-#else
-  #include <iostream>
-  #define LOG_STITCHING_MSG(msg) for(;;) { std::cout << msg; std::cout.flush(); break; }
-#endif
-#else
-  #define LOG_STITCHING_MSG(msg)
-#endif
-
-#define LOG_(_level, _msg)                     \
-    for(;;)                                    \
-    {                                          \
-        using namespace std;                   \
-        if ((_level) >= ::cv::detail::stitchingLogLevel()) \
-        {                                      \
-            LOG_STITCHING_MSG(_msg);           \
-        }                                      \
-    break;                                 \
-    }
-
-
-#define LOG(msg) LOG_(1, msg)
-#define LOG_CHAT(msg) LOG_(0, msg)
-
-#define LOGLN(msg) LOG(msg << std::endl)
-#define LOGLN_CHAT(msg) LOG_CHAT(msg << std::endl)
-
-//#if DEBUG_LOG_CHAT
-//  #define LOG_CHAT(msg) LOG(msg)
-//  #define LOGLN_CHAT(msg) LOGLN(msg)
-//#else
-//  #define LOG_CHAT(msg) do{}while(0)
-//  #define LOGLN_CHAT(msg) do{}while(0)
-//#endif
-
 namespace cv {
 namespace detail {
 
@@ -150,16 +100,16 @@ class CV_EXPORTS Graph
 //////////////////////////////////////////////////////////////////////////////
 // Auxiliary functions
 
-CV_EXPORTS bool overlapRoi(Point tl1, Point tl2, Size sz1, Size sz2, Rect &roi);
-CV_EXPORTS Rect resultRoi(const std::vector<Point> &corners, const std::vector<UMat> &images);
-CV_EXPORTS Rect resultRoi(const std::vector<Point> &corners, const std::vector<Size> &sizes);
-CV_EXPORTS Rect resultRoiIntersection(const std::vector<Point> &corners, const std::vector<Size> &sizes);
-CV_EXPORTS Point resultTl(const std::vector<Point> &corners);
+CV_EXPORTS_W bool overlapRoi(Point tl1, Point tl2, Size sz1, Size sz2, Rect &roi);
+CV_EXPORTS_W Rect resultRoi(const std::vector<Point> &corners, const std::vector<UMat> &images);
+CV_EXPORTS_W Rect resultRoi(const std::vector<Point> &corners, const std::vector<Size> &sizes);
+CV_EXPORTS_W Rect resultRoiIntersection(const std::vector<Point> &corners, const std::vector<Size> &sizes);
+CV_EXPORTS_W Point resultTl(const std::vector<Point> &corners);
 
 // Returns random 'count' element subset of the {0,1,...,size-1} set
-CV_EXPORTS void selectRandomSubset(int count, int size, std::vector<int> &subset);
+CV_EXPORTS_W void selectRandomSubset(int count, int size, std::vector<int> &subset);
 
-CV_EXPORTS int& stitchingLogLevel();
+CV_EXPORTS_W int& stitchingLogLevel();
 
 //! @}
 
@@ -168,4 +118,4 @@ CV_EXPORTS int& stitchingLogLevel();
 
 #include "util_inl.hpp"
 
-#endif // __OPENCV_STITCHING_UTIL_HPP__
+#endif // OPENCV_STITCHING_UTIL_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/util_inl.hpp b/IPL/include/opencv/opencv2/stitching/detail/util_inl.hpp
index 6ac6f8e..dafab8b 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/util_inl.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/util_inl.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_UTIL_INL_HPP__
-#define __OPENCV_STITCHING_UTIL_INL_HPP__
+#ifndef OPENCV_STITCHING_UTIL_INL_HPP
+#define OPENCV_STITCHING_UTIL_INL_HPP
 
 #include <queue>
 #include "opencv2/core.hpp"
@@ -128,4 +128,4 @@ static inline double sqr(double x) { return x * x; }
 
 //! @endcond
 
-#endif // __OPENCV_STITCHING_UTIL_INL_HPP__
+#endif // OPENCV_STITCHING_UTIL_INL_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/warpers.hpp b/IPL/include/opencv/opencv2/stitching/detail/warpers.hpp
index 0cb9e42..bc2c6e3 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/warpers.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/warpers.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_WARPERS_HPP__
-#define __OPENCV_STITCHING_WARPERS_HPP__
+#ifndef OPENCV_STITCHING_WARPERS_HPP
+#define OPENCV_STITCHING_WARPERS_HPP
 
 #include "opencv2/core.hpp"
 #include "opencv2/core/cuda.hpp"
@@ -92,7 +92,7 @@ class CV_EXPORTS RotationWarper
     @return Project image top-left corner
      */
     virtual Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode,
-                       OutputArray dst) = 0;
+                       CV_OUT OutputArray dst) = 0;
 
     /** @brief Projects the image backward.
 
@@ -105,7 +105,7 @@ class CV_EXPORTS RotationWarper
     @param dst Backward-projected image
      */
     virtual void warpBackward(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode,
-                              Size dst_size, OutputArray dst) = 0;
+                              Size dst_size, CV_OUT OutputArray dst) = 0;
 
     /**
     @param src_size Source image bounding box
@@ -121,7 +121,7 @@ class CV_EXPORTS RotationWarper
 
 /** @brief Base class for warping logic implementation.
  */
-struct CV_EXPORTS ProjectorBase
+struct CV_EXPORTS_W_SIMPLE ProjectorBase
 {
     void setCameraParams(InputArray K = Mat::eye(3, 3, CV_32F),
                          InputArray R = Mat::eye(3, 3, CV_32F),
@@ -138,23 +138,23 @@ struct CV_EXPORTS ProjectorBase
 /** @brief Base class for rotation-based warper using a detail::ProjectorBase_ derived class.
  */
 template <class P>
-class CV_EXPORTS RotationWarperBase : public RotationWarper
+class CV_EXPORTS_TEMPLATE RotationWarperBase : public RotationWarper
 {
 public:
-    Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R);
+    Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R) CV_OVERRIDE;
 
-    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap);
+    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) CV_OVERRIDE;
 
     Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode,
-               OutputArray dst);
+               OutputArray dst) CV_OVERRIDE;
 
     void warpBackward(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode,
-                      Size dst_size, OutputArray dst);
+                      Size dst_size, OutputArray dst) CV_OVERRIDE;
 
-    Rect warpRoi(Size src_size, InputArray K, InputArray R);
+    Rect warpRoi(Size src_size, InputArray K, InputArray R) CV_OVERRIDE;
 
-    float getScale() const { return projector_.scale; }
-    void setScale(float val) { projector_.scale = val; }
+    float getScale() const  CV_OVERRIDE{ return projector_.scale; }
+    void setScale(float val) CV_OVERRIDE { projector_.scale = val; }
 
 protected:
 
@@ -186,36 +186,99 @@ class CV_EXPORTS PlaneWarper : public RotationWarperBase<PlaneProjector>
      */
     PlaneWarper(float scale = 1.f) { projector_.scale = scale; }
 
-    Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R);
+    Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R) CV_OVERRIDE;
     Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R, InputArray T);
 
-    virtual Rect buildMaps(Size src_size, InputArray K, InputArray R, InputArray T, OutputArray xmap, OutputArray ymap);
-    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap);
+    virtual Rect buildMaps(Size src_size, InputArray K, InputArray R, InputArray T, CV_OUT OutputArray xmap, CV_OUT OutputArray ymap);
+    Rect buildMaps(Size src_size, InputArray K, InputArray R, CV_OUT OutputArray xmap, CV_OUT OutputArray ymap) CV_OVERRIDE;
 
     Point warp(InputArray src, InputArray K, InputArray R,
-               int interp_mode, int border_mode, OutputArray dst);
+               int interp_mode, int border_mode, CV_OUT OutputArray dst) CV_OVERRIDE;
     virtual Point warp(InputArray src, InputArray K, InputArray R, InputArray T, int interp_mode, int border_mode,
-               OutputArray dst);
+        CV_OUT OutputArray dst);
 
-    Rect warpRoi(Size src_size, InputArray K, InputArray R);
+    Rect warpRoi(Size src_size, InputArray K, InputArray R) CV_OVERRIDE;
     Rect warpRoi(Size src_size, InputArray K, InputArray R, InputArray T);
 
 protected:
-    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br);
+    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br) CV_OVERRIDE;
 };
 
 
-struct CV_EXPORTS SphericalProjector : ProjectorBase
+/** @brief Affine warper that uses rotations and translations
+
+ Uses affine transformation in homogeneous coordinates to represent both rotation and
+ translation in camera rotation matrix.
+ */
+class CV_EXPORTS AffineWarper : public PlaneWarper
 {
-    void mapForward(float x, float y, float &u, float &v);
-    void mapBackward(float u, float v, float &x, float &y);
+public:
+    /** @brief Construct an instance of the affine warper class.
+
+    @param scale Projected image scale multiplier
+     */
+    AffineWarper(float scale = 1.f) : PlaneWarper(scale) {}
+
+    /** @brief Projects the image point.
+
+    @param pt Source point
+    @param K Camera intrinsic parameters
+    @param H Camera extrinsic parameters
+    @return Projected point
+     */
+    Point2f warpPoint(const Point2f &pt, InputArray K, InputArray H) CV_OVERRIDE;
+
+    /** @brief Builds the projection maps according to the given camera data.
+
+    @param src_size Source image size
+    @param K Camera intrinsic parameters
+    @param H Camera extrinsic parameters
+    @param xmap Projection map for the x axis
+    @param ymap Projection map for the y axis
+    @return Projected image minimum bounding box
+     */
+    Rect buildMaps(Size src_size, InputArray K, InputArray H, OutputArray xmap, OutputArray ymap) CV_OVERRIDE;
+
+    /** @brief Projects the image.
+
+    @param src Source image
+    @param K Camera intrinsic parameters
+    @param H Camera extrinsic parameters
+    @param interp_mode Interpolation mode
+    @param border_mode Border extrapolation mode
+    @param dst Projected image
+    @return Project image top-left corner
+     */
+    Point warp(InputArray src, InputArray K, InputArray H,
+               int interp_mode, int border_mode, OutputArray dst) CV_OVERRIDE;
+
+    /**
+    @param src_size Source image bounding box
+    @param K Camera intrinsic parameters
+    @param H Camera extrinsic parameters
+    @return Projected image minimum bounding box
+     */
+    Rect warpRoi(Size src_size, InputArray K, InputArray H) CV_OVERRIDE;
+
+protected:
+    /** @brief Extracts rotation and translation matrices from matrix H representing
+        affine transformation in homogeneous coordinates
+     */
+    void getRTfromHomogeneous(InputArray H, Mat &R, Mat &T);
+};
+
+
+struct CV_EXPORTS_W_SIMPLE SphericalProjector : ProjectorBase
+{
+    CV_WRAP void mapForward(float x, float y, float &u, float &v);
+    CV_WRAP void mapBackward(float u, float v, float &x, float &y);
 };
 
 
 /** @brief Warper that maps an image onto the unit sphere located at the origin.
 
  Projects image onto unit sphere with origin at (0, 0, 0) and radius scale, measured in pixels.
- A 360° panorama would therefore have a resulting width of 2 * scale * PI pixels.
+ A 360 panorama would therefore have a resulting width of 2 * scale * PI pixels.
  Poles are located at (0, -1, 0) and (0, 1, 0) points.
 */
 class CV_EXPORTS SphericalWarper : public RotationWarperBase<SphericalProjector>
@@ -228,10 +291,10 @@ class CV_EXPORTS SphericalWarper : public RotationWarperBase<SphericalProjector>
      */
     SphericalWarper(float scale) { projector_.scale = scale; }
 
-    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap);
-    Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode, OutputArray dst);
+    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) CV_OVERRIDE;
+    Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode, OutputArray dst) CV_OVERRIDE;
 protected:
-    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br);
+    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br) CV_OVERRIDE;
 };
 
 
@@ -253,10 +316,10 @@ class CV_EXPORTS CylindricalWarper : public RotationWarperBase<CylindricalProjec
      */
     CylindricalWarper(float scale) { projector_.scale = scale; }
 
-    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap);
-    Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode, OutputArray dst);
+    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) CV_OVERRIDE;
+    Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode, OutputArray dst) CV_OVERRIDE;
 protected:
-    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br)
+    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br) CV_OVERRIDE
     {
         RotationWarperBase<CylindricalProjector>::detectResultRoiByBorder(src_size, dst_tl, dst_br);
     }
@@ -409,7 +472,7 @@ class CV_EXPORTS PlaneWarperGpu : public PlaneWarper
 public:
     PlaneWarperGpu(float scale = 1.f) : PlaneWarper(scale) {}
 
-    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap)
+    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) CV_OVERRIDE
     {
         Rect result = buildMaps(src_size, K, R, d_xmap_, d_ymap_);
         d_xmap_.download(xmap);
@@ -417,7 +480,7 @@ class CV_EXPORTS PlaneWarperGpu : public PlaneWarper
         return result;
     }
 
-    Rect buildMaps(Size src_size, InputArray K, InputArray R, InputArray T, OutputArray xmap, OutputArray ymap)
+    Rect buildMaps(Size src_size, InputArray K, InputArray R, InputArray T, OutputArray xmap, OutputArray ymap) CV_OVERRIDE
     {
         Rect result = buildMaps(src_size, K, R, T, d_xmap_, d_ymap_);
         d_xmap_.download(xmap);
@@ -426,7 +489,7 @@ class CV_EXPORTS PlaneWarperGpu : public PlaneWarper
     }
 
     Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode,
-               OutputArray dst)
+               OutputArray dst) CV_OVERRIDE
     {
         d_src_.upload(src);
         Point result = warp(d_src_, K, R, interp_mode, border_mode, d_dst_);
@@ -435,7 +498,7 @@ class CV_EXPORTS PlaneWarperGpu : public PlaneWarper
     }
 
     Point warp(InputArray src, InputArray K, InputArray R, InputArray T, int interp_mode, int border_mode,
-               OutputArray dst)
+               OutputArray dst) CV_OVERRIDE
     {
         d_src_.upload(src);
         Point result = warp(d_src_, K, R, T, interp_mode, border_mode, d_dst_);
@@ -463,7 +526,7 @@ class CV_EXPORTS SphericalWarperGpu : public SphericalWarper
 public:
     SphericalWarperGpu(float scale) : SphericalWarper(scale) {}
 
-    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap)
+    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) CV_OVERRIDE
     {
         Rect result = buildMaps(src_size, K, R, d_xmap_, d_ymap_);
         d_xmap_.download(xmap);
@@ -472,7 +535,7 @@ class CV_EXPORTS SphericalWarperGpu : public SphericalWarper
     }
 
     Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode,
-               OutputArray dst)
+               OutputArray dst) CV_OVERRIDE
     {
         d_src_.upload(src);
         Point result = warp(d_src_, K, R, interp_mode, border_mode, d_dst_);
@@ -495,7 +558,7 @@ class CV_EXPORTS CylindricalWarperGpu : public CylindricalWarper
 public:
     CylindricalWarperGpu(float scale) : CylindricalWarper(scale) {}
 
-    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap)
+    Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) CV_OVERRIDE
     {
         Rect result = buildMaps(src_size, K, R, d_xmap_, d_ymap_);
         d_xmap_.download(xmap);
@@ -504,7 +567,7 @@ class CV_EXPORTS CylindricalWarperGpu : public CylindricalWarper
     }
 
     Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode,
-               OutputArray dst)
+               OutputArray dst) CV_OVERRIDE
     {
         d_src_.upload(src);
         Point result = warp(d_src_, K, R, interp_mode, border_mode, d_dst_);
@@ -522,7 +585,7 @@ class CV_EXPORTS CylindricalWarperGpu : public CylindricalWarper
 };
 
 
-struct SphericalPortraitProjector : ProjectorBase
+struct CV_EXPORTS SphericalPortraitProjector : ProjectorBase
 {
     void mapForward(float x, float y, float &u, float &v);
     void mapBackward(float u, float v, float &x, float &y);
@@ -537,10 +600,10 @@ class CV_EXPORTS SphericalPortraitWarper : public RotationWarperBase<SphericalPo
     SphericalPortraitWarper(float scale) { projector_.scale = scale; }
 
 protected:
-    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br);
+    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br) CV_OVERRIDE;
 };
 
-struct CylindricalPortraitProjector : ProjectorBase
+struct CV_EXPORTS CylindricalPortraitProjector : ProjectorBase
 {
     void mapForward(float x, float y, float &u, float &v);
     void mapBackward(float u, float v, float &x, float &y);
@@ -553,13 +616,13 @@ class CV_EXPORTS CylindricalPortraitWarper : public RotationWarperBase<Cylindric
     CylindricalPortraitWarper(float scale) { projector_.scale = scale; }
 
 protected:
-    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br)
+    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br) CV_OVERRIDE
     {
         RotationWarperBase<CylindricalPortraitProjector>::detectResultRoiByBorder(src_size, dst_tl, dst_br);
     }
 };
 
-struct PlanePortraitProjector : ProjectorBase
+struct CV_EXPORTS PlanePortraitProjector : ProjectorBase
 {
     void mapForward(float x, float y, float &u, float &v);
     void mapBackward(float u, float v, float &x, float &y);
@@ -572,7 +635,7 @@ class CV_EXPORTS PlanePortraitWarper : public RotationWarperBase<PlanePortraitPr
     PlanePortraitWarper(float scale) { projector_.scale = scale; }
 
 protected:
-    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br)
+    void detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br) CV_OVERRIDE
     {
         RotationWarperBase<PlanePortraitProjector>::detectResultRoiByBorder(src_size, dst_tl, dst_br);
     }
@@ -585,4 +648,4 @@ class CV_EXPORTS PlanePortraitWarper : public RotationWarperBase<PlanePortraitPr
 
 #include "warpers_inl.hpp"
 
-#endif // __OPENCV_STITCHING_WARPERS_HPP__
+#endif // OPENCV_STITCHING_WARPERS_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/detail/warpers_inl.hpp b/IPL/include/opencv/opencv2/stitching/detail/warpers_inl.hpp
index 0416ecb..f4a19d9 100644
--- a/IPL/include/opencv/opencv2/stitching/detail/warpers_inl.hpp
+++ b/IPL/include/opencv/opencv2/stitching/detail/warpers_inl.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_WARPERS_INL_HPP__
-#define __OPENCV_STITCHING_WARPERS_INL_HPP__
+#ifndef OPENCV_STITCHING_WARPERS_INL_HPP
+#define OPENCV_STITCHING_WARPERS_INL_HPP
 
 #include "opencv2/core.hpp"
 #include "warpers.hpp" // Make your IDE see declarations
@@ -150,10 +150,10 @@ Rect RotationWarperBase<P>::warpRoi(Size src_size, InputArray K, InputArray R)
 template <class P>
 void RotationWarperBase<P>::detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br)
 {
-    float tl_uf = std::numeric_limits<float>::max();
-    float tl_vf = std::numeric_limits<float>::max();
-    float br_uf = -std::numeric_limits<float>::max();
-    float br_vf = -std::numeric_limits<float>::max();
+    float tl_uf = (std::numeric_limits<float>::max)();
+    float tl_vf = (std::numeric_limits<float>::max)();
+    float br_uf = -(std::numeric_limits<float>::max)();
+    float br_vf = -(std::numeric_limits<float>::max)();
 
     float u, v;
     for (int y = 0; y < src_size.height; ++y)
@@ -161,8 +161,8 @@ void RotationWarperBase<P>::detectResultRoi(Size src_size, Point &dst_tl, Point
         for (int x = 0; x < src_size.width; ++x)
         {
             projector_.mapForward(static_cast<float>(x), static_cast<float>(y), u, v);
-            tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
-            br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
+            tl_uf = (std::min)(tl_uf, u); tl_vf = (std::min)(tl_vf, v);
+            br_uf = (std::max)(br_uf, u); br_vf = (std::max)(br_vf, v);
         }
     }
 
@@ -176,31 +176,31 @@ void RotationWarperBase<P>::detectResultRoi(Size src_size, Point &dst_tl, Point
 template <class P>
 void RotationWarperBase<P>::detectResultRoiByBorder(Size src_size, Point &dst_tl, Point &dst_br)
 {
-    float tl_uf = std::numeric_limits<float>::max();
-    float tl_vf = std::numeric_limits<float>::max();
-    float br_uf = -std::numeric_limits<float>::max();
-    float br_vf = -std::numeric_limits<float>::max();
+    float tl_uf = (std::numeric_limits<float>::max)();
+    float tl_vf = (std::numeric_limits<float>::max)();
+    float br_uf = -(std::numeric_limits<float>::max)();
+    float br_vf = -(std::numeric_limits<float>::max)();
 
     float u, v;
     for (float x = 0; x < src_size.width; ++x)
     {
         projector_.mapForward(static_cast<float>(x), 0, u, v);
-        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
-        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
+        tl_uf = (std::min)(tl_uf, u); tl_vf = (std::min)(tl_vf, v);
+        br_uf = (std::max)(br_uf, u); br_vf = (std::max)(br_vf, v);
 
         projector_.mapForward(static_cast<float>(x), static_cast<float>(src_size.height - 1), u, v);
-        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
-        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
+        tl_uf = (std::min)(tl_uf, u); tl_vf = (std::min)(tl_vf, v);
+        br_uf = (std::max)(br_uf, u); br_vf = (std::max)(br_vf, v);
     }
     for (int y = 0; y < src_size.height; ++y)
     {
         projector_.mapForward(0, static_cast<float>(y), u, v);
-        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
-        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
+        tl_uf = (std::min)(tl_uf, u); tl_vf = (std::min)(tl_vf, v);
+        br_uf = (std::max)(br_uf, u); br_vf = (std::max)(br_vf, v);
 
         projector_.mapForward(static_cast<float>(src_size.width - 1), static_cast<float>(y), u, v);
-        tl_uf = std::min(tl_uf, u); tl_vf = std::min(tl_vf, v);
-        br_uf = std::max(br_uf, u); br_vf = std::max(br_vf, v);
+        tl_uf = (std::min)(tl_uf, u); tl_vf = (std::min)(tl_vf, v);
+        br_uf = (std::max)(br_uf, u); br_vf = (std::max)(br_vf, v);
     }
 
     dst_tl.x = static_cast<int>(tl_uf);
@@ -771,4 +771,4 @@ void PlanePortraitProjector::mapBackward(float u0, float v0, float &x, float &y)
 
 //! @endcond
 
-#endif // __OPENCV_STITCHING_WARPERS_INL_HPP__
+#endif // OPENCV_STITCHING_WARPERS_INL_HPP
diff --git a/IPL/include/opencv/opencv2/stitching/warpers.hpp b/IPL/include/opencv/opencv2/stitching/warpers.hpp
index 7e570d3..ff43386 100644
--- a/IPL/include/opencv/opencv2/stitching/warpers.hpp
+++ b/IPL/include/opencv/opencv2/stitching/warpers.hpp
@@ -40,63 +40,141 @@
 //
 //M*/
 
-#ifndef __OPENCV_STITCHING_WARPER_CREATORS_HPP__
-#define __OPENCV_STITCHING_WARPER_CREATORS_HPP__
+#ifndef OPENCV_STITCHING_WARPER_CREATORS_HPP
+#define OPENCV_STITCHING_WARPER_CREATORS_HPP
 
 #include "opencv2/stitching/detail/warpers.hpp"
+#include <string>
 
 namespace cv {
+    class CV_EXPORTS_W PyRotationWarper
+    {
+        Ptr<detail::RotationWarper> rw;
+
+    public:
+        CV_WRAP PyRotationWarper(String type, float scale);
+        CV_WRAP PyRotationWarper() {};
+        ~PyRotationWarper() {}
+
+        /** @brief Projects the image point.
+
+        @param pt Source point
+        @param K Camera intrinsic parameters
+        @param R Camera rotation matrix
+        @return Projected point
+        */
+        CV_WRAP Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R);
+
+        /** @brief Builds the projection maps according to the given camera data.
+
+        @param src_size Source image size
+        @param K Camera intrinsic parameters
+        @param R Camera rotation matrix
+        @param xmap Projection map for the x axis
+        @param ymap Projection map for the y axis
+        @return Projected image minimum bounding box
+        */
+        CV_WRAP Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap);
+
+        /** @brief Projects the image.
+
+        @param src Source image
+        @param K Camera intrinsic parameters
+        @param R Camera rotation matrix
+        @param interp_mode Interpolation mode
+        @param border_mode Border extrapolation mode
+        @param dst Projected image
+        @return Project image top-left corner
+        */
+        CV_WRAP Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode,
+            CV_OUT OutputArray dst);
+
+        /** @brief Projects the image backward.
+
+        @param src Projected image
+        @param K Camera intrinsic parameters
+        @param R Camera rotation matrix
+        @param interp_mode Interpolation mode
+        @param border_mode Border extrapolation mode
+        @param dst_size Backward-projected image size
+        @param dst Backward-projected image
+        */
+        CV_WRAP void warpBackward(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode,
+            Size dst_size, CV_OUT OutputArray dst);
+
+        /**
+        @param src_size Source image bounding box
+        @param K Camera intrinsic parameters
+        @param R Camera rotation matrix
+        @return Projected image minimum bounding box
+        */
+        CV_WRAP Rect warpRoi(Size src_size, InputArray K, InputArray R);
+
+        CV_WRAP float getScale() const { return 1.f; }
+        CV_WRAP void setScale(float) {}
+    };
 
 //! @addtogroup stitching_warp
 //! @{
 
 /** @brief Image warper factories base class.
  */
-class WarperCreator
+
+class CV_EXPORTS_W WarperCreator
 {
 public:
-    virtual ~WarperCreator() {}
+    CV_WRAP virtual ~WarperCreator() {}
     virtual Ptr<detail::RotationWarper> create(float scale) const = 0;
 };
 
+
 /** @brief Plane warper factory class.
   @sa detail::PlaneWarper
  */
-class PlaneWarper : public WarperCreator
+class CV_EXPORTS  PlaneWarper : public WarperCreator
+{
+public:
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::PlaneWarper>(scale); }
+};
+
+/** @brief Affine warper factory class.
+  @sa detail::AffineWarper
+ */
+class CV_EXPORTS  AffineWarper : public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::PlaneWarper>(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::AffineWarper>(scale); }
 };
 
 /** @brief Cylindrical warper factory class.
 @sa detail::CylindricalWarper
 */
-class CylindricalWarper: public WarperCreator
+class CV_EXPORTS CylindricalWarper: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::CylindricalWarper>(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::CylindricalWarper>(scale); }
 };
 
 /** @brief Spherical warper factory class */
-class SphericalWarper: public WarperCreator
+class CV_EXPORTS SphericalWarper: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::SphericalWarper>(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::SphericalWarper>(scale); }
 };
 
-class FisheyeWarper : public WarperCreator
+class CV_EXPORTS FisheyeWarper : public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::FisheyeWarper>(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::FisheyeWarper>(scale); }
 };
 
-class StereographicWarper: public WarperCreator
+class CV_EXPORTS StereographicWarper: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::StereographicWarper>(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::StereographicWarper>(scale); }
 };
 
-class CompressedRectilinearWarper: public WarperCreator
+class CV_EXPORTS CompressedRectilinearWarper: public WarperCreator
 {
     float a, b;
 public:
@@ -104,10 +182,10 @@ class CompressedRectilinearWarper: public WarperCreator
     {
         a = A; b = B;
     }
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::CompressedRectilinearWarper>(scale, a, b); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::CompressedRectilinearWarper>(scale, a, b); }
 };
 
-class CompressedRectilinearPortraitWarper: public WarperCreator
+class CV_EXPORTS CompressedRectilinearPortraitWarper: public WarperCreator
 {
     float a, b;
 public:
@@ -115,10 +193,10 @@ class CompressedRectilinearPortraitWarper: public WarperCreator
     {
         a = A; b = B;
     }
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::CompressedRectilinearPortraitWarper>(scale, a, b); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::CompressedRectilinearPortraitWarper>(scale, a, b); }
 };
 
-class PaniniWarper: public WarperCreator
+class CV_EXPORTS PaniniWarper: public WarperCreator
 {
     float a, b;
 public:
@@ -126,10 +204,10 @@ class PaniniWarper: public WarperCreator
     {
         a = A; b = B;
     }
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::PaniniWarper>(scale, a, b); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::PaniniWarper>(scale, a, b); }
 };
 
-class PaniniPortraitWarper: public WarperCreator
+class CV_EXPORTS PaniniPortraitWarper: public WarperCreator
 {
     float a, b;
 public:
@@ -137,19 +215,19 @@ class PaniniPortraitWarper: public WarperCreator
     {
         a = A; b = B;
     }
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::PaniniPortraitWarper>(scale, a, b); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::PaniniPortraitWarper>(scale, a, b); }
 };
 
-class MercatorWarper: public WarperCreator
+class CV_EXPORTS MercatorWarper: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::MercatorWarper>(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::MercatorWarper>(scale); }
 };
 
-class TransverseMercatorWarper: public WarperCreator
+class CV_EXPORTS TransverseMercatorWarper: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::TransverseMercatorWarper>(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::TransverseMercatorWarper>(scale); }
 };
 
 
@@ -158,21 +236,21 @@ class TransverseMercatorWarper: public WarperCreator
 class PlaneWarperGpu: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::PlaneWarperGpu>(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::PlaneWarperGpu>(scale); }
 };
 
 
 class CylindricalWarperGpu: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::CylindricalWarperGpu>(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::CylindricalWarperGpu>(scale); }
 };
 
 
 class SphericalWarperGpu: public WarperCreator
 {
 public:
-    Ptr<detail::RotationWarper> create(float scale) const { return makePtr<detail::SphericalWarperGpu>(scale); }
+    Ptr<detail::RotationWarper> create(float scale) const CV_OVERRIDE { return makePtr<detail::SphericalWarperGpu>(scale); }
 };
 #endif
 
@@ -180,4 +258,4 @@ class SphericalWarperGpu: public WarperCreator
 
 } // namespace cv
 
-#endif // __OPENCV_STITCHING_WARPER_CREATORS_HPP__
+#endif // OPENCV_STITCHING_WARPER_CREATORS_HPP
diff --git a/IPL/include/opencv/opencv2/structured_light.hpp b/IPL/include/opencv/opencv2/structured_light.hpp
index b06cdfb..4508d89 100644
--- a/IPL/include/opencv/opencv2/structured_light.hpp
+++ b/IPL/include/opencv/opencv2/structured_light.hpp
@@ -45,6 +45,7 @@
 
 #include "opencv2/structured_light/structured_light.hpp"
 #include "opencv2/structured_light/graycodepattern.hpp"
+#include "opencv2/structured_light/sinusoidalpattern.hpp"
 
 /** @defgroup structured_light Structured Light API
 
diff --git a/IPL/include/opencv/opencv2/structured_light/graycodepattern.hpp b/IPL/include/opencv/opencv2/structured_light/graycodepattern.hpp
index cf01f70..f1d6331 100644
--- a/IPL/include/opencv/opencv2/structured_light/graycodepattern.hpp
+++ b/IPL/include/opencv/opencv2/structured_light/graycodepattern.hpp
@@ -43,6 +43,7 @@
 #define __OPENCV_GRAY_CODE_PATTERN_HPP__
 
 #include "opencv2/core.hpp"
+#include "opencv2/structured_light/structured_light.hpp"
 
 namespace cv {
 namespace structured_light {
@@ -72,22 +73,22 @@ class CV_EXPORTS_W GrayCodePattern : public StructuredLightPattern
    *  @param width Projector's width. Default value is 1024.
    *  @param height Projector's height. Default value is 768.
    */
-  struct CV_EXPORTS_W_SIMPLE Params
+  struct CV_EXPORTS Params
   {
-    CV_WRAP
     Params();
-    CV_PROP_RW
     int width;
-    CV_PROP_RW
     int height;
   };
 
   /** @brief Constructor
    @param parameters GrayCodePattern parameters GrayCodePattern::Params: the width and the height of the projector.
    */
-  CV_WRAP
   static Ptr<GrayCodePattern> create( const GrayCodePattern::Params &parameters = GrayCodePattern::Params() );
 
+  // alias for scripting
+  CV_WRAP
+  static Ptr<GrayCodePattern> create( int width, int height );
+
   /** @brief Get the number of pattern images needed for the graycode pattern.
    *
    * @return The number of pattern images needed for the graycode pattern.
@@ -136,13 +137,13 @@ class CV_EXPORTS_W GrayCodePattern : public StructuredLightPattern
    *  @param patternImages The pattern images acquired by the camera, stored in a grayscale vector < Mat >.
    *  @param x x coordinate of the image pixel.
    *  @param y y coordinate of the image pixel.
-   *  @param projPix Projector's pixel corresponding to the camera's pixel: projPix.x and projPix.y are the image coordinates of the projector’s pixel corresponding to the pixel being decoded in a camera.
+   *  @param projPix Projector's pixel corresponding to the camera's pixel: projPix.x and projPix.y are the image coordinates of the projector's pixel corresponding to the pixel being decoded in a camera.
    */
   CV_WRAP
-  virtual bool getProjPixel( InputArrayOfArrays patternImages, int x, int y, Point &projPix ) const = 0;
+  virtual bool getProjPixel( InputArrayOfArrays patternImages, int x, int y, CV_OUT Point &projPix ) const = 0;
 };
 
 //! @}
 }
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/IPL/include/opencv/opencv2/structured_light/sinusoidalpattern.hpp b/IPL/include/opencv/opencv2/structured_light/sinusoidalpattern.hpp
new file mode 100644
index 0000000..cc2efe2
--- /dev/null
+++ b/IPL/include/opencv/opencv2/structured_light/sinusoidalpattern.hpp
@@ -0,0 +1,151 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_SINUSOIDAL_PATTERN_HPP__
+#define __OPENCV_SINUSOIDAL_PATTERN_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/structured_light/structured_light.hpp"
+#include <opencv2/phase_unwrapping.hpp>
+#include <opencv2/calib3d.hpp>
+
+namespace cv {
+namespace structured_light {
+//! @addtogroup structured_light
+//! @{
+
+ //! Type of sinusoidal pattern profilometry methods.
+enum{
+  FTP = 0,
+  PSP = 1,
+  FAPS = 2
+ };
+/**
+ * @brief Class implementing Fourier transform profilometry (FTP) , phase-shifting profilometry (PSP)
+ * and Fourier-assisted phase-shifting profilometry (FAPS) based on @cite faps.
+
+ * This class generates sinusoidal patterns that can be used with FTP, PSP and FAPS.
+*/
+class CV_EXPORTS_W SinusoidalPattern : public StructuredLightPattern
+{
+public:
+    /**
+     * @brief Parameters of SinusoidalPattern constructor
+     * @param width Projector's width.
+     * @param height Projector's height.
+     * @param nbrOfPeriods Number of period along the patterns direction.
+     * @param shiftValue Phase shift between two consecutive patterns.
+     * @param methodId Allow to choose between FTP, PSP and FAPS.
+     * @param nbrOfPixelsBetweenMarkers Number of pixels between two consecutive markers on the same row.
+     * @param setMarkers Allow to set markers on the patterns.
+     * @param markersLocation vector used to store markers location on the patterns.
+     */
+    struct CV_EXPORTS_W Params
+    {
+        CV_WRAP Params();
+        CV_PROP_RW int width;
+        CV_PROP_RW int height;
+        CV_PROP_RW int nbrOfPeriods;
+        CV_PROP_RW float shiftValue;
+        CV_PROP_RW int methodId;
+        CV_PROP_RW int nbrOfPixelsBetweenMarkers;
+        CV_PROP_RW bool horizontal;
+        CV_PROP_RW bool setMarkers;
+        std::vector<Point2f> markersLocation;
+    };
+    /**
+     * @brief Constructor.
+     * @param parameters SinusoidalPattern parameters SinusoidalPattern::Params: width, height of the projector and patterns parameters.
+     *
+     */
+    CV_WRAP static Ptr<SinusoidalPattern> create( Ptr<SinusoidalPattern::Params> parameters =
+                                          makePtr<SinusoidalPattern::Params>() );
+    /**
+     * @brief Compute a wrapped phase map from sinusoidal patterns.
+     * @param patternImages Input data to compute the wrapped phase map.
+     * @param wrappedPhaseMap Wrapped phase map obtained through one of the three methods.
+     * @param shadowMask Mask used to discard shadow regions.
+     * @param fundamental Fundamental matrix used to compute epipolar lines and ease the matching step.
+     */
+    CV_WRAP
+    virtual void computePhaseMap( InputArrayOfArrays patternImages,
+                                  OutputArray wrappedPhaseMap,
+                                  OutputArray shadowMask = noArray(),
+                                  InputArray fundamental = noArray()) = 0;
+    /**
+     * @brief Unwrap the wrapped phase map to remove phase ambiguities.
+     * @param wrappedPhaseMap The wrapped phase map computed from the pattern.
+     * @param unwrappedPhaseMap The unwrapped phase map used to find correspondences between the two devices.
+     * @param camSize Resolution of the camera.
+     * @param shadowMask Mask used to discard shadow regions.
+     */
+    CV_WRAP
+    virtual void unwrapPhaseMap( InputArray wrappedPhaseMap,
+                                 OutputArray unwrappedPhaseMap,
+                                 cv::Size camSize,
+                                 InputArray shadowMask = noArray() ) = 0;
+    /**
+     * @brief Find correspondences between the two devices thanks to unwrapped phase maps.
+     * @param projUnwrappedPhaseMap Projector's unwrapped phase map.
+     * @param camUnwrappedPhaseMap Camera's unwrapped phase map.
+     * @param matches Images used to display correspondences map.
+     */
+    CV_WRAP
+    virtual void findProCamMatches( InputArray projUnwrappedPhaseMap, InputArray camUnwrappedPhaseMap,
+                                    OutputArrayOfArrays matches ) = 0;
+
+    /**
+     * @brief compute the data modulation term.
+     * @param patternImages captured images with projected patterns.
+     * @param dataModulationTerm Mat where the data modulation term is saved.
+     * @param shadowMask Mask used to discard shadow regions.
+     */
+    CV_WRAP
+    virtual void computeDataModulationTerm( InputArrayOfArrays patternImages,
+                                            OutputArray dataModulationTerm,
+                                            InputArray shadowMask ) = 0;
+
+};
+//! @}
+}
+}
+#endif
diff --git a/IPL/include/opencv/opencv2/structured_light/structured_light.hpp b/IPL/include/opencv/opencv2/structured_light/structured_light.hpp
index aa7e014..4d4b6fa 100644
--- a/IPL/include/opencv/opencv2/structured_light/structured_light.hpp
+++ b/IPL/include/opencv/opencv2/structured_light/structured_light.hpp
@@ -53,7 +53,7 @@ namespace structured_light {
 // other algorithms can be implemented
 enum
 {
-  DECODE_3D_UNDERWORLD = 0  //!< Kyriakos Herakleous, Charalambos Poullis. “3DUNDERWORLD-SLS: An Open-Source Structured-Light Scanning System for Rapid Geometry Acquisition”, arXiv preprint arXiv:1406.6595 (2014).
+  DECODE_3D_UNDERWORLD = 0  //!< Kyriakos Herakleous, Charalambos Poullis. "3DUNDERWORLD-SLS: An Open-Source Structured-Light Scanning System for Rapid Geometry Acquisition", arXiv preprint arXiv:1406.6595 (2014).
 };
 
 /** @brief Abstract base class for generating and decoding structured light patterns.
@@ -78,13 +78,14 @@ class CV_EXPORTS_W StructuredLightPattern : public virtual Algorithm
    @note All the images must be at the same resolution.
    */
   CV_WRAP
-  virtual bool decode( InputArrayOfArrays patternImages, OutputArray disparityMap, InputArrayOfArrays blackImages =
-                          noArray(),
-                      InputArrayOfArrays whiteImages = noArray(), int flags = DECODE_3D_UNDERWORLD ) const = 0;
+  virtual bool decode( const std::vector< std::vector<Mat> >& patternImages, OutputArray disparityMap,
+                       InputArrayOfArrays blackImages = noArray(),
+                       InputArrayOfArrays whiteImages = noArray(),
+                       int flags = DECODE_3D_UNDERWORLD ) const = 0;
 };
 
 //! @}
 
 }
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/IPL/include/opencv/opencv2/superres.hpp b/IPL/include/opencv/opencv2/superres.hpp
index dec8e4e..792dffd 100644
--- a/IPL/include/opencv/opencv2/superres.hpp
+++ b/IPL/include/opencv/opencv2/superres.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_SUPERRES_HPP__
-#define __OPENCV_SUPERRES_HPP__
+#ifndef OPENCV_SUPERRES_HPP
+#define OPENCV_SUPERRES_HPP
 
 #include "opencv2/core.hpp"
 #include "opencv2/superres/optical_flow.hpp"
@@ -50,7 +50,7 @@
   @defgroup superres Super Resolution
 
 The Super Resolution module contains a set of functions and classes that can be used to solve the
-problem of resolution enhancement. There are a few methods implemented, most of them are descibed in
+problem of resolution enhancement. There are a few methods implemented, most of them are described in
 the papers @cite Farsiu03 and @cite Mitzel09 .
 
  */
@@ -97,8 +97,8 @@ namespace cv
 
             @param frame Output result
              */
-            void nextFrame(OutputArray frame);
-            void reset();
+            void nextFrame(OutputArray frame) CV_OVERRIDE;
+            void reset() CV_OVERRIDE;
 
             /** @brief Clear all inner buffers.
             */
@@ -123,10 +123,10 @@ namespace cv
             virtual void setTau(double val) = 0;
 
             //! @brief Weight parameter to balance data term and smoothness term
-            /** @see setLabmda */
-            virtual double getLabmda() const = 0;
-            /** @copybrief getLabmda @see getLabmda */
-            virtual void setLabmda(double val) = 0;
+            /** @see setLambda */
+            virtual double getLambda() const = 0;
+            /** @copybrief getLambda @see getLambda */
+            virtual void setLambda(double val) = 0;
 
             //! @brief Parameter of spacial distribution in Bilateral-TV
             /** @see setAlpha */
@@ -204,4 +204,4 @@ namespace cv
     }
 }
 
-#endif // __OPENCV_SUPERRES_HPP__
+#endif // OPENCV_SUPERRES_HPP
diff --git a/IPL/include/opencv/opencv2/superres/optical_flow.hpp b/IPL/include/opencv/opencv2/superres/optical_flow.hpp
index d2f29a3..07e7ca9 100644
--- a/IPL/include/opencv/opencv2/superres/optical_flow.hpp
+++ b/IPL/include/opencv/opencv2/superres/optical_flow.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_SUPERRES_OPTICAL_FLOW_HPP__
-#define __OPENCV_SUPERRES_OPTICAL_FLOW_HPP__
+#ifndef OPENCV_SUPERRES_OPTICAL_FLOW_HPP
+#define OPENCV_SUPERRES_OPTICAL_FLOW_HPP
 
 #include "opencv2/core.hpp"
 
@@ -200,4 +200,4 @@ namespace cv
     }
 }
 
-#endif // __OPENCV_SUPERRES_OPTICAL_FLOW_HPP__
+#endif // OPENCV_SUPERRES_OPTICAL_FLOW_HPP
diff --git a/IPL/include/opencv/opencv2/surface_matching/icp.hpp b/IPL/include/opencv/opencv2/surface_matching/icp.hpp
index d894702..a0052d4 100644
--- a/IPL/include/opencv/opencv2/surface_matching/icp.hpp
+++ b/IPL/include/opencv/opencv2/surface_matching/icp.hpp
@@ -77,17 +77,17 @@ namespace ppf_match_3d
 * 5. Linearization of Point-to-Plane metric by Kok Lim Low:
 * https://www.comp.nus.edu.sg/~lowkl/publications/lowk_point-to-plane_icp_techrep.pdf
 */
-class CV_EXPORTS ICP
+class CV_EXPORTS_W ICP
 {
 public:
 
-  enum ICP_SAMPLING_TYPE
+  CV_WRAP enum
   {
-    ICP_SAMPLING_TYPE_UNIFORM,
-    ICP_SAMPLING_TYPE_GELFAND
+    ICP_SAMPLING_TYPE_UNIFORM = 0,
+    ICP_SAMPLING_TYPE_GELFAND = 1
   };
 
-  ICP()
+  CV_WRAP ICP()
   {
     m_tolerance = 0.005f;
     m_rejectionScale = 2.5f;
@@ -114,7 +114,7 @@ class CV_EXPORTS ICP
             applied. Leave it as 0.
      *  @param [in] numMaxCorr Currently this parameter is ignored and only PickyICP is applied. Leave it as 1.
      */
-  ICP(const int iterations, const float tolerence=0.05, const float rejectionScale=2.5, const int numLevels=6, const ICP_SAMPLING_TYPE sampleType = ICP_SAMPLING_TYPE_UNIFORM, const int numMaxCorr=1)
+  CV_WRAP ICP(const int iterations, const float tolerence = 0.05f, const float rejectionScale = 2.5f, const int numLevels = 6, const int sampleType = ICP::ICP_SAMPLING_TYPE_UNIFORM, const int numMaxCorr = 1)
   {
     m_tolerance = tolerence;
     m_numNeighborsCorr = numMaxCorr;
@@ -136,7 +136,7 @@ class CV_EXPORTS ICP
      *
      *  \details It is assumed that the model is registered on the scene. Scene remains static, while the model transforms. The output poses transform the models onto the scene. Because of the point to plane minimization, the scene is expected to have the normals available. Expected to have the normals (Nx6).
      */
-  int registerModelToScene(const Mat& srcPC, const Mat& dstPC, double& residual, double pose[16]);
+  CV_WRAP int registerModelToScene(const Mat& srcPC, const Mat& dstPC, CV_OUT double& residual, CV_OUT Matx44d& pose);
 
   /**
      *  \brief Perform registration with multiple initial poses
@@ -149,7 +149,7 @@ class CV_EXPORTS ICP
      *
      *  \details It is assumed that the model is registered on the scene. Scene remains static, while the model transforms. The output poses transform the models onto the scene. Because of the point to plane minimization, the scene is expected to have the normals available. Expected to have the normals (Nx6).
      */
-  int registerModelToScene(const Mat& srcPC, const Mat& dstPC, std::vector<Pose3DPtr>& poses);
+  CV_WRAP int registerModelToScene(const Mat& srcPC, const Mat& dstPC, CV_IN_OUT std::vector<Pose3DPtr>& poses);
 
 private:
   float m_tolerance;
diff --git a/IPL/include/opencv/opencv2/surface_matching/pose_3d.hpp b/IPL/include/opencv/opencv2/surface_matching/pose_3d.hpp
index cd9c4a4..b015db9 100644
--- a/IPL/include/opencv/opencv2/surface_matching/pose_3d.hpp
+++ b/IPL/include/opencv/opencv2/surface_matching/pose_3d.hpp
@@ -67,53 +67,51 @@ typedef Ptr<PoseCluster3D> PoseCluster3DPtr;
 * various helper methods to work with poses
 *
 */
-class CV_EXPORTS Pose3D
+class CV_EXPORTS_W Pose3D
 {
 public:
-  Pose3D()
+  CV_WRAP Pose3D()
   {
     alpha=0;
     modelIndex=0;
     numVotes=0;
     residual = 0;
 
-    for (int i=0; i<16; i++)
-      pose[i]=0;
+    pose = Matx44d::all(0);
   }
 
-  Pose3D(double Alpha, unsigned int ModelIndex=0, unsigned int NumVotes=0)
+  CV_WRAP Pose3D(double Alpha, size_t ModelIndex=0, size_t NumVotes=0)
   {
     alpha = Alpha;
     modelIndex = ModelIndex;
     numVotes = NumVotes;
     residual=0;
 
-    for (int i=0; i<16; i++)
-      pose[i]=0;
+    pose = Matx44d::all(0);
   }
 
   /**
    *  \brief Updates the pose with the new one
    *  \param [in] NewPose New pose to overwrite
    */
-  void updatePose(double NewPose[16]);
+  CV_WRAP void updatePose(Matx44d& NewPose);
 
   /**
    *  \brief Updates the pose with the new one
    */
-  void updatePose(double NewR[9], double NewT[3]);
+  CV_WRAP void updatePose(Matx33d& NewR, Vec3d& NewT);
 
   /**
    *  \brief Updates the pose with the new one, but this time using quaternions to represent rotation
    */
-  void updatePoseQuat(double Q[4], double NewT[3]);
+  CV_WRAP void updatePoseQuat(Vec4d& Q, Vec3d& NewT);
 
   /**
    *  \brief Left multiplies the existing pose in order to update the transformation
    *  \param [in] IncrementalPose New pose to apply
    */
-  void appendPose(double IncrementalPose[16]);
-  void printPose();
+  CV_WRAP void appendPose(Matx44d& IncrementalPose);
+  CV_WRAP void printPose();
 
   Pose3DPtr clone();
 
@@ -124,18 +122,20 @@ class CV_EXPORTS Pose3D
 
   virtual ~Pose3D() {}
 
-  double alpha, residual;
-  unsigned int modelIndex;
-  unsigned int numVotes;
-  double pose[16], angle, t[3], q[4];
+  CV_PROP double alpha, residual;
+  CV_PROP size_t modelIndex, numVotes;
+  CV_PROP Matx44d pose;
+  CV_PROP double angle;
+  CV_PROP Vec3d t;
+  CV_PROP Vec4d q;
 };
 
 /**
-* @brief When multiple poses (see Pose3D) are grouped together (contribute to the same transformation) 
+* @brief When multiple poses (see Pose3D) are grouped together (contribute to the same transformation)
 * pose clusters occur. This class is a general container for such groups of poses. It is possible to store,
 * load and perform IO on these poses.
 */
-class CV_EXPORTS PoseCluster3D
+class CV_EXPORTS_W PoseCluster3D
 {
 public:
   PoseCluster3D()
@@ -175,7 +175,7 @@ class CV_EXPORTS PoseCluster3D
   int readPoseCluster(const std::string& FileName);
 
   std::vector<Pose3DPtr> poseList;
-  int numVotes;
+  size_t numVotes;
   int id;
 };
 
diff --git a/IPL/include/opencv/opencv2/surface_matching/ppf_helpers.hpp b/IPL/include/opencv/opencv2/surface_matching/ppf_helpers.hpp
index 6f3cb77..8e02b15 100644
--- a/IPL/include/opencv/opencv2/surface_matching/ppf_helpers.hpp
+++ b/IPL/include/opencv/opencv2/surface_matching/ppf_helpers.hpp
@@ -59,16 +59,16 @@ namespace ppf_match_3d
  *  @param [in] fileName The PLY model to read
  *  @param [in] withNormals Flag wheather the input PLY contains normal information,
  *  and whether it should be loaded or not
- *  @return Returns the matrix on successfull load
+ *  @return Returns the matrix on successful load
  */
-CV_EXPORTS Mat loadPLYSimple(const char* fileName, int withNormals);
+CV_EXPORTS_W Mat loadPLYSimple(const char* fileName, int withNormals = 0);
 
 /**
  *  @brief Write a point cloud to PLY file
  *  @param [in] PC Input point cloud
  *  @param [in] fileName The PLY model file to write
 */
-CV_EXPORTS void writePLY(Mat PC, const char* fileName);
+CV_EXPORTS_W void writePLY(Mat PC, const char* fileName);
 
 /**
 *  @brief Used for debbuging pruposes, writes a point cloud to a PLY file with the tip
@@ -76,7 +76,7 @@ CV_EXPORTS void writePLY(Mat PC, const char* fileName);
 *  @param [in] PC Input point cloud
 *  @param [in] fileName The PLY model file to write
 */
-CV_EXPORTS void writePLYVisibleNormals(Mat PC, const char* fileName);
+CV_EXPORTS_W void writePLYVisibleNormals(Mat PC, const char* fileName);
 
 Mat samplePCUniform(Mat PC, int sampleStep);
 Mat samplePCUniformInd(Mat PC, int sampleStep, std::vector<int>& indices);
@@ -89,31 +89,20 @@ Mat samplePCUniformInd(Mat PC, int sampleStep, std::vector<int>& indices);
  *  @param [in] zrange Z components (min and max) of the bounding box of the model
  *  @param [in] sample_step_relative The point cloud is sampled such that all points
  *  have a certain minimum distance. This minimum distance is determined relatively using
- *  the parameter sample_step_relative. 
+ *  the parameter sample_step_relative.
  *  @param [in] weightByCenter The contribution of the quantized data points can be weighted
  *  by the distance to the origin. This parameter enables/disables the use of weighting.
  *  @return Sampled point cloud
 */
-CV_EXPORTS Mat samplePCByQuantization(Mat pc, float xrange[2], float yrange[2], float zrange[2], float sample_step_relative, int weightByCenter=0);
+CV_EXPORTS_W Mat samplePCByQuantization(Mat pc, Vec2f& xrange, Vec2f& yrange, Vec2f& zrange, float sample_step_relative, int weightByCenter=0);
 
-void computeBboxStd(Mat pc, float xRange[2], float yRange[2], float zRange[2]);
+void computeBboxStd(Mat pc, Vec2f& xRange, Vec2f& yRange, Vec2f& zRange);
 
 void* indexPCFlann(Mat pc);
 void destroyFlann(void* flannIndex);
 void queryPCFlann(void* flannIndex, Mat& pc, Mat& indices, Mat& distances);
 void queryPCFlann(void* flannIndex, Mat& pc, Mat& indices, Mat& distances, const int numNeighbors);
 
-/**
- *  Mostly for visualization purposes. Normalizes the point cloud in a Hartley-Zissermann
- *  fashion. In other words, the point cloud is centered, and scaled such that the largest
- *  distance from the origin is sqrt(2). Finally a rescaling is applied.
- *  @param [in] pc Input point cloud (CV_32F family). Point clouds with 3 or 6 elements per
- *  row are expected.
- *  @param [in] scale The scale after normalization. Default to 1.
- *  @return Normalized point cloud
-*/
-CV_EXPORTS Mat normalize_pc(Mat pc, float scale);
-
 Mat normalizePCCoeff(Mat pc, float scale, float* Cx, float* Cy, float* Cz, float* MinVal, float* MaxVal);
 Mat transPCCoeff(Mat pc, float scale, float Cx, float Cy, float Cz, float MinVal, float MaxVal);
 
@@ -125,20 +114,20 @@ Mat transPCCoeff(Mat pc, float scale, float Cx, float Cy, float Cz, float MinVal
  *  @param [in] Pose 4x4 pose matrix, but linearized in row-major form.
  *  @return Transformed point cloud
 */
-CV_EXPORTS Mat transformPCPose(Mat pc, double Pose[16]);
+CV_EXPORTS_W Mat transformPCPose(Mat pc, const Matx44d& Pose);
 
 /**
  *  Generate a random 4x4 pose matrix
  *  @param [out] Pose The random pose
 */
-CV_EXPORTS void getRandomPose(double Pose[16]);
+CV_EXPORTS_W void getRandomPose(Matx44d& Pose);
 
 /**
  *  Adds a uniform noise in the given scale to the input point cloud
- *  @param [in] pc Input point cloud (CV_32F family). 
+ *  @param [in] pc Input point cloud (CV_32F family).
  *  @param [in] scale Input scale of the noise. The larger the scale, the more noisy the output
 */
-CV_EXPORTS Mat addNoisePC(Mat pc, double scale);
+CV_EXPORTS_W Mat addNoisePC(Mat pc, double scale);
 
 /**
  *  @brief Compute the normals of an arbitrary point cloud
@@ -148,13 +137,13 @@ CV_EXPORTS Mat addNoisePC(Mat pc, double scale);
  *  If PCNormals is provided to be an Nx6 matrix, then no new allocation
  *  is made, instead the existing memory is overwritten.
  *  @param [in] PC Input point cloud to compute the normals for.
- *  @param [in] PCNormals Output point cloud
+ *  @param [out] PCNormals Output point cloud
  *  @param [in] NumNeighbors Number of neighbors to take into account in a local region
  *  @param [in] FlipViewpoint Should normals be flipped to a viewing direction?
  *  @param [in] viewpoint
  *  @return Returns 0 on success
  */
-CV_EXPORTS int computeNormalsPC3d(const Mat& PC, Mat& PCNormals, const int NumNeighbors, const bool FlipViewpoint, const double viewpoint[3]);
+CV_EXPORTS_W int computeNormalsPC3d(const Mat& PC, CV_OUT Mat& PCNormals, const int NumNeighbors, const bool FlipViewpoint, const Vec3f& viewpoint);
 
 //! @}
 
diff --git a/IPL/include/opencv/opencv2/surface_matching/ppf_match_3d.hpp b/IPL/include/opencv/opencv2/surface_matching/ppf_match_3d.hpp
index ffc8615..b967e6a 100644
--- a/IPL/include/opencv/opencv2/surface_matching/ppf_match_3d.hpp
+++ b/IPL/include/opencv/opencv2/surface_matching/ppf_match_3d.hpp
@@ -94,14 +94,14 @@ typedef struct THash
   * detector.match(pcTest, results, 1.0/5.0,0.05);
   * @endcode
   */
-class CV_EXPORTS PPF3DDetector
+class CV_EXPORTS_W PPF3DDetector
 {
 public:
 
   /**
    * \brief Empty constructor. Sets default arguments
    */
-  PPF3DDetector();
+  CV_WRAP PPF3DDetector();
 
   /**
     * Constructor with arguments
@@ -109,7 +109,7 @@ class CV_EXPORTS PPF3DDetector
     * @param [in] relativeDistanceStep The discretization distance of the point pair distance relative to the model's diameter. This value has a direct impact on the hashtable. Using small values would lead to too fine discretization, and thus ambiguity in the bins of hashtable. Too large values would lead to no discrimination over the feature vectors and different point pair features would be assigned to the same bin. This argument defaults to the value of RelativeSamplingStep. For noisy scenes, the value can be increased to improve the robustness of the matching against noisy points.
     * @param [in] numAngles Set the discretization of the point pair orientation as the number of subdivisions of the angle. This value is the equivalent of RelativeDistanceStep for the orientations. Increasing the value increases the precision of the matching but decreases the robustness against incorrect normal directions. Decreasing the value decreases the precision of the matching but increases the robustness against incorrect normal directions. For very noisy scenes where the normal directions can not be computed accurately, the value can be set to 25 or 20.
     */
-  PPF3DDetector(const double relativeSamplingStep, const double relativeDistanceStep=0.05, const double numAngles=30);
+  CV_WRAP PPF3DDetector(const double relativeSamplingStep, const double relativeDistanceStep=0.05, const double numAngles=30);
 
   virtual ~PPF3DDetector();
 
@@ -128,7 +128,7 @@ class CV_EXPORTS PPF3DDetector
     *
     *  \details Uses the parameters set in the constructor to downsample and learn a new model. When the model is learnt, the instance gets ready for calling "match".
     */
-  void trainModel(const Mat& Model);
+  CV_WRAP void trainModel(const Mat& Model);
 
   /**
     *  \brief Matches a trained model across a provided scene.
@@ -138,7 +138,7 @@ class CV_EXPORTS PPF3DDetector
     *  @param [in] relativeSceneSampleStep The ratio of scene points to be used for the matching after sampling with relativeSceneDistance. For example, if this value is set to 1.0/5.0, every 5th point from the scene is used for pose estimation. This parameter allows an easy trade-off between speed and accuracy of the matching. Increasing the value leads to less points being used and in turn to a faster but less accurate pose computation. Decreasing the value has the inverse effect.
     *  @param [in] relativeSceneDistance Set the distance threshold relative to the diameter of the model. This parameter is equivalent to relativeSamplingStep in the training stage. This parameter acts like a prior sampling with the relativeSceneSampleStep parameter.
     */
-  void match(const Mat& scene, std::vector<Pose3DPtr> &results, const double relativeSceneSampleStep=1.0/5.0, const double relativeSceneDistance=0.03);
+  CV_WRAP void match(const Mat& scene, CV_OUT std::vector<Pose3DPtr> &results, const double relativeSceneSampleStep=1.0/5.0, const double relativeSceneDistance=0.03);
 
   void read(const FileNode& fn);
   void write(FileStorage& fs) const;
@@ -148,7 +148,7 @@ class CV_EXPORTS PPF3DDetector
   double angle_step, angle_step_radians, distance_step;
   double sampling_step_relative, angle_step_relative, distance_step_relative;
   Mat sampled_pc, ppf;
-  int num_ref_points, ppf_step;
+  int num_ref_points;
   hashtable_int* hash_table;
   THash* hash_nodes;
 
@@ -160,13 +160,13 @@ class CV_EXPORTS PPF3DDetector
   void clearTrainingModels();
 
 private:
-  void computePPFFeatures(const double p1[4], const double n1[4],
-                          const double p2[4], const double n2[4],
-                          double f[4]);
+  void computePPFFeatures(const Vec3d& p1, const Vec3d& n1,
+                          const Vec3d& p2, const Vec3d& n2,
+                          Vec4d& f);
 
   bool matchPose(const Pose3D& sourcePose, const Pose3D& targetPose);
 
-  void clusterPoses(std::vector<Pose3DPtr> poseList, int numPoses, std::vector<Pose3DPtr> &finalPoses);
+  void clusterPoses(std::vector<Pose3DPtr>& poseList, int numPoses, std::vector<Pose3DPtr> &finalPoses);
 
   bool trained;
 };
diff --git a/IPL/include/opencv/opencv2/surface_matching/t_hash_int.hpp b/IPL/include/opencv/opencv2/surface_matching/t_hash_int.hpp
index 9e251e2..4e9b5bb 100644
--- a/IPL/include/opencv/opencv2/surface_matching/t_hash_int.hpp
+++ b/IPL/include/opencv/opencv2/surface_matching/t_hash_int.hpp
@@ -55,7 +55,7 @@ namespace ppf_match_3d
 //! @addtogroup surface_matching
 //! @{
 
-typedef unsigned int KeyType;
+typedef uint KeyType;
 
 typedef struct hashnode_i
 {
@@ -68,7 +68,7 @@ typedef struct HSHTBL_i
 {
   size_t size;
   struct hashnode_i **nodes;
-  size_t (*hashfunc)(unsigned int);
+  size_t (*hashfunc)(uint);
 } hashtable_int;
 
 
@@ -76,7 +76,7 @@ typedef struct HSHTBL_i
 
 from http://www-graphics.stanford.edu/~seander/bithacks.html
 */
-inline static unsigned int next_power_of_two(unsigned int value)
+inline static uint next_power_of_two(uint value)
 {
 
   --value;
@@ -90,7 +90,7 @@ inline static unsigned int next_power_of_two(unsigned int value)
   return value;
 }
 
-hashtable_int *hashtableCreate(size_t size, size_t (*hashfunc)(unsigned int));
+hashtable_int *hashtableCreate(size_t size, size_t (*hashfunc)(uint));
 void hashtableDestroy(hashtable_int *hashtbl);
 int hashtableInsert(hashtable_int *hashtbl, KeyType key, void *data);
 int hashtableInsertHashed(hashtable_int *hashtbl, KeyType key, void *data);
diff --git a/IPL/include/opencv/opencv2/text.hpp b/IPL/include/opencv/opencv2/text.hpp
index 591424c..c06c889 100644
--- a/IPL/include/opencv/opencv2/text.hpp
+++ b/IPL/include/opencv/opencv2/text.hpp
@@ -41,6 +41,7 @@ the use of this software, even if advised of the possibility of such damage.
 
 #include "opencv2/text/erfilter.hpp"
 #include "opencv2/text/ocr.hpp"
+#include "opencv2/text/textDetector.hpp"
 
 /** @defgroup text Scene Text Detection and Recognition
 
@@ -54,7 +55,7 @@ Class-specific Extremal Regions for Scene Text Detection
 --------------------------------------------------------
 
 The scene text detection algorithm described below has been initially proposed by Lukás Neumann &
-Jiri Matas [Neumann12]. The main idea behind Class-specific Extremal Regions is similar to the MSER
+Jiri Matas @cite Neumann11. The main idea behind Class-specific Extremal Regions is similar to the MSER
 in that suitable Extremal Regions (ERs) are selected from the whole component tree of the image.
 However, this technique differs from MSER in that selection of suitable ERs is done by a sequential
 classifier trained for character detection, i.e. dropping the stability requirement of MSERs and
@@ -66,12 +67,12 @@ hierarchy by their inclusion relation:
 
 ![image](pics/component_tree.png)
 
-The component tree may conatain a huge number of regions even for a very simple image as shown in
+The component tree may contain a huge number of regions even for a very simple image as shown in
 the previous image. This number can easily reach the order of 1 x 10\^6 regions for an average 1
 Megapixel image. In order to efficiently select suitable regions among all the ERs the algorithm
 make use of a sequential classifier with two differentiated stages.
 
-In the first stage incrementally computable descriptors (area, perimeter, bounding box, and euler
+In the first stage incrementally computable descriptors (area, perimeter, bounding box, and Euler's
 number) are computed (in O(1)) for each region r and used as features for a classifier which
 estimates the class-conditional probability p(r|character). Only the ERs which correspond to local
 maximum of the probability p(r|character) are selected (if their probability is above a global limit
@@ -87,12 +88,12 @@ order to increase the character localization recall.
 
 After the ER filtering is done on each input channel, character candidates must be grouped in
 high-level text blocks (i.e. words, text lines, paragraphs, ...). The opencv_text module implements
-two different grouping algorithms: the Exhaustive Search algorithm proposed in [Neumann11] for
+two different grouping algorithms: the Exhaustive Search algorithm proposed in @cite Neumann12 for
 grouping horizontally aligned text, and the method proposed by Lluis Gomez and Dimosthenis Karatzas
-in [Gomez13][Gomez14] for grouping arbitrary oriented text (see erGrouping).
+in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).
 
 To see the text detector at work, have a look at the textdetection demo:
-<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
+<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
 
     @defgroup text_recognize Scene Text Recognition
   @}
diff --git a/IPL/include/opencv/opencv2/text/erfilter.hpp b/IPL/include/opencv/opencv2/text/erfilter.hpp
index 7b4f4c1..8007142 100644
--- a/IPL/include/opencv/opencv2/text/erfilter.hpp
+++ b/IPL/include/opencv/opencv2/text/erfilter.hpp
@@ -78,11 +78,11 @@ struct CV_EXPORTS ERStat
     //! incrementally computable features
     int area;
     int perimeter;
-    int euler;                 //!< euler number
+    int euler;                 //!< Euler's number
     Rect rect;
     double raw_moments[2];     //!< order 1 raw moments to derive the centroid
     double central_moments[3]; //!< order 2 central moments to construct the covariance matrix
-    std::deque<int> *crossings;//!< horizontal crossings
+    Ptr<std::deque<int> > crossings;//!< horizontal crossings
     float med_crossings;       //!< median of the crossings at three different height levels
 
     //! 2nd stage features
@@ -105,17 +105,17 @@ struct CV_EXPORTS ERStat
     ERStat* next;
     ERStat* prev;
 
-    //! wenever the regions is a local maxima of the probability
+    //! whenever the regions is a local maxima of the probability
     bool local_maxima;
     ERStat* max_probability_ancestor;
     ERStat* min_probability_ancestor;
 };
 
-/** @brief Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithm [Neumann12]. :
+/** @brief Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithm @cite Neumann12. :
 
 Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier.
  */
-class CV_EXPORTS ERFilter : public Algorithm
+class CV_EXPORTS_W ERFilter : public Algorithm
 {
 public:
 
@@ -124,7 +124,7 @@ class CV_EXPORTS ERFilter : public Algorithm
     By doing it we hide SVM, Boost etc. Developers can provide their own classifiers to the
     ERFilter algorithm.
      */
-    class CV_EXPORTS Callback
+    class CV_EXPORTS_W Callback
     {
     public:
         virtual ~Callback() { }
@@ -159,44 +159,21 @@ class CV_EXPORTS ERFilter : public Algorithm
     virtual void setMinProbability(float minProbability) = 0;
     virtual void setMinProbabilityDiff(float minProbabilityDiff) = 0;
     virtual void setNonMaxSuppression(bool nonMaxSuppression) = 0;
-    virtual int  getNumRejected() = 0;
+    virtual int  getNumRejected() const = 0;
 };
 
 
-/*!
-    Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm
-    Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
-
-    The component tree of the image is extracted by a threshold increased step by step
-    from 0 to 255, incrementally computable descriptors (aspect_ratio, compactness,
-    number of holes, and number of horizontal crossings) are computed for each ER
-    and used as features for a classifier which estimates the class-conditional
-    probability P(er|character). The value of P(er|character) is tracked using the inclusion
-    relation of ER across all thresholds and only the ERs which correspond to local maximum
-    of the probability P(er|character) are selected (if the local maximum of the
-    probability is above a global limit pmin and the difference between local maximum and
-    local minimum is greater than minProbabilityDiff).
-
-    @param cb – Callback with the classifier. Default classifier can be implicitly load with function
-        loadClassifierNM1(), e.g. from file in samples/cpp/trained_classifierNM1.xml
-    @param thresholdDelta – Threshold step in subsequent thresholds when extracting the component tree
-    @param minArea – The minimum area (% of image size) allowed for retreived ER’s
-    @param maxArea – The maximum area (% of image size) allowed for retreived ER’s
-    @param minProbability – The minimum probability P(er|character) allowed for retreived ER’s
-    @param nonMaxSuppression – Whenever non-maximum suppression is done over the branch probabilities
-    @param minProbabilityDiff – The minimum probability difference between local maxima and local minima ERs
-*/
-
-/** @brief Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm [Neumann12].
+
+/** @brief Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm @cite Neumann12.
 
 @param  cb :   Callback with the classifier. Default classifier can be implicitly load with function
 loadClassifierNM1, e.g. from file in samples/cpp/trained_classifierNM1.xml
 @param  thresholdDelta :   Threshold step in subsequent thresholds when extracting the component tree
 @param  minArea :   The minimum area (% of image size) allowed for retreived ER's
-@param  minArea :   The maximum area (% of image size) allowed for retreived ER's
+@param  maxArea :   The maximum area (% of image size) allowed for retreived ER's
 @param  minProbability :   The minimum probability P(er|character) allowed for retreived ER's
 @param  nonMaxSuppression :   Whenever non-maximum suppression is done over the branch probabilities
-@param  minProbability :   The minimum probability difference between local maxima and local minima ERs
+@param  minProbabilityDiff :   The minimum probability difference between local maxima and local minima ERs
 
 The component tree of the image is extracted by a threshold increased step by step from 0 to 255,
 incrementally computable descriptors (aspect_ratio, compactness, number of holes, and number of
@@ -207,13 +184,13 @@ the probability P(er|character) are selected (if the local maximum of the probab
 global limit pmin and the difference between local maximum and local minimum is greater than
 minProbabilityDiff).
  */
-CV_EXPORTS Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb,
-                                                  int thresholdDelta = 1, float minArea = 0.00025,
-                                                  float maxArea = 0.13, float minProbability = 0.4,
+CV_EXPORTS_W Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb,
+                                                  int thresholdDelta = 1, float minArea = (float)0.00025,
+                                                  float maxArea = (float)0.13, float minProbability = (float)0.4,
                                                   bool nonMaxSuppression = true,
-                                                  float minProbabilityDiff = 0.1);
+                                                  float minProbabilityDiff = (float)0.1);
 
-/** @brief Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12].
+/** @brief Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm @cite Neumann12.
 
 @param  cb :   Callback with the classifier. Default classifier can be implicitly load with function
 loadClassifierNM2, e.g. from file in samples/cpp/trained_classifierNM2.xml
@@ -224,9 +201,27 @@ non-character classes using more informative but also more computationally expen
 classifier uses all the features calculated in the first stage and the following additional
 features: hole area ratio, convex hull ratio, and number of outer inflexion points.
  */
-CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
-                                                  float minProbability = 0.3);
+CV_EXPORTS_W Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
+                                                  float minProbability = (float)0.3);
+
+/** @brief Reads an Extremal Region Filter for the 1st stage classifier of N&M algorithm
+    from the provided path e.g. /path/to/cpp/trained_classifierNM1.xml
+
+@overload
+ */
+CV_EXPORTS_W  Ptr<ERFilter> createERFilterNM1(const String& filename,
+                                                  int thresholdDelta = 1, float minArea = (float)0.00025,
+                                                  float maxArea = (float)0.13, float minProbability = (float)0.4,
+                                                  bool nonMaxSuppression = true,
+                                                  float minProbabilityDiff = (float)0.1);
 
+/** @brief Reads an Extremal Region Filter for the 2nd stage classifier of N&M algorithm
+    from the provided path e.g. /path/to/cpp/trained_classifierNM2.xml
+
+@overload
+ */
+CV_EXPORTS_W Ptr<ERFilter> createERFilterNM2(const String& filename,
+                                                  float minProbability = (float)0.3);
 
 /** @brief Allow to implicitly load the default classifier when creating an ERFilter object.
 
@@ -234,7 +229,7 @@ CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
 
 returns a pointer to ERFilter::Callback.
  */
-CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM1(const std::string& filename);
+CV_EXPORTS_W Ptr<ERFilter::Callback> loadClassifierNM1(const String& filename);
 
 /** @brief Allow to implicitly load the default classifier when creating an ERFilter object.
 
@@ -242,7 +237,7 @@ CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM1(const std::string& filename
 
 returns a pointer to ERFilter::Callback.
  */
-CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM2(const std::string& filename);
+CV_EXPORTS_W Ptr<ERFilter::Callback> loadClassifierNM2(const String& filename);
 
 
 //! computeNMChannels operation modes
@@ -250,7 +245,7 @@ enum { ERFILTER_NM_RGBLGrad,
        ERFILTER_NM_IHSGrad
      };
 
-/** @brief Compute the different channels to be processed independently in the N&M algorithm [Neumann12].
+/** @brief Compute the different channels to be processed independently in the N&M algorithm @cite Neumann12.
 
 @param _src Source image. Must be RGB CV_8UC3.
 
@@ -264,14 +259,14 @@ channels (Grad) are used in order to obtain high localization recall. This imple
 provides an alternative combination of red (R), green (G), blue (B), lightness (L), and gradient
 magnitude (Grad).
  */
-CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
+CV_EXPORTS_W void computeNMChannels(InputArray _src, CV_OUT OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
 
 
 
 //! text::erGrouping operation modes
 enum erGrouping_Modes {
 
-    /** Exhaustive Search algorithm proposed in [Neumann11] for grouping horizontally aligned text.
+    /** Exhaustive Search algorithm proposed in @cite Neumann11 for grouping horizontally aligned text.
     The algorithm models a verification function for all the possible ER sequences. The
     verification fuction for ER pairs consists in a set of threshold-based pairwise rules which
     compare measurements of two regions (height ratio, centroid angle, and region distance). The
@@ -282,13 +277,15 @@ enum erGrouping_Modes {
     consistent.
     */
     ERGROUPING_ORIENTATION_HORIZ,
-    /** Text grouping method proposed in [Gomez13][Gomez14] for grouping arbitrary oriented text. Regions
+    /** Text grouping method proposed in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text. Regions
     are agglomerated by Single Linkage Clustering in a weighted feature space that combines proximity
     (x,y coordinates) and similarity measures (color, size, gradient magnitude, stroke width, etc.).
     SLC provides a dendrogram where each node represents a text group hypothesis. Then the algorithm
     finds the branches corresponding to text groups by traversing this dendrogram with a stopping rule
     that combines the output of a rotation invariant text group classifier and a probabilistic measure
     for hierarchical clustering validity assessment.
+
+    @note This mode is not supported due NFA code removal ( https://github.com/opencv/opencv_contrib/issues/2235 )
      */
     ERGROUPING_ORIENTATION_ANY
 };
@@ -299,7 +296,7 @@ enum erGrouping_Modes {
 
 @param channels Vector of single channel images CV_8UC1 from wich the regions were extracted.
 
-@param regions Vector of ER's retreived from the ERFilter algorithm from each channel.
+@param regions Vector of ER's retrieved from the ERFilter algorithm from each channel.
 
 @param groups The output of the algorithm is stored in this parameter as set of lists of indexes to
 provided regions.
@@ -324,11 +321,18 @@ CV_EXPORTS void erGrouping(InputArray img, InputArrayOfArrays channels,
                                            const std::string& filename = std::string(),
                                            float minProbablity = 0.5);
 
+CV_EXPORTS_W void erGrouping(InputArray image, InputArray channel,
+                                           std::vector<std::vector<Point> > regions,
+                                           CV_OUT std::vector<Rect> &groups_rects,
+                                           int method = ERGROUPING_ORIENTATION_HORIZ,
+                                           const String& filename = String(),
+                                           float minProbablity = (float)0.5);
+
 /** @brief Converts MSER contours (vector\<Point\>) to ERStat regions.
 
 @param image Source image CV_8UC1 from which the MSERs where extracted.
 
-@param contours Intput vector with all the contours (vector\<Point\>).
+@param contours Input vector with all the contours (vector\<Point\>).
 
 @param regions Output where the ERStat regions are stored.
 
@@ -338,11 +342,32 @@ single vector\<Point\>, the function separates them in two different vectors (th
 ERStats where extracted from two different channels).
 
 An example of MSERsToERStats in use can be found in the text detection webcam_demo:
-<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
  */
 CV_EXPORTS void MSERsToERStats(InputArray image, std::vector<std::vector<Point> > &contours,
                                std::vector<std::vector<ERStat> > &regions);
 
+// Utility funtion for scripting
+CV_EXPORTS_W void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2, CV_OUT std::vector< std::vector<Point> >& regions);
+
+
+/** @brief Extracts text regions from image.
+
+@param image Source image where text blocks needs to be extracted from.  Should be CV_8UC3 (color).
+@param er_filter1 Extremal Region Filter for the 1st stage classifier of N&M algorithm @cite Neumann12
+@param er_filter2 Extremal Region Filter for the 2nd stage classifier of N&M algorithm @cite Neumann12
+@param groups_rects Output list of rectangle blocks with text
+@param method Grouping method (see text::erGrouping_Modes). Can be one of ERGROUPING_ORIENTATION_HORIZ, ERGROUPING_ORIENTATION_ANY.
+@param filename The XML or YAML file with the classifier model (e.g. samples/trained_classifier_erGrouping.xml). Only to use when grouping method is ERGROUPING_ORIENTATION_ANY.
+@param minProbability The minimum probability for accepting a group. Only to use when grouping method is ERGROUPING_ORIENTATION_ANY.
+
+
+ */
+CV_EXPORTS_W void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2, CV_OUT std::vector<Rect> &groups_rects,
+                                           int method = ERGROUPING_ORIENTATION_HORIZ,
+                                           const String& filename = String(),
+                                           float minProbability = (float)0.5);
+
 //! @}
 
 }
diff --git a/IPL/include/opencv/opencv2/text/ocr.hpp b/IPL/include/opencv/opencv2/text/ocr.hpp
index 651934b..0137c37 100644
--- a/IPL/include/opencv/opencv2/text/ocr.hpp
+++ b/IPL/include/opencv/opencv2/text/ocr.hpp
@@ -44,6 +44,8 @@
 #ifndef __OPENCV_TEXT_OCR_HPP__
 #define __OPENCV_TEXT_OCR_HPP__
 
+#include <opencv2/core.hpp>
+
 #include <vector>
 #include <string>
 
@@ -61,6 +63,31 @@ enum
     OCR_LEVEL_TEXTLINE
 };
 
+//! Tesseract.PageSegMode Enumeration
+enum page_seg_mode
+{
+    PSM_OSD_ONLY,
+    PSM_AUTO_OSD,
+    PSM_AUTO_ONLY,
+    PSM_AUTO,
+    PSM_SINGLE_COLUMN,
+    PSM_SINGLE_BLOCK_VERT_TEXT,
+    PSM_SINGLE_BLOCK,
+    PSM_SINGLE_LINE,
+    PSM_SINGLE_WORD,
+    PSM_CIRCLE_WORD,
+    PSM_SINGLE_CHAR
+};
+
+//! Tesseract.OcrEngineMode Enumeration
+enum ocr_engine_mode
+{
+    OEM_TESSERACT_ONLY,
+    OEM_CUBE_ONLY,
+    OEM_TESSERACT_CUBE_COMBINED,
+    OEM_DEFAULT
+};
+
 //base class BaseOCR declares a common API that would be used in a typical text recognition scenario
 class CV_EXPORTS_W BaseOCR
 {
@@ -81,10 +108,10 @@ Notice that it is compiled only when tesseract-ocr is correctly installed.
 @note
    -   (C++) An example of OCRTesseract recognition combined with scene text detection can be found
         at the end_to_end_recognition demo:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
     -   (C++) Another example of OCRTesseract recognition combined with scene text detection can be
         found at the webcam_demo:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
  */
 class CV_EXPORTS_W OCRTesseract : public BaseOCR
 {
@@ -103,15 +130,15 @@ class CV_EXPORTS_W OCRTesseract : public BaseOCR
     recognition of individual text elements found (e.g. words or text lines).
     @param component_confidences If provided the method will output a list of confidence values
     for the recognition of individual text elements found (e.g. words or text lines).
-    @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE.
+    @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXTLINE.
      */
     virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=0);
+                     int component_level=0) CV_OVERRIDE;
 
     virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=0);
+                     int component_level=0) CV_OVERRIDE;
 
     // aliases for scripting
     CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
@@ -128,7 +155,7 @@ class CV_EXPORTS_W OCRTesseract : public BaseOCR
     @param language an ISO 639-3 code or NULL will default to "eng".
     @param char_whitelist specifies the list of characters used for recognition. NULL defaults to
     "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
-    @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by deffault
+    @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by default
     tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
     values.
     @param psmode tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO
@@ -136,7 +163,7 @@ class CV_EXPORTS_W OCRTesseract : public BaseOCR
     possible values.
      */
     CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
-                                    const char* char_whitelist=NULL, int oem=3, int psmode=3);
+                                    const char* char_whitelist=NULL, int oem=OEM_DEFAULT, int psmode=PSM_AUTO);
 };
 
 
@@ -147,12 +174,19 @@ enum decoder_mode
     OCR_DECODER_VITERBI = 0 // Other algorithms may be added
 };
 
+/* OCR classifier type*/
+enum classifier_type
+{
+    OCR_KNN_CLASSIFIER = 0,
+    OCR_CNN_CLASSIFIER = 1
+};
+
 /** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models.
 
 @note
    -   (C++) An example on using OCRHMMDecoder recognition combined with scene text detection can
         be found at the webcam_demo sample:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
  */
 class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
 {
@@ -163,9 +197,9 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
     This way it hides the feature extractor and the classifier itself, so developers can write
     their own OCR code.
 
-    The default character classifier and feature extractor can be loaded using the utility funtion
+    The default character classifier and feature extractor can be loaded using the utility function
     loadOCRHMMClassifierNM and KNN model provided in
-    <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
+    <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
      */
     class CV_EXPORTS_W ClassifierCallback
     {
@@ -206,7 +240,7 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
      */
     virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=0);
+                     int component_level=0) CV_OVERRIDE;
 
     /** @brief Recognize text using HMM.
 
@@ -233,7 +267,7 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
      */
     virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=0);
+                     int component_level=0) CV_OVERRIDE;
 
     // aliases for scripting
     CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
@@ -256,24 +290,31 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
     @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment
     (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
      */
-    static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
-                                     const std::string& vocabulary,                    // The language vocabulary (chars when ascii english text)
+
+    CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
+                                     const String& vocabulary,                    // The language vocabulary (chars when ASCII English text)
                                                                                        //     size() must be equal to the number of classes
                                      InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
-                                                                                       //     cols == rows == vocabulari.size()
+                                                                                       //     cols == rows == vocabulary.size()
                                      InputArray emission_probabilities_table,          // Table with observation emission probabilities
-                                                                                       //     cols == rows == vocabulari.size()
-                                     decoder_mode mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)
+                                                                                       //     cols == rows == vocabulary.size()
+                                     int mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)
 
-    CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
-                                     const String& vocabulary,                    // The language vocabulary (chars when ascii english text)
+    /** @brief Creates an instance of the OCRHMMDecoder class. Loads and initializes HMMDecoder from the specified path
+
+     @overload
+     */
+    CV_WRAP static Ptr<OCRHMMDecoder> create(const String& filename,
+
+                                     const String& vocabulary,                    // The language vocabulary (chars when ASCII English text)
                                                                                        //     size() must be equal to the number of classes
                                      InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
-                                                                                       //     cols == rows == vocabulari.size()
+                                                                                       //     cols == rows == vocabulary.size()
                                      InputArray emission_probabilities_table,          // Table with observation emission probabilities
-                                                                                       //     cols == rows == vocabulari.size()
-                                     int mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)
+                                                                                       //     cols == rows == vocabulary.size()
+                                     int mode = OCR_DECODER_VITERBI,                    // HMM Decoding algorithm (only Viterbi for the moment)
 
+                                     int classifier = OCR_KNN_CLASSIFIER);              // The character classifier type
 protected:
 
     Ptr<OCRHMMDecoder::ClassifierCallback> classifier;
@@ -293,6 +334,8 @@ fixed size, while retaining the centroid and aspect ratio, in order to extract a
 based on gradient orientations along the chain-code of its perimeter. Then, the region is classified
 using a KNN model trained with synthetic data of rendered characters with different standard font
 types.
+
+@deprecated loadOCRHMMClassifier instead
  */
 
 CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
@@ -305,14 +348,24 @@ The CNN default classifier is based in the scene text recognition method propose
 Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and
 a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
 at each window location.
+
+@deprecated use loadOCRHMMClassifier instead
  */
 CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
 
+/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
+
+ @param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz)
+
+ @param classifier Can be one of classifier_type enum values.
+
+ */
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifier(const String& filename, int classifier);
 //! @}
 
 /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
  *
- * @param vocabulary The language vocabulary (chars when ascii english text).
+ * @param vocabulary The language vocabulary (chars when ASCII English text).
  *
  * @param lexicon The list of words that are expected to be found in a particular image.
  *
@@ -321,7 +374,7 @@ CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(cons
  * The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
  * @note
  *    -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
- *            <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
+ *            <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
  **/
 CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
 
@@ -335,7 +388,7 @@ CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vec
 @note
    -   (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can
         be found at the demo sample:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
  */
 class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
 {
@@ -346,9 +399,9 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
     This way it hides the feature extractor and the classifier itself, so developers can write
     their own OCR code.
 
-    The default character classifier and feature extractor can be loaded using the utility funtion
+    The default character classifier and feature extractor can be loaded using the utility function
     loadOCRBeamSearchClassifierCNN with all its parameters provided in
-    <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
+    <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
      */
     class CV_EXPORTS_W ClassifierCallback
     {
@@ -392,11 +445,11 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
      */
     virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=0);
+                     int component_level=0) CV_OVERRIDE;
 
     virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=0);
+                     int component_level=0) CV_OVERRIDE;
 
     // aliases for scripting
     CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
@@ -407,7 +460,7 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
 
     @param classifier The character classifier with built in feature extractor.
 
-    @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size()
+    @param vocabulary The language vocabulary (chars when ASCII English text). vocabulary.size()
     must be equal to the number of classes of the classifier.
 
     @param transition_probabilities_table Table with transition probabilities between character
@@ -422,25 +475,39 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
     @param beam_size Size of the beam in Beam Search algorithm.
      */
     static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
-                                     const std::string& vocabulary,                    // The language vocabulary (chars when ascii english text)
+                                     const std::string& vocabulary,                    // The language vocabulary (chars when ASCII English text)
                                                                                        //     size() must be equal to the number of classes
                                      InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
-                                                                                       //     cols == rows == vocabulari.size()
+                                                                                       //     cols == rows == vocabulary.size()
                                      InputArray emission_probabilities_table,          // Table with observation emission probabilities
-                                                                                       //     cols == rows == vocabulari.size()
+                                                                                       //     cols == rows == vocabulary.size()
                                      decoder_mode mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                      int beam_size = 500);                              // Size of the beam in Beam Search algorithm
 
     CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier, // The character classifier with built in feature extractor
-                                     const String& vocabulary,                    // The language vocabulary (chars when ascii english text)
+                                     const String& vocabulary,                    // The language vocabulary (chars when ASCII English text)
                                                                                        //     size() must be equal to the number of classes
                                      InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
-                                                                                       //     cols == rows == vocabulari.size()
+                                                                                       //     cols == rows == vocabulary.size()
                                      InputArray emission_probabilities_table,          // Table with observation emission probabilities
-                                                                                       //     cols == rows == vocabulari.size()
+                                                                                       //     cols == rows == vocabulary.size()
                                      int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                      int beam_size = 500);                              // Size of the beam in Beam Search algorithm
 
+    /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path.
+
+    @overload
+
+     */
+    CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const String& filename, // The character classifier file
+                                     const String& vocabulary,                    // The language vocabulary (chars when ASCII English text)
+                                                                                       //     size() must be equal to the number of classes
+                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
+                                                                                       //     cols == rows == vocabulary.size()
+                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
+                                                                                       //     cols == rows == vocabulary.size()
+                                     int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
+                                     int beam_size = 500);
 protected:
 
     Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@@ -463,8 +530,66 @@ at each window location.
 
 CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);
 
+
+/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
+ * Given a predefined vocabulary , a DictNet is employed to select the most probable
+ * word given an input image.
+ *
+ * DictNet is described in detail in:
+ * Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
+ * http://arxiv.org/abs/1412.1842
+ */
+class CV_EXPORTS OCRHolisticWordRecognizer : public BaseOCR
+{
+public:
+    virtual void run(Mat& image,
+                     std::string& output_text,
+                     std::vector<Rect>* component_rects = NULL,
+                     std::vector<std::string>* component_texts = NULL,
+                     std::vector<float>* component_confidences = NULL,
+                     int component_level = OCR_LEVEL_WORD) CV_OVERRIDE = 0;
+
+    /** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
+
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
+
+    @param image Input image CV_8UC1 or CV_8UC3
+
+    @param mask is totally ignored and is only available for compatibillity reasons
+
+    @param output_text Output text of the the word spoting, always one that exists in the dictionary.
+
+    @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_level must be OCR_LEVEL_WORD.
+     */
+    virtual void run(Mat& image,
+                     Mat& mask,
+                     std::string& output_text,
+                     std::vector<Rect>* component_rects = NULL,
+                     std::vector<std::string>* component_texts = NULL,
+                     std::vector<float>* component_confidences = NULL,
+                     int component_level = OCR_LEVEL_WORD) CV_OVERRIDE = 0;
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class.
+     */
+    static Ptr<OCRHolisticWordRecognizer> create(const std::string &archFilename,
+                                                 const std::string &weightsFilename,
+                                                 const std::string &wordsFilename);
+};
+
 //! @}
 
-}
-}
+}} // cv::text::
+
+
 #endif // _OPENCV_TEXT_OCR_HPP_
diff --git a/IPL/include/opencv/opencv2/text/textDetector.hpp b/IPL/include/opencv/opencv2/text/textDetector.hpp
new file mode 100644
index 0000000..da65337
--- /dev/null
+++ b/IPL/include/opencv/opencv2/text/textDetector.hpp
@@ -0,0 +1,73 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_TEXT_TEXTDETECTOR_HPP__
+#define __OPENCV_TEXT_TEXTDETECTOR_HPP__
+
+#include "ocr.hpp"
+
+namespace cv
+{
+namespace text
+{
+
+//! @addtogroup text_detect
+//! @{
+
+/** @brief An abstract class providing interface for text detection algorithms
+ */
+class CV_EXPORTS_W TextDetector
+{
+public:
+    /**
+    @brief Method that provides a quick and simple interface to detect text inside an image
+
+    @param inputImage an image to process
+    @param Bbox a vector of Rect that will store the detected word bounding box
+    @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
+    */
+    CV_WRAP virtual void detect(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
+    virtual ~TextDetector() {}
+};
+
+/** @brief TextDetectorCNN class provides the functionallity of text bounding box detection.
+ This class is representing to find bounding boxes of text words given an input image.
+ This class uses OpenCV dnn module to load pre-trained model described in @cite LiaoSBWL17.
+ The original repository with the modified SSD Caffe version: https://github.com/MhLiao/TextBoxes.
+ Model can be downloaded from [DropBox](https://www.dropbox.com/s/g8pjzv2de9gty8g/TextBoxes_icdar13.caffemodel?dl=0).
+ Modified .prototxt file with the model description can be found in `opencv_contrib/modules/text/samples/textbox.prototxt`.
+ */
+class CV_EXPORTS_W TextDetectorCNN : public TextDetector
+{
+public:
+    /**
+    @overload
+
+    @param inputImage an image expected to be a CV_U8C3 of any size
+    @param Bbox a vector of Rect that will store the detected word bounding box
+    @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
+    */
+    CV_WRAP virtual void detect(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) CV_OVERRIDE = 0;
+
+    /** @brief Creates an instance of the TextDetectorCNN class using the provided parameters.
+
+    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
+    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
+    @param detectionSizes a list of sizes for multiscale detection. The values`[(300,300),(700,500),(700,300),(700,700),(1600,1600)]` are
+    recommended in @cite LiaoSBWL17 to achieve the best quality.
+    */
+    static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename,
+                                               std::vector<Size> detectionSizes);
+    /**
+      @overload
+    */
+    CV_WRAP static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename);
+};
+
+//! @}
+}//namespace text
+}//namespace cv
+
+
+#endif // _OPENCV_TEXT_OCR_HPP_
diff --git a/IPL/include/opencv/opencv2/tracking.hpp b/IPL/include/opencv/opencv2/tracking.hpp
index 488c2db..516f5b9 100644
--- a/IPL/include/opencv/opencv2/tracking.hpp
+++ b/IPL/include/opencv/opencv2/tracking.hpp
@@ -39,8 +39,8 @@
  //
  //M*/
 
-#ifndef __OPENCV_TRACKING_LENLEN_HPP__
-#define __OPENCV_TRACKING_LENLEN_HPP__
+#ifndef __OPENCV_TRACKING_HPP__
+#define __OPENCV_TRACKING_HPP__
 
 #include "opencv2/core/cvdef.h"
 
@@ -49,12 +49,12 @@
 Long-term optical tracking API
 ------------------------------
 
-Long-term optical tracking is one of most important issue for many computer vision applications in
+Long-term optical tracking is an important issue for many computer vision applications in
 real world scenario. The development in this area is very fragmented and this API is an unique
 interface useful for plug several algorithms and compare them. This work is partially based on
 @cite AAM and @cite AMVOT .
 
-This algorithms start from a bounding box of the target and with their internal representation they
+These algorithms start from a bounding box of the target and with their internal representation they
 avoid the drift during the tracking. These long-term trackers are able to evaluate online the
 quality of the location of the target in the new frame, without ground truth.
 
@@ -62,35 +62,23 @@ There are three main components: the TrackerSampler, the TrackerFeatureSet and t
 first component is the object that computes the patches over the frame based on the last target
 location. The TrackerFeatureSet is the class that manages the Features, is possible plug many kind
 of these (HAAR, HOG, LBP, Feature2D, etc). The last component is the internal representation of the
-target, it is the appearence model. It stores all state candidates and compute the trajectory (the
+target, it is the appearance model. It stores all state candidates and compute the trajectory (the
 most likely target states). The class TrackerTargetState represents a possible state of the target.
 The TrackerSampler and the TrackerFeatureSet are the visual representation of the target, instead
 the TrackerModel is the statistical model.
 
 A recent benchmark between these algorithms can be found in @cite OOT
 
-UML design: see @ref tracking_diagrams
-
-To see how API works, try tracker demo:
-<https://github.com/lenlen/opencv/blob/tracking_api/samples/cpp/tracker.cpp>
-
-@note This Tracking API has been designed with PlantUML. If you modify this API please change UML
-in <em>modules/tracking/doc/tracking_diagrams.markdown</em>. The following reference was used in the API
-
-Creating Own Tracker
+Creating Your Own %Tracker
 --------------------
 
-If you want create a new tracker, here's what you have to do. First, decide on the name of the class
+If you want to create a new tracker, here's what you have to do. First, decide on the name of the class
 for the tracker (to meet the existing style, we suggest something with prefix "tracker", e.g.
-trackerMIL, trackerBoosting) -- we shall refer to this choice as to "classname" in subsequent. Also,
-you should decide upon the name of the tracker, is it will be known to user (the current style
-suggests using all capitals, say MIL or BOOSTING) --we'll call it a "name".
+trackerMIL, trackerBoosting) -- we shall refer to this choice as to "classname" in subsequent.
 
--   Declare your tracker in include/opencv2/tracking/tracker.hpp. Your tracker should inherit from
+-   Declare your tracker in modules/tracking/include/opencv2/tracking/tracker.hpp. Your tracker should inherit from
     Tracker (please, see the example below). You should declare the specialized Param structure,
-    where you probably will want to put the data, needed to initialize your tracker. Also don't
-    forget to put the BOILERPLATE_CODE(name,classname) macro inside the class declaration. That
-    macro will generate static createTracker() function, which we'll talk about later. You should
+    where you probably will want to put the data, needed to initialize your tracker. You should
     get something similar to :
 @code
         class CV_EXPORTS_W TrackerMIL : public Tracker
@@ -114,20 +102,10 @@ suggests using all capitals, say MIL or BOOSTING) --we'll call it a "name".
 @endcode
     of course, you can also add any additional methods of your choice. It should be pointed out,
     however, that it is not expected to have a constructor declared, as creation should be done via
-    the corresponding createTracker() method.
--   In src/tracker.cpp file add BOILERPLATE_CODE(name,classname) line to the body of
-    Tracker::create() method you will find there, like :
-@code
-        Ptr<Tracker> Tracker::create( const String& trackerType )
-        {
-          BOILERPLATE_CODE("BOOSTING",TrackerBoosting);
-          BOILERPLATE_CODE("MIL",TrackerMIL);
-          return Ptr<Tracker>();
-        }
-@endcode
+    the corresponding create() method.
 -   Finally, you should implement the function with signature :
 @code
-        Ptr<classname> classname::createTracker(const classname::Params &parameters){
+        Ptr<classname> classname::create(const classname::Params &parameters){
             ...
         }
 @endcode
@@ -297,16 +275,10 @@ Example of creating specialized TrackerTargetState TrackerMILTargetState : :
 
     };
 @endcode
-### Try it
-
-To try your tracker you can use the demo at
-<https://github.com/lenlen/opencv/blob/tracking_api/samples/cpp/tracker.cpp>.
-
-The first argument is the name of the tracker and the second is a video source.
 
 */
 
 #include <opencv2/tracking/tracker.hpp>
 #include <opencv2/tracking/tldDataset.hpp>
 
-#endif //__OPENCV_TRACKING_LENLEN
+#endif //__OPENCV_TRACKING_HPP__
diff --git a/IPL/include/opencv/opencv2/tracking/feature.hpp b/IPL/include/opencv/opencv2/tracking/feature.hpp
index b354d62..3bcfe6e 100644
--- a/IPL/include/opencv/opencv2/tracking/feature.hpp
+++ b/IPL/include/opencv/opencv2/tracking/feature.hpp
@@ -137,17 +137,18 @@ class CvParams
 class CvFeatureParams : public CvParams
 {
  public:
-  enum
+  enum FeatureType
   {
     HAAR = 0,
     LBP = 1,
     HOG = 2
   };
+
   CvFeatureParams();
   virtual void init( const CvFeatureParams& fp );
-  virtual void write( FileStorage &fs ) const;
-  virtual bool read( const FileNode &node );
-  static Ptr<CvFeatureParams> create( int featureType );
+  virtual void write( FileStorage &fs ) const CV_OVERRIDE;
+  virtual bool read( const FileNode &node ) CV_OVERRIDE;
+  static Ptr<CvFeatureParams> create(CvFeatureParams::FeatureType featureType);
   int maxCatCount;  // 0 in case of numerical features
   int featSize;  // 1 in case of simple features (HAAR, LBP) and N_BINS(9)*N_CELLS(4) in case of Dalal's HOG features
   int numFeatures;
@@ -163,7 +164,7 @@ class CvFeatureEvaluator
   virtual void setImage( const Mat& img, uchar clsLabel, int idx );
   virtual void writeFeatures( FileStorage &fs, const Mat& featureMap ) const = 0;
   virtual float operator()( int featureIdx, int sampleIdx ) = 0;
-  static Ptr<CvFeatureEvaluator> create( int type );
+  static Ptr<CvFeatureEvaluator> create(CvFeatureParams::FeatureType type);
 
   int getNumFeatures() const
   {
@@ -201,13 +202,13 @@ class CvHaarFeatureParams : public CvFeatureParams
 
   CvHaarFeatureParams();
 
-  virtual void init( const CvFeatureParams& fp );
-  virtual void write( FileStorage &fs ) const;
-  virtual bool read( const FileNode &node );
+  virtual void init( const CvFeatureParams& fp ) CV_OVERRIDE;
+  virtual void write( FileStorage &fs ) const CV_OVERRIDE;
+  virtual bool read( const FileNode &node ) CV_OVERRIDE;
 
-  virtual void printDefaults() const;
-  virtual void printAttrs() const;
-  virtual bool scanAttr( const std::string prm, const std::string val );
+  virtual void printDefaults() const CV_OVERRIDE;
+  virtual void printAttrs() const CV_OVERRIDE;
+  virtual bool scanAttr( const std::string prm, const std::string val ) CV_OVERRIDE;
 
   bool isIntegral;
 };
@@ -251,10 +252,10 @@ class CvHaarEvaluator : public CvFeatureEvaluator
 
   };
 
-  virtual void init( const CvFeatureParams *_featureParams, int _maxSampleCount, Size _winSize );
-  virtual void setImage( const Mat& img, uchar clsLabel = 0, int idx = 1 );
-  virtual float operator()( int featureIdx, int sampleIdx );
-  virtual void writeFeatures( FileStorage &fs, const Mat& featureMap ) const;
+  virtual void init( const CvFeatureParams *_featureParams, int _maxSampleCount, Size _winSize ) CV_OVERRIDE;
+  virtual void setImage( const Mat& img, uchar clsLabel = 0, int idx = 1 ) CV_OVERRIDE;
+  virtual float operator()( int featureIdx, int sampleIdx ) CV_OVERRIDE;
+  virtual void writeFeatures( FileStorage &fs, const Mat& featureMap ) const CV_OVERRIDE;
   void writeFeature( FileStorage &fs ) const;  // for old file format
   const std::vector<CvHaarEvaluator::FeatureHaar>& getFeatures() const;
   inline CvHaarEvaluator::FeatureHaar& getFeatures( int idx )
@@ -263,7 +264,7 @@ class CvHaarEvaluator : public CvFeatureEvaluator
   }
   void setWinSize( Size patchSize );
   Size setWinSize() const;
-  virtual void generateFeatures();
+  virtual void generateFeatures() CV_OVERRIDE;
 
   /**
    * TODO new method
@@ -300,12 +301,12 @@ class CvHOGEvaluator : public CvFeatureEvaluator
   virtual ~CvHOGEvaluator()
   {
   }
-  virtual void init( const CvFeatureParams *_featureParams, int _maxSampleCount, Size _winSize );
-  virtual void setImage( const Mat& img, uchar clsLabel, int idx );
-  virtual float operator()( int varIdx, int sampleIdx );
-  virtual void writeFeatures( FileStorage &fs, const Mat& featureMap ) const;
+  virtual void init( const CvFeatureParams *_featureParams, int _maxSampleCount, Size _winSize ) CV_OVERRIDE;
+  virtual void setImage( const Mat& img, uchar clsLabel, int idx ) CV_OVERRIDE;
+  virtual float operator()( int varIdx, int sampleIdx ) CV_OVERRIDE;
+  virtual void writeFeatures( FileStorage &fs, const Mat& featureMap ) const CV_OVERRIDE;
  protected:
-  virtual void generateFeatures();
+  virtual void generateFeatures() CV_OVERRIDE;
   virtual void integralHistogram( const Mat &img, std::vector<Mat> &histogram, Mat &norm, int nbins ) const;
   class Feature
   {
@@ -364,18 +365,18 @@ struct CvLBPFeatureParams : CvFeatureParams
 class CvLBPEvaluator : public CvFeatureEvaluator
 {
  public:
-  virtual ~CvLBPEvaluator()
+  virtual ~CvLBPEvaluator() CV_OVERRIDE
   {
   }
-  virtual void init( const CvFeatureParams *_featureParams, int _maxSampleCount, Size _winSize );
-  virtual void setImage( const Mat& img, uchar clsLabel, int idx );
-  virtual float operator()( int featureIdx, int sampleIdx )
+  virtual void init( const CvFeatureParams *_featureParams, int _maxSampleCount, Size _winSize ) CV_OVERRIDE;
+  virtual void setImage( const Mat& img, uchar clsLabel, int idx ) CV_OVERRIDE;
+  virtual float operator()( int featureIdx, int sampleIdx ) CV_OVERRIDE
   {
     return (float) features[featureIdx].calc( sum, sampleIdx );
   }
-  virtual void writeFeatures( FileStorage &fs, const Mat& featureMap ) const;
+  virtual void writeFeatures( FileStorage &fs, const Mat& featureMap ) const CV_OVERRIDE;
  protected:
-  virtual void generateFeatures();
+  virtual void generateFeatures() CV_OVERRIDE;
 
   class Feature
   {
diff --git a/IPL/include/opencv/opencv2/tracking/kalman_filters.hpp b/IPL/include/opencv/opencv2/tracking/kalman_filters.hpp
index e733b22..7a89c87 100644
--- a/IPL/include/opencv/opencv2/tracking/kalman_filters.hpp
+++ b/IPL/include/opencv/opencv2/tracking/kalman_filters.hpp
@@ -62,13 +62,13 @@ class CV_EXPORTS UnscentedKalmanFilter
     * @param control - the current control vector,
     * @return the predicted estimate of the state.
     */
-    virtual Mat predict( const Mat& control = Mat() ) = 0;
+    virtual Mat predict( InputArray control = noArray() ) = 0;
 
     /** The function performs correction step of the algorithm
     * @param measurement - the current measurement vector,
     * @return the corrected estimate of the state.
     */
-    virtual Mat correct( const Mat& measurement ) = 0;
+    virtual Mat correct( InputArray measurement ) = 0;
 
     /**
     * @return the process noise cross-covariance matrix.
diff --git a/IPL/include/opencv/opencv2/tracking/onlineMIL.hpp b/IPL/include/opencv/opencv2/tracking/onlineMIL.hpp
index b6fc25b..78e1372 100644
--- a/IPL/include/opencv/opencv2/tracking/onlineMIL.hpp
+++ b/IPL/include/opencv/opencv2/tracking/onlineMIL.hpp
@@ -54,8 +54,6 @@ namespace cv
 //TODO based on the original implementation
 //http://vision.ucsd.edu/~bbabenko/project_miltrack.shtml
 
-#define  sign(s)  ((s > 0 ) ? 1 : ((s<0) ? -1 : 0))
-
 class ClfOnlineStump;
 
 class CV_EXPORTS ClfMilBoost
diff --git a/IPL/include/opencv/opencv2/tracking/tldDataset.hpp b/IPL/include/opencv/opencv2/tracking/tldDataset.hpp
index d31919a..a874255 100644
--- a/IPL/include/opencv/opencv2/tracking/tldDataset.hpp
+++ b/IPL/include/opencv/opencv2/tracking/tldDataset.hpp
@@ -42,15 +42,15 @@
 #ifndef OPENCV_TLD_DATASET
 #define OPENCV_TLD_DATASET
 
-#include "opencv2/highgui.hpp"
+#include "opencv2/core.hpp"
 
 namespace cv
 {
 	namespace tld
 	{
 		CV_EXPORTS cv::Rect2d tld_InitDataset(int videoInd, const char* rootPath = "TLD_dataset", int datasetInd = 0);
-		CV_EXPORTS cv::Mat tld_getNextDatasetFrame();
+		CV_EXPORTS cv::String tld_getNextDatasetFrame();
 	}
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/IPL/include/opencv/opencv2/tracking/tracker.hpp b/IPL/include/opencv/opencv2/tracking/tracker.hpp
index 5fabfec..6f489f0 100644
--- a/IPL/include/opencv/opencv2/tracking/tracker.hpp
+++ b/IPL/include/opencv/opencv2/tracking/tracker.hpp
@@ -47,17 +47,11 @@
 #include "feature.hpp"
 #include "onlineMIL.hpp"
 #include "onlineBoosting.hpp"
-#include <iostream>
-
-
-#define BOILERPLATE_CODE(name,classname) \
-    static Ptr<classname> createTracker(const classname::Params &parameters=classname::Params());\
-    virtual ~classname(){};
 
 /*
  * Partially based on:
  * ====================================================================================================================
- * 	- [AAM] S. Salti, A. Cavallaro, L. Di Stefano, Adaptive Appearance Modeling for Video Tracking: Survey and Evaluation
+ *   - [AAM] S. Salti, A. Cavallaro, L. Di Stefano, Adaptive Appearance Modeling for Video Tracking: Survey and Evaluation
  *  - [AMVOT] X. Li, W. Hu, C. Shen, Z. Zhang, A. Dick, A. van den Hengel, A Survey of Appearance Models in Visual Object Tracking
  *
  * This Tracking API has been designed with PlantUML. If you modify this API please change UML files under modules/tracking/doc/uml
@@ -200,7 +194,7 @@ class CV_EXPORTS TrackerFeatureSet
   bool blockAddTrackerFeature;
 
   std::vector<std::pair<String, Ptr<TrackerFeature> > > features;  //list of features
-  std::vector<Mat> responses;				//list of response after compute
+  std::vector<Mat> responses;        //list of response after compute
 
 };
 
@@ -531,44 +525,29 @@ class CV_EXPORTS_W Tracker : public virtual Algorithm
 {
  public:
 
-  virtual ~Tracker();
+  virtual ~Tracker() CV_OVERRIDE;
 
-  /** @brief Initialize the tracker with a know bounding box that surrounding the target
+  /** @brief Initialize the tracker with a known bounding box that surrounded the target
     @param image The initial frame
-    @param boundingBox The initial boundig box
+    @param boundingBox The initial bounding box
 
     @return True if initialization went succesfully, false otherwise
      */
-  CV_WRAP bool init( const Mat& image, const Rect2d& boundingBox );
+  CV_WRAP bool init( InputArray image, const Rect2d& boundingBox );
 
   /** @brief Update the tracker, find the new most likely bounding box for the target
     @param image The current frame
-    @param boundingBox The boundig box that represent the new target location, if true was returned, not
+    @param boundingBox The bounding box that represent the new target location, if true was returned, not
     modified otherwise
 
     @return True means that target was located and false means that tracker cannot locate target in
     current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed
     missing from the frame (say, out of sight)
      */
-  CV_WRAP bool update( const Mat& image, CV_OUT Rect2d& boundingBox );
-
-  /** @brief Creates a tracker by its name.
-    @param trackerType Tracker type
-
-    The following detector types are supported:
-
-    -   "MIL" -- TrackerMIL
-    -   "BOOSTING" -- TrackerBoosting
-     */
-  CV_WRAP static Ptr<Tracker> create( const String& trackerType );
-
-  virtual void read( const FileNode& fn )=0;
-  virtual void write( FileStorage& fs ) const=0;
+  CV_WRAP bool update( InputArray image, CV_OUT Rect2d& boundingBox );
 
-  Ptr<TrackerModel> getModel()
-  {
-	  return model;
-  }
+  virtual void read( const FileNode& fn ) CV_OVERRIDE = 0;
+  virtual void write( FileStorage& fs ) const CV_OVERRIDE = 0;
 
  protected:
 
@@ -648,8 +627,8 @@ class CV_EXPORTS TrackerStateEstimatorMILBoosting : public TrackerStateEstimator
   void setCurrentConfidenceMap( ConfidenceMap& confidenceMap );
 
  protected:
-  Ptr<TrackerTargetState> estimateImpl( const std::vector<ConfidenceMap>& confidenceMaps );
-  void updateImpl( std::vector<ConfidenceMap>& confidenceMaps );
+  Ptr<TrackerTargetState> estimateImpl( const std::vector<ConfidenceMap>& confidenceMaps ) CV_OVERRIDE;
+  void updateImpl( std::vector<ConfidenceMap>& confidenceMaps ) CV_OVERRIDE;
 
  private:
   uint max_idx( const std::vector<float> &v );
@@ -753,8 +732,8 @@ class CV_EXPORTS TrackerStateEstimatorAdaBoosting : public TrackerStateEstimator
   std::vector<int> computeSwappedClassifier();
 
  protected:
-  Ptr<TrackerTargetState> estimateImpl( const std::vector<ConfidenceMap>& confidenceMaps );
-  void updateImpl( std::vector<ConfidenceMap>& confidenceMaps );
+  Ptr<TrackerTargetState> estimateImpl( const std::vector<ConfidenceMap>& confidenceMaps ) CV_OVERRIDE;
+  void updateImpl( std::vector<ConfidenceMap>& confidenceMaps ) CV_OVERRIDE;
 
   Ptr<StrongClassifierDirectSelection> boostClassifier;
 
@@ -781,8 +760,8 @@ class CV_EXPORTS TrackerStateEstimatorSVM : public TrackerStateEstimator
   ~TrackerStateEstimatorSVM();
 
  protected:
-  Ptr<TrackerTargetState> estimateImpl( const std::vector<ConfidenceMap>& confidenceMaps );
-  void updateImpl( std::vector<ConfidenceMap>& confidenceMaps );
+  Ptr<TrackerTargetState> estimateImpl( const std::vector<ConfidenceMap>& confidenceMaps ) CV_OVERRIDE;
+  void updateImpl( std::vector<ConfidenceMap>& confidenceMaps ) CV_OVERRIDE;
 };
 
 /************************************ Specific TrackerSamplerAlgorithm Classes ************************************/
@@ -806,7 +785,7 @@ class CV_EXPORTS TrackerSamplerCSC : public TrackerSamplerAlgorithm
     Params();
     float initInRad;        //!< radius for gathering positive instances during init
     float trackInPosRad;    //!< radius for gathering positive instances during tracking
-    float searchWinSize;	//!< size of search window
+    float searchWinSize;  //!< size of search window
     int initMaxNegNum;      //!< # negative samples to use during init
     int trackMaxPosNum;     //!< # positive samples to use during training
     int trackMaxNegNum;     //!< # negative samples to use during training
@@ -834,7 +813,7 @@ class CV_EXPORTS TrackerSamplerCSC : public TrackerSamplerAlgorithm
 
  protected:
 
-  bool samplingImpl( const Mat& image, Rect boundingBox, std::vector<Mat>& sample );
+  bool samplingImpl( const Mat& image, Rect boundingBox, std::vector<Mat>& sample ) CV_OVERRIDE;
 
  private:
 
@@ -881,7 +860,7 @@ class CV_EXPORTS TrackerSamplerCS : public TrackerSamplerAlgorithm
 
   ~TrackerSamplerCS();
 
-  bool samplingImpl( const Mat& image, Rect boundingBox, std::vector<Mat>& sample );
+  bool samplingImpl( const Mat& image, Rect boundingBox, std::vector<Mat>& sample ) CV_OVERRIDE;
   Rect getROI() const;
  private:
   Rect getTrackingROI( float searchFactor );
@@ -937,7 +916,7 @@ class CV_EXPORTS TrackerSamplerPF : public TrackerSamplerAlgorithm
      */
   TrackerSamplerPF(const Mat& chosenRect,const TrackerSamplerPF::Params &parameters = TrackerSamplerPF::Params());
 protected:
-  bool samplingImpl( const Mat& image, Rect boundingBox, std::vector<Mat>& sample );
+  bool samplingImpl( const Mat& image, Rect boundingBox, std::vector<Mat>& sample ) CV_OVERRIDE;
 private:
   Params params;
   Ptr<MinProblemSolver> _solver;
@@ -960,13 +939,13 @@ class CV_EXPORTS TrackerFeatureFeature2d : public TrackerFeature
    */
   TrackerFeatureFeature2d( String detectorType, String descriptorType );
 
-  ~TrackerFeatureFeature2d();
+  ~TrackerFeatureFeature2d() CV_OVERRIDE;
 
-  void selection( Mat& response, int npoints );
+  void selection( Mat& response, int npoints ) CV_OVERRIDE;
 
  protected:
 
-  bool computeImpl( const std::vector<Mat>& images, Mat& response );
+  bool computeImpl( const std::vector<Mat>& images, Mat& response ) CV_OVERRIDE;
 
  private:
 
@@ -982,13 +961,13 @@ class CV_EXPORTS TrackerFeatureHOG : public TrackerFeature
 
   TrackerFeatureHOG();
 
-  ~TrackerFeatureHOG();
+  ~TrackerFeatureHOG() CV_OVERRIDE;
 
-  void selection( Mat& response, int npoints );
+  void selection( Mat& response, int npoints ) CV_OVERRIDE;
 
  protected:
 
-  bool computeImpl( const std::vector<Mat>& images, Mat& response );
+  bool computeImpl( const std::vector<Mat>& images, Mat& response ) CV_OVERRIDE;
 
 };
 
@@ -1011,7 +990,7 @@ class CV_EXPORTS TrackerFeatureHAAR : public TrackerFeature
      */
   TrackerFeatureHAAR( const TrackerFeatureHAAR::Params &parameters = TrackerFeatureHAAR::Params() );
 
-  ~TrackerFeatureHAAR();
+  ~TrackerFeatureHAAR() CV_OVERRIDE;
 
   /** @brief Compute the features only for the selected indices in the images collection
     @param selFeatures indices of selected features
@@ -1026,7 +1005,7 @@ class CV_EXPORTS TrackerFeatureHAAR : public TrackerFeature
 
     @note This method modifies the response parameter
      */
-  void selection( Mat& response, int npoints );
+  void selection( Mat& response, int npoints ) CV_OVERRIDE;
 
   /** @brief Swap the feature in position source with the feature in position target
   @param source The source position
@@ -1046,7 +1025,7 @@ class CV_EXPORTS TrackerFeatureHAAR : public TrackerFeature
   CvHaarEvaluator::FeatureHaar& getFeatureAt( int id );
 
  protected:
-  bool computeImpl( const std::vector<Mat>& images, Mat& response );
+  bool computeImpl( const std::vector<Mat>& images, Mat& response ) CV_OVERRIDE;
 
  private:
 
@@ -1065,11 +1044,11 @@ class CV_EXPORTS TrackerFeatureLBP : public TrackerFeature
 
   ~TrackerFeatureLBP();
 
-  void selection( Mat& response, int npoints );
+  void selection( Mat& response, int npoints ) CV_OVERRIDE;
 
  protected:
 
-  bool computeImpl( const std::vector<Mat>& images, Mat& response );
+  bool computeImpl( const std::vector<Mat>& images, Mat& response ) CV_OVERRIDE;
 
 };
 
@@ -1083,19 +1062,19 @@ based on @cite MIL .
 
 Original code can be found here <http://vision.ucsd.edu/~bbabenko/project_miltrack.shtml>
  */
-class CV_EXPORTS TrackerMIL : public Tracker
+class CV_EXPORTS_W TrackerMIL : public Tracker
 {
  public:
   struct CV_EXPORTS Params
   {
     Params();
     //parameters for sampler
-    float samplerInitInRadius;	//!< radius for gathering positive instances during init
+    float samplerInitInRadius;  //!< radius for gathering positive instances during init
     int samplerInitMaxNegNum;  //!< # negative samples to use during init
     float samplerSearchWinSize;  //!< size of search window
     float samplerTrackInRadius;  //!< radius for gathering positive instances during tracking
-    int samplerTrackMaxPosNum;	//!< # positive samples to use during tracking
-    int samplerTrackMaxNegNum;	//!< # negative samples to use during tracking
+    int samplerTrackMaxPosNum;  //!< # positive samples to use during tracking
+    int samplerTrackMaxNegNum;  //!< # negative samples to use during tracking
     int featureSetNumFeatures;  //!< # features
 
     void read( const FileNode& fn );
@@ -1105,15 +1084,20 @@ class CV_EXPORTS TrackerMIL : public Tracker
   /** @brief Constructor
     @param parameters MIL parameters TrackerMIL::Params
      */
-  BOILERPLATE_CODE("MIL",TrackerMIL);
+  static Ptr<TrackerMIL> create(const TrackerMIL::Params &parameters);
+
+  CV_WRAP static Ptr<TrackerMIL> create();
+
+  virtual ~TrackerMIL() CV_OVERRIDE {}
 };
 
-/** @brief This is a real-time object tracking based on a novel on-line version of the AdaBoost algorithm.
+/** @brief the Boosting tracker
 
+This is a real-time object tracking based on a novel on-line version of the AdaBoost algorithm.
 The classifier uses the surrounding background as negative examples in update step to avoid the
 drifting problem. The implementation is based on @cite OLB .
  */
-class CV_EXPORTS TrackerBoosting : public Tracker
+class CV_EXPORTS_W TrackerBoosting : public Tracker
 {
  public:
   struct CV_EXPORTS Params
@@ -1125,12 +1109,12 @@ class CV_EXPORTS TrackerBoosting : public Tracker
     int iterationInit;  //!<the initial iterations
     int featureSetNumFeatures;  //!< # features
     /**
-     * \brief Read parameters from file
+     * \brief Read parameters from a file
      */
     void read( const FileNode& fn );
 
     /**
-     * \brief Write parameters in a file
+     * \brief Write parameters to a file
      */
     void write( FileStorage& fs ) const;
   };
@@ -1138,10 +1122,14 @@ class CV_EXPORTS TrackerBoosting : public Tracker
   /** @brief Constructor
     @param parameters BOOSTING parameters TrackerBoosting::Params
      */
-  BOILERPLATE_CODE("BOOSTING",TrackerBoosting);
+  static Ptr<TrackerBoosting> create(const TrackerBoosting::Params &parameters);
+
+  CV_WRAP static Ptr<TrackerBoosting> create();
+
+  virtual ~TrackerBoosting() CV_OVERRIDE {}
 };
 
-/** @brief Median Flow tracker implementation.
+/** @brief the Median Flow tracker
 
 Implementation of a paper @cite MedianFlow .
 
@@ -1151,14 +1139,21 @@ by authors to outperform MIL). During the implementation period the code at
 <http://www.aonsquared.co.uk/node/5>, the courtesy of the author Arthur Amarra, was used for the
 reference purpose.
  */
-class CV_EXPORTS TrackerMedianFlow : public Tracker
+class CV_EXPORTS_W TrackerMedianFlow : public Tracker
 {
  public:
   struct CV_EXPORTS Params
   {
-    Params();
-    int pointsInGrid; //!<square root of number of keypoints used; increase it to trade
-                      //!<accurateness for speed; default value is sensible and recommended
+    Params(); //!<default constructor
+              //!<note that the default values of parameters are recommended for most of use cases
+    int pointsInGrid;      //!<square root of number of keypoints used; increase it to trade
+                           //!<accurateness for speed
+    cv::Size winSize;      //!<window size parameter for Lucas-Kanade optical flow
+    int maxLevel;          //!<maximal pyramid level number for Lucas-Kanade optical flow
+    TermCriteria termCriteria; //!<termination criteria for Lucas-Kanade optical flow
+    cv::Size winSizeNCC;   //!<window size around a point for normalized cross-correlation check
+    double maxMedianLengthOfDisplacementDifference; //!<criterion for loosing the tracked object
+
     void read( const FileNode& /*fn*/ );
     void write( FileStorage& /*fs*/ ) const;
   };
@@ -1166,21 +1161,27 @@ class CV_EXPORTS TrackerMedianFlow : public Tracker
   /** @brief Constructor
     @param parameters Median Flow parameters TrackerMedianFlow::Params
     */
-  BOILERPLATE_CODE("MEDIANFLOW",TrackerMedianFlow);
+  static Ptr<TrackerMedianFlow> create(const TrackerMedianFlow::Params &parameters);
+
+  CV_WRAP static Ptr<TrackerMedianFlow> create();
+
+  virtual ~TrackerMedianFlow() CV_OVERRIDE {}
 };
 
-/** @brief TLD is a novel tracking framework that explicitly decomposes the long-term tracking task into
+/** @brief the TLD (Tracking, learning and detection) tracker
+
+TLD is a novel tracking framework that explicitly decomposes the long-term tracking task into
 tracking, learning and detection.
 
 The tracker follows the object from frame to frame. The detector localizes all appearances that
-have been observed so far and corrects the tracker if necessary. The learning estimates detector’s
+have been observed so far and corrects the tracker if necessary. The learning estimates detector's
 errors and updates it to avoid these errors in the future. The implementation is based on @cite TLD .
 
 The Median Flow algorithm (see cv::TrackerMedianFlow) was chosen as a tracking component in this
-implementation, following authors. Tracker is supposed to be able to handle rapid motions, partial
+implementation, following authors. The tracker is supposed to be able to handle rapid motions, partial
 occlusions, object absence etc.
  */
-class CV_EXPORTS TrackerTLD : public Tracker
+class CV_EXPORTS_W TrackerTLD : public Tracker
 {
  public:
   struct CV_EXPORTS Params
@@ -1193,182 +1194,201 @@ class CV_EXPORTS TrackerTLD : public Tracker
   /** @brief Constructor
     @param parameters TLD parameters TrackerTLD::Params
      */
-  BOILERPLATE_CODE("TLD",TrackerTLD);
+  static Ptr<TrackerTLD> create(const TrackerTLD::Params &parameters);
+
+  CV_WRAP static Ptr<TrackerTLD> create();
+
+  virtual ~TrackerTLD() CV_OVERRIDE {}
 };
 
-/** @brief KCF is a novel tracking framework that utilizes properties of circulant matrix to enhance the processing speed.
- * This tracking method is an implementation of @cite KCF_ECCV which is extended to KFC with color-names features (@cite KCF_CN).
- * The original paper of KCF is available at <http://home.isr.uc.pt/~henriques/circulant/index.html>
+/** @brief the KCF (Kernelized Correlation Filter) tracker
+
+ * KCF is a novel tracking framework that utilizes properties of circulant matrix to enhance the processing speed.
+ * This tracking method is an implementation of @cite KCF_ECCV which is extended to KCF with color-names features (@cite KCF_CN).
+ * The original paper of KCF is available at <http://www.robots.ox.ac.uk/~joao/publications/henriques_tpami2015.pdf>
  * as well as the matlab implementation. For more information about KCF with color-names features, please refer to
  * <http://www.cvl.isy.liu.se/research/objrec/visualtracking/colvistrack/index.html>.
  */
-class CV_EXPORTS TrackerKCF : public Tracker
+class CV_EXPORTS_W TrackerKCF : public Tracker
 {
 public:
-	/**
-	* \brief Feature type to be used in the tracking grayscale, colornames, compressed color-names
-	* The modes available now:
-	-   "GRAY" -- Use grayscale values as the feature
-	-   "CN" -- Color-names feature
-	*/
-	enum MODE {
-		GRAY = (1u << 0),
-		CN = (1u << 1),
-		CUSTOM = (1u << 2)
-	};
-
-	struct CV_EXPORTS Params
-	{
-		/**
-		* \brief Constructor
-		*/
-		Params();
-
-		/**
-		* \brief Read parameters from file, currently unused
-		*/
-		void read(const FileNode& /*fn*/);
-
-		/**
-		* \brief Read parameters from file, currently unused
-		*/
-		void write(FileStorage& /*fs*/) const;
-
-		double sigma;                 //!<  gaussian kernel bandwidth
-		double lambda;                //!<  regularization
-		double interp_factor;         //!<  linear interpolation factor for adaptation
-		double output_sigma_factor;   //!<  spatial bandwidth (proportional to target)
-		double pca_learning_rate;     //!<  compression learning rate
-		bool resize;                  //!<  activate the resize feature to improve the processing speed
-		bool split_coeff;             //!<  split the training coefficients into two matrices
-		bool wrap_kernel;             //!<  wrap around the kernel values
-		bool compress_feature;        //!<  activate the pca method to compress the features
-		int max_patch_size;           //!<  threshold for the ROI size
-		int compressed_size;          //!<  feature size after compression
-		unsigned int desc_pca;        //!<  compressed descriptors of TrackerKCF::MODE
-		unsigned int desc_npca;       //!<  non-compressed descriptors of TrackerKCF::MODE
-	};
-
-	virtual void setFeatureExtractor(void(*)(const Mat, const Rect, Mat&), bool pca_func = false);
-
-	/** @brief Constructor
-	@param parameters KCF parameters TrackerKCF::Params
-	*/
-	BOILERPLATE_CODE("KCF", TrackerKCF);
+  /**
+  * \brief Feature type to be used in the tracking grayscale, colornames, compressed color-names
+  * The modes available now:
+  -   "GRAY" -- Use grayscale values as the feature
+  -   "CN" -- Color-names feature
+  */
+  enum MODE {
+    GRAY   = (1 << 0),
+    CN     = (1 << 1),
+    CUSTOM = (1 << 2)
+  };
+
+  struct CV_EXPORTS Params
+  {
+    /**
+    * \brief Constructor
+    */
+    Params();
+
+    /**
+    * \brief Read parameters from a file
+    */
+    void read(const FileNode& /*fn*/);
+
+    /**
+    * \brief Write parameters to a file
+    */
+    void write(FileStorage& /*fs*/) const;
+
+    float detect_thresh;         //!<  detection confidence threshold
+    float sigma;                 //!<  gaussian kernel bandwidth
+    float lambda;                //!<  regularization
+    float interp_factor;         //!<  linear interpolation factor for adaptation
+    float output_sigma_factor;   //!<  spatial bandwidth (proportional to target)
+    float pca_learning_rate;     //!<  compression learning rate
+    bool resize;                  //!<  activate the resize feature to improve the processing speed
+    bool split_coeff;             //!<  split the training coefficients into two matrices
+    bool wrap_kernel;             //!<  wrap around the kernel values
+    bool compress_feature;        //!<  activate the pca method to compress the features
+    int max_patch_size;           //!<  threshold for the ROI size
+    int compressed_size;          //!<  feature size after compression
+    int desc_pca;        //!<  compressed descriptors of TrackerKCF::MODE
+    int desc_npca;       //!<  non-compressed descriptors of TrackerKCF::MODE
+  };
+
+  virtual void setFeatureExtractor(void(*)(const Mat, const Rect, Mat&), bool pca_func = false) = 0;
+
+  /** @brief Constructor
+  @param parameters KCF parameters TrackerKCF::Params
+  */
+  static Ptr<TrackerKCF> create(const TrackerKCF::Params &parameters);
+
+  CV_WRAP static Ptr<TrackerKCF> create();
+
+  virtual ~TrackerKCF() CV_OVERRIDE {}
 };
 
-/************************************ MultiTracker Class ---By Laksono Kurnianggoro---) ************************************/
-/** @brief This class is used to track multiple objects using the specified tracker algorithm.
-* The MultiTracker is naive implementation of multiple object tracking.
-* It process the tracked objects independently without any optimization accross the tracked objects.
+/** @brief the GOTURN (Generic Object Tracking Using Regression Networks) tracker
+
+ *  GOTURN (@cite GOTURN) is kind of trackers based on Convolutional Neural Networks (CNN). While taking all advantages of CNN trackers,
+ *  GOTURN is much faster due to offline training without online fine-tuning nature.
+ *  GOTURN tracker addresses the problem of single target tracking: given a bounding box label of an object in the first frame of the video,
+ *  we track that object through the rest of the video. NOTE: Current method of GOTURN does not handle occlusions; however, it is fairly
+ *  robust to viewpoint changes, lighting changes, and deformations.
+ *  Inputs of GOTURN are two RGB patches representing Target and Search patches resized to 227x227.
+ *  Outputs of GOTURN are predicted bounding box coordinates, relative to Search patch coordinate system, in format X1,Y1,X2,Y2.
+ *  Original paper is here: <http://davheld.github.io/GOTURN/GOTURN.pdf>
+ *  As long as original authors implementation: <https://github.com/davheld/GOTURN#train-the-tracker>
+ *  Implementation of training algorithm is placed in separately here due to 3d-party dependencies:
+ *  <https://github.com/Auron-X/GOTURN_Training_Toolkit>
+ *  GOTURN architecture goturn.prototxt and trained model goturn.caffemodel are accessible on opencv_extra GitHub repository.
 */
-class CV_EXPORTS_W MultiTracker
+class CV_EXPORTS_W TrackerGOTURN : public Tracker
 {
 public:
+  struct CV_EXPORTS Params
+  {
+    Params();
+    void read(const FileNode& /*fn*/);
+    void write(FileStorage& /*fs*/) const;
+  };
 
-	/**
-	* \brief Constructor.
-	* In the case of trackerType is given, it will be set as the default algorithm for all trackers.
-	* @param trackerType the name of the tracker algorithm to be used
-	*/
-	CV_WRAP MultiTracker(const String& trackerType = "");
-
-	/**
-	* \brief Destructor
-	*/
-	~MultiTracker();
-
-	/**
-	* \brief Add a new object to be tracked.
-	* The defaultAlgorithm will be used the newly added tracker.
-	* @param image input image
-	* @param boundingBox a rectangle represents ROI of the tracked object
-	*/
-	CV_WRAP bool add(const Mat& image, const Rect2d& boundingBox);
-
-	/**
-	* \brief Add a new object to be tracked.
-	* @param trackerType the name of the tracker algorithm to be used
-	* @param image input image
-	* @param boundingBox a rectangle represents ROI of the tracked object
-	*/
-	CV_WRAP bool add(const String& trackerType, const Mat& image, const Rect2d& boundingBox);
-
-	/**
-	* \brief Add a set of objects to be tracked.
-	* @param trackerType the name of the tracker algorithm to be used
-	* @param image input image
-	* @param boundingBox list of the tracked objects
-	*/
-	CV_WRAP bool add(const String& trackerType, const Mat& image, std::vector<Rect2d> boundingBox);
-
-	/**
-	* \brief Add a set of objects to be tracked using the defaultAlgorithm tracker.
-	* @param image input image
-	* @param boundingBox list of the tracked objects
-	*/
-	CV_WRAP bool add(const Mat& image, std::vector<Rect2d> boundingBox);
-
-	/**
-	* \brief Update the current tracking status.
-	* The result will be saved in the internal storage.
-	* @param image input image
-	*/
-	bool update(const Mat& image);
-
-	//!<  storage for the tracked objects, each object corresponds to one tracker algorithm.
-	std::vector<Rect2d> objects;
-
-	/**
-	* \brief Update the current tracking status.
-	* @param image input image
-	* @param boundingBox the tracking result, represent a list of ROIs of the tracked objects.
-	*/
-	CV_WRAP bool update(const Mat& image, CV_OUT std::vector<Rect2d> & boundingBox);
+  /** @brief Constructor
+  @param parameters GOTURN parameters TrackerGOTURN::Params
+  */
+  static Ptr<TrackerGOTURN> create(const TrackerGOTURN::Params &parameters);
 
-protected:
-	//!<  storage for the tracker algorithms.
-	std::vector< Ptr<Tracker> > trackerList;
+  CV_WRAP static Ptr<TrackerGOTURN> create();
+
+  virtual ~TrackerGOTURN() CV_OVERRIDE {}
+};
+
+/** @brief the MOSSE (Minimum Output Sum of Squared %Error) tracker
+
+The implementation is based on @cite MOSSE Visual Object Tracking using Adaptive Correlation Filters
+@note this tracker works with grayscale images, if passed bgr ones, they will get converted internally.
+*/
 
-	//!<  default algorithm for the tracking method.
-	String defaultAlgorithm;
+class CV_EXPORTS_W TrackerMOSSE : public Tracker
+{
+ public:
+  /** @brief Constructor
+  */
+  CV_WRAP static Ptr<TrackerMOSSE> create();
+
+  virtual ~TrackerMOSSE() CV_OVERRIDE {}
 };
 
-class ROISelector {
+
+/************************************ MultiTracker Class ---By Laksono Kurnianggoro---) ************************************/
+/** @brief This class is used to track multiple objects using the specified tracker algorithm.
+
+* The %MultiTracker is naive implementation of multiple object tracking.
+* It process the tracked objects independently without any optimization accross the tracked objects.
+*/
+class CV_EXPORTS_W MultiTracker : public Algorithm
+{
 public:
-	Rect2d select(Mat img, bool fromCenter = true);
-	Rect2d select(const cv::String& windowName, Mat img, bool showCrossair = true, bool fromCenter = true);
-	void select(const cv::String& windowName, Mat img, std::vector<Rect2d> & boundingBox, bool fromCenter = true);
 
-	struct handlerT{
-		// basic parameters
-		bool isDrawing;
-		Rect2d box;
-		Mat image;
+  /**
+  * \brief Constructor.
+  */
+  CV_WRAP MultiTracker();
 
-		// parameters for drawing from the center
-		bool drawFromCenter;
-		Point2f center;
+  /**
+  * \brief Destructor
+  */
+  ~MultiTracker() CV_OVERRIDE;
 
-		// initializer list
-		handlerT() : isDrawing(false), drawFromCenter(true) {};
-	}selectorParams;
+  /**
+  * \brief Add a new object to be tracked.
+  *
+  * @param newTracker tracking algorithm to be used
+  * @param image input image
+  * @param boundingBox a rectangle represents ROI of the tracked object
+  */
+  CV_WRAP bool add(Ptr<Tracker> newTracker, InputArray image, const Rect2d& boundingBox);
 
-	// to store the tracked objects
-	std::vector<handlerT> objects;
+  /**
+  * \brief Add a set of objects to be tracked.
+  * @param newTrackers list of tracking algorithms to be used
+  * @param image input image
+  * @param boundingBox list of the tracked objects
+  */
+  bool add(std::vector<Ptr<Tracker> > newTrackers, InputArray image, std::vector<Rect2d> boundingBox);
 
-private:
-	static void mouseHandler(int event, int x, int y, int flags, void *param);
-	void opencv_mouse_callback(int event, int x, int y, int, void *param);
+  /**
+  * \brief Update the current tracking status.
+  * The result will be saved in the internal storage.
+  * @param image input image
+  */
+  bool update(InputArray image);
 
-	// save the keypressed characted
-	int key;
-};
+  /**
+  * \brief Update the current tracking status.
+  * @param image input image
+  * @param boundingBox the tracking result, represent a list of ROIs of the tracked objects.
+  */
+  CV_WRAP bool update(InputArray image, CV_OUT std::vector<Rect2d> & boundingBox);
 
-Rect2d CV_EXPORTS_W selectROI(Mat img, bool fromCenter = true);
-Rect2d CV_EXPORTS_W selectROI(const cv::String& windowName, Mat img, bool showCrossair = true, bool fromCenter = true);
-void CV_EXPORTS_W selectROI(const cv::String& windowName, Mat img, std::vector<Rect2d> & boundingBox, bool fromCenter = true);
+  /**
+  * \brief Returns a reference to a storage for the tracked objects, each object corresponds to one tracker algorithm
+  */
+  CV_WRAP const std::vector<Rect2d>& getObjects() const;
 
+  /**
+  * \brief Returns a pointer to a new instance of MultiTracker
+  */
+  CV_WRAP static Ptr<MultiTracker> create();
+
+protected:
+  //!<  storage for the tracker algorithms.
+  std::vector< Ptr<Tracker> > trackerList;
+
+  //!<  storage for the tracked objects, each object corresponds to one tracker algorithm.
+  std::vector<Rect2d> objects;
+};
 
 /************************************ Multi-Tracker Classes ---By Tyan Vladimir---************************************/
 
@@ -1379,56 +1399,58 @@ void CV_EXPORTS_W selectROI(const cv::String& windowName, Mat img, std::vector<R
 class CV_EXPORTS MultiTracker_Alt
 {
 public:
-	/** @brief Constructor for Multitracker
-	*/
-	MultiTracker_Alt()
-	{
-		targetNum = 0;
-	}
-
-	/** @brief Add a new target to a tracking-list and initialize the tracker with a know bounding box that surrounding the target
-	@param image The initial frame
-	@param boundingBox The initial boundig box of target
-	@param tracker_algorithm_name Multi-tracker algorithm name
-
-	@return True if new target initialization went succesfully, false otherwise
-	*/
-	bool addTarget(const Mat& image, const Rect2d& boundingBox, String tracker_algorithm_name);
-
-	/** @brief Update all trackers from the tracking-list, find a new most likely bounding boxes for the targets
-	@param image The current frame
-
-	@return True means that all targets were located and false means that tracker couldn't locate one of the targets in
-	current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed
-	missing from the frame (say, out of sight)
-	*/
-	bool update(const Mat& image);
-
-	/** @brief Current number of targets in tracking-list
-	*/
-	int targetNum;
-
-	/** @brief Trackers list for Multi-Object-Tracker
-	*/
-	std::vector <Ptr<Tracker> > trackers;
-
-	/** @brief Bounding Boxes list for Multi-Object-Tracker
-	*/
-	std::vector <Rect2d> boundingBoxes;
-	/** @brief List of randomly generated colors for bounding boxes display
-	*/
-	std::vector<Scalar> colors;
+  /** @brief Constructor for Multitracker
+  */
+  MultiTracker_Alt()
+  {
+    targetNum = 0;
+  }
+
+  /** @brief Add a new target to a tracking-list and initialize the tracker with a known bounding box that surrounded the target
+  @param image The initial frame
+  @param boundingBox The initial bounding box of target
+  @param tracker_algorithm Multi-tracker algorithm
+
+  @return True if new target initialization went succesfully, false otherwise
+  */
+  bool addTarget(InputArray image, const Rect2d& boundingBox, Ptr<Tracker> tracker_algorithm);
+
+  /** @brief Update all trackers from the tracking-list, find a new most likely bounding boxes for the targets
+  @param image The current frame
+
+  @return True means that all targets were located and false means that tracker couldn't locate one of the targets in
+  current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed
+  missing from the frame (say, out of sight)
+  */
+  bool update(InputArray image);
+
+  /** @brief Current number of targets in tracking-list
+  */
+  int targetNum;
+
+  /** @brief Trackers list for Multi-Object-Tracker
+  */
+  std::vector <Ptr<Tracker> > trackers;
+
+  /** @brief Bounding Boxes list for Multi-Object-Tracker
+  */
+  std::vector <Rect2d> boundingBoxes;
+  /** @brief List of randomly generated colors for bounding boxes display
+  */
+  std::vector<Scalar> colors;
 };
 
-/** @brief Multi Object Tracker for TLD. TLD is a novel tracking framework that explicitly decomposes
+/** @brief Multi Object %Tracker for TLD.
+
+TLD is a novel tracking framework that explicitly decomposes
 the long-term tracking task into tracking, learning and detection.
 
 The tracker follows the object from frame to frame. The detector localizes all appearances that
-have been observed so far and corrects the tracker if necessary. The learning estimates detector’s
+have been observed so far and corrects the tracker if necessary. The learning estimates detector's
 errors and updates it to avoid these errors in the future. The implementation is based on @cite TLD .
 
 The Median Flow algorithm (see cv::TrackerMedianFlow) was chosen as a tracking component in this
-implementation, following authors. Tracker is supposed to be able to handle rapid motions, partial
+implementation, following authors. The tracker is supposed to be able to handle rapid motions, partial
 occlusions, object absence etc.
 
 @sa Tracker, MultiTracker, TrackerTLD
@@ -1436,21 +1458,89 @@ occlusions, object absence etc.
 class CV_EXPORTS MultiTrackerTLD : public MultiTracker_Alt
 {
 public:
-	/** @brief Update all trackers from the tracking-list, find a new most likely bounding boxes for the targets by
-	optimized update method using some techniques to speedup calculations specifically for MO TLD. The only limitation
-	is that	all target bounding boxes should have approximately same aspect ratios. Speed boost is around 20%
+  /** @brief Update all trackers from the tracking-list, find a new most likely bounding boxes for the targets by
+  optimized update method using some techniques to speedup calculations specifically for MO TLD. The only limitation
+  is that all target bounding boxes should have approximately same aspect ratios. Speed boost is around 20%
 
-	@param image The current frame.
+  @param image The current frame.
 
-	@return True means that all targets were located and false means that tracker couldn't locate one of the targets in
-	current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed
-	missing from the frame (say, out of sight)
-	*/
-	bool update_opt(const Mat& image);
+  @return True means that all targets were located and false means that tracker couldn't locate one of the targets in
+  current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed
+  missing from the frame (say, out of sight)
+  */
+  bool update_opt(InputArray image);
 };
 
-//! @}
+/*********************************** CSRT ************************************/
+/** @brief the CSRT tracker
+
+The implementation is based on @cite Lukezic_IJCV2018 Discriminative Correlation Filter with Channel and Spatial Reliability
+*/
+class CV_EXPORTS_W TrackerCSRT : public Tracker
+{
+public:
+  struct CV_EXPORTS Params
+  {
+    /**
+    * \brief Constructor
+    */
+    Params();
 
+    /**
+    * \brief Read parameters from a file
+    */
+    void read(const FileNode& /*fn*/);
+
+    /**
+    * \brief Write parameters to a file
+    */
+    void write(cv::FileStorage& fs) const;
+
+    bool use_hog;
+    bool use_color_names;
+    bool use_gray;
+    bool use_rgb;
+    bool use_channel_weights;
+    bool use_segmentation;
+
+    std::string window_function; //!<  Window function: "hann", "cheb", "kaiser"
+    float kaiser_alpha;
+    float cheb_attenuation;
+
+    float template_size;
+    float gsl_sigma;
+    float hog_orientations;
+    float hog_clip;
+    float padding;
+    float filter_lr;
+    float weights_lr;
+    int num_hog_channels_used;
+    int admm_iterations;
+    int histogram_bins;
+    float histogram_lr;
+    int background_ratio;
+    int number_of_scales;
+    float scale_sigma_factor;
+    float scale_model_max_area;
+    float scale_lr;
+    float scale_step;
+
+    float psr_threshold; //!< we lost the target, if the psr is lower than this.
+  };
+
+  /** @brief Constructor
+  @param parameters CSRT parameters TrackerCSRT::Params
+  */
+  static Ptr<TrackerCSRT> create(const TrackerCSRT::Params &parameters);
+
+  CV_WRAP static Ptr<TrackerCSRT> create();
+
+  CV_WRAP virtual void setInitialMask(InputArray mask) = 0;
+
+  virtual ~TrackerCSRT() CV_OVERRIDE {}
+};
+
+//! @}
 } /* namespace cv */
 
 #endif
diff --git a/IPL/include/opencv/opencv2/tracking/tracking_by_matching.hpp b/IPL/include/opencv/opencv2/tracking/tracking_by_matching.hpp
new file mode 100644
index 0000000..b6962e2
--- /dev/null
+++ b/IPL/include/opencv/opencv2/tracking/tracking_by_matching.hpp
@@ -0,0 +1,557 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_TRACKING_TRACKING_BY_MATCHING_HPP__
+#define __OPENCV_TRACKING_TRACKING_BY_MATCHING_HPP__
+
+#include <deque>
+#include <iostream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <memory>
+#include <map>
+#include <tuple>
+#include <set>
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+
+
+namespace cv {
+namespace tbm { //Tracking-by-Matching
+///
+/// \brief The TrackedObject struct defines properties of detected object.
+///
+struct CV_EXPORTS TrackedObject {
+    cv::Rect rect;       ///< Detected object ROI (zero area if N/A).
+    double confidence;   ///< Detection confidence level (-1 if N/A).
+    int frame_idx;       ///< Frame index where object was detected (-1 if N/A).
+    int object_id;       ///< Unique object identifier (-1 if N/A).
+    uint64_t timestamp;  ///< Timestamp in milliseconds.
+
+    ///
+    /// \brief Default constructor.
+    ///
+    TrackedObject()
+        : confidence(-1),
+        frame_idx(-1),
+        object_id(-1),
+        timestamp(0) {}
+
+    ///
+    /// \brief Constructor with parameters.
+    /// \param rect Bounding box of detected object.
+    /// \param confidence Confidence of detection.
+    /// \param frame_idx Index of frame.
+    /// \param object_id Object ID.
+    ///
+    TrackedObject(const cv::Rect &rect, float confidence, int frame_idx,
+                  int object_id)
+        : rect(rect),
+        confidence(confidence),
+        frame_idx(frame_idx),
+        object_id(object_id),
+        timestamp(0) {}
+};
+
+using TrackedObjects = std::deque<TrackedObject>;
+
+bool operator==(const TrackedObject& first, const TrackedObject& second);
+bool operator!=(const TrackedObject& first, const TrackedObject& second);
+/// (object id, detected objects) pairs collection.
+using ObjectTracks = std::unordered_map<int, TrackedObjects>;
+
+///
+/// \brief The IImageDescriptor class declares base class for image
+/// descriptor.
+///
+class CV_EXPORTS IImageDescriptor {
+public:
+    ///
+    /// \brief Descriptor size getter.
+    /// \return Descriptor size.
+    ///
+    virtual cv::Size size() const = 0;
+
+    ///
+    /// \brief Computes image descriptor.
+    /// \param[in] mat Color image.
+    /// \param[out] descr Computed descriptor.
+    ///
+    virtual void compute(const cv::Mat &mat, CV_OUT cv::Mat& descr) = 0;
+
+    ///
+    /// \brief Computes image descriptors in batches.
+    /// \param[in] mats Images of interest.
+    /// \param[out] descrs Matrices to store the computed descriptors.
+    ///
+    virtual void compute(const std::vector<cv::Mat> &mats,
+                         CV_OUT std::vector<cv::Mat>& descrs) = 0;
+
+    virtual ~IImageDescriptor() {}
+};
+
+
+///
+/// \brief Uses resized image as descriptor.
+///
+class CV_EXPORTS ResizedImageDescriptor : public IImageDescriptor {
+public:
+    ///
+    /// \brief Constructor.
+    /// \param[in] descr_size Size of the descriptor (resized image).
+    /// \param[in] interpolation Interpolation algorithm.
+    ///
+    explicit ResizedImageDescriptor(const cv::Size &descr_size,
+                                    const cv::InterpolationFlags interpolation)
+        : descr_size_(descr_size), interpolation_(interpolation) {
+            CV_Assert(descr_size.width > 0);
+            CV_Assert(descr_size.height > 0);
+        }
+
+    ///
+    /// \brief Returns descriptor size.
+    /// \return Number of elements in the descriptor.
+    ///
+    cv::Size size() const override { return descr_size_; }
+
+    ///
+    /// \brief Computes image descriptor.
+    /// \param[in] mat Frame containing the image of interest.
+    /// \param[out] descr Matrix to store the computed descriptor.
+    ///
+    void compute(const cv::Mat &mat, CV_OUT cv::Mat& descr) override {
+        CV_Assert(!mat.empty());
+        cv::resize(mat, descr, descr_size_, 0, 0, interpolation_);
+    }
+
+    ///
+    /// \brief Computes images descriptors.
+    /// \param[in] mats Frames containing images of interest.
+    /// \param[out] descrs Matrices to store the computed descriptors.
+    //
+    void compute(const std::vector<cv::Mat> &mats,
+                 CV_OUT std::vector<cv::Mat>& descrs) override  {
+        descrs.resize(mats.size());
+        for (size_t i = 0; i < mats.size(); i++)  {
+            compute(mats[i], descrs[i]);
+        }
+    }
+
+private:
+    cv::Size descr_size_;
+
+    cv::InterpolationFlags interpolation_;
+};
+
+
+///
+/// \brief The IDescriptorDistance class declares an interface for distance
+/// computation between reidentification descriptors.
+///
+class CV_EXPORTS IDescriptorDistance {
+public:
+    ///
+    /// \brief Computes distance between two descriptors.
+    /// \param[in] descr1 First descriptor.
+    /// \param[in] descr2 Second descriptor.
+    /// \return Distance between two descriptors.
+    ///
+    virtual float compute(const cv::Mat &descr1, const cv::Mat &descr2) = 0;
+
+    ///
+    /// \brief Computes distances between two descriptors in batches.
+    /// \param[in] descrs1 Batch of first descriptors.
+    /// \param[in] descrs2 Batch of second descriptors.
+    /// \return Distances between descriptors.
+    ///
+    virtual std::vector<float> compute(const std::vector<cv::Mat> &descrs1,
+                                       const std::vector<cv::Mat> &descrs2) = 0;
+
+    virtual ~IDescriptorDistance() {}
+};
+
+///
+/// \brief The CosDistance class allows computing cosine distance between two
+/// reidentification descriptors.
+///
+class CV_EXPORTS CosDistance : public IDescriptorDistance {
+public:
+    ///
+    /// \brief CosDistance constructor.
+    /// \param[in] descriptor_size Descriptor size.
+    ///
+    explicit CosDistance(const cv::Size &descriptor_size);
+
+    ///
+    /// \brief Computes distance between two descriptors.
+    /// \param descr1 First descriptor.
+    /// \param descr2 Second descriptor.
+    /// \return Distance between two descriptors.
+    ///
+    float compute(const cv::Mat &descr1, const cv::Mat &descr2) override;
+
+    ///
+    /// \brief Computes distances between two descriptors in batches.
+    /// \param[in] descrs1 Batch of first descriptors.
+    /// \param[in] descrs2 Batch of second descriptors.
+    /// \return Distances between descriptors.
+    ///
+    std::vector<float> compute(
+        const std::vector<cv::Mat> &descrs1,
+        const std::vector<cv::Mat> &descrs2) override;
+
+private:
+    cv::Size descriptor_size_;
+};
+
+
+///
+/// \brief Computes distance between images
+///        using MatchTemplate function from OpenCV library
+///        and its cross-correlation computation method in particular.
+///
+class CV_EXPORTS MatchTemplateDistance : public IDescriptorDistance {
+public:
+    ///
+    /// \brief Constructs the distance object.
+    ///
+    /// \param[in] type Method of MatchTemplate function computation.
+    /// \param[in] scale Scale parameter for the distance.
+    ///            Final distance is computed as:
+    ///            scale * distance + offset.
+    /// \param[in] offset Offset parameter for the distance.
+    ///            Final distance is computed as:
+    ///            scale * distance + offset.
+    ///
+    MatchTemplateDistance(int type = cv::TemplateMatchModes::TM_CCORR_NORMED,
+                          float scale = -1, float offset = 1)
+        : type_(type), scale_(scale), offset_(offset) {}
+    ///
+    /// \brief Computes distance between image descriptors.
+    /// \param[in] descr1 First image descriptor.
+    /// \param[in] descr2 Second image descriptor.
+    /// \return Distance between image descriptors.
+    ///
+    float compute(const cv::Mat &descr1, const cv::Mat &descr2) override;
+    ///
+    /// \brief Computes distances between two descriptors in batches.
+    /// \param[in] descrs1 Batch of first descriptors.
+    /// \param[in] descrs2 Batch of second descriptors.
+    /// \return Distances between descriptors.
+    ///
+    std::vector<float> compute(const std::vector<cv::Mat> &descrs1,
+                               const std::vector<cv::Mat> &descrs2) override;
+    virtual ~MatchTemplateDistance() {}
+
+private:
+    int type_;      ///< Method of MatchTemplate function computation.
+    float scale_;   ///< Scale parameter for the distance. Final distance is
+                    /// computed as: scale * distance + offset.
+    float offset_;  ///< Offset parameter for the distance. Final distance is
+                    /// computed as: scale * distance + offset.
+};
+
+///
+/// \brief The TrackerParams struct stores parameters of TrackerByMatching
+///
+struct CV_EXPORTS TrackerParams {
+    size_t min_track_duration;  ///< Min track duration in milliseconds.
+
+    size_t forget_delay;  ///< Forget about track if the last bounding box in
+                          /// track was detected more than specified number of
+                          /// frames ago.
+
+    float aff_thr_fast;  ///< Affinity threshold which is used to determine if
+                         /// tracklet and detection should be combined (fast
+                         /// descriptor is used).
+
+    float aff_thr_strong;  ///< Affinity threshold which is used to determine if
+                           /// tracklet and detection should be combined(strong
+                           /// descriptor is used).
+
+    float shape_affinity_w;  ///< Shape affinity weight.
+
+    float motion_affinity_w;  ///< Motion affinity weight.
+
+    float time_affinity_w;  ///< Time affinity weight.
+
+    float min_det_conf;  ///< Min confidence of detection.
+
+    cv::Vec2f bbox_aspect_ratios_range;  ///< Bounding box aspect ratios range.
+
+    cv::Vec2f bbox_heights_range;  ///< Bounding box heights range.
+
+    int predict;  ///< How many frames are used to predict bounding box in case
+    /// of lost track.
+
+    float strong_affinity_thr;  ///< If 'fast' confidence is greater than this
+                                /// threshold then 'strong' Re-ID approach is
+                                /// used.
+
+    float reid_thr;  ///< Affinity threshold for re-identification.
+
+    bool drop_forgotten_tracks;  ///< Drop forgotten tracks. If it's enabled it
+                                 /// disables an ability to get detection log.
+
+    int max_num_objects_in_track;  ///< The number of objects in track is
+                                   /// restricted by this parameter. If it is negative or zero, the max number of
+                                   /// objects in track is not restricted.
+
+    ///
+    /// Default constructor.
+    ///
+    TrackerParams();
+};
+
+///
+/// \brief The Track class describes tracks.
+///
+class CV_EXPORTS Track {
+public:
+    ///
+    /// \brief Track constructor.
+    /// \param objs Detected objects sequence.
+    /// \param last_image Image of last image in the detected object sequence.
+    /// \param descriptor_fast Fast descriptor.
+    /// \param descriptor_strong Strong descriptor (reid embedding).
+    ///
+    Track(const TrackedObjects &objs, const cv::Mat &last_image,
+          const cv::Mat &descriptor_fast, const cv::Mat &descriptor_strong)
+        : objects(objs),
+        predicted_rect(!objs.empty() ? objs.back().rect : cv::Rect()),
+        last_image(last_image),
+        descriptor_fast(descriptor_fast),
+        descriptor_strong(descriptor_strong),
+        lost(0),
+        length(1) {
+            CV_Assert(!objs.empty());
+            first_object = objs[0];
+        }
+
+    ///
+    /// \brief empty returns if track does not contain objects.
+    /// \return true if track does not contain objects.
+    ///
+    bool empty() const { return objects.empty(); }
+
+    ///
+    /// \brief size returns number of detected objects in a track.
+    /// \return number of detected objects in a track.
+    ///
+    size_t size() const { return objects.size(); }
+
+    ///
+    /// \brief operator [] return const reference to detected object with
+    ///        specified index.
+    /// \param i Index of object.
+    /// \return const reference to detected object with specified index.
+    ///
+    const TrackedObject &operator[](size_t i) const { return objects[i]; }
+
+    ///
+    /// \brief operator [] return non-const reference to detected object with
+    ///        specified index.
+    /// \param i Index of object.
+    /// \return non-const reference to detected object with specified index.
+    ///
+    TrackedObject &operator[](size_t i) { return objects[i]; }
+
+    ///
+    /// \brief back returns const reference to last object in track.
+    /// \return const reference to last object in track.
+    ///
+    const TrackedObject &back() const {
+        CV_Assert(!empty());
+        return objects.back();
+    }
+
+    ///
+    /// \brief back returns non-const reference to last object in track.
+    /// \return non-const reference to last object in track.
+    ///
+    TrackedObject &back() {
+        CV_Assert(!empty());
+        return objects.back();
+    }
+
+    TrackedObjects objects;   ///< Detected objects;
+    cv::Rect predicted_rect;  ///< Rectangle that represents predicted position
+                              /// and size of bounding box if track has been lost.
+    cv::Mat last_image;       ///< Image of last detected object in track.
+    cv::Mat descriptor_fast;  ///< Fast descriptor.
+    cv::Mat descriptor_strong;  ///< Strong descriptor (reid embedding).
+    size_t lost;                ///< How many frames ago track has been lost.
+
+    TrackedObject first_object;  ///< First object in track.
+    size_t length;  ///< Length of a track including number of objects that were
+                    /// removed from track in order to avoid memory usage growth.
+};
+
+///
+/// \brief Tracker-by-Matching algorithm interface.
+///
+/// This class is implementation of tracking-by-matching system. It uses two
+/// different appearance measures to compute affinity between bounding boxes:
+/// some fast descriptor and some strong descriptor. Each time the assignment
+/// problem is solved. The assignment problem in our case is how to establish
+/// correspondence between existing tracklets and recently detected objects.
+/// First step is to compute an affinity matrix between tracklets and
+/// detections. The affinity equals to
+///       appearance_affinity * motion_affinity * shape_affinity.
+/// Where appearance is 1 - distance(tracklet_fast_dscr, detection_fast_dscr).
+/// Second step is to solve the assignment problem using Kuhn-Munkres
+/// algorithm. If correspondence between some tracklet and detection is
+/// established with low confidence (affinity) then the strong descriptor is
+/// used to determine if there is correspondence between tracklet and detection.
+///
+class CV_EXPORTS ITrackerByMatching {
+public:
+    using Descriptor = std::shared_ptr<IImageDescriptor>;
+    using Distance = std::shared_ptr<IDescriptorDistance>;
+
+    ///
+    /// \brief Destructor for the tracker
+    ///
+    virtual ~ITrackerByMatching() {}
+
+    ///
+    /// \brief Process given frame.
+    /// \param[in] frame Colored image (CV_8UC3).
+    /// \param[in] detections Detected objects on the frame.
+    /// \param[in] timestamp Timestamp must be positive and measured in
+    /// milliseconds
+    ///
+    virtual void process(const cv::Mat &frame, const TrackedObjects &detections,
+                         uint64_t timestamp) = 0;
+
+    ///
+    /// \brief Pipeline parameters getter.
+    /// \return Parameters of pipeline.
+    ///
+    virtual const TrackerParams &params() const = 0;
+
+    ///
+    /// \brief Pipeline parameters setter.
+    /// \param[in] params Parameters of pipeline.
+    ///
+    virtual void setParams(const TrackerParams &params) = 0;
+
+    ///
+    /// \brief Fast descriptor getter.
+    /// \return Fast descriptor used in pipeline.
+    ///
+    virtual const Descriptor &descriptorFast() const = 0;
+
+    ///
+    /// \brief Fast descriptor setter.
+    /// \param[in] val Fast descriptor used in pipeline.
+    ///
+    virtual void setDescriptorFast(const Descriptor &val) = 0;
+
+    ///
+    /// \brief Strong descriptor getter.
+    /// \return Strong descriptor used in pipeline.
+    ///
+    virtual const Descriptor &descriptorStrong() const = 0;
+
+    ///
+    /// \brief Strong descriptor setter.
+    /// \param[in] val Strong descriptor used in pipeline.
+    ///
+    virtual void setDescriptorStrong(const Descriptor &val) = 0;
+
+    ///
+    /// \brief Fast distance getter.
+    /// \return Fast distance used in pipeline.
+    ///
+    virtual const Distance &distanceFast() const = 0;
+
+    ///
+    /// \brief Fast distance setter.
+    /// \param[in] val Fast distance used in pipeline.
+    ///
+    virtual void setDistanceFast(const Distance &val) = 0;
+
+    ///
+    /// \brief Strong distance getter.
+    /// \return Strong distance used in pipeline.
+    ///
+    virtual const Distance &distanceStrong() const = 0;
+
+    ///
+    /// \brief Strong distance setter.
+    /// \param[in] val Strong distance used in pipeline.
+    ///
+    virtual void setDistanceStrong(const Distance &val) = 0;
+
+    ///
+    /// \brief Returns number of counted people.
+    /// \return a number of counted people.
+    ///
+    virtual size_t count() const = 0;
+
+    ///
+    /// \brief Get active tracks to draw
+    /// \return Active tracks.
+    ///
+    virtual std::unordered_map<size_t, std::vector<cv::Point> > getActiveTracks() const = 0;
+
+    ///
+    /// \brief Get tracked detections.
+    /// \return Tracked detections.
+    ///
+    virtual TrackedObjects trackedDetections() const = 0;
+
+    ///
+    /// \brief Draws active tracks on a given frame.
+    /// \param[in] frame Colored image (CV_8UC3).
+    /// \return Colored image with drawn active tracks.
+    ///
+    virtual cv::Mat drawActiveTracks(const cv::Mat &frame) = 0;
+
+    ///
+    /// \brief isTrackForgotten returns true if track is forgotten.
+    /// \param id Track ID.
+    /// \return true if track is forgotten.
+    ///
+    virtual bool isTrackForgotten(size_t id) const = 0;
+
+    ///
+    /// \brief tracks Returns all tracks including forgotten (lost too many frames
+    /// ago).
+    /// \return Set of tracks {id, track}.
+    ///
+    virtual const std::unordered_map<size_t, Track> &tracks() const = 0;
+
+    ///
+    /// \brief isTrackValid Checks whether track is valid (duration > threshold).
+    /// \param track_id Index of checked track.
+    /// \return True if track duration exceeds some predefined value.
+    ///
+    virtual bool isTrackValid(size_t track_id) const = 0;
+
+    ///
+    /// \brief dropForgottenTracks Removes tracks from memory that were lost too
+    /// many frames ago.
+    ///
+    virtual void dropForgottenTracks() = 0;
+
+    ///
+    /// \brief dropForgottenTrack Check that the track was lost too many frames
+    /// ago
+    /// and removes it frm memory.
+    ///
+    virtual void dropForgottenTrack(size_t track_id) = 0;
+};
+
+///
+/// \brief The factory to create Tracker-by-Matching algorithm implementation.
+///
+CV_EXPORTS cv::Ptr<ITrackerByMatching> createTrackerByMatching(const TrackerParams &params = TrackerParams());
+
+} // namespace tbm
+} // namespace cv
+#endif // #ifndef __OPENCV_TRACKING_TRACKING_BY_MATCHING_HPP__
diff --git a/IPL/include/opencv/opencv2/video.hpp b/IPL/include/opencv/opencv2/video.hpp
index a593815..a3dde60 100644
--- a/IPL/include/opencv/opencv2/video.hpp
+++ b/IPL/include/opencv/opencv2/video.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEO_HPP__
-#define __OPENCV_VIDEO_HPP__
+#ifndef OPENCV_VIDEO_HPP
+#define OPENCV_VIDEO_HPP
 
 /**
   @defgroup video Video Analysis
@@ -56,8 +56,4 @@
 #include "opencv2/video/tracking.hpp"
 #include "opencv2/video/background_segm.hpp"
 
-#ifndef DISABLE_OPENCV_24_COMPATIBILITY
-#include "opencv2/video/tracking_c.h"
-#endif
-
-#endif //__OPENCV_VIDEO_HPP__
+#endif //OPENCV_VIDEO_HPP
diff --git a/IPL/include/opencv/opencv2/video/background_segm.hpp b/IPL/include/opencv/opencv2/video/background_segm.hpp
index dbeccbd..e1dfa15 100644
--- a/IPL/include/opencv/opencv2/video/background_segm.hpp
+++ b/IPL/include/opencv/opencv2/video/background_segm.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_BACKGROUND_SEGM_HPP__
-#define __OPENCV_BACKGROUND_SEGM_HPP__
+#ifndef OPENCV_BACKGROUND_SEGM_HPP
+#define OPENCV_BACKGROUND_SEGM_HPP
 
 #include "opencv2/core.hpp"
 
@@ -188,13 +188,24 @@ class CV_EXPORTS_W BackgroundSubtractorMOG2 : public BackgroundSubtractor
 
     A shadow is detected if pixel is a darker version of the background. The shadow threshold (Tau in
     the paper) is a threshold defining how much darker the shadow can be. Tau= 0.5 means that if a pixel
-    is more than twice darker then it is not shadow. See Prati, Mikic, Trivedi and Cucchiarra,
+    is more than twice darker then it is not shadow. See Prati, Mikic, Trivedi and Cucchiara,
     *Detecting Moving Shadows...*, IEEE PAMI,2003.
      */
     CV_WRAP virtual double getShadowThreshold() const = 0;
     /** @brief Sets the shadow threshold
     */
     CV_WRAP virtual void setShadowThreshold(double threshold) = 0;
+
+    /** @brief Computes a foreground mask.
+
+    @param image Next video frame. Floating point frame will be used without scaling and should be in range \f$[0,255]\f$.
+    @param fgmask The output foreground mask as an 8-bit binary image.
+    @param learningRate The value between 0 and 1 that indicates how fast the background model is
+    learnt. Negative parameter value makes the algorithm to use some automatically chosen learning
+    rate. 0 means that the background model is not updated at all, 1 means that the background model
+    is completely reinitialized from the last frame.
+     */
+    CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0;
 };
 
 /** @brief Creates MOG2 Background Subtractor
@@ -210,9 +221,9 @@ CV_EXPORTS_W Ptr<BackgroundSubtractorMOG2>
     createBackgroundSubtractorMOG2(int history=500, double varThreshold=16,
                                    bool detectShadows=true);
 
-/** @brief K-nearest neigbours - based Background/Foreground Segmentation Algorithm.
+/** @brief K-nearest neighbours - based Background/Foreground Segmentation Algorithm.
 
-The class implements the K-nearest neigbours background subtraction described in @cite Zivkovic2006 .
+The class implements the K-nearest neighbours background subtraction described in @cite Zivkovic2006 .
 Very efficient if number of foreground pixels is low.
  */
 class CV_EXPORTS_W BackgroundSubtractorKNN : public BackgroundSubtractor
@@ -250,7 +261,7 @@ class CV_EXPORTS_W BackgroundSubtractorKNN : public BackgroundSubtractor
     pixel is matching the kNN background model.
      */
     CV_WRAP virtual int getkNNSamples() const = 0;
-    /** @brief Sets the k in the kNN. How many nearest neigbours need to match.
+    /** @brief Sets the k in the kNN. How many nearest neighbours need to match.
     */
     CV_WRAP virtual void setkNNSamples(int _nkNN) = 0;
 
@@ -278,7 +289,7 @@ class CV_EXPORTS_W BackgroundSubtractorKNN : public BackgroundSubtractor
 
     A shadow is detected if pixel is a darker version of the background. The shadow threshold (Tau in
     the paper) is a threshold defining how much darker the shadow can be. Tau= 0.5 means that if a pixel
-    is more than twice darker then it is not shadow. See Prati, Mikic, Trivedi and Cucchiarra,
+    is more than twice darker then it is not shadow. See Prati, Mikic, Trivedi and Cucchiara,
     *Detecting Moving Shadows...*, IEEE PAMI,2003.
      */
     CV_WRAP virtual double getShadowThreshold() const = 0;
diff --git a/IPL/include/opencv/opencv2/video/legacy/constants_c.h b/IPL/include/opencv/opencv2/video/legacy/constants_c.h
new file mode 100644
index 0000000..1a98f52
--- /dev/null
+++ b/IPL/include/opencv/opencv2/video/legacy/constants_c.h
@@ -0,0 +1,16 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_VIDEO_LEGACY_CONSTANTS_H
+#define OPENCV_VIDEO_LEGACY_CONSTANTS_H
+
+enum
+{
+    CV_LKFLOW_PYR_A_READY = 1,
+    CV_LKFLOW_PYR_B_READY = 2,
+    CV_LKFLOW_INITIAL_GUESSES = 4,
+    CV_LKFLOW_GET_MIN_EIGENVALS = 8
+};
+
+#endif // OPENCV_VIDEO_LEGACY_CONSTANTS_H
diff --git a/IPL/include/opencv/opencv2/video/tracking.hpp b/IPL/include/opencv/opencv2/video/tracking.hpp
index d6954fe..e978886 100644
--- a/IPL/include/opencv/opencv2/video/tracking.hpp
+++ b/IPL/include/opencv/opencv2/video/tracking.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_TRACKING_HPP__
-#define __OPENCV_TRACKING_HPP__
+#ifndef OPENCV_TRACKING_HPP
+#define OPENCV_TRACKING_HPP
 
 #include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
@@ -78,6 +78,9 @@ See the OpenCV sample camshiftdemo.c that tracks colored objects.
  */
 CV_EXPORTS_W RotatedRect CamShift( InputArray probImage, CV_IN_OUT Rect& window,
                                    TermCriteria criteria );
+/** @example samples/cpp/camshiftdemo.cpp
+An example using the mean-shift tracking algorithm
+*/
 
 /** @brief Finds an object on a back projection image.
 
@@ -97,8 +100,6 @@ projection and remove the noise. For example, you can do this by retrieving conn
 with findContours , throwing away contours with small area ( contourArea ), and rendering the
 remaining contours with drawContours.
 
-@note
--   A mean-shift tracking sample can be found at opencv_source_code/samples/cpp/camshiftdemo.cpp
  */
 CV_EXPORTS_W int meanShift( InputArray probImage, CV_IN_OUT Rect& window, TermCriteria criteria );
 
@@ -123,6 +124,10 @@ CV_EXPORTS_W int buildOpticalFlowPyramid( InputArray img, OutputArrayOfArrays py
                                           int derivBorder = BORDER_CONSTANT,
                                           bool tryReuseInputImage = true );
 
+/** @example samples/cpp/lkdemo.cpp
+An example using the Lucas-Kanade optical flow algorithm
+*/
+
 /** @brief Calculates an optical flow for a sparse feature set using the iterative Lucas-Kanade method with
 pyramids.
 
@@ -226,7 +231,7 @@ CV_EXPORTS_W void calcOpticalFlowFarneback( InputArray prev, InputArray next, In
 @param dst Second input 2D point set of the same size and the same type as A, or another image.
 @param fullAffine If true, the function finds an optimal affine transformation with no additional
 restrictions (6 degrees of freedom). Otherwise, the class of transformations to choose from is
-limited to combinations of translation, rotation, and uniform scaling (5 degrees of freedom).
+limited to combinations of translation, rotation, and uniform scaling (4 degrees of freedom).
 
 The function finds an optimal affine transform *[A|b]* (a 2 x 3 floating-point matrix) that
 approximates best the affine transformation between:
@@ -244,11 +249,13 @@ where src[i] and dst[i] are the i-th points in src and dst, respectively
 \f[\begin{bmatrix} a_{11} & a_{12} & b_1  \\ -a_{12} & a_{11} & b_2  \end{bmatrix}\f]
 when fullAffine=false.
 
+@deprecated Use cv::estimateAffine2D, cv::estimateAffinePartial2D instead. If you are using this function
+with images, extract points using cv::calcOpticalFlowPyrLK and then use the estimation functions.
+
 @sa
-getAffineTransform, getPerspectiveTransform, findHomography
+estimateAffine2D, estimateAffinePartial2D, getAffineTransform, getPerspectiveTransform, findHomography
  */
-CV_EXPORTS_W Mat estimateRigidTransform( InputArray src, InputArray dst, bool fullAffine );
-
+CV_DEPRECATED CV_EXPORTS Mat estimateRigidTransform( InputArray src, InputArray dst, bool fullAffine );
 
 enum
 {
@@ -258,11 +265,28 @@ enum
     MOTION_HOMOGRAPHY  = 3
 };
 
+/** @brief Computes the Enhanced Correlation Coefficient value between two images @cite EP08 .
+
+@param templateImage single-channel template image; CV_8U or CV_32F array.
+@param inputImage single-channel input image to be warped to provide an image similar to
+ templateImage, same type as templateImage.
+@param inputMask An optional mask to indicate valid values of inputImage.
+
+@sa
+findTransformECC
+ */
+
+CV_EXPORTS_W double computeECC(InputArray templateImage, InputArray inputImage, InputArray inputMask = noArray());
+
+/** @example samples/cpp/image_alignment.cpp
+An example using the image alignment ECC algorithm
+*/
+
 /** @brief Finds the geometric transform (warp) between two images in terms of the ECC criterion @cite EP08 .
 
 @param templateImage single-channel template image; CV_8U or CV_32F array.
 @param inputImage single-channel input image which should be warped with the final warpMatrix in
-order to provide an image similar to templateImage, same type as temlateImage.
+order to provide an image similar to templateImage, same type as templateImage.
 @param warpMatrix floating-point \f$2\times 3\f$ or \f$3\times 3\f$ mapping matrix (warp).
 @param motionType parameter, specifying the type of motion:
  -   **MOTION_TRANSLATION** sets a translational motion model; warpMatrix is \f$2\times 3\f$ with
@@ -279,6 +303,7 @@ criteria.epsilon defines the threshold of the increment in the correlation coeff
 iterations (a negative criteria.epsilon makes criteria.maxcount the only termination criterion).
 Default values are shown in the declaration above.
 @param inputMask An optional mask to indicate valid values of inputImage.
+@param gaussFiltSize An optional value indicating size of gaussian blur filter; (DEFAULT: 5)
 
 The function estimates the optimum transformation (warpMatrix) with respect to ECC criterion
 (@cite EP08), that is
@@ -297,7 +322,7 @@ row is ignored.
 Unlike findHomography and estimateRigidTransform, the function findTransformECC implements an
 area-based alignment that builds on intensity similarities. In essence, the function updates the
 initial transformation that roughly aligns the images. If this information is missing, the identity
-warp (unity matrix) should be given as input. Note that if images undergo strong
+warp (unity matrix) is used as an initialization. Note that if images undergo strong
 displacements/rotations, an initial transformation that roughly aligns the images is necessary
 (e.g., a simple euclidean/similarity transform that allows for the images showing the same image
 content approximately). Use inverse warping in the second image to take an image close to the first
@@ -306,32 +331,35 @@ sample image_alignment.cpp that demonstrates the use of the function. Note that
 an exception if algorithm does not converges.
 
 @sa
-estimateRigidTransform, findHomography
+computeECC, estimateAffine2D, estimateAffinePartial2D, findHomography
  */
 CV_EXPORTS_W double findTransformECC( InputArray templateImage, InputArray inputImage,
-                                      InputOutputArray warpMatrix, int motionType = MOTION_AFFINE,
-                                      TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 50, 0.001),
-                                      InputArray inputMask = noArray());
+                                      InputOutputArray warpMatrix, int motionType,
+                                      TermCriteria criteria,
+                                      InputArray inputMask, int gaussFiltSize);
+
+/** @overload */
+CV_EXPORTS
+double findTransformECC(InputArray templateImage, InputArray inputImage,
+    InputOutputArray warpMatrix, int motionType = MOTION_AFFINE,
+    TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 50, 0.001),
+    InputArray inputMask = noArray());
+
+/** @example samples/cpp/kalman.cpp
+An example using the standard Kalman filter
+*/
 
 /** @brief Kalman filter class.
 
 The class implements a standard Kalman filter <http://en.wikipedia.org/wiki/Kalman_filter>,
 @cite Welch95 . However, you can modify transitionMatrix, controlMatrix, and measurementMatrix to get
-an extended Kalman filter functionality. See the OpenCV sample kalman.cpp.
-
-@note
-
--   An example using the standard Kalman filter can be found at
-    opencv_source_code/samples/cpp/kalman.cpp
+an extended Kalman filter functionality.
+@note In C API when CvKalman\* kalmanFilter structure is not needed anymore, it should be released
+with cvReleaseKalman(&kalmanFilter)
  */
 class CV_EXPORTS_W KalmanFilter
 {
 public:
-    /** @brief The constructors.
-
-    @note In C API when CvKalman\* kalmanFilter structure is not needed anymore, it should be released
-    with cvReleaseKalman(&kalmanFilter)
-     */
     CV_WRAP KalmanFilter();
     /** @overload
     @param dynamParams Dimensionality of the state.
@@ -382,6 +410,29 @@ class CV_EXPORTS_W KalmanFilter
 };
 
 
+/** @brief Read a .flo file
+
+ @param path Path to the file to be loaded
+
+ The function readOpticalFlow loads a flow field from a file and returns it as a single matrix.
+ Resulting Mat has a type CV_32FC2 - floating-point, 2-channel. First channel corresponds to the
+ flow in the horizontal direction (u), second - vertical (v).
+ */
+CV_EXPORTS_W Mat readOpticalFlow( const String& path );
+/** @brief Write a .flo to disk
+
+ @param path Path to the file to be written
+ @param flow Flow field to be stored
+
+ The function stores a flow field in a file, returns true on success, false otherwise.
+ The flow field must be a 2-channel, floating-point matrix (CV_32FC2). First channel corresponds
+ to the flow in the horizontal direction (u), second - vertical (v).
+ */
+CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow );
+
+/**
+   Base class for dense optical flow algorithms
+*/
 class CV_EXPORTS_W DenseOpticalFlow : public Algorithm
 {
 public:
@@ -397,116 +448,262 @@ class CV_EXPORTS_W DenseOpticalFlow : public Algorithm
     CV_WRAP virtual void collectGarbage() = 0;
 };
 
-/** @brief "Dual TV L1" Optical Flow Algorithm.
+/** @brief Base interface for sparse optical flow algorithms.
+ */
+class CV_EXPORTS_W SparseOpticalFlow : public Algorithm
+{
+public:
+    /** @brief Calculates a sparse optical flow.
+
+    @param prevImg First input image.
+    @param nextImg Second input image of the same size and the same type as prevImg.
+    @param prevPts Vector of 2D points for which the flow needs to be found.
+    @param nextPts Output vector of 2D points containing the calculated new positions of input features in the second image.
+    @param status Output status vector. Each element of the vector is set to 1 if the
+                  flow for the corresponding features has been found. Otherwise, it is set to 0.
+    @param err Optional output vector that contains error response for each point (inverse confidence).
+     */
+    CV_WRAP virtual void calc(InputArray prevImg, InputArray nextImg,
+                      InputArray prevPts, InputOutputArray nextPts,
+                      OutputArray status,
+                      OutputArray err = cv::noArray()) = 0;
+};
+
+
+/** @brief Class computing a dense optical flow using the Gunnar Farneback's algorithm.
+ */
+class CV_EXPORTS_W FarnebackOpticalFlow : public DenseOpticalFlow
+{
+public:
+    CV_WRAP virtual int getNumLevels() const = 0;
+    CV_WRAP virtual void setNumLevels(int numLevels) = 0;
 
-The class implements the "Dual TV L1" optical flow algorithm described in @cite Zach2007 and
-@cite Javier2012 .
-Here are important members of the class that control the algorithm, which you can set after
-constructing the class instance:
+    CV_WRAP virtual double getPyrScale() const = 0;
+    CV_WRAP virtual void setPyrScale(double pyrScale) = 0;
 
--   member double tau
-    Time step of the numerical scheme.
+    CV_WRAP virtual bool getFastPyramids() const = 0;
+    CV_WRAP virtual void setFastPyramids(bool fastPyramids) = 0;
 
--   member double lambda
-    Weight parameter for the data term, attachment parameter. This is the most relevant
-    parameter, which determines the smoothness of the output. The smaller this parameter is,
-    the smoother the solutions we obtain. It depends on the range of motions of the images, so
-    its value should be adapted to each image sequence.
+    CV_WRAP virtual int getWinSize() const = 0;
+    CV_WRAP virtual void setWinSize(int winSize) = 0;
 
--   member double theta
-    Weight parameter for (u - v)\^2, tightness parameter. It serves as a link between the
-    attachment and the regularization terms. In theory, it should have a small value in order
-    to maintain both parts in correspondence. The method is stable for a large range of values
-    of this parameter.
+    CV_WRAP virtual int getNumIters() const = 0;
+    CV_WRAP virtual void setNumIters(int numIters) = 0;
 
--   member int nscales
-    Number of scales used to create the pyramid of images.
+    CV_WRAP virtual int getPolyN() const = 0;
+    CV_WRAP virtual void setPolyN(int polyN) = 0;
 
--   member int warps
-    Number of warpings per scale. Represents the number of times that I1(x+u0) and grad(
-    I1(x+u0) ) are computed per scale. This is a parameter that assures the stability of the
-    method. It also affects the running time, so it is a compromise between speed and
-    accuracy.
+    CV_WRAP virtual double getPolySigma() const = 0;
+    CV_WRAP virtual void setPolySigma(double polySigma) = 0;
 
--   member double epsilon
-    Stopping criterion threshold used in the numerical scheme, which is a trade-off between
-    precision and running time. A small value will yield more accurate solutions at the
-    expense of a slower convergence.
+    CV_WRAP virtual int getFlags() const = 0;
+    CV_WRAP virtual void setFlags(int flags) = 0;
 
--   member int iterations
-    Stopping criterion iterations number used in the numerical scheme.
+    CV_WRAP static Ptr<FarnebackOpticalFlow> create(
+            int numLevels = 5,
+            double pyrScale = 0.5,
+            bool fastPyramids = false,
+            int winSize = 13,
+            int numIters = 10,
+            int polyN = 5,
+            double polySigma = 1.1,
+            int flags = 0);
+};
+
+/** @brief Variational optical flow refinement
 
-C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
-Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
+This class implements variational refinement of the input flow field, i.e.
+it uses input flow to initialize the minimization of the following functional:
+\f$E(U) = \int_{\Omega} \delta \Psi(E_I) + \gamma \Psi(E_G) + \alpha \Psi(E_S) \f$,
+where \f$E_I,E_G,E_S\f$ are color constancy, gradient constancy and smoothness terms
+respectively. \f$\Psi(s^2)=\sqrt{s^2+\epsilon^2}\f$ is a robust penalizer to limit the
+influence of outliers. A complete formulation and a description of the minimization
+procedure can be found in @cite Brox2004
 */
-class CV_EXPORTS_W DualTVL1OpticalFlow : public DenseOpticalFlow
+class CV_EXPORTS_W VariationalRefinement : public DenseOpticalFlow
 {
 public:
-    //! @brief Time step of the numerical scheme
-    /** @see setTau */
-    virtual double getTau() const = 0;
-    /** @copybrief getTau @see getTau */
-    virtual void setTau(double val) = 0;
-    //! @brief Weight parameter for the data term, attachment parameter
-    /** @see setLambda */
-    virtual double getLambda() const = 0;
-    /** @copybrief getLambda @see getLambda */
-    virtual void setLambda(double val) = 0;
-    //! @brief Weight parameter for (u - v)^2, tightness parameter
-    /** @see setTheta */
-    virtual double getTheta() const = 0;
-    /** @copybrief getTheta @see getTheta */
-    virtual void setTheta(double val) = 0;
-    //! @brief coefficient for additional illumination variation term
-    /** @see setGamma */
-    virtual double getGamma() const = 0;
+    /** @brief @ref calc function overload to handle separate horizontal (u) and vertical (v) flow components
+    (to avoid extra splits/merges) */
+    CV_WRAP virtual void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) = 0;
+
+    /** @brief Number of outer (fixed-point) iterations in the minimization procedure.
+    @see setFixedPointIterations */
+    CV_WRAP virtual int getFixedPointIterations() const = 0;
+    /** @copybrief getFixedPointIterations @see getFixedPointIterations */
+    CV_WRAP virtual void setFixedPointIterations(int val) = 0;
+
+    /** @brief Number of inner successive over-relaxation (SOR) iterations
+        in the minimization procedure to solve the respective linear system.
+    @see setSorIterations */
+    CV_WRAP virtual int getSorIterations() const = 0;
+    /** @copybrief getSorIterations @see getSorIterations */
+    CV_WRAP virtual void setSorIterations(int val) = 0;
+
+    /** @brief Relaxation factor in SOR
+    @see setOmega */
+    CV_WRAP virtual float getOmega() const = 0;
+    /** @copybrief getOmega @see getOmega */
+    CV_WRAP virtual void setOmega(float val) = 0;
+
+    /** @brief Weight of the smoothness term
+    @see setAlpha */
+    CV_WRAP virtual float getAlpha() const = 0;
+    /** @copybrief getAlpha @see getAlpha */
+    CV_WRAP virtual void setAlpha(float val) = 0;
+
+    /** @brief Weight of the color constancy term
+    @see setDelta */
+    CV_WRAP virtual float getDelta() const = 0;
+    /** @copybrief getDelta @see getDelta */
+    CV_WRAP virtual void setDelta(float val) = 0;
+
+    /** @brief Weight of the gradient constancy term
+    @see setGamma */
+    CV_WRAP virtual float getGamma() const = 0;
     /** @copybrief getGamma @see getGamma */
-    virtual void setGamma(double val) = 0;
-    //! @brief Number of scales used to create the pyramid of images
-    /** @see setScalesNumber */
-    virtual int getScalesNumber() const = 0;
-    /** @copybrief getScalesNumber @see getScalesNumber */
-    virtual void setScalesNumber(int val) = 0;
-    //! @brief Number of warpings per scale
-    /** @see setWarpingsNumber */
-    virtual int getWarpingsNumber() const = 0;
-    /** @copybrief getWarpingsNumber @see getWarpingsNumber */
-    virtual void setWarpingsNumber(int val) = 0;
-    //! @brief Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time
-    /** @see setEpsilon */
-    virtual double getEpsilon() const = 0;
-    /** @copybrief getEpsilon @see getEpsilon */
-    virtual void setEpsilon(double val) = 0;
-    //! @brief Inner iterations (between outlier filtering) used in the numerical scheme
-    /** @see setInnerIterations */
-    virtual int getInnerIterations() const = 0;
-    /** @copybrief getInnerIterations @see getInnerIterations */
-    virtual void setInnerIterations(int val) = 0;
-    //! @brief Outer iterations (number of inner loops) used in the numerical scheme
-    /** @see setOuterIterations */
-    virtual int getOuterIterations() const = 0;
-    /** @copybrief getOuterIterations @see getOuterIterations */
-    virtual void setOuterIterations(int val) = 0;
-    //! @brief Use initial flow
-    /** @see setUseInitialFlow */
-    virtual bool getUseInitialFlow() const = 0;
-    /** @copybrief getUseInitialFlow @see getUseInitialFlow */
-    virtual void setUseInitialFlow(bool val) = 0;
-    //! @brief Step between scales (<1)
-    /** @see setScaleStep */
-    virtual double getScaleStep() const = 0;
-    /** @copybrief getScaleStep @see getScaleStep */
-    virtual void setScaleStep(double val) = 0;
-    //! @brief Median filter kernel size (1 = no filter) (3 or 5)
-    /** @see setMedianFiltering */
-    virtual int getMedianFiltering() const = 0;
-    /** @copybrief getMedianFiltering @see getMedianFiltering */
-    virtual void setMedianFiltering(int val) = 0;
+    CV_WRAP virtual void setGamma(float val) = 0;
+
+    /** @brief Creates an instance of VariationalRefinement
+    */
+    CV_WRAP static Ptr<VariationalRefinement> create();
 };
 
-/** @brief Creates instance of cv::DenseOpticalFlow
+/** @brief DIS optical flow algorithm.
+
+This class implements the Dense Inverse Search (DIS) optical flow algorithm. More
+details about the algorithm can be found at @cite Kroeger2016 . Includes three presets with preselected
+parameters to provide reasonable trade-off between speed and quality. However, even the slowest preset is
+still relatively fast, use DeepFlow if you need better quality and don't care about speed.
+
+This implementation includes several additional features compared to the algorithm described in the paper,
+including spatial propagation of flow vectors (@ref getUseSpatialPropagation), as well as an option to
+utilize an initial flow approximation passed to @ref calc (which is, essentially, temporal propagation,
+if the previous frame's flow field is passed).
 */
-CV_EXPORTS_W Ptr<DualTVL1OpticalFlow> createOptFlow_DualTVL1();
+class CV_EXPORTS_W DISOpticalFlow : public DenseOpticalFlow
+{
+public:
+    enum
+    {
+        PRESET_ULTRAFAST = 0,
+        PRESET_FAST = 1,
+        PRESET_MEDIUM = 2
+    };
+
+    /** @brief Finest level of the Gaussian pyramid on which the flow is computed (zero level
+        corresponds to the original image resolution). The final flow is obtained by bilinear upscaling.
+        @see setFinestScale */
+    CV_WRAP virtual int getFinestScale() const = 0;
+    /** @copybrief getFinestScale @see getFinestScale */
+    CV_WRAP virtual void setFinestScale(int val) = 0;
+
+    /** @brief Size of an image patch for matching (in pixels). Normally, default 8x8 patches work well
+        enough in most cases.
+        @see setPatchSize */
+    CV_WRAP virtual int getPatchSize() const = 0;
+    /** @copybrief getPatchSize @see getPatchSize */
+    CV_WRAP virtual void setPatchSize(int val) = 0;
+
+    /** @brief Stride between neighbor patches. Must be less than patch size. Lower values correspond
+        to higher flow quality.
+        @see setPatchStride */
+    CV_WRAP virtual int getPatchStride() const = 0;
+    /** @copybrief getPatchStride @see getPatchStride */
+    CV_WRAP virtual void setPatchStride(int val) = 0;
+
+    /** @brief Maximum number of gradient descent iterations in the patch inverse search stage. Higher values
+        may improve quality in some cases.
+        @see setGradientDescentIterations */
+    CV_WRAP virtual int getGradientDescentIterations() const = 0;
+    /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */
+    CV_WRAP virtual void setGradientDescentIterations(int val) = 0;
+
+    /** @brief Number of fixed point iterations of variational refinement per scale. Set to zero to
+        disable variational refinement completely. Higher values will typically result in more smooth and
+        high-quality flow.
+    @see setGradientDescentIterations */
+    CV_WRAP virtual int getVariationalRefinementIterations() const = 0;
+    /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */
+    CV_WRAP virtual void setVariationalRefinementIterations(int val) = 0;
+
+    /** @brief Weight of the smoothness term
+    @see setVariationalRefinementAlpha */
+    CV_WRAP virtual float getVariationalRefinementAlpha() const = 0;
+    /** @copybrief getVariationalRefinementAlpha @see getVariationalRefinementAlpha */
+    CV_WRAP virtual void setVariationalRefinementAlpha(float val) = 0;
+
+    /** @brief Weight of the color constancy term
+    @see setVariationalRefinementDelta */
+    CV_WRAP virtual float getVariationalRefinementDelta() const = 0;
+    /** @copybrief getVariationalRefinementDelta @see getVariationalRefinementDelta */
+    CV_WRAP virtual void setVariationalRefinementDelta(float val) = 0;
+
+    /** @brief Weight of the gradient constancy term
+    @see setVariationalRefinementGamma */
+    CV_WRAP virtual float getVariationalRefinementGamma() const = 0;
+    /** @copybrief getVariationalRefinementGamma @see getVariationalRefinementGamma */
+    CV_WRAP virtual void setVariationalRefinementGamma(float val) = 0;
+
+
+    /** @brief Whether to use mean-normalization of patches when computing patch distance. It is turned on
+        by default as it typically provides a noticeable quality boost because of increased robustness to
+        illumination variations. Turn it off if you are certain that your sequence doesn't contain any changes
+        in illumination.
+    @see setUseMeanNormalization */
+    CV_WRAP virtual bool getUseMeanNormalization() const = 0;
+    /** @copybrief getUseMeanNormalization @see getUseMeanNormalization */
+    CV_WRAP virtual void setUseMeanNormalization(bool val) = 0;
+
+    /** @brief Whether to use spatial propagation of good optical flow vectors. This option is turned on by
+        default, as it tends to work better on average and can sometimes help recover from major errors
+        introduced by the coarse-to-fine scheme employed by the DIS optical flow algorithm. Turning this
+        option off can make the output flow field a bit smoother, however.
+    @see setUseSpatialPropagation */
+    CV_WRAP virtual bool getUseSpatialPropagation() const = 0;
+    /** @copybrief getUseSpatialPropagation @see getUseSpatialPropagation */
+    CV_WRAP virtual void setUseSpatialPropagation(bool val) = 0;
+
+    /** @brief Creates an instance of DISOpticalFlow
+
+    @param preset one of PRESET_ULTRAFAST, PRESET_FAST and PRESET_MEDIUM
+    */
+    CV_WRAP static Ptr<DISOpticalFlow> create(int preset = DISOpticalFlow::PRESET_FAST);
+};
+
+/** @brief Class used for calculating a sparse optical flow.
+
+The class can calculate an optical flow for a sparse feature set using the
+iterative Lucas-Kanade method with pyramids.
+
+@sa calcOpticalFlowPyrLK
+
+*/
+class CV_EXPORTS_W SparsePyrLKOpticalFlow : public SparseOpticalFlow
+{
+public:
+    CV_WRAP virtual Size getWinSize() const = 0;
+    CV_WRAP virtual void setWinSize(Size winSize) = 0;
+
+    CV_WRAP virtual int getMaxLevel() const = 0;
+    CV_WRAP virtual void setMaxLevel(int maxLevel) = 0;
+
+    CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
+    CV_WRAP virtual void setTermCriteria(TermCriteria& crit) = 0;
+
+    CV_WRAP virtual int getFlags() const = 0;
+    CV_WRAP virtual void setFlags(int flags) = 0;
+
+    CV_WRAP virtual double getMinEigThreshold() const = 0;
+    CV_WRAP virtual void setMinEigThreshold(double minEigThreshold) = 0;
+
+    CV_WRAP static Ptr<SparsePyrLKOpticalFlow> create(
+            Size winSize = Size(21, 21),
+            int maxLevel = 3, TermCriteria crit =
+            TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01),
+            int flags = 0,
+            double minEigThreshold = 1e-4);
+};
 
 //! @} video_track
 
diff --git a/IPL/include/opencv/opencv2/video/tracking_c.h b/IPL/include/opencv/opencv2/video/tracking_c.h
deleted file mode 100644
index b355352..0000000
--- a/IPL/include/opencv/opencv2/video/tracking_c.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_TRACKING_C_H__
-#define __OPENCV_TRACKING_C_H__
-
-#include "opencv2/imgproc/types_c.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** @addtogroup video_c
-  @{
-*/
-
-/****************************************************************************************\
-*                                  Motion Analysis                                       *
-\****************************************************************************************/
-
-/************************************ optical flow ***************************************/
-
-#define CV_LKFLOW_PYR_A_READY       1
-#define CV_LKFLOW_PYR_B_READY       2
-#define CV_LKFLOW_INITIAL_GUESSES   4
-#define CV_LKFLOW_GET_MIN_EIGENVALS 8
-
-/* It is Lucas & Kanade method, modified to use pyramids.
-   Also it does several iterations to get optical flow for
-   every point at every pyramid level.
-   Calculates optical flow between two images for certain set of points (i.e.
-   it is a "sparse" optical flow, which is opposite to the previous 3 methods) */
-CVAPI(void)  cvCalcOpticalFlowPyrLK( const CvArr*  prev, const CvArr*  curr,
-                                     CvArr*  prev_pyr, CvArr*  curr_pyr,
-                                     const CvPoint2D32f* prev_features,
-                                     CvPoint2D32f* curr_features,
-                                     int       count,
-                                     CvSize    win_size,
-                                     int       level,
-                                     char*     status,
-                                     float*    track_error,
-                                     CvTermCriteria criteria,
-                                     int       flags );
-
-
-/* Modification of a previous sparse optical flow algorithm to calculate
-   affine flow */
-CVAPI(void)  cvCalcAffineFlowPyrLK( const CvArr*  prev, const CvArr*  curr,
-                                    CvArr*  prev_pyr, CvArr*  curr_pyr,
-                                    const CvPoint2D32f* prev_features,
-                                    CvPoint2D32f* curr_features,
-                                    float* matrices, int  count,
-                                    CvSize win_size, int  level,
-                                    char* status, float* track_error,
-                                    CvTermCriteria criteria, int flags );
-
-/* Estimate rigid transformation between 2 images or 2 point sets */
-CVAPI(int)  cvEstimateRigidTransform( const CvArr* A, const CvArr* B,
-                                      CvMat* M, int full_affine );
-
-/* Estimate optical flow for each pixel using the two-frame G. Farneback algorithm */
-CVAPI(void) cvCalcOpticalFlowFarneback( const CvArr* prev, const CvArr* next,
-                                        CvArr* flow, double pyr_scale, int levels,
-                                        int winsize, int iterations, int poly_n,
-                                        double poly_sigma, int flags );
-
-/********************************* motion templates *************************************/
-
-/****************************************************************************************\
-*        All the motion template functions work only with single channel images.         *
-*        Silhouette image must have depth IPL_DEPTH_8U or IPL_DEPTH_8S                   *
-*        Motion history image must have depth IPL_DEPTH_32F,                             *
-*        Gradient mask - IPL_DEPTH_8U or IPL_DEPTH_8S,                                   *
-*        Motion orientation image - IPL_DEPTH_32F                                        *
-*        Segmentation mask - IPL_DEPTH_32F                                               *
-*        All the angles are in degrees, all the times are in milliseconds                *
-\****************************************************************************************/
-
-/* Updates motion history image given motion silhouette */
-CVAPI(void)    cvUpdateMotionHistory( const CvArr* silhouette, CvArr* mhi,
-                                      double timestamp, double duration );
-
-/* Calculates gradient of the motion history image and fills
-   a mask indicating where the gradient is valid */
-CVAPI(void)    cvCalcMotionGradient( const CvArr* mhi, CvArr* mask, CvArr* orientation,
-                                     double delta1, double delta2,
-                                     int aperture_size CV_DEFAULT(3));
-
-/* Calculates average motion direction within a selected motion region
-   (region can be selected by setting ROIs and/or by composing a valid gradient mask
-   with the region mask) */
-CVAPI(double)  cvCalcGlobalOrientation( const CvArr* orientation, const CvArr* mask,
-                                        const CvArr* mhi, double timestamp,
-                                        double duration );
-
-/* Splits a motion history image into a few parts corresponding to separate independent motions
-   (e.g. left hand, right hand) */
-CVAPI(CvSeq*)  cvSegmentMotion( const CvArr* mhi, CvArr* seg_mask,
-                                CvMemStorage* storage,
-                                double timestamp, double seg_thresh );
-
-/****************************************************************************************\
-*                                       Tracking                                         *
-\****************************************************************************************/
-
-/* Implements CAMSHIFT algorithm - determines object position, size and orientation
-   from the object histogram back project (extension of meanshift) */
-CVAPI(int)  cvCamShift( const CvArr* prob_image, CvRect  window,
-                        CvTermCriteria criteria, CvConnectedComp* comp,
-                        CvBox2D* box CV_DEFAULT(NULL) );
-
-/* Implements MeanShift algorithm - determines object position
-   from the object histogram back project */
-CVAPI(int)  cvMeanShift( const CvArr* prob_image, CvRect  window,
-                         CvTermCriteria criteria, CvConnectedComp* comp );
-
-/*
-standard Kalman filter (in G. Welch' and G. Bishop's notation):
-
-  x(k)=A*x(k-1)+B*u(k)+w(k)  p(w)~N(0,Q)
-  z(k)=H*x(k)+v(k),   p(v)~N(0,R)
-*/
-typedef struct CvKalman
-{
-    int MP;                     /* number of measurement vector dimensions */
-    int DP;                     /* number of state vector dimensions */
-    int CP;                     /* number of control vector dimensions */
-
-    /* backward compatibility fields */
-#if 1
-    float* PosterState;         /* =state_pre->data.fl */
-    float* PriorState;          /* =state_post->data.fl */
-    float* DynamMatr;           /* =transition_matrix->data.fl */
-    float* MeasurementMatr;     /* =measurement_matrix->data.fl */
-    float* MNCovariance;        /* =measurement_noise_cov->data.fl */
-    float* PNCovariance;        /* =process_noise_cov->data.fl */
-    float* KalmGainMatr;        /* =gain->data.fl */
-    float* PriorErrorCovariance;/* =error_cov_pre->data.fl */
-    float* PosterErrorCovariance;/* =error_cov_post->data.fl */
-    float* Temp1;               /* temp1->data.fl */
-    float* Temp2;               /* temp2->data.fl */
-#endif
-
-    CvMat* state_pre;           /* predicted state (x'(k)):
-                                    x(k)=A*x(k-1)+B*u(k) */
-    CvMat* state_post;          /* corrected state (x(k)):
-                                    x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) */
-    CvMat* transition_matrix;   /* state transition matrix (A) */
-    CvMat* control_matrix;      /* control matrix (B)
-                                   (it is not used if there is no control)*/
-    CvMat* measurement_matrix;  /* measurement matrix (H) */
-    CvMat* process_noise_cov;   /* process noise covariance matrix (Q) */
-    CvMat* measurement_noise_cov; /* measurement noise covariance matrix (R) */
-    CvMat* error_cov_pre;       /* priori error estimate covariance matrix (P'(k)):
-                                    P'(k)=A*P(k-1)*At + Q)*/
-    CvMat* gain;                /* Kalman gain matrix (K(k)):
-                                    K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)*/
-    CvMat* error_cov_post;      /* posteriori error estimate covariance matrix (P(k)):
-                                    P(k)=(I-K(k)*H)*P'(k) */
-    CvMat* temp1;               /* temporary matrices */
-    CvMat* temp2;
-    CvMat* temp3;
-    CvMat* temp4;
-    CvMat* temp5;
-} CvKalman;
-
-/* Creates Kalman filter and sets A, B, Q, R and state to some initial values */
-CVAPI(CvKalman*) cvCreateKalman( int dynam_params, int measure_params,
-                                 int control_params CV_DEFAULT(0));
-
-/* Releases Kalman filter state */
-CVAPI(void)  cvReleaseKalman( CvKalman** kalman);
-
-/* Updates Kalman filter by time (predicts future state of the system) */
-CVAPI(const CvMat*)  cvKalmanPredict( CvKalman* kalman,
-                                      const CvMat* control CV_DEFAULT(NULL));
-
-/* Updates Kalman filter by measurement
-   (corrects state of the system and internal matrices) */
-CVAPI(const CvMat*)  cvKalmanCorrect( CvKalman* kalman, const CvMat* measurement );
-
-#define cvKalmanUpdateByTime  cvKalmanPredict
-#define cvKalmanUpdateByMeasurement cvKalmanCorrect
-
-/** @} video_c */
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-
-#endif // __OPENCV_TRACKING_C_H__
diff --git a/IPL/include/opencv/opencv2/videoio.hpp b/IPL/include/opencv/opencv2/videoio.hpp
index 6b3b97b..e5b74ee 100644
--- a/IPL/include/opencv/opencv2/videoio.hpp
+++ b/IPL/include/opencv/opencv2/videoio.hpp
@@ -40,17 +40,26 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOIO_HPP__
-#define __OPENCV_VIDEOIO_HPP__
+#ifndef OPENCV_VIDEOIO_HPP
+#define OPENCV_VIDEOIO_HPP
 
 #include "opencv2/core.hpp"
 
 /**
-  @defgroup videoio Media I/O
+  @defgroup videoio Video I/O
+
+  @brief Read and write video or images sequence with OpenCV
+
+  ### See also:
+  - @ref videoio_overview
+  - Tutorials: @ref tutorial_table_of_content_videoio
   @{
-    @defgroup videoio_c C API
-    @defgroup videoio_ios iOS glue
-    @defgroup videoio_winrt WinRT glue
+    @defgroup videoio_flags_base Flags for video I/O
+    @defgroup videoio_flags_others Additional flags for video I/O API backends
+    @defgroup videoio_c C API for video I/O
+    @defgroup videoio_ios iOS glue for video I/O
+    @defgroup videoio_winrt WinRT glue for video I/O
+    @defgroup videoio_registry Query I/O API backends registry
   @}
 */
 
@@ -65,60 +74,85 @@ namespace cv
 //! @addtogroup videoio
 //! @{
 
-// Camera API
-enum { CAP_ANY          = 0,     // autodetect
-       CAP_VFW          = 200,   // platform native
-       CAP_V4L          = 200,
-       CAP_V4L2         = CAP_V4L,
-       CAP_FIREWARE     = 300,   // IEEE 1394 drivers
-       CAP_FIREWIRE     = CAP_FIREWARE,
-       CAP_IEEE1394     = CAP_FIREWARE,
-       CAP_DC1394       = CAP_FIREWARE,
-       CAP_CMU1394      = CAP_FIREWARE,
-       CAP_QT           = 500,   // QuickTime
-       CAP_UNICAP       = 600,   // Unicap drivers
-       CAP_DSHOW        = 700,   // DirectShow (via videoInput)
-       CAP_PVAPI        = 800,   // PvAPI, Prosilica GigE SDK
-       CAP_OPENNI       = 900,   // OpenNI (for Kinect)
-       CAP_OPENNI_ASUS  = 910,   // OpenNI (for Asus Xtion)
-       CAP_ANDROID      = 1000,  // Android - not used
-       CAP_XIAPI        = 1100,  // XIMEA Camera API
-       CAP_AVFOUNDATION = 1200,  // AVFoundation framework for iOS (OS X Lion will have the same API)
-       CAP_GIGANETIX    = 1300,  // Smartek Giganetix GigEVisionSDK
-       CAP_MSMF         = 1400,  // Microsoft Media Foundation (via videoInput)
-       CAP_WINRT        = 1410,  // Microsoft Windows Runtime using Media Foundation
-       CAP_INTELPERC    = 1500,  // Intel Perceptual Computing SDK
-       CAP_OPENNI2      = 1600,  // OpenNI2 (for Kinect)
-       CAP_OPENNI2_ASUS = 1610,  // OpenNI2 (for Asus Xtion and Occipital Structure sensors)
-       CAP_GPHOTO2      = 1700,  // gPhoto2 connection
-       CAP_GSTREAMER    = 1800,  // GStreamer
-       CAP_FFMPEG       = 1900,  // FFMPEG
-       CAP_IMAGES       = 2000   // OpenCV Image Sequence (e.g. img_%02d.jpg)
+//! @addtogroup videoio_flags_base
+//! @{
+
+
+/** @brief %VideoCapture API backends identifier.
+
+Select preferred API for a capture object.
+To be used in the VideoCapture::VideoCapture() constructor or VideoCapture::open()
+
+@note Backends are available only if they have been built with your OpenCV binaries.
+See @ref videoio_overview for more information.
+*/
+enum VideoCaptureAPIs {
+       CAP_ANY          = 0,            //!< Auto detect == 0
+       CAP_VFW          = 200,          //!< Video For Windows (obsolete, removed)
+       CAP_V4L          = 200,          //!< V4L/V4L2 capturing support
+       CAP_V4L2         = CAP_V4L,      //!< Same as CAP_V4L
+       CAP_FIREWIRE     = 300,          //!< IEEE 1394 drivers
+       CAP_FIREWARE     = CAP_FIREWIRE, //!< Same value as CAP_FIREWIRE
+       CAP_IEEE1394     = CAP_FIREWIRE, //!< Same value as CAP_FIREWIRE
+       CAP_DC1394       = CAP_FIREWIRE, //!< Same value as CAP_FIREWIRE
+       CAP_CMU1394      = CAP_FIREWIRE, //!< Same value as CAP_FIREWIRE
+       CAP_QT           = 500,          //!< QuickTime (obsolete, removed)
+       CAP_UNICAP       = 600,          //!< Unicap drivers (obsolete, removed)
+       CAP_DSHOW        = 700,          //!< DirectShow (via videoInput)
+       CAP_PVAPI        = 800,          //!< PvAPI, Prosilica GigE SDK
+       CAP_OPENNI       = 900,          //!< OpenNI (for Kinect)
+       CAP_OPENNI_ASUS  = 910,          //!< OpenNI (for Asus Xtion)
+       CAP_ANDROID      = 1000,         //!< Android - not used
+       CAP_XIAPI        = 1100,         //!< XIMEA Camera API
+       CAP_AVFOUNDATION = 1200,         //!< AVFoundation framework for iOS (OS X Lion will have the same API)
+       CAP_GIGANETIX    = 1300,         //!< Smartek Giganetix GigEVisionSDK
+       CAP_MSMF         = 1400,         //!< Microsoft Media Foundation (via videoInput)
+       CAP_WINRT        = 1410,         //!< Microsoft Windows Runtime using Media Foundation
+       CAP_INTELPERC    = 1500,         //!< RealSense (former Intel Perceptual Computing SDK)
+       CAP_REALSENSE    = 1500,         //!< Synonym for CAP_INTELPERC
+       CAP_OPENNI2      = 1600,         //!< OpenNI2 (for Kinect)
+       CAP_OPENNI2_ASUS = 1610,         //!< OpenNI2 (for Asus Xtion and Occipital Structure sensors)
+       CAP_GPHOTO2      = 1700,         //!< gPhoto2 connection
+       CAP_GSTREAMER    = 1800,         //!< GStreamer
+       CAP_FFMPEG       = 1900,         //!< Open and record video file or stream using the FFMPEG library
+       CAP_IMAGES       = 2000,         //!< OpenCV Image Sequence (e.g. img_%02d.jpg)
+       CAP_ARAVIS       = 2100,         //!< Aravis SDK
+       CAP_OPENCV_MJPEG = 2200,         //!< Built-in OpenCV MotionJPEG codec
+       CAP_INTEL_MFX    = 2300,         //!< Intel MediaSDK
+       CAP_XINE         = 2400,         //!< XINE engine (Linux)
      };
 
-// generic properties (based on DC1394 properties)
-enum { CAP_PROP_POS_MSEC       =0,
-       CAP_PROP_POS_FRAMES     =1,
-       CAP_PROP_POS_AVI_RATIO  =2,
-       CAP_PROP_FRAME_WIDTH    =3,
-       CAP_PROP_FRAME_HEIGHT   =4,
-       CAP_PROP_FPS            =5,
-       CAP_PROP_FOURCC         =6,
-       CAP_PROP_FRAME_COUNT    =7,
-       CAP_PROP_FORMAT         =8,
-       CAP_PROP_MODE           =9,
-       CAP_PROP_BRIGHTNESS    =10,
-       CAP_PROP_CONTRAST      =11,
-       CAP_PROP_SATURATION    =12,
-       CAP_PROP_HUE           =13,
-       CAP_PROP_GAIN          =14,
-       CAP_PROP_EXPOSURE      =15,
-       CAP_PROP_CONVERT_RGB   =16,
-       CAP_PROP_WHITE_BALANCE_BLUE_U =17,
-       CAP_PROP_RECTIFICATION =18,
+/** @brief %VideoCapture generic properties identifier.
+
+ Reading / writing properties involves many layers. Some unexpected result might happens along this chain.
+ Effective behaviour depends from device hardware, driver and API Backend.
+ @sa videoio_flags_others, VideoCapture::get(), VideoCapture::set()
+*/
+enum VideoCaptureProperties {
+       CAP_PROP_POS_MSEC       =0, //!< Current position of the video file in milliseconds.
+       CAP_PROP_POS_FRAMES     =1, //!< 0-based index of the frame to be decoded/captured next.
+       CAP_PROP_POS_AVI_RATIO  =2, //!< Relative position of the video file: 0=start of the film, 1=end of the film.
+       CAP_PROP_FRAME_WIDTH    =3, //!< Width of the frames in the video stream.
+       CAP_PROP_FRAME_HEIGHT   =4, //!< Height of the frames in the video stream.
+       CAP_PROP_FPS            =5, //!< Frame rate.
+       CAP_PROP_FOURCC         =6, //!< 4-character code of codec. see VideoWriter::fourcc .
+       CAP_PROP_FRAME_COUNT    =7, //!< Number of frames in the video file.
+       CAP_PROP_FORMAT         =8, //!< Format of the %Mat objects (see Mat::type()) returned by VideoCapture::retrieve().
+                                   //!< Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1).
+       CAP_PROP_MODE           =9, //!< Backend-specific value indicating the current capture mode.
+       CAP_PROP_BRIGHTNESS    =10, //!< Brightness of the image (only for those cameras that support).
+       CAP_PROP_CONTRAST      =11, //!< Contrast of the image (only for cameras).
+       CAP_PROP_SATURATION    =12, //!< Saturation of the image (only for cameras).
+       CAP_PROP_HUE           =13, //!< Hue of the image (only for cameras).
+       CAP_PROP_GAIN          =14, //!< Gain of the image (only for those cameras that support).
+       CAP_PROP_EXPOSURE      =15, //!< Exposure (only for those cameras that support).
+       CAP_PROP_CONVERT_RGB   =16, //!< Boolean flags indicating whether images should be converted to RGB. <br/>
+                                   //!< *GStreamer note*: The flag is ignored in case if custom pipeline is used. It's user responsibility to interpret pipeline output.
+       CAP_PROP_WHITE_BALANCE_BLUE_U =17, //!< Currently unsupported.
+       CAP_PROP_RECTIFICATION =18, //!< Rectification flag for stereo cameras (note: only supported by DC1394 v 2.x backend currently).
        CAP_PROP_MONOCHROME    =19,
        CAP_PROP_SHARPNESS     =20,
-       CAP_PROP_AUTO_EXPOSURE =21, // DC1394: exposure control done by camera, user can adjust refernce level using this feature
+       CAP_PROP_AUTO_EXPOSURE =21, //!< DC1394: exposure control done by camera, user can adjust reference level using this feature.
        CAP_PROP_GAMMA         =22,
        CAP_PROP_TEMPERATURE   =23,
        CAP_PROP_TRIGGER       =24,
@@ -133,46 +167,72 @@ enum { CAP_PROP_POS_MSEC       =0,
        CAP_PROP_TILT          =34,
        CAP_PROP_ROLL          =35,
        CAP_PROP_IRIS          =36,
-       CAP_PROP_SETTINGS      =37,
+       CAP_PROP_SETTINGS      =37, //!< Pop up video/camera filter dialog (note: only supported by DSHOW backend currently. The property value is ignored)
        CAP_PROP_BUFFERSIZE    =38,
-       CAP_PROP_AUTOFOCUS     =39
+       CAP_PROP_AUTOFOCUS     =39,
+       CAP_PROP_SAR_NUM       =40, //!< Sample aspect ratio: num/den (num)
+       CAP_PROP_SAR_DEN       =41, //!< Sample aspect ratio: num/den (den)
+       CAP_PROP_BACKEND       =42, //!< Current backend (enum VideoCaptureAPIs). Read-only property
+       CAP_PROP_CHANNEL       =43, //!< Video input or Channel Number (only for those cameras that support)
+       CAP_PROP_AUTO_WB       =44, //!< enable/ disable auto white-balance
+       CAP_PROP_WB_TEMPERATURE=45, //!< white-balance color temperature
+       CAP_PROP_CODEC_PIXEL_FORMAT =46,    //!< (read-only) codec's pixel format. 4-character code - see VideoWriter::fourcc . Subset of [AV_PIX_FMT_*](https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/raw.c) or -1 if unknown
+       CAP_PROP_BITRATE       =47, //!< (read-only) Video bitrate in kbits/s
+#ifndef CV_DOXYGEN
+       CV__CAP_PROP_LATEST
+#endif
      };
 
+/** @brief %VideoWriter generic properties identifier.
+ @sa VideoWriter::get(), VideoWriter::set()
+*/
+enum VideoWriterProperties {
+  VIDEOWRITER_PROP_QUALITY = 1,    //!< Current quality (0..100%) of the encoded videostream. Can be adjusted dynamically in some codecs.
+  VIDEOWRITER_PROP_FRAMEBYTES = 2, //!< (Read-only): Size of just encoded video frame. Note that the encoding order may be different from representation order.
+  VIDEOWRITER_PROP_NSTRIPES = 3    //!< Number of stripes for parallel encoding. -1 for auto detection.
+};
 
-// Generic camera output modes.
-// Currently, these are supported through the libv4l interface only.
-enum { CAP_MODE_BGR  = 0, // BGR24 (default)
-       CAP_MODE_RGB  = 1, // RGB24
-       CAP_MODE_GRAY = 2, // Y8
-       CAP_MODE_YUYV = 3  // YUYV
-     };
+//! @} videoio_flags_base
 
+//! @addtogroup videoio_flags_others
+//! @{
 
-// DC1394 only
-// modes of the controlling registers (can be: auto, manual, auto single push, absolute Latter allowed with any other mode)
-// every feature can have only one mode turned on at a time
-enum { CAP_PROP_DC1394_OFF                = -4, //turn the feature off (not controlled manually nor automatically)
-       CAP_PROP_DC1394_MODE_MANUAL        = -3, //set automatically when a value of the feature is set by the user
+/** @name IEEE 1394 drivers
+    @{
+*/
+
+/** @brief Modes of the IEEE 1394 controlling registers
+(can be: auto, manual, auto single push, absolute Latter allowed with any other mode)
+every feature can have only one mode turned on at a time
+*/
+enum { CAP_PROP_DC1394_OFF                = -4, //!< turn the feature off (not controlled manually nor automatically).
+       CAP_PROP_DC1394_MODE_MANUAL        = -3, //!< set automatically when a value of the feature is set by the user.
        CAP_PROP_DC1394_MODE_AUTO          = -2,
        CAP_PROP_DC1394_MODE_ONE_PUSH_AUTO = -1,
        CAP_PROP_DC1394_MAX                = 31
      };
 
+//! @} IEEE 1394 drivers
+
+/** @name OpenNI (for Kinect)
+    @{
+*/
 
-// OpenNI map generators
+//! OpenNI map generators
 enum { CAP_OPENNI_DEPTH_GENERATOR = 1 << 31,
        CAP_OPENNI_IMAGE_GENERATOR = 1 << 30,
-       CAP_OPENNI_GENERATORS_MASK = CAP_OPENNI_DEPTH_GENERATOR + CAP_OPENNI_IMAGE_GENERATOR
+       CAP_OPENNI_IR_GENERATOR    = 1 << 29,
+       CAP_OPENNI_GENERATORS_MASK = CAP_OPENNI_DEPTH_GENERATOR + CAP_OPENNI_IMAGE_GENERATOR + CAP_OPENNI_IR_GENERATOR
      };
 
-// Properties of cameras available through OpenNI interfaces
+//! Properties of cameras available through OpenNI backend
 enum { CAP_PROP_OPENNI_OUTPUT_MODE       = 100,
-       CAP_PROP_OPENNI_FRAME_MAX_DEPTH   = 101, // in mm
-       CAP_PROP_OPENNI_BASELINE          = 102, // in mm
-       CAP_PROP_OPENNI_FOCAL_LENGTH      = 103, // in pixels
-       CAP_PROP_OPENNI_REGISTRATION      = 104, // flag that synchronizes the remapping depth map to image map
-                                                // by changing depth generator's view point (if the flag is "on") or
-                                                // sets this view point to its normal one (if the flag is "off").
+       CAP_PROP_OPENNI_FRAME_MAX_DEPTH   = 101, //!< In mm
+       CAP_PROP_OPENNI_BASELINE          = 102, //!< In mm
+       CAP_PROP_OPENNI_FOCAL_LENGTH      = 103, //!< In pixels
+       CAP_PROP_OPENNI_REGISTRATION      = 104, //!< Flag that synchronizes the remapping depth map to image map
+                                                //!< by changing depth generator's view point (if the flag is "on") or
+                                                //!< sets this view point to its normal one (if the flag is "off").
        CAP_PROP_OPENNI_REGISTRATION_ON   = CAP_PROP_OPENNI_REGISTRATION,
        CAP_PROP_OPENNI_APPROX_FRAME_SYNC = 105,
        CAP_PROP_OPENNI_MAX_BUFFER_SIZE   = 106,
@@ -183,28 +243,31 @@ enum { CAP_PROP_OPENNI_OUTPUT_MODE       = 100,
        CAP_PROP_OPENNI2_MIRROR           = 111
      };
 
-// OpenNI shortcats
+//! OpenNI shortcuts
 enum { CAP_OPENNI_IMAGE_GENERATOR_PRESENT         = CAP_OPENNI_IMAGE_GENERATOR + CAP_PROP_OPENNI_GENERATOR_PRESENT,
        CAP_OPENNI_IMAGE_GENERATOR_OUTPUT_MODE     = CAP_OPENNI_IMAGE_GENERATOR + CAP_PROP_OPENNI_OUTPUT_MODE,
+       CAP_OPENNI_DEPTH_GENERATOR_PRESENT         = CAP_OPENNI_DEPTH_GENERATOR + CAP_PROP_OPENNI_GENERATOR_PRESENT,
        CAP_OPENNI_DEPTH_GENERATOR_BASELINE        = CAP_OPENNI_DEPTH_GENERATOR + CAP_PROP_OPENNI_BASELINE,
        CAP_OPENNI_DEPTH_GENERATOR_FOCAL_LENGTH    = CAP_OPENNI_DEPTH_GENERATOR + CAP_PROP_OPENNI_FOCAL_LENGTH,
        CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION    = CAP_OPENNI_DEPTH_GENERATOR + CAP_PROP_OPENNI_REGISTRATION,
-       CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION_ON = CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION
+       CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION_ON = CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION,
+       CAP_OPENNI_IR_GENERATOR_PRESENT            = CAP_OPENNI_IR_GENERATOR + CAP_PROP_OPENNI_GENERATOR_PRESENT,
      };
 
-// OpenNI data given from depth generator
-enum { CAP_OPENNI_DEPTH_MAP         = 0, // Depth values in mm (CV_16UC1)
-       CAP_OPENNI_POINT_CLOUD_MAP   = 1, // XYZ in meters (CV_32FC3)
-       CAP_OPENNI_DISPARITY_MAP     = 2, // Disparity in pixels (CV_8UC1)
-       CAP_OPENNI_DISPARITY_MAP_32F = 3, // Disparity in pixels (CV_32FC1)
-       CAP_OPENNI_VALID_DEPTH_MASK  = 4, // CV_8UC1
+//! OpenNI data given from depth generator
+enum { CAP_OPENNI_DEPTH_MAP         = 0, //!< Depth values in mm (CV_16UC1)
+       CAP_OPENNI_POINT_CLOUD_MAP   = 1, //!< XYZ in meters (CV_32FC3)
+       CAP_OPENNI_DISPARITY_MAP     = 2, //!< Disparity in pixels (CV_8UC1)
+       CAP_OPENNI_DISPARITY_MAP_32F = 3, //!< Disparity in pixels (CV_32FC1)
+       CAP_OPENNI_VALID_DEPTH_MASK  = 4, //!< CV_8UC1
 
-       // Data given from RGB image generator
-       CAP_OPENNI_BGR_IMAGE         = 5,
-       CAP_OPENNI_GRAY_IMAGE        = 6
+       CAP_OPENNI_BGR_IMAGE         = 5, //!< Data given from RGB image generator
+       CAP_OPENNI_GRAY_IMAGE        = 6, //!< Data given from RGB image generator
+
+       CAP_OPENNI_IR_IMAGE          = 7  //!< Data given from IR image generator
      };
 
-// Supported output modes of OpenNI image generator
+//! Supported output modes of OpenNI image generator
 enum { CAP_OPENNI_VGA_30HZ  = 0,
        CAP_OPENNI_SXGA_15HZ = 1,
        CAP_OPENNI_SXGA_30HZ = 2,
@@ -212,203 +275,234 @@ enum { CAP_OPENNI_VGA_30HZ  = 0,
        CAP_OPENNI_QVGA_60HZ = 4
      };
 
+//! @} OpenNI
+
+/** @name GStreamer
+    @{
+*/
 
-// GStreamer
-enum { CAP_PROP_GSTREAMER_QUEUE_LENGTH = 200 // default is 1
+enum { CAP_PROP_GSTREAMER_QUEUE_LENGTH = 200 //!< Default is 1
      };
 
+//! @} GStreamer
 
-// PVAPI
-enum { CAP_PROP_PVAPI_MULTICASTIP           = 300, // ip for anable multicast master mode. 0 for disable multicast
-       CAP_PROP_PVAPI_FRAMESTARTTRIGGERMODE = 301, // FrameStartTriggerMode: Determines how a frame is initiated
-       CAP_PROP_PVAPI_DECIMATIONHORIZONTAL  = 302, // Horizontal sub-sampling of the image
-       CAP_PROP_PVAPI_DECIMATIONVERTICAL    = 303, // Vertical sub-sampling of the image
-       CAP_PROP_PVAPI_BINNINGX              = 304, // Horizontal binning factor
-       CAP_PROP_PVAPI_BINNINGY              = 305, // Vertical binning factor
-       CAP_PROP_PVAPI_PIXELFORMAT           = 306  // Pixel format
+/** @name PvAPI, Prosilica GigE SDK
+    @{
+*/
+
+//! PVAPI
+enum { CAP_PROP_PVAPI_MULTICASTIP           = 300, //!< IP for enable multicast master mode. 0 for disable multicast.
+       CAP_PROP_PVAPI_FRAMESTARTTRIGGERMODE = 301, //!< FrameStartTriggerMode: Determines how a frame is initiated.
+       CAP_PROP_PVAPI_DECIMATIONHORIZONTAL  = 302, //!< Horizontal sub-sampling of the image.
+       CAP_PROP_PVAPI_DECIMATIONVERTICAL    = 303, //!< Vertical sub-sampling of the image.
+       CAP_PROP_PVAPI_BINNINGX              = 304, //!< Horizontal binning factor.
+       CAP_PROP_PVAPI_BINNINGY              = 305, //!< Vertical binning factor.
+       CAP_PROP_PVAPI_PIXELFORMAT           = 306  //!< Pixel format.
      };
 
-// PVAPI: FrameStartTriggerMode
-enum { CAP_PVAPI_FSTRIGMODE_FREERUN     = 0,    // Freerun
-       CAP_PVAPI_FSTRIGMODE_SYNCIN1     = 1,    // SyncIn1
-       CAP_PVAPI_FSTRIGMODE_SYNCIN2     = 2,    // SyncIn2
-       CAP_PVAPI_FSTRIGMODE_FIXEDRATE   = 3,    // FixedRate
-       CAP_PVAPI_FSTRIGMODE_SOFTWARE    = 4     // Software
+//! PVAPI: FrameStartTriggerMode
+enum { CAP_PVAPI_FSTRIGMODE_FREERUN     = 0,    //!< Freerun
+       CAP_PVAPI_FSTRIGMODE_SYNCIN1     = 1,    //!< SyncIn1
+       CAP_PVAPI_FSTRIGMODE_SYNCIN2     = 2,    //!< SyncIn2
+       CAP_PVAPI_FSTRIGMODE_FIXEDRATE   = 3,    //!< FixedRate
+       CAP_PVAPI_FSTRIGMODE_SOFTWARE    = 4     //!< Software
      };
 
-// PVAPI: DecimationHorizontal, DecimationVertical
-enum { CAP_PVAPI_DECIMATION_OFF       = 1,    // Off
-       CAP_PVAPI_DECIMATION_2OUTOF4   = 2,    // 2 out of 4 decimation
-       CAP_PVAPI_DECIMATION_2OUTOF8   = 4,    // 2 out of 8 decimation
-       CAP_PVAPI_DECIMATION_2OUTOF16  = 8     // 2 out of 16 decimation
+//! PVAPI: DecimationHorizontal, DecimationVertical
+enum { CAP_PVAPI_DECIMATION_OFF       = 1,    //!< Off
+       CAP_PVAPI_DECIMATION_2OUTOF4   = 2,    //!< 2 out of 4 decimation
+       CAP_PVAPI_DECIMATION_2OUTOF8   = 4,    //!< 2 out of 8 decimation
+       CAP_PVAPI_DECIMATION_2OUTOF16  = 8     //!< 2 out of 16 decimation
      };
 
-// PVAPI: PixelFormat
-enum { CAP_PVAPI_PIXELFORMAT_MONO8    = 1,    // Mono8
-       CAP_PVAPI_PIXELFORMAT_MONO16   = 2,    // Mono16
-       CAP_PVAPI_PIXELFORMAT_BAYER8   = 3,    // Bayer8
-       CAP_PVAPI_PIXELFORMAT_BAYER16  = 4,    // Bayer16
-       CAP_PVAPI_PIXELFORMAT_RGB24    = 5,    // Rgb24
-       CAP_PVAPI_PIXELFORMAT_BGR24    = 6,    // Bgr24
-       CAP_PVAPI_PIXELFORMAT_RGBA32   = 7,    // Rgba32
-       CAP_PVAPI_PIXELFORMAT_BGRA32   = 8,    // Bgra32
+//! PVAPI: PixelFormat
+enum { CAP_PVAPI_PIXELFORMAT_MONO8    = 1,    //!< Mono8
+       CAP_PVAPI_PIXELFORMAT_MONO16   = 2,    //!< Mono16
+       CAP_PVAPI_PIXELFORMAT_BAYER8   = 3,    //!< Bayer8
+       CAP_PVAPI_PIXELFORMAT_BAYER16  = 4,    //!< Bayer16
+       CAP_PVAPI_PIXELFORMAT_RGB24    = 5,    //!< Rgb24
+       CAP_PVAPI_PIXELFORMAT_BGR24    = 6,    //!< Bgr24
+       CAP_PVAPI_PIXELFORMAT_RGBA32   = 7,    //!< Rgba32
+       CAP_PVAPI_PIXELFORMAT_BGRA32   = 8,    //!< Bgra32
      };
 
-       // Properties of cameras available through XIMEA SDK interface
-enum { CAP_PROP_XI_DOWNSAMPLING                                 = 400, // Change image resolution by binning or skipping.
-       CAP_PROP_XI_DATA_FORMAT                                  = 401, // Output data format.
-       CAP_PROP_XI_OFFSET_X                                     = 402, // Horizontal offset from the origin to the area of interest (in pixels).
-       CAP_PROP_XI_OFFSET_Y                                     = 403, // Vertical offset from the origin to the area of interest (in pixels).
-       CAP_PROP_XI_TRG_SOURCE                                   = 404, // Defines source of trigger.
-       CAP_PROP_XI_TRG_SOFTWARE                                 = 405, // Generates an internal trigger. PRM_TRG_SOURCE must be set to TRG_SOFTWARE.
-       CAP_PROP_XI_GPI_SELECTOR                                 = 406, // Selects general purpose input
-       CAP_PROP_XI_GPI_MODE                                     = 407, // Set general purpose input mode
-       CAP_PROP_XI_GPI_LEVEL                                    = 408, // Get general purpose level
-       CAP_PROP_XI_GPO_SELECTOR                                 = 409, // Selects general purpose output
-       CAP_PROP_XI_GPO_MODE                                     = 410, // Set general purpose output mode
-       CAP_PROP_XI_LED_SELECTOR                                 = 411, // Selects camera signalling LED
-       CAP_PROP_XI_LED_MODE                                     = 412, // Define camera signalling LED functionality
-       CAP_PROP_XI_MANUAL_WB                                    = 413, // Calculates White Balance(must be called during acquisition)
-       CAP_PROP_XI_AUTO_WB                                      = 414, // Automatic white balance
-       CAP_PROP_XI_AEAG                                         = 415, // Automatic exposure/gain
-       CAP_PROP_XI_EXP_PRIORITY                                 = 416, // Exposure priority (0.5 - exposure 50%, gain 50%).
-       CAP_PROP_XI_AE_MAX_LIMIT                                 = 417, // Maximum limit of exposure in AEAG procedure
-       CAP_PROP_XI_AG_MAX_LIMIT                                 = 418,  // Maximum limit of gain in AEAG procedure
-       CAP_PROP_XI_AEAG_LEVEL                                   = 419, // Average intensity of output signal AEAG should achieve(in %)
-       CAP_PROP_XI_TIMEOUT                                      = 420, // Image capture timeout in milliseconds
-       CAP_PROP_XI_EXPOSURE                                     = 421, // Exposure time in microseconds
-       CAP_PROP_XI_EXPOSURE_BURST_COUNT                         = 422, // Sets the number of times of exposure in one frame.
-       CAP_PROP_XI_GAIN_SELECTOR                                = 423, // Gain selector for parameter Gain allows to select different type of gains.
-       CAP_PROP_XI_GAIN                                         = 424, // Gain in dB
-       CAP_PROP_XI_DOWNSAMPLING_TYPE                            = 426, // Change image downsampling type.
-       CAP_PROP_XI_BINNING_SELECTOR                             = 427, // Binning engine selector.
-       CAP_PROP_XI_BINNING_VERTICAL                             = 428, // Vertical Binning - number of vertical photo-sensitive cells to combine together.
-       CAP_PROP_XI_BINNING_HORIZONTAL                           = 429, // Horizontal Binning - number of horizontal photo-sensitive cells to combine together.
-       CAP_PROP_XI_BINNING_PATTERN                              = 430, // Binning pattern type.
-       CAP_PROP_XI_DECIMATION_SELECTOR                          = 431, // Decimation engine selector.
-       CAP_PROP_XI_DECIMATION_VERTICAL                          = 432, // Vertical Decimation - vertical sub-sampling of the image - reduces the vertical resolution of the image by the specified vertical decimation factor.
-       CAP_PROP_XI_DECIMATION_HORIZONTAL                        = 433, // Horizontal Decimation - horizontal sub-sampling of the image - reduces the horizontal resolution of the image by the specified vertical decimation factor.
-       CAP_PROP_XI_DECIMATION_PATTERN                           = 434, // Decimation pattern type.
-       CAP_PROP_XI_TEST_PATTERN_GENERATOR_SELECTOR              = 587, // Selects which test pattern generator is controlled by the TestPattern feature.
-       CAP_PROP_XI_TEST_PATTERN                                 = 588, // Selects which test pattern type is generated by the selected generator.
-       CAP_PROP_XI_IMAGE_DATA_FORMAT                            = 435, // Output data format.
-       CAP_PROP_XI_SHUTTER_TYPE                                 = 436, // Change sensor shutter type(CMOS sensor).
-       CAP_PROP_XI_SENSOR_TAPS                                  = 437, // Number of taps
-       CAP_PROP_XI_AEAG_ROI_OFFSET_X                            = 439, // Automatic exposure/gain ROI offset X
-       CAP_PROP_XI_AEAG_ROI_OFFSET_Y                            = 440, // Automatic exposure/gain ROI offset Y
-       CAP_PROP_XI_AEAG_ROI_WIDTH                               = 441, // Automatic exposure/gain ROI Width
-       CAP_PROP_XI_AEAG_ROI_HEIGHT                              = 442, // Automatic exposure/gain ROI Height
-       CAP_PROP_XI_BPC                                          = 445, // Correction of bad pixels
-       CAP_PROP_XI_WB_KR                                        = 448, // White balance red coefficient
-       CAP_PROP_XI_WB_KG                                        = 449, // White balance green coefficient
-       CAP_PROP_XI_WB_KB                                        = 450, // White balance blue coefficient
-       CAP_PROP_XI_WIDTH                                        = 451, // Width of the Image provided by the device (in pixels).
-       CAP_PROP_XI_HEIGHT                                       = 452, // Height of the Image provided by the device (in pixels).
-       CAP_PROP_XI_REGION_SELECTOR                              = 589, // Selects Region in Multiple ROI which parameters are set by width, height, ... ,region mode
-       CAP_PROP_XI_REGION_MODE                                  = 595, // Activates/deactivates Region selected by Region Selector
-       CAP_PROP_XI_LIMIT_BANDWIDTH                              = 459, // Set/get bandwidth(datarate)(in Megabits)
-       CAP_PROP_XI_SENSOR_DATA_BIT_DEPTH                        = 460, // Sensor output data bit depth.
-       CAP_PROP_XI_OUTPUT_DATA_BIT_DEPTH                        = 461, // Device output data bit depth.
-       CAP_PROP_XI_IMAGE_DATA_BIT_DEPTH                         = 462, // bitdepth of data returned by function xiGetImage
-       CAP_PROP_XI_OUTPUT_DATA_PACKING                          = 463, // Device output data packing (or grouping) enabled. Packing could be enabled if output_data_bit_depth > 8 and packing capability is available.
-       CAP_PROP_XI_OUTPUT_DATA_PACKING_TYPE                     = 464, // Data packing type. Some cameras supports only specific packing type.
-       CAP_PROP_XI_IS_COOLED                                    = 465, // Returns 1 for cameras that support cooling.
-       CAP_PROP_XI_COOLING                                      = 466, // Start camera cooling.
-       CAP_PROP_XI_TARGET_TEMP                                  = 467, // Set sensor target temperature for cooling.
-       CAP_PROP_XI_CHIP_TEMP                                    = 468, // Camera sensor temperature
-       CAP_PROP_XI_HOUS_TEMP                                    = 469, // Camera housing tepmerature
-       CAP_PROP_XI_HOUS_BACK_SIDE_TEMP                          = 590, // Camera housing back side tepmerature
-       CAP_PROP_XI_SENSOR_BOARD_TEMP                            = 596, // Camera sensor board temperature
-       CAP_PROP_XI_CMS                                          = 470, // Mode of color management system.
-       CAP_PROP_XI_APPLY_CMS                                    = 471, // Enable applying of CMS profiles to xiGetImage (see XI_PRM_INPUT_CMS_PROFILE, XI_PRM_OUTPUT_CMS_PROFILE).
-       CAP_PROP_XI_IMAGE_IS_COLOR                               = 474, // Returns 1 for color cameras.
-       CAP_PROP_XI_COLOR_FILTER_ARRAY                           = 475, // Returns color filter array type of RAW data.
-       CAP_PROP_XI_GAMMAY                                       = 476, // Luminosity gamma
-       CAP_PROP_XI_GAMMAC                                       = 477, // Chromaticity gamma
-       CAP_PROP_XI_SHARPNESS                                    = 478, // Sharpness Strenght
-       CAP_PROP_XI_CC_MATRIX_00                                 = 479, // Color Correction Matrix element [0][0]
-       CAP_PROP_XI_CC_MATRIX_01                                 = 480, // Color Correction Matrix element [0][1]
-       CAP_PROP_XI_CC_MATRIX_02                                 = 481, // Color Correction Matrix element [0][2]
-       CAP_PROP_XI_CC_MATRIX_03                                 = 482, // Color Correction Matrix element [0][3]
-       CAP_PROP_XI_CC_MATRIX_10                                 = 483, // Color Correction Matrix element [1][0]
-       CAP_PROP_XI_CC_MATRIX_11                                 = 484, // Color Correction Matrix element [1][1]
-       CAP_PROP_XI_CC_MATRIX_12                                 = 485, // Color Correction Matrix element [1][2]
-       CAP_PROP_XI_CC_MATRIX_13                                 = 486, // Color Correction Matrix element [1][3]
-       CAP_PROP_XI_CC_MATRIX_20                                 = 487, // Color Correction Matrix element [2][0]
-       CAP_PROP_XI_CC_MATRIX_21                                 = 488, // Color Correction Matrix element [2][1]
-       CAP_PROP_XI_CC_MATRIX_22                                 = 489, // Color Correction Matrix element [2][2]
-       CAP_PROP_XI_CC_MATRIX_23                                 = 490, // Color Correction Matrix element [2][3]
-       CAP_PROP_XI_CC_MATRIX_30                                 = 491, // Color Correction Matrix element [3][0]
-       CAP_PROP_XI_CC_MATRIX_31                                 = 492, // Color Correction Matrix element [3][1]
-       CAP_PROP_XI_CC_MATRIX_32                                 = 493, // Color Correction Matrix element [3][2]
-       CAP_PROP_XI_CC_MATRIX_33                                 = 494, // Color Correction Matrix element [3][3]
-       CAP_PROP_XI_DEFAULT_CC_MATRIX                            = 495, // Set default Color Correction Matrix
-       CAP_PROP_XI_TRG_SELECTOR                                 = 498, // Selects the type of trigger.
-       CAP_PROP_XI_ACQ_FRAME_BURST_COUNT                        = 499, // Sets number of frames acquired by burst. This burst is used only if trigger is set to FrameBurstStart
-       CAP_PROP_XI_DEBOUNCE_EN                                  = 507, // Enable/Disable debounce to selected GPI
-       CAP_PROP_XI_DEBOUNCE_T0                                  = 508, // Debounce time (x * 10us)
-       CAP_PROP_XI_DEBOUNCE_T1                                  = 509, // Debounce time (x * 10us)
-       CAP_PROP_XI_DEBOUNCE_POL                                 = 510, // Debounce polarity (pol = 1 t0 - falling edge, t1 - rising edge)
-       CAP_PROP_XI_LENS_MODE                                    = 511, // Status of lens control interface. This shall be set to XI_ON before any Lens operations.
-       CAP_PROP_XI_LENS_APERTURE_VALUE                          = 512, // Current lens aperture value in stops. Examples: 2.8, 4, 5.6, 8, 11
-       CAP_PROP_XI_LENS_FOCUS_MOVEMENT_VALUE                    = 513, // Lens current focus movement value to be used by XI_PRM_LENS_FOCUS_MOVE in motor steps.
-       CAP_PROP_XI_LENS_FOCUS_MOVE                              = 514, // Moves lens focus motor by steps set in XI_PRM_LENS_FOCUS_MOVEMENT_VALUE.
-       CAP_PROP_XI_LENS_FOCUS_DISTANCE                          = 515, // Lens focus distance in cm.
-       CAP_PROP_XI_LENS_FOCAL_LENGTH                            = 516, // Lens focal distance in mm.
-       CAP_PROP_XI_LENS_FEATURE_SELECTOR                        = 517, // Selects the current feature which is accessible by XI_PRM_LENS_FEATURE.
-       CAP_PROP_XI_LENS_FEATURE                                 = 518, // Allows access to lens feature value currently selected by XI_PRM_LENS_FEATURE_SELECTOR.
-       CAP_PROP_XI_DEVICE_MODEL_ID                              = 521, // Return device model id
-       CAP_PROP_XI_DEVICE_SN                                    = 522, // Return device serial number
-       CAP_PROP_XI_IMAGE_DATA_FORMAT_RGB32_ALPHA                = 529, // The alpha channel of RGB32 output image format.
-       CAP_PROP_XI_IMAGE_PAYLOAD_SIZE                           = 530, // Buffer size in bytes sufficient for output image returned by xiGetImage
-       CAP_PROP_XI_TRANSPORT_PIXEL_FORMAT                       = 531, // Current format of pixels on transport layer.
-       CAP_PROP_XI_SENSOR_CLOCK_FREQ_HZ                         = 532, // Sensor clock frequency in Hz.
-       CAP_PROP_XI_SENSOR_CLOCK_FREQ_INDEX                      = 533, // Sensor clock frequency index. Sensor with selected frequencies have possibility to set the frequency only by this index.
-       CAP_PROP_XI_SENSOR_OUTPUT_CHANNEL_COUNT                  = 534, // Number of output channels from sensor used for data transfer.
-       CAP_PROP_XI_FRAMERATE                                    = 535, // Define framerate in Hz
-       CAP_PROP_XI_COUNTER_SELECTOR                             = 536, // Select counter
-       CAP_PROP_XI_COUNTER_VALUE                                = 537, // Counter status
-       CAP_PROP_XI_ACQ_TIMING_MODE                              = 538, // Type of sensor frames timing.
-       CAP_PROP_XI_AVAILABLE_BANDWIDTH                          = 539, // Calculate and return available interface bandwidth(int Megabits)
-       CAP_PROP_XI_BUFFER_POLICY                                = 540, // Data move policy
-       CAP_PROP_XI_LUT_EN                                       = 541, // Activates LUT.
-       CAP_PROP_XI_LUT_INDEX                                    = 542, // Control the index (offset) of the coefficient to access in the LUT.
-       CAP_PROP_XI_LUT_VALUE                                    = 543, // Value at entry LUTIndex of the LUT
-       CAP_PROP_XI_TRG_DELAY                                    = 544, // Specifies the delay in microseconds (us) to apply after the trigger reception before activating it.
-       CAP_PROP_XI_TS_RST_MODE                                  = 545, // Defines how time stamp reset engine will be armed
-       CAP_PROP_XI_TS_RST_SOURCE                                = 546, // Defines which source will be used for timestamp reset. Writing this parameter will trigger settings of engine (arming)
-       CAP_PROP_XI_IS_DEVICE_EXIST                              = 547, // Returns 1 if camera connected and works properly.
-       CAP_PROP_XI_ACQ_BUFFER_SIZE                              = 548, // Acquisition buffer size in buffer_size_unit. Default bytes.
-       CAP_PROP_XI_ACQ_BUFFER_SIZE_UNIT                         = 549, // Acquisition buffer size unit in bytes. Default 1. E.g. Value 1024 means that buffer_size is in KiBytes
-       CAP_PROP_XI_ACQ_TRANSPORT_BUFFER_SIZE                    = 550, // Acquisition transport buffer size in bytes
-       CAP_PROP_XI_BUFFERS_QUEUE_SIZE                           = 551, // Queue of field/frame buffers
-       CAP_PROP_XI_ACQ_TRANSPORT_BUFFER_COMMIT                  = 552, // Number of buffers to commit to low level
-       CAP_PROP_XI_RECENT_FRAME                                 = 553, // GetImage returns most recent frame
-       CAP_PROP_XI_DEVICE_RESET                                 = 554, // Resets the camera to default state.
-       CAP_PROP_XI_COLUMN_FPN_CORRECTION                        = 555, // Correction of column FPN
-       CAP_PROP_XI_ROW_FPN_CORRECTION                           = 591, // Correction of row FPN
-       CAP_PROP_XI_SENSOR_MODE                                  = 558, // Current sensor mode. Allows to select sensor mode by one integer. Setting of this parameter affects: image dimensions and downsampling.
-       CAP_PROP_XI_HDR                                          = 559, // Enable High Dynamic Range feature.
-       CAP_PROP_XI_HDR_KNEEPOINT_COUNT                          = 560, // The number of kneepoints in the PWLR.
-       CAP_PROP_XI_HDR_T1                                       = 561, // position of first kneepoint(in % of XI_PRM_EXPOSURE)
-       CAP_PROP_XI_HDR_T2                                       = 562, // position of second kneepoint (in % of XI_PRM_EXPOSURE)
-       CAP_PROP_XI_KNEEPOINT1                                   = 563, // value of first kneepoint (% of sensor saturation)
-       CAP_PROP_XI_KNEEPOINT2                                   = 564, // value of second kneepoint (% of sensor saturation)
-       CAP_PROP_XI_IMAGE_BLACK_LEVEL                            = 565, // Last image black level counts. Can be used for Offline processing to recall it.
-       CAP_PROP_XI_HW_REVISION                                  = 571, // Returns hardware revision number.
-       CAP_PROP_XI_DEBUG_LEVEL                                  = 572, // Set debug level
-       CAP_PROP_XI_AUTO_BANDWIDTH_CALCULATION                   = 573, // Automatic bandwidth calculation,
-       CAP_PROP_XI_FFS_FILE_ID                                  = 594, // File number.
-       CAP_PROP_XI_FFS_FILE_SIZE                                = 580, // Size of file.
-       CAP_PROP_XI_FREE_FFS_SIZE                                = 581, // Size of free camera FFS.
-       CAP_PROP_XI_USED_FFS_SIZE                                = 582, // Size of used camera FFS.
-       CAP_PROP_XI_FFS_ACCESS_KEY                               = 583, // Setting of key enables file operations on some cameras.
-       CAP_PROP_XI_SENSOR_FEATURE_SELECTOR                      = 585, // Selects the current feature which is accessible by XI_PRM_SENSOR_FEATURE_VALUE.
-       CAP_PROP_XI_SENSOR_FEATURE_VALUE                         = 586, // Allows access to sensor feature value currently selected by XI_PRM_SENSOR_FEATURE_SELECTOR.
+//! @} PvAPI
+
+/** @name XIMEA Camera API
+    @{
+*/
+
+//! Properties of cameras available through XIMEA SDK backend
+enum { CAP_PROP_XI_DOWNSAMPLING                                 = 400, //!< Change image resolution by binning or skipping.
+       CAP_PROP_XI_DATA_FORMAT                                  = 401, //!< Output data format.
+       CAP_PROP_XI_OFFSET_X                                     = 402, //!< Horizontal offset from the origin to the area of interest (in pixels).
+       CAP_PROP_XI_OFFSET_Y                                     = 403, //!< Vertical offset from the origin to the area of interest (in pixels).
+       CAP_PROP_XI_TRG_SOURCE                                   = 404, //!< Defines source of trigger.
+       CAP_PROP_XI_TRG_SOFTWARE                                 = 405, //!< Generates an internal trigger. PRM_TRG_SOURCE must be set to TRG_SOFTWARE.
+       CAP_PROP_XI_GPI_SELECTOR                                 = 406, //!< Selects general purpose input.
+       CAP_PROP_XI_GPI_MODE                                     = 407, //!< Set general purpose input mode.
+       CAP_PROP_XI_GPI_LEVEL                                    = 408, //!< Get general purpose level.
+       CAP_PROP_XI_GPO_SELECTOR                                 = 409, //!< Selects general purpose output.
+       CAP_PROP_XI_GPO_MODE                                     = 410, //!< Set general purpose output mode.
+       CAP_PROP_XI_LED_SELECTOR                                 = 411, //!< Selects camera signalling LED.
+       CAP_PROP_XI_LED_MODE                                     = 412, //!< Define camera signalling LED functionality.
+       CAP_PROP_XI_MANUAL_WB                                    = 413, //!< Calculates White Balance(must be called during acquisition).
+       CAP_PROP_XI_AUTO_WB                                      = 414, //!< Automatic white balance.
+       CAP_PROP_XI_AEAG                                         = 415, //!< Automatic exposure/gain.
+       CAP_PROP_XI_EXP_PRIORITY                                 = 416, //!< Exposure priority (0.5 - exposure 50%, gain 50%).
+       CAP_PROP_XI_AE_MAX_LIMIT                                 = 417, //!< Maximum limit of exposure in AEAG procedure.
+       CAP_PROP_XI_AG_MAX_LIMIT                                 = 418, //!< Maximum limit of gain in AEAG procedure.
+       CAP_PROP_XI_AEAG_LEVEL                                   = 419, //!< Average intensity of output signal AEAG should achieve(in %).
+       CAP_PROP_XI_TIMEOUT                                      = 420, //!< Image capture timeout in milliseconds.
+       CAP_PROP_XI_EXPOSURE                                     = 421, //!< Exposure time in microseconds.
+       CAP_PROP_XI_EXPOSURE_BURST_COUNT                         = 422, //!< Sets the number of times of exposure in one frame.
+       CAP_PROP_XI_GAIN_SELECTOR                                = 423, //!< Gain selector for parameter Gain allows to select different type of gains.
+       CAP_PROP_XI_GAIN                                         = 424, //!< Gain in dB.
+       CAP_PROP_XI_DOWNSAMPLING_TYPE                            = 426, //!< Change image downsampling type.
+       CAP_PROP_XI_BINNING_SELECTOR                             = 427, //!< Binning engine selector.
+       CAP_PROP_XI_BINNING_VERTICAL                             = 428, //!< Vertical Binning - number of vertical photo-sensitive cells to combine together.
+       CAP_PROP_XI_BINNING_HORIZONTAL                           = 429, //!< Horizontal Binning - number of horizontal photo-sensitive cells to combine together.
+       CAP_PROP_XI_BINNING_PATTERN                              = 430, //!< Binning pattern type.
+       CAP_PROP_XI_DECIMATION_SELECTOR                          = 431, //!< Decimation engine selector.
+       CAP_PROP_XI_DECIMATION_VERTICAL                          = 432, //!< Vertical Decimation - vertical sub-sampling of the image - reduces the vertical resolution of the image by the specified vertical decimation factor.
+       CAP_PROP_XI_DECIMATION_HORIZONTAL                        = 433, //!< Horizontal Decimation - horizontal sub-sampling of the image - reduces the horizontal resolution of the image by the specified vertical decimation factor.
+       CAP_PROP_XI_DECIMATION_PATTERN                           = 434, //!< Decimation pattern type.
+       CAP_PROP_XI_TEST_PATTERN_GENERATOR_SELECTOR              = 587, //!< Selects which test pattern generator is controlled by the TestPattern feature.
+       CAP_PROP_XI_TEST_PATTERN                                 = 588, //!< Selects which test pattern type is generated by the selected generator.
+       CAP_PROP_XI_IMAGE_DATA_FORMAT                            = 435, //!< Output data format.
+       CAP_PROP_XI_SHUTTER_TYPE                                 = 436, //!< Change sensor shutter type(CMOS sensor).
+       CAP_PROP_XI_SENSOR_TAPS                                  = 437, //!< Number of taps.
+       CAP_PROP_XI_AEAG_ROI_OFFSET_X                            = 439, //!< Automatic exposure/gain ROI offset X.
+       CAP_PROP_XI_AEAG_ROI_OFFSET_Y                            = 440, //!< Automatic exposure/gain ROI offset Y.
+       CAP_PROP_XI_AEAG_ROI_WIDTH                               = 441, //!< Automatic exposure/gain ROI Width.
+       CAP_PROP_XI_AEAG_ROI_HEIGHT                              = 442, //!< Automatic exposure/gain ROI Height.
+       CAP_PROP_XI_BPC                                          = 445, //!< Correction of bad pixels.
+       CAP_PROP_XI_WB_KR                                        = 448, //!< White balance red coefficient.
+       CAP_PROP_XI_WB_KG                                        = 449, //!< White balance green coefficient.
+       CAP_PROP_XI_WB_KB                                        = 450, //!< White balance blue coefficient.
+       CAP_PROP_XI_WIDTH                                        = 451, //!< Width of the Image provided by the device (in pixels).
+       CAP_PROP_XI_HEIGHT                                       = 452, //!< Height of the Image provided by the device (in pixels).
+       CAP_PROP_XI_REGION_SELECTOR                              = 589, //!< Selects Region in Multiple ROI which parameters are set by width, height, ... ,region mode.
+       CAP_PROP_XI_REGION_MODE                                  = 595, //!< Activates/deactivates Region selected by Region Selector.
+       CAP_PROP_XI_LIMIT_BANDWIDTH                              = 459, //!< Set/get bandwidth(datarate)(in Megabits).
+       CAP_PROP_XI_SENSOR_DATA_BIT_DEPTH                        = 460, //!< Sensor output data bit depth.
+       CAP_PROP_XI_OUTPUT_DATA_BIT_DEPTH                        = 461, //!< Device output data bit depth.
+       CAP_PROP_XI_IMAGE_DATA_BIT_DEPTH                         = 462, //!< bitdepth of data returned by function xiGetImage.
+       CAP_PROP_XI_OUTPUT_DATA_PACKING                          = 463, //!< Device output data packing (or grouping) enabled. Packing could be enabled if output_data_bit_depth > 8 and packing capability is available.
+       CAP_PROP_XI_OUTPUT_DATA_PACKING_TYPE                     = 464, //!< Data packing type. Some cameras supports only specific packing type.
+       CAP_PROP_XI_IS_COOLED                                    = 465, //!< Returns 1 for cameras that support cooling.
+       CAP_PROP_XI_COOLING                                      = 466, //!< Start camera cooling.
+       CAP_PROP_XI_TARGET_TEMP                                  = 467, //!< Set sensor target temperature for cooling.
+       CAP_PROP_XI_CHIP_TEMP                                    = 468, //!< Camera sensor temperature.
+       CAP_PROP_XI_HOUS_TEMP                                    = 469, //!< Camera housing temperature.
+       CAP_PROP_XI_HOUS_BACK_SIDE_TEMP                          = 590, //!< Camera housing back side temperature.
+       CAP_PROP_XI_SENSOR_BOARD_TEMP                            = 596, //!< Camera sensor board temperature.
+       CAP_PROP_XI_CMS                                          = 470, //!< Mode of color management system.
+       CAP_PROP_XI_APPLY_CMS                                    = 471, //!< Enable applying of CMS profiles to xiGetImage (see XI_PRM_INPUT_CMS_PROFILE, XI_PRM_OUTPUT_CMS_PROFILE).
+       CAP_PROP_XI_IMAGE_IS_COLOR                               = 474, //!< Returns 1 for color cameras.
+       CAP_PROP_XI_COLOR_FILTER_ARRAY                           = 475, //!< Returns color filter array type of RAW data.
+       CAP_PROP_XI_GAMMAY                                       = 476, //!< Luminosity gamma.
+       CAP_PROP_XI_GAMMAC                                       = 477, //!< Chromaticity gamma.
+       CAP_PROP_XI_SHARPNESS                                    = 478, //!< Sharpness Strength.
+       CAP_PROP_XI_CC_MATRIX_00                                 = 479, //!< Color Correction Matrix element [0][0].
+       CAP_PROP_XI_CC_MATRIX_01                                 = 480, //!< Color Correction Matrix element [0][1].
+       CAP_PROP_XI_CC_MATRIX_02                                 = 481, //!< Color Correction Matrix element [0][2].
+       CAP_PROP_XI_CC_MATRIX_03                                 = 482, //!< Color Correction Matrix element [0][3].
+       CAP_PROP_XI_CC_MATRIX_10                                 = 483, //!< Color Correction Matrix element [1][0].
+       CAP_PROP_XI_CC_MATRIX_11                                 = 484, //!< Color Correction Matrix element [1][1].
+       CAP_PROP_XI_CC_MATRIX_12                                 = 485, //!< Color Correction Matrix element [1][2].
+       CAP_PROP_XI_CC_MATRIX_13                                 = 486, //!< Color Correction Matrix element [1][3].
+       CAP_PROP_XI_CC_MATRIX_20                                 = 487, //!< Color Correction Matrix element [2][0].
+       CAP_PROP_XI_CC_MATRIX_21                                 = 488, //!< Color Correction Matrix element [2][1].
+       CAP_PROP_XI_CC_MATRIX_22                                 = 489, //!< Color Correction Matrix element [2][2].
+       CAP_PROP_XI_CC_MATRIX_23                                 = 490, //!< Color Correction Matrix element [2][3].
+       CAP_PROP_XI_CC_MATRIX_30                                 = 491, //!< Color Correction Matrix element [3][0].
+       CAP_PROP_XI_CC_MATRIX_31                                 = 492, //!< Color Correction Matrix element [3][1].
+       CAP_PROP_XI_CC_MATRIX_32                                 = 493, //!< Color Correction Matrix element [3][2].
+       CAP_PROP_XI_CC_MATRIX_33                                 = 494, //!< Color Correction Matrix element [3][3].
+       CAP_PROP_XI_DEFAULT_CC_MATRIX                            = 495, //!< Set default Color Correction Matrix.
+       CAP_PROP_XI_TRG_SELECTOR                                 = 498, //!< Selects the type of trigger.
+       CAP_PROP_XI_ACQ_FRAME_BURST_COUNT                        = 499, //!< Sets number of frames acquired by burst. This burst is used only if trigger is set to FrameBurstStart.
+       CAP_PROP_XI_DEBOUNCE_EN                                  = 507, //!< Enable/Disable debounce to selected GPI.
+       CAP_PROP_XI_DEBOUNCE_T0                                  = 508, //!< Debounce time (x * 10us).
+       CAP_PROP_XI_DEBOUNCE_T1                                  = 509, //!< Debounce time (x * 10us).
+       CAP_PROP_XI_DEBOUNCE_POL                                 = 510, //!< Debounce polarity (pol = 1 t0 - falling edge, t1 - rising edge).
+       CAP_PROP_XI_LENS_MODE                                    = 511, //!< Status of lens control interface. This shall be set to XI_ON before any Lens operations.
+       CAP_PROP_XI_LENS_APERTURE_VALUE                          = 512, //!< Current lens aperture value in stops. Examples: 2.8, 4, 5.6, 8, 11.
+       CAP_PROP_XI_LENS_FOCUS_MOVEMENT_VALUE                    = 513, //!< Lens current focus movement value to be used by XI_PRM_LENS_FOCUS_MOVE in motor steps.
+       CAP_PROP_XI_LENS_FOCUS_MOVE                              = 514, //!< Moves lens focus motor by steps set in XI_PRM_LENS_FOCUS_MOVEMENT_VALUE.
+       CAP_PROP_XI_LENS_FOCUS_DISTANCE                          = 515, //!< Lens focus distance in cm.
+       CAP_PROP_XI_LENS_FOCAL_LENGTH                            = 516, //!< Lens focal distance in mm.
+       CAP_PROP_XI_LENS_FEATURE_SELECTOR                        = 517, //!< Selects the current feature which is accessible by XI_PRM_LENS_FEATURE.
+       CAP_PROP_XI_LENS_FEATURE                                 = 518, //!< Allows access to lens feature value currently selected by XI_PRM_LENS_FEATURE_SELECTOR.
+       CAP_PROP_XI_DEVICE_MODEL_ID                              = 521, //!< Returns device model id.
+       CAP_PROP_XI_DEVICE_SN                                    = 522, //!< Returns device serial number.
+       CAP_PROP_XI_IMAGE_DATA_FORMAT_RGB32_ALPHA                = 529, //!< The alpha channel of RGB32 output image format.
+       CAP_PROP_XI_IMAGE_PAYLOAD_SIZE                           = 530, //!< Buffer size in bytes sufficient for output image returned by xiGetImage.
+       CAP_PROP_XI_TRANSPORT_PIXEL_FORMAT                       = 531, //!< Current format of pixels on transport layer.
+       CAP_PROP_XI_SENSOR_CLOCK_FREQ_HZ                         = 532, //!< Sensor clock frequency in Hz.
+       CAP_PROP_XI_SENSOR_CLOCK_FREQ_INDEX                      = 533, //!< Sensor clock frequency index. Sensor with selected frequencies have possibility to set the frequency only by this index.
+       CAP_PROP_XI_SENSOR_OUTPUT_CHANNEL_COUNT                  = 534, //!< Number of output channels from sensor used for data transfer.
+       CAP_PROP_XI_FRAMERATE                                    = 535, //!< Define framerate in Hz.
+       CAP_PROP_XI_COUNTER_SELECTOR                             = 536, //!< Select counter.
+       CAP_PROP_XI_COUNTER_VALUE                                = 537, //!< Counter status.
+       CAP_PROP_XI_ACQ_TIMING_MODE                              = 538, //!< Type of sensor frames timing.
+       CAP_PROP_XI_AVAILABLE_BANDWIDTH                          = 539, //!< Calculate and returns available interface bandwidth(int Megabits).
+       CAP_PROP_XI_BUFFER_POLICY                                = 540, //!< Data move policy.
+       CAP_PROP_XI_LUT_EN                                       = 541, //!< Activates LUT.
+       CAP_PROP_XI_LUT_INDEX                                    = 542, //!< Control the index (offset) of the coefficient to access in the LUT.
+       CAP_PROP_XI_LUT_VALUE                                    = 543, //!< Value at entry LUTIndex of the LUT.
+       CAP_PROP_XI_TRG_DELAY                                    = 544, //!< Specifies the delay in microseconds (us) to apply after the trigger reception before activating it.
+       CAP_PROP_XI_TS_RST_MODE                                  = 545, //!< Defines how time stamp reset engine will be armed.
+       CAP_PROP_XI_TS_RST_SOURCE                                = 546, //!< Defines which source will be used for timestamp reset. Writing this parameter will trigger settings of engine (arming).
+       CAP_PROP_XI_IS_DEVICE_EXIST                              = 547, //!< Returns 1 if camera connected and works properly.
+       CAP_PROP_XI_ACQ_BUFFER_SIZE                              = 548, //!< Acquisition buffer size in buffer_size_unit. Default bytes.
+       CAP_PROP_XI_ACQ_BUFFER_SIZE_UNIT                         = 549, //!< Acquisition buffer size unit in bytes. Default 1. E.g. Value 1024 means that buffer_size is in KiBytes.
+       CAP_PROP_XI_ACQ_TRANSPORT_BUFFER_SIZE                    = 550, //!< Acquisition transport buffer size in bytes.
+       CAP_PROP_XI_BUFFERS_QUEUE_SIZE                           = 551, //!< Queue of field/frame buffers.
+       CAP_PROP_XI_ACQ_TRANSPORT_BUFFER_COMMIT                  = 552, //!< Number of buffers to commit to low level.
+       CAP_PROP_XI_RECENT_FRAME                                 = 553, //!< GetImage returns most recent frame.
+       CAP_PROP_XI_DEVICE_RESET                                 = 554, //!< Resets the camera to default state.
+       CAP_PROP_XI_COLUMN_FPN_CORRECTION                        = 555, //!< Correction of column FPN.
+       CAP_PROP_XI_ROW_FPN_CORRECTION                           = 591, //!< Correction of row FPN.
+       CAP_PROP_XI_SENSOR_MODE                                  = 558, //!< Current sensor mode. Allows to select sensor mode by one integer. Setting of this parameter affects: image dimensions and downsampling.
+       CAP_PROP_XI_HDR                                          = 559, //!< Enable High Dynamic Range feature.
+       CAP_PROP_XI_HDR_KNEEPOINT_COUNT                          = 560, //!< The number of kneepoints in the PWLR.
+       CAP_PROP_XI_HDR_T1                                       = 561, //!< Position of first kneepoint(in % of XI_PRM_EXPOSURE).
+       CAP_PROP_XI_HDR_T2                                       = 562, //!< Position of second kneepoint (in % of XI_PRM_EXPOSURE).
+       CAP_PROP_XI_KNEEPOINT1                                   = 563, //!< Value of first kneepoint (% of sensor saturation).
+       CAP_PROP_XI_KNEEPOINT2                                   = 564, //!< Value of second kneepoint (% of sensor saturation).
+       CAP_PROP_XI_IMAGE_BLACK_LEVEL                            = 565, //!< Last image black level counts. Can be used for Offline processing to recall it.
+       CAP_PROP_XI_HW_REVISION                                  = 571, //!< Returns hardware revision number.
+       CAP_PROP_XI_DEBUG_LEVEL                                  = 572, //!< Set debug level.
+       CAP_PROP_XI_AUTO_BANDWIDTH_CALCULATION                   = 573, //!< Automatic bandwidth calculation.
+       CAP_PROP_XI_FFS_FILE_ID                                  = 594, //!< File number.
+       CAP_PROP_XI_FFS_FILE_SIZE                                = 580, //!< Size of file.
+       CAP_PROP_XI_FREE_FFS_SIZE                                = 581, //!< Size of free camera FFS.
+       CAP_PROP_XI_USED_FFS_SIZE                                = 582, //!< Size of used camera FFS.
+       CAP_PROP_XI_FFS_ACCESS_KEY                               = 583, //!< Setting of key enables file operations on some cameras.
+       CAP_PROP_XI_SENSOR_FEATURE_SELECTOR                      = 585, //!< Selects the current feature which is accessible by XI_PRM_SENSOR_FEATURE_VALUE.
+       CAP_PROP_XI_SENSOR_FEATURE_VALUE                         = 586, //!< Allows access to sensor feature value currently selected by XI_PRM_SENSOR_FEATURE_SELECTOR.
      };
 
+//! @} XIMEA
+
+/** @name XIMEA Camera API
+*  @{
+*/
+
+//! Properties of cameras available through ARAVIS backend
+enum { CAP_PROP_ARAVIS_AUTOTRIGGER                              = 600 //!< Automatically trigger frame capture if camera is configured with software trigger
+};
+
+//! @} ARAVIS
+
+/** @name AVFoundation framework for iOS
+    OS X Lion will have the same API
+    @{
+*/
 
-// Properties of cameras available through AVFOUNDATION interface
+//! Properties of cameras available through AVFOUNDATION backend
 enum { CAP_PROP_IOS_DEVICE_FOCUS        = 9001,
        CAP_PROP_IOS_DEVICE_EXPOSURE     = 9002,
        CAP_PROP_IOS_DEVICE_FLASH        = 9003,
@@ -416,8 +510,11 @@ enum { CAP_PROP_IOS_DEVICE_FOCUS        = 9001,
        CAP_PROP_IOS_DEVICE_TORCH        = 9005
      };
 
+/** @name Smartek Giganetix GigEVisionSDK
+    @{
+*/
 
-// Properties of cameras available through Smartek Giganetix Ethernet Vision interface
+//! Properties of cameras available through Smartek Giganetix Ethernet Vision backend
 /* --- Vladimir Litvinenko (litvinenko.vladimir@gmail.com) --- */
 enum { CAP_PROP_GIGA_FRAME_OFFSET_X   = 10001,
        CAP_PROP_GIGA_FRAME_OFFSET_Y   = 10002,
@@ -427,6 +524,11 @@ enum { CAP_PROP_GIGA_FRAME_OFFSET_X   = 10001,
        CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
      };
 
+//! @} Smartek
+
+/** @name Intel Perceptual Computing SDK
+    @{
+*/
 enum { CAP_PROP_INTELPERC_PROFILE_COUNT               = 11001,
        CAP_PROP_INTELPERC_PROFILE_IDX                 = 11002,
        CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE  = 11003,
@@ -436,144 +538,162 @@ enum { CAP_PROP_INTELPERC_PROFILE_COUNT               = 11001,
        CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT     = 11007
      };
 
-// Intel PerC streams
+//! Intel Perceptual Streams
 enum { CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
        CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
-       CAP_INTELPERC_GENERATORS_MASK = CAP_INTELPERC_DEPTH_GENERATOR + CAP_INTELPERC_IMAGE_GENERATOR
+       CAP_INTELPERC_IR_GENERATOR    = 1 << 27,
+       CAP_INTELPERC_GENERATORS_MASK = CAP_INTELPERC_DEPTH_GENERATOR + CAP_INTELPERC_IMAGE_GENERATOR + CAP_INTELPERC_IR_GENERATOR
      };
 
-enum { CAP_INTELPERC_DEPTH_MAP              = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
-       CAP_INTELPERC_UVDEPTH_MAP            = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
-       CAP_INTELPERC_IR_MAP                 = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
+enum { CAP_INTELPERC_DEPTH_MAP              = 0, //!< Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
+       CAP_INTELPERC_UVDEPTH_MAP            = 1, //!< Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
+       CAP_INTELPERC_IR_MAP                 = 2, //!< Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
        CAP_INTELPERC_IMAGE                  = 3
      };
 
-enum { VIDEOWRITER_PROP_QUALITY = 1,    // Quality (0..100%) of the videostream encoded
-       VIDEOWRITER_PROP_FRAMEBYTES = 2, // (Read-only): Size of just encoded video frame
-       VIDEOWRITER_PROP_NSTRIPES = 3    // Number of stripes for parallel encoding. -1 for auto detection
+//! @} Intel Perceptual
+
+/** @name gPhoto2 connection
+    @{
+*/
+
+/** @brief gPhoto2 properties
+
+If `propertyId` is less than 0 then work on widget with that __additive inversed__ camera setting ID
+Get IDs by using CAP_PROP_GPHOTO2_WIDGET_ENUMERATE.
+@see CvCaptureCAM_GPHOTO2 for more info
+*/
+enum { CAP_PROP_GPHOTO2_PREVIEW           = 17001, //!< Capture only preview from liveview mode.
+       CAP_PROP_GPHOTO2_WIDGET_ENUMERATE  = 17002, //!< Readonly, returns (const char *).
+       CAP_PROP_GPHOTO2_RELOAD_CONFIG     = 17003, //!< Trigger, only by set. Reload camera settings.
+       CAP_PROP_GPHOTO2_RELOAD_ON_CHANGE  = 17004, //!< Reload all settings on set.
+       CAP_PROP_GPHOTO2_COLLECT_MSGS      = 17005, //!< Collect messages with details.
+       CAP_PROP_GPHOTO2_FLUSH_MSGS        = 17006, //!< Readonly, returns (const char *).
+       CAP_PROP_SPEED                     = 17007, //!< Exposure speed. Can be readonly, depends on camera program.
+       CAP_PROP_APERTURE                  = 17008, //!< Aperture. Can be readonly, depends on camera program.
+       CAP_PROP_EXPOSUREPROGRAM           = 17009, //!< Camera exposure program.
+       CAP_PROP_VIEWFINDER                = 17010  //!< Enter liveview mode.
      };
 
-// gPhoto2 properties, if propertyId is less than 0 then work on widget with that __additive inversed__ camera setting ID
-// Get IDs by using CAP_PROP_GPHOTO2_WIDGET_ENUMERATE.
-// @see CvCaptureCAM_GPHOTO2 for more info
-enum { CAP_PROP_GPHOTO2_PREVIEW           = 17001, // Capture only preview from liveview mode.
-       CAP_PROP_GPHOTO2_WIDGET_ENUMERATE  = 17002, // Readonly, returns (const char *).
-       CAP_PROP_GPHOTO2_RELOAD_CONFIG     = 17003, // Trigger, only by set. Reload camera settings.
-       CAP_PROP_GPHOTO2_RELOAD_ON_CHANGE  = 17004, // Reload all settings on set.
-       CAP_PROP_GPHOTO2_COLLECT_MSGS      = 17005, // Collect messages with details.
-       CAP_PROP_GPHOTO2_FLUSH_MSGS        = 17006, // Readonly, returns (const char *).
-       CAP_PROP_SPEED                     = 17007, // Exposure speed. Can be readonly, depends on camera program.
-       CAP_PROP_APERTURE                  = 17008, // Aperture. Can be readonly, depends on camera program.
-       CAP_PROP_EXPOSUREPROGRAM           = 17009, // Camera exposure program.
-       CAP_PROP_VIEWFINDER                = 17010  // Enter liveview mode.
+//! @} gPhoto2
+
+
+/** @name Images backend
+    @{
+*/
+
+/** @brief Images backend properties
+
+*/
+enum { CAP_PROP_IMAGES_BASE = 18000,
+       CAP_PROP_IMAGES_LAST = 19000 // excluding
      };
 
-//enum {
+//! @} Images
+
+//! @} videoio_flags_others
+
 
 class IVideoCapture;
+//! @cond IGNORED
+namespace internal { class VideoCapturePrivateAccessor; }
+//! @endcond IGNORED
+
+/** @brief Class for video capturing from video files, image sequences or cameras.
 
-/** @brief Class for video capturing from video files, image sequences or cameras. The class provides C++ API
-for capturing video from cameras or for reading video files and image sequences. Here is how the
-class can be used: :
-@code
-    #include "opencv2/opencv.hpp"
-
-    using namespace cv;
-
-    int main(int, char**)
-    {
-        VideoCapture cap(0); // open the default camera
-        if(!cap.isOpened())  // check if we succeeded
-            return -1;
-
-        Mat edges;
-        namedWindow("edges",1);
-        for(;;)
-        {
-            Mat frame;
-            cap >> frame; // get a new frame from camera
-            cvtColor(frame, edges, COLOR_BGR2GRAY);
-            GaussianBlur(edges, edges, Size(7,7), 1.5, 1.5);
-            Canny(edges, edges, 0, 30, 3);
-            imshow("edges", edges);
-            if(waitKey(30) >= 0) break;
-        }
-        // the camera will be deinitialized automatically in VideoCapture destructor
-        return 0;
-    }
-@endcode
-@note In C API the black-box structure CvCapture is used instead of VideoCapture.
+The class provides C++ API for capturing video from cameras or for reading video files and image sequences.
 
+Here is how the class can be used:
+@include samples/cpp/videocapture_basic.cpp
+
+@note In @ref videoio_c "C API" the black-box structure `CvCapture` is used instead of %VideoCapture.
 @note
--   A basic sample on using the VideoCapture interface can be found at
-    opencv_source_code/samples/cpp/starter_video.cpp
--   Another basic video processing sample can be found at
-    opencv_source_code/samples/cpp/video_dmtx.cpp
--   (Python) A basic sample on using the VideoCapture interface can be found at
-    opencv_source_code/samples/python/video.py
--   (Python) Another basic video processing sample can be found at
-    opencv_source_code/samples/python/video_dmtx.py
+-   (C++) A basic sample on using the %VideoCapture interface can be found at
+    `OPENCV_SOURCE_CODE/samples/cpp/videocapture_starter.cpp`
+-   (Python) A basic sample on using the %VideoCapture interface can be found at
+    `OPENCV_SOURCE_CODE/samples/python/video.py`
 -   (Python) A multi threaded video processing sample can be found at
-    opencv_source_code/samples/python/video_threaded.py
+    `OPENCV_SOURCE_CODE/samples/python/video_threaded.py`
+-   (Python) %VideoCapture sample showcasing some features of the Video4Linux2 backend
+    `OPENCV_SOURCE_CODE/samples/python/video_v4l2.py`
  */
 class CV_EXPORTS_W VideoCapture
 {
 public:
-    /** @brief
-    @note In C API, when you finished working with video, release CvCapture structure with
+    /** @brief Default constructor
+    @note In @ref videoio_c "C API", when you finished working with video, release CvCapture structure with
     cvReleaseCapture(), or use Ptr\<CvCapture\> that calls cvReleaseCapture() automatically in the
     destructor.
      */
     CV_WRAP VideoCapture();
 
     /** @overload
-    @param filename name of the opened video file (eg. video.avi) or image sequence (eg.
-    img_%02d.jpg, which will read samples like img_00.jpg, img_01.jpg, img_02.jpg, ...)
+    @brief  Opens a video file or a capturing device or an IP video stream for video capturing with API Preference
+
+    @param filename it can be:
+    - name of video file (eg. `video.avi`)
+    - or image sequence (eg. `img_%02d.jpg`, which will read samples like `img_00.jpg, img_01.jpg, img_02.jpg, ...`)
+    - or URL of video stream (eg. `protocol://host:port/script_name?script_params|auth`)
+    - or GStreamer pipeline string in gst-launch tool format in case if GStreamer is used as backend
+      Note that each video stream or IP camera feed has its own URL scheme. Please refer to the
+      documentation of source stream to know the right URL.
+    @param apiPreference preferred Capture API backends to use. Can be used to enforce a specific reader
+    implementation if multiple are available: e.g. cv::CAP_FFMPEG or cv::CAP_IMAGES or cv::CAP_DSHOW.
+
+    @sa cv::VideoCaptureAPIs
     */
-    CV_WRAP VideoCapture(const String& filename);
+    CV_WRAP explicit VideoCapture(const String& filename, int apiPreference = CAP_ANY);
 
     /** @overload
-    @param filename name of the opened video file (eg. video.avi) or image sequence (eg.
-    img_%02d.jpg, which will read samples like img_00.jpg, img_01.jpg, img_02.jpg, ...)
+    @brief  Opens a camera for video capturing
 
-    @param apiPreference preferred Capture API to use. Can be used to enforce a specific reader
-    implementation if multiple are available: e.g. CAP_FFMPEG or CAP_IMAGES
-    */
-    CV_WRAP VideoCapture(const String& filename, int apiPreference);
+    @param index id of the video capturing device to open. To open default camera using default backend just pass 0.
+    (to backward compatibility usage of camera_id + domain_offset (CAP_*) is valid when apiPreference is CAP_ANY)
+    @param apiPreference preferred Capture API backends to use. Can be used to enforce a specific reader
+    implementation if multiple are available: e.g. cv::CAP_DSHOW or cv::CAP_MSMF or cv::CAP_V4L.
 
-    /** @overload
-    @param index = camera_id + domain_offset (CAP_*). id of the video capturing device to open. If there is a single
-    camera connected, just pass 0. Advanced Usage: to open Camera 1 using the MS Media Foundation API: index = 1 + CAP_MSMF
+    @sa cv::VideoCaptureAPIs
     */
-    CV_WRAP VideoCapture(int index);
+    CV_WRAP explicit VideoCapture(int index, int apiPreference = CAP_ANY);
+
+    /** @brief Default destructor
 
+    The method first calls VideoCapture::release to close the already opened file or camera.
+    */
     virtual ~VideoCapture();
 
-    /** @brief Open video file or a capturing device for video capturing
+    /** @brief  Opens a video file or a capturing device or an IP video stream for video capturing.
+
+    @overload
 
-    @param filename name of the opened video file (eg. video.avi) or image sequence (eg.
-    img_%02d.jpg, which will read samples like img_00.jpg, img_01.jpg, img_02.jpg, ...)
+    Parameters are same as the constructor VideoCapture(const String& filename, int apiPreference = CAP_ANY)
+    @return `true` if the file has been successfully opened
 
-    The methods first call VideoCapture::release to close the already opened file or camera.
+    The method first calls VideoCapture::release to close the already opened file or camera.
      */
-    CV_WRAP virtual bool open(const String& filename);
+    CV_WRAP virtual bool open(const String& filename, int apiPreference = CAP_ANY);
 
-    /** @overload
-    @param index = camera_id + domain_offset (CAP_*). id of the video capturing device to open. If there is a single
-    camera connected, just pass 0. Advanced Usage: to open Camera 1 using the MS Media Foundation API: index = 1 + CAP_MSMF
+    /** @brief  Opens a camera for video capturing
+
+    @overload
+
+    Parameters are same as the constructor VideoCapture(int index, int apiPreference = CAP_ANY)
+    @return `true` if the camera has been successfully opened.
+
+    The method first calls VideoCapture::release to close the already opened file or camera.
     */
-    CV_WRAP virtual bool open(int index);
+    CV_WRAP virtual bool open(int index, int apiPreference = CAP_ANY);
 
     /** @brief Returns true if video capturing has been initialized already.
 
-    If the previous call to VideoCapture constructor or VideoCapture::open succeeded, the method returns
+    If the previous call to VideoCapture constructor or VideoCapture::open() succeeded, the method returns
     true.
      */
     CV_WRAP virtual bool isOpened() const;
 
     /** @brief Closes video file or capturing device.
 
-    The methods are automatically called by subsequent VideoCapture::open and by VideoCapture
+    The method is automatically called by subsequent VideoCapture::open and by VideoCapture
     destructor.
 
     The C function also deallocates memory and clears \*capture pointer.
@@ -582,7 +702,9 @@ class CV_EXPORTS_W VideoCapture
 
     /** @brief Grabs the next frame from video file or capturing device.
 
-    The methods/functions grab the next frame from video file or camera and return true (non-zero) in
+    @return `true` (non-zero) in the case of success.
+
+    The method/function grabs the next frame from video file or camera and returns true (non-zero) in
     the case of success.
 
     The primary use of the function is in multi-camera environments, especially when the cameras do not
@@ -592,128 +714,154 @@ class CV_EXPORTS_W VideoCapture
     from different cameras will be closer in time.
 
     Also, when a connected camera is multi-head (for example, a stereo camera or a Kinect device), the
-    correct way of retrieving data from it is to call VideoCapture::grab first and then call
-    VideoCapture::retrieve one or more times with different values of the channel parameter. See
-    <https://github.com/Itseez/opencv/tree/master/samples/cpp/openni_capture.cpp>
+    correct way of retrieving data from it is to call VideoCapture::grab() first and then call
+    VideoCapture::retrieve() one or more times with different values of the channel parameter.
+
+    @ref tutorial_kinect_openni
      */
     CV_WRAP virtual bool grab();
 
     /** @brief Decodes and returns the grabbed video frame.
 
-    The methods/functions decode and return the just grabbed frame. If no frames has been grabbed
-    (camera has been disconnected, or there are no more frames in video file), the methods return false
-    and the functions return NULL pointer.
+    @param [out] image the video frame is returned here. If no frames has been grabbed the image will be empty.
+    @param flag it could be a frame index or a driver specific flag
+    @return `false` if no frames has been grabbed
+
+    The method decodes and returns the just grabbed frame. If no frames has been grabbed
+    (camera has been disconnected, or there are no more frames in video file), the method returns false
+    and the function returns an empty image (with %cv::Mat, test it with Mat::empty()).
 
-    @note OpenCV 1.x functions cvRetrieveFrame and cv.RetrieveFrame return image stored inside the video
+    @sa read()
+
+    @note In @ref videoio_c "C API", functions cvRetrieveFrame() and cv.RetrieveFrame() return image stored inside the video
     capturing structure. It is not allowed to modify or release the image! You can copy the frame using
-    :ocvcvCloneImage and then do whatever you want with the copy.
+    cvCloneImage and then do whatever you want with the copy.
      */
     CV_WRAP virtual bool retrieve(OutputArray image, int flag = 0);
+
+    /** @brief Stream operator to read the next video frame.
+    @sa read()
+    */
     virtual VideoCapture& operator >> (CV_OUT Mat& image);
+
+    /** @overload
+    @sa read()
+    */
     virtual VideoCapture& operator >> (CV_OUT UMat& image);
 
     /** @brief Grabs, decodes and returns the next video frame.
 
-    The methods/functions combine VideoCapture::grab and VideoCapture::retrieve in one call. This is the
-    most convenient method for reading video files or capturing data from decode and return the just
+    @param [out] image the video frame is returned here. If no frames has been grabbed the image will be empty.
+    @return `false` if no frames has been grabbed
+
+    The method/function combines VideoCapture::grab() and VideoCapture::retrieve() in one call. This is the
+    most convenient method for reading video files or capturing data from decode and returns the just
     grabbed frame. If no frames has been grabbed (camera has been disconnected, or there are no more
-    frames in video file), the methods return false and the functions return NULL pointer.
+    frames in video file), the method returns false and the function returns empty image (with %cv::Mat, test it with Mat::empty()).
 
-    @note OpenCV 1.x functions cvRetrieveFrame and cv.RetrieveFrame return image stored inside the video
+    @note In @ref videoio_c "C API", functions cvRetrieveFrame() and cv.RetrieveFrame() return image stored inside the video
     capturing structure. It is not allowed to modify or release the image! You can copy the frame using
-    :ocvcvCloneImage and then do whatever you want with the copy.
+    cvCloneImage and then do whatever you want with the copy.
      */
     CV_WRAP virtual bool read(OutputArray image);
 
     /** @brief Sets a property in the VideoCapture.
 
-    @param propId Property identifier. It can be one of the following:
-     -   **CAP_PROP_POS_MSEC** Current position of the video file in milliseconds.
-     -   **CAP_PROP_POS_FRAMES** 0-based index of the frame to be decoded/captured next.
-     -   **CAP_PROP_POS_AVI_RATIO** Relative position of the video file: 0 - start of the
-         film, 1 - end of the film.
-     -   **CAP_PROP_FRAME_WIDTH** Width of the frames in the video stream.
-     -   **CAP_PROP_FRAME_HEIGHT** Height of the frames in the video stream.
-     -   **CAP_PROP_FPS** Frame rate.
-     -   **CAP_PROP_FOURCC** 4-character code of codec.
-     -   **CAP_PROP_FRAME_COUNT** Number of frames in the video file.
-     -   **CAP_PROP_FORMAT** Format of the Mat objects returned by retrieve() .
-     -   **CAP_PROP_MODE** Backend-specific value indicating the current capture mode.
-     -   **CAP_PROP_BRIGHTNESS** Brightness of the image (only for cameras).
-     -   **CAP_PROP_CONTRAST** Contrast of the image (only for cameras).
-     -   **CAP_PROP_SATURATION** Saturation of the image (only for cameras).
-     -   **CAP_PROP_HUE** Hue of the image (only for cameras).
-     -   **CAP_PROP_GAIN** Gain of the image (only for cameras).
-     -   **CAP_PROP_EXPOSURE** Exposure (only for cameras).
-     -   **CAP_PROP_CONVERT_RGB** Boolean flags indicating whether images should be converted
-         to RGB.
-     -   **CAP_PROP_WHITE_BALANCE** Currently unsupported
-     -   **CAP_PROP_RECTIFICATION** Rectification flag for stereo cameras (note: only supported
-         by DC1394 v 2.x backend currently)
+    @param propId Property identifier from cv::VideoCaptureProperties (eg. cv::CAP_PROP_POS_MSEC, cv::CAP_PROP_POS_FRAMES, ...)
+    or one from @ref videoio_flags_others
     @param value Value of the property.
+    @return `true` if the property is supported by backend used by the VideoCapture instance.
+    @note Even if it returns `true` this doesn't ensure that the property
+    value has been accepted by the capture device. See note in VideoCapture::get()
      */
     CV_WRAP virtual bool set(int propId, double value);
 
     /** @brief Returns the specified VideoCapture property
 
-    @param propId Property identifier. It can be one of the following:
-     -   **CAP_PROP_POS_MSEC** Current position of the video file in milliseconds or video
-         capture timestamp.
-     -   **CAP_PROP_POS_FRAMES** 0-based index of the frame to be decoded/captured next.
-     -   **CAP_PROP_POS_AVI_RATIO** Relative position of the video file: 0 - start of the
-         film, 1 - end of the film.
-     -   **CAP_PROP_FRAME_WIDTH** Width of the frames in the video stream.
-     -   **CAP_PROP_FRAME_HEIGHT** Height of the frames in the video stream.
-     -   **CAP_PROP_FPS** Frame rate.
-     -   **CAP_PROP_FOURCC** 4-character code of codec.
-     -   **CAP_PROP_FRAME_COUNT** Number of frames in the video file.
-     -   **CAP_PROP_FORMAT** Format of the Mat objects returned by retrieve() .
-     -   **CAP_PROP_MODE** Backend-specific value indicating the current capture mode.
-     -   **CAP_PROP_BRIGHTNESS** Brightness of the image (only for cameras).
-     -   **CAP_PROP_CONTRAST** Contrast of the image (only for cameras).
-     -   **CAP_PROP_SATURATION** Saturation of the image (only for cameras).
-     -   **CAP_PROP_HUE** Hue of the image (only for cameras).
-     -   **CAP_PROP_GAIN** Gain of the image (only for cameras).
-     -   **CAP_PROP_EXPOSURE** Exposure (only for cameras).
-     -   **CAP_PROP_CONVERT_RGB** Boolean flags indicating whether images should be converted
-         to RGB.
-     -   **CAP_PROP_WHITE_BALANCE** Currently not supported
-     -   **CAP_PROP_RECTIFICATION** Rectification flag for stereo cameras (note: only supported
-         by DC1394 v 2.x backend currently)
-
-    @note When querying a property that is not supported by the backend used by the VideoCapture
-    class, value 0 is returned.
-     */
-    CV_WRAP virtual double get(int propId) const;
+    @param propId Property identifier from cv::VideoCaptureProperties (eg. cv::CAP_PROP_POS_MSEC, cv::CAP_PROP_POS_FRAMES, ...)
+    or one from @ref videoio_flags_others
+    @return Value for the specified property. Value 0 is returned when querying a property that is
+    not supported by the backend used by the VideoCapture instance.
 
-    /** @overload
+    @note Reading / writing properties involves many layers. Some unexpected result might happens
+    along this chain.
+    @code{.txt}
+    VideoCapture -> API Backend -> Operating System -> Device Driver -> Device Hardware
+    @endcode
+    The returned value might be different from what really used by the device or it could be encoded
+    using device dependent rules (eg. steps or percentage). Effective behaviour depends from device
+    driver and API Backend
 
-    @param filename name of the opened video file (eg. video.avi) or image sequence (eg.
-    img_%02d.jpg, which will read samples like img_00.jpg, img_01.jpg, img_02.jpg, ...)
+    */
+    CV_WRAP virtual double get(int propId) const;
 
-    @param apiPreference preferred Capture API to use. Can be used to enforce a specific reader
-    implementation if multiple are available: e.g. CAP_FFMPEG or CAP_IMAGES
+    /** @brief Returns used backend API name
 
-    The methods first call VideoCapture::release to close the already opened file or camera.
+     @note Stream should be opened.
      */
-    CV_WRAP virtual bool open(const String& filename, int apiPreference);
+    CV_WRAP String getBackendName() const;
+
+    /** Switches exceptions mode
+     *
+     * methods raise exceptions if not successful instead of returning an error code
+     */
+    CV_WRAP void setExceptionMode(bool enable) { throwOnFail = enable; }
+
+    /// query if exception mode is active
+    CV_WRAP bool getExceptionMode() { return throwOnFail; }
+
+
+    /** @brief Wait for ready frames from VideoCapture.
+
+    @param streams input video streams
+    @param readyIndex stream indexes with grabbed frames (ready to use .retrieve() to fetch actual frame)
+    @param timeoutNs number of nanoseconds (0 - infinite)
+    @return `true` if streamReady is not empty
+
+    @throws Exception %Exception on stream errors (check .isOpened() to filter out malformed streams) or VideoCapture type is not supported
+
+    The primary use of the function is in multi-camera environments.
+    The method fills the ready state vector, grabbs video frame, if camera is ready.
+
+    After this call use VideoCapture::retrieve() to decode and fetch frame data.
+    */
+    static /*CV_WRAP*/
+    bool waitAny(
+            const std::vector<VideoCapture>& streams,
+            CV_OUT std::vector<int>& readyIndex,
+            int64 timeoutNs = 0);
 
 protected:
     Ptr<CvCapture> cap;
     Ptr<IVideoCapture> icap;
+    bool throwOnFail;
+
+    friend class internal::VideoCapturePrivateAccessor;
 };
 
 class IVideoWriter;
 
+/** @example samples/cpp/tutorial_code/videoio/video-write/video-write.cpp
+Check @ref tutorial_video_write "the corresponding tutorial" for more details
+*/
+
+/** @example samples/cpp/videowriter_basic.cpp
+An example using VideoCapture and VideoWriter class
+*/
+
 /** @brief Video writer class.
- */
+
+The class provides C++ API for writing video files or image sequences.
+*/
 class CV_EXPORTS_W VideoWriter
 {
 public:
-    /** @brief VideoWriter constructors
+    /** @brief Default constructors
 
-    The constructors/functions initialize video writers. On Linux FFMPEG is used to write videos; on
-    Windows FFMPEG or VFW is used; on MacOSX QTKit is used.
+    The constructors/functions initialize video writers.
+    -   On Linux FFMPEG is used to write videos;
+    -   On Windows FFMPEG or MSWF or DSHOW is used;
+    -   On MacOSX AVFoundation is used.
      */
     CV_WRAP VideoWriter();
 
@@ -729,69 +877,112 @@ class CV_EXPORTS_W VideoWriter
     @param frameSize Size of the video frames.
     @param isColor If it is not zero, the encoder will expect and encode color frames, otherwise it
     will work with grayscale frames (the flag is currently supported on Windows only).
+
+    @b Tips:
+    - With some backends `fourcc=-1` pops up the codec selection dialog from the system.
+    - To save image sequence use a proper filename (eg. `img_%02d.jpg`) and `fourcc=0`
+      OR `fps=0`. Use uncompressed image format (eg. `img_%02d.BMP`) to save raw frames.
+    - Most codecs are lossy. If you want lossless video file you need to use a lossless codecs
+      (eg. FFMPEG FFV1, Huffman HFYU, Lagarith LAGS, etc...)
+    - If FFMPEG is enabled, using `codec=0; fps=0;` you can create an uncompressed (raw) video file.
     */
     CV_WRAP VideoWriter(const String& filename, int fourcc, double fps,
                 Size frameSize, bool isColor = true);
 
+    /** @overload
+    The `apiPreference` parameter allows to specify API backends to use. Can be used to enforce a specific reader implementation
+    if multiple are available: e.g. cv::CAP_FFMPEG or cv::CAP_GSTREAMER.
+     */
+    CV_WRAP VideoWriter(const String& filename, int apiPreference, int fourcc, double fps,
+                Size frameSize, bool isColor = true);
+
+    /** @brief Default destructor
+
+    The method first calls VideoWriter::release to close the already opened file.
+    */
     virtual ~VideoWriter();
 
     /** @brief Initializes or reinitializes video writer.
 
     The method opens video writer. Parameters are the same as in the constructor
     VideoWriter::VideoWriter.
+    @return `true` if video writer has been successfully initialized
+
+    The method first calls VideoWriter::release to close the already opened file.
      */
     CV_WRAP virtual bool open(const String& filename, int fourcc, double fps,
                       Size frameSize, bool isColor = true);
 
+    /** @overload
+     */
+    CV_WRAP bool open(const String& filename, int apiPreference, int fourcc, double fps,
+                      Size frameSize, bool isColor = true);
+
     /** @brief Returns true if video writer has been successfully initialized.
     */
     CV_WRAP virtual bool isOpened() const;
 
     /** @brief Closes the video writer.
 
-    The methods are automatically called by subsequent VideoWriter::open and by the VideoWriter
+    The method is automatically called by subsequent VideoWriter::open and by the VideoWriter
     destructor.
      */
     CV_WRAP virtual void release();
+
+    /** @brief Stream operator to write the next video frame.
+    @sa write
+    */
     virtual VideoWriter& operator << (const Mat& image);
 
+    /** @overload
+    @sa write
+    */
+    virtual VideoWriter& operator << (const UMat& image);
+
     /** @brief Writes the next video frame
 
-    @param image The written frame
+    @param image The written frame. In general, color images are expected in BGR format.
 
-    The functions/methods write the specified image to video file. It must have the same size as has
+    The function/method writes the specified image to video file. It must have the same size as has
     been specified when opening the video writer.
      */
-    CV_WRAP virtual void write(const Mat& image);
+    CV_WRAP virtual void write(InputArray image);
 
     /** @brief Sets a property in the VideoWriter.
 
-     @param propId Property identifier. It can be one of the following:
-     -   **VIDEOWRITER_PROP_QUALITY** Quality (0..100%) of the videostream encoded. Can be adjusted dynamically in some codecs.
-     -   **VIDEOWRITER_PROP_NSTRIPES** Number of stripes for parallel encoding
+     @param propId Property identifier from cv::VideoWriterProperties (eg. cv::VIDEOWRITER_PROP_QUALITY)
+     or one of @ref videoio_flags_others
+
      @param value Value of the property.
+     @return  `true` if the property is supported by the backend used by the VideoWriter instance.
      */
     CV_WRAP virtual bool set(int propId, double value);
 
     /** @brief Returns the specified VideoWriter property
 
-     @param propId Property identifier. It can be one of the following:
-     -   **VIDEOWRITER_PROP_QUALITY** Current quality of the encoded videostream.
-     -   **VIDEOWRITER_PROP_FRAMEBYTES** (Read-only) Size of just encoded video frame; note that the encoding order may be different from representation order.
-     -   **VIDEOWRITER_PROP_NSTRIPES** Number of stripes for parallel encoding
+     @param propId Property identifier from cv::VideoWriterProperties (eg. cv::VIDEOWRITER_PROP_QUALITY)
+     or one of @ref videoio_flags_others
 
-     @note When querying a property that is not supported by the backend used by the VideoWriter
-     class, value 0 is returned.
+     @return Value for the specified property. Value 0 is returned when querying a property that is
+     not supported by the backend used by the VideoWriter instance.
      */
     CV_WRAP virtual double get(int propId) const;
 
     /** @brief Concatenates 4 chars to a fourcc code
 
+    @return a fourcc code
+
     This static method constructs the fourcc code of the codec to be used in the constructor
     VideoWriter::VideoWriter or VideoWriter::open.
      */
     CV_WRAP static int fourcc(char c1, char c2, char c3, char c4);
 
+    /** @brief Returns used backend API name
+
+     @note Stream should be opened.
+     */
+    CV_WRAP String getBackendName() const;
+
 protected:
     Ptr<CvVideoWriter> writer;
     Ptr<IVideoWriter> iwriter;
@@ -800,11 +991,11 @@ class CV_EXPORTS_W VideoWriter
                                     Size frameSize, bool isColor = true);
 };
 
-template<> CV_EXPORTS void DefaultDeleter<CvCapture>::operator ()(CvCapture* obj) const;
-template<> CV_EXPORTS void DefaultDeleter<CvVideoWriter>::operator ()(CvVideoWriter* obj) const;
+template<> struct DefaultDeleter<CvCapture>{ CV_EXPORTS void operator ()(CvCapture* obj) const; };
+template<> struct DefaultDeleter<CvVideoWriter>{ CV_EXPORTS void operator ()(CvVideoWriter* obj) const; };
 
 //! @} videoio
 
 } // cv
 
-#endif //__OPENCV_VIDEOIO_HPP__
+#endif //OPENCV_VIDEOIO_HPP
diff --git a/IPL/include/opencv/opencv2/videoio/cap_ios.h b/IPL/include/opencv/opencv2/videoio/cap_ios.h
index 1a9875b..207ad46 100644
--- a/IPL/include/opencv/opencv2/videoio/cap_ios.h
+++ b/IPL/include/opencv/opencv2/videoio/cap_ios.h
@@ -39,33 +39,15 @@
 
 @class CvAbstractCamera;
 
-@interface CvAbstractCamera : NSObject
+CV_EXPORTS @interface CvAbstractCamera : NSObject
 {
-    AVCaptureSession* captureSession;
-    AVCaptureConnection* videoCaptureConnection;
-    AVCaptureVideoPreviewLayer *captureVideoPreviewLayer;
-
     UIDeviceOrientation currentDeviceOrientation;
 
     BOOL cameraAvailable;
-    BOOL captureSessionLoaded;
-    BOOL running;
-    BOOL useAVCaptureVideoPreviewLayer;
-
-    AVCaptureDevicePosition defaultAVCaptureDevicePosition;
-    AVCaptureVideoOrientation defaultAVCaptureVideoOrientation;
-    NSString *const defaultAVCaptureSessionPreset;
-
-    int defaultFPS;
-
-    UIView* parentView;
-
-    int imageWidth;
-    int imageHeight;
 }
 
-@property (nonatomic, retain) AVCaptureSession* captureSession;
-@property (nonatomic, retain) AVCaptureConnection* videoCaptureConnection;
+@property (nonatomic, strong) AVCaptureSession* captureSession;
+@property (nonatomic, strong) AVCaptureConnection* videoCaptureConnection;
 
 @property (nonatomic, readonly) BOOL running;
 @property (nonatomic, readonly) BOOL captureSessionLoaded;
@@ -80,24 +62,24 @@
 @property (nonatomic, assign) int imageWidth;
 @property (nonatomic, assign) int imageHeight;
 
-@property (nonatomic, retain) UIView* parentView;
+@property (nonatomic, strong) UIView* parentView;
 
-- (void)start;
-- (void)stop;
-- (void)switchCameras;
+- CV_UNUSED(start);
+- CV_UNUSED(stop);
+- CV_UNUSED(switchCameras);
 
 - (id)initWithParentView:(UIView*)parent;
 
-- (void)createCaptureOutput;
-- (void)createVideoPreviewLayer;
-- (void)updateOrientation;
+- CV_UNUSED(createCaptureOutput);
+- CV_UNUSED(createVideoPreviewLayer);
+- CV_UNUSED(updateOrientation);
 
-- (void)lockFocus;
-- (void)unlockFocus;
-- (void)lockExposure;
-- (void)unlockExposure;
-- (void)lockBalance;
-- (void)unlockBalance;
+- CV_UNUSED(lockFocus);
+- CV_UNUSED(unlockFocus);
+- CV_UNUSED(lockExposure);
+- CV_UNUSED(unlockExposure);
+- CV_UNUSED(lockBalance);
+- CV_UNUSED(unlockBalance);
 
 @end
 
@@ -105,7 +87,7 @@
 
 @class CvVideoCamera;
 
-@protocol CvVideoCameraDelegate <NSObject>
+CV_EXPORTS @protocol CvVideoCameraDelegate <NSObject>
 
 #ifdef __cplusplus
 // delegate method for processing image frames
@@ -114,37 +96,29 @@
 
 @end
 
-@interface CvVideoCamera : CvAbstractCamera<AVCaptureVideoDataOutputSampleBufferDelegate>
+CV_EXPORTS @interface CvVideoCamera : CvAbstractCamera<AVCaptureVideoDataOutputSampleBufferDelegate>
 {
     AVCaptureVideoDataOutput *videoDataOutput;
 
     dispatch_queue_t videoDataOutputQueue;
     CALayer *customPreviewLayer;
 
-    BOOL grayscaleMode;
-
-    BOOL recordVideo;
-    BOOL rotateVideo;
-    AVAssetWriterInput* recordAssetWriterInput;
-    AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
-    AVAssetWriter* recordAssetWriter;
-
     CMTime lastSampleTime;
 
 }
 
-@property (nonatomic, assign) id<CvVideoCameraDelegate> delegate;
+@property (nonatomic, weak) id<CvVideoCameraDelegate> delegate;
 @property (nonatomic, assign) BOOL grayscaleMode;
 
 @property (nonatomic, assign) BOOL recordVideo;
 @property (nonatomic, assign) BOOL rotateVideo;
-@property (nonatomic, retain) AVAssetWriterInput* recordAssetWriterInput;
-@property (nonatomic, retain) AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
-@property (nonatomic, retain) AVAssetWriter* recordAssetWriter;
+@property (nonatomic, strong) AVAssetWriterInput* recordAssetWriterInput;
+@property (nonatomic, strong) AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
+@property (nonatomic, strong) AVAssetWriter* recordAssetWriter;
 
 - (void)adjustLayoutToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation;
-- (void)layoutPreviewLayer;
-- (void)saveVideo;
+- CV_UNUSED(layoutPreviewLayer);
+- CV_UNUSED(saveVideo);
 - (NSURL *)videoFileURL;
 - (NSString *)videoFileString;
 
@@ -155,21 +129,21 @@
 
 @class CvPhotoCamera;
 
-@protocol CvPhotoCameraDelegate <NSObject>
+CV_EXPORTS @protocol CvPhotoCameraDelegate <NSObject>
 
 - (void)photoCamera:(CvPhotoCamera*)photoCamera capturedImage:(UIImage *)image;
 - (void)photoCameraCancel:(CvPhotoCamera*)photoCamera;
 
 @end
 
-@interface CvPhotoCamera : CvAbstractCamera
+CV_EXPORTS @interface CvPhotoCamera : CvAbstractCamera
 {
     AVCaptureStillImageOutput *stillImageOutput;
 }
 
-@property (nonatomic, assign) id<CvPhotoCameraDelegate> delegate;
+@property (nonatomic, weak) id<CvPhotoCameraDelegate> delegate;
 
-- (void)takePicture;
+- CV_UNUSED(takePicture);
 
 @end
 
diff --git a/IPL/include/opencv/opencv2/videoio/legacy/constants_c.h b/IPL/include/opencv/opencv2/videoio/legacy/constants_c.h
new file mode 100644
index 0000000..d484353
--- /dev/null
+++ b/IPL/include/opencv/opencv2/videoio/legacy/constants_c.h
@@ -0,0 +1,434 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_VIDEOIO_LEGACY_CONSTANTS_H
+#define OPENCV_VIDEOIO_LEGACY_CONSTANTS_H
+
+enum
+{
+    CV_CAP_ANY      =0,     // autodetect
+
+    CV_CAP_MIL      =100,   // MIL proprietary drivers
+
+    CV_CAP_VFW      =200,   // platform native
+    CV_CAP_V4L      =200,
+    CV_CAP_V4L2     =200,
+
+    CV_CAP_FIREWARE =300,   // IEEE 1394 drivers
+    CV_CAP_FIREWIRE =300,
+    CV_CAP_IEEE1394 =300,
+    CV_CAP_DC1394   =300,
+    CV_CAP_CMU1394  =300,
+
+    CV_CAP_STEREO   =400,   // TYZX proprietary drivers
+    CV_CAP_TYZX     =400,
+    CV_TYZX_LEFT    =400,
+    CV_TYZX_RIGHT   =401,
+    CV_TYZX_COLOR   =402,
+    CV_TYZX_Z       =403,
+
+    CV_CAP_QT       =500,   // QuickTime
+
+    CV_CAP_UNICAP   =600,   // Unicap drivers
+
+    CV_CAP_DSHOW    =700,   // DirectShow (via videoInput)
+    CV_CAP_MSMF     =1400,  // Microsoft Media Foundation (via videoInput)
+
+    CV_CAP_PVAPI    =800,   // PvAPI, Prosilica GigE SDK
+
+    CV_CAP_OPENNI   =900,   // OpenNI (for Kinect)
+    CV_CAP_OPENNI_ASUS =910,   // OpenNI (for Asus Xtion)
+
+    CV_CAP_ANDROID  =1000,  // Android - not used
+    CV_CAP_ANDROID_BACK =CV_CAP_ANDROID+99, // Android back camera - not used
+    CV_CAP_ANDROID_FRONT =CV_CAP_ANDROID+98, // Android front camera - not used
+
+    CV_CAP_XIAPI    =1100,   // XIMEA Camera API
+
+    CV_CAP_AVFOUNDATION = 1200,  // AVFoundation framework for iOS (OS X Lion will have the same API)
+
+    CV_CAP_GIGANETIX = 1300,  // Smartek Giganetix GigEVisionSDK
+
+    CV_CAP_INTELPERC = 1500, // Intel Perceptual Computing
+
+    CV_CAP_OPENNI2 = 1600,   // OpenNI2 (for Kinect)
+    CV_CAP_GPHOTO2 = 1700,
+    CV_CAP_GSTREAMER = 1800, // GStreamer
+    CV_CAP_FFMPEG = 1900,    // FFMPEG
+    CV_CAP_IMAGES = 2000,    // OpenCV Image Sequence (e.g. img_%02d.jpg)
+
+    CV_CAP_ARAVIS = 2100     // Aravis GigE SDK
+};
+
+enum
+{
+    // modes of the controlling registers (can be: auto, manual, auto single push, absolute Latter allowed with any other mode)
+    // every feature can have only one mode turned on at a time
+    CV_CAP_PROP_DC1394_OFF         = -4,  //turn the feature off (not controlled manually nor automatically)
+    CV_CAP_PROP_DC1394_MODE_MANUAL = -3, //set automatically when a value of the feature is set by the user
+    CV_CAP_PROP_DC1394_MODE_AUTO = -2,
+    CV_CAP_PROP_DC1394_MODE_ONE_PUSH_AUTO = -1,
+    CV_CAP_PROP_POS_MSEC       =0,
+    CV_CAP_PROP_POS_FRAMES     =1,
+    CV_CAP_PROP_POS_AVI_RATIO  =2,
+    CV_CAP_PROP_FRAME_WIDTH    =3,
+    CV_CAP_PROP_FRAME_HEIGHT   =4,
+    CV_CAP_PROP_FPS            =5,
+    CV_CAP_PROP_FOURCC         =6,
+    CV_CAP_PROP_FRAME_COUNT    =7,
+    CV_CAP_PROP_FORMAT         =8,
+    CV_CAP_PROP_MODE           =9,
+    CV_CAP_PROP_BRIGHTNESS    =10,
+    CV_CAP_PROP_CONTRAST      =11,
+    CV_CAP_PROP_SATURATION    =12,
+    CV_CAP_PROP_HUE           =13,
+    CV_CAP_PROP_GAIN          =14,
+    CV_CAP_PROP_EXPOSURE      =15,
+    CV_CAP_PROP_CONVERT_RGB   =16,
+    CV_CAP_PROP_WHITE_BALANCE_BLUE_U =17,
+    CV_CAP_PROP_RECTIFICATION =18,
+    CV_CAP_PROP_MONOCHROME    =19,
+    CV_CAP_PROP_SHARPNESS     =20,
+    CV_CAP_PROP_AUTO_EXPOSURE =21, // exposure control done by camera,
+                                   // user can adjust reference level
+                                   // using this feature
+    CV_CAP_PROP_GAMMA         =22,
+    CV_CAP_PROP_TEMPERATURE   =23,
+    CV_CAP_PROP_TRIGGER       =24,
+    CV_CAP_PROP_TRIGGER_DELAY =25,
+    CV_CAP_PROP_WHITE_BALANCE_RED_V =26,
+    CV_CAP_PROP_ZOOM          =27,
+    CV_CAP_PROP_FOCUS         =28,
+    CV_CAP_PROP_GUID          =29,
+    CV_CAP_PROP_ISO_SPEED     =30,
+    CV_CAP_PROP_MAX_DC1394    =31,
+    CV_CAP_PROP_BACKLIGHT     =32,
+    CV_CAP_PROP_PAN           =33,
+    CV_CAP_PROP_TILT          =34,
+    CV_CAP_PROP_ROLL          =35,
+    CV_CAP_PROP_IRIS          =36,
+    CV_CAP_PROP_SETTINGS      =37,
+    CV_CAP_PROP_BUFFERSIZE    =38,
+    CV_CAP_PROP_AUTOFOCUS     =39,
+    CV_CAP_PROP_SAR_NUM       =40,
+    CV_CAP_PROP_SAR_DEN       =41,
+
+    CV_CAP_PROP_AUTOGRAB      =1024, // property for videoio class CvCapture_Android only
+    CV_CAP_PROP_SUPPORTED_PREVIEW_SIZES_STRING=1025, // readonly, tricky property, returns cpnst char* indeed
+    CV_CAP_PROP_PREVIEW_FORMAT=1026, // readonly, tricky property, returns cpnst char* indeed
+
+    // OpenNI map generators
+    CV_CAP_OPENNI_DEPTH_GENERATOR = 1 << 31,
+    CV_CAP_OPENNI_IMAGE_GENERATOR = 1 << 30,
+    CV_CAP_OPENNI_IR_GENERATOR    = 1 << 29,
+    CV_CAP_OPENNI_GENERATORS_MASK = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_OPENNI_IMAGE_GENERATOR + CV_CAP_OPENNI_IR_GENERATOR,
+
+    // Properties of cameras available through OpenNI interfaces
+    CV_CAP_PROP_OPENNI_OUTPUT_MODE     = 100,
+    CV_CAP_PROP_OPENNI_FRAME_MAX_DEPTH = 101, // in mm
+    CV_CAP_PROP_OPENNI_BASELINE        = 102, // in mm
+    CV_CAP_PROP_OPENNI_FOCAL_LENGTH    = 103, // in pixels
+    CV_CAP_PROP_OPENNI_REGISTRATION    = 104, // flag
+    CV_CAP_PROP_OPENNI_REGISTRATION_ON = CV_CAP_PROP_OPENNI_REGISTRATION, // flag that synchronizes the remapping depth map to image map
+                                                                          // by changing depth generator's view point (if the flag is "on") or
+                                                                          // sets this view point to its normal one (if the flag is "off").
+    CV_CAP_PROP_OPENNI_APPROX_FRAME_SYNC = 105,
+    CV_CAP_PROP_OPENNI_MAX_BUFFER_SIZE   = 106,
+    CV_CAP_PROP_OPENNI_CIRCLE_BUFFER     = 107,
+    CV_CAP_PROP_OPENNI_MAX_TIME_DURATION = 108,
+
+    CV_CAP_PROP_OPENNI_GENERATOR_PRESENT = 109,
+    CV_CAP_PROP_OPENNI2_SYNC = 110,
+    CV_CAP_PROP_OPENNI2_MIRROR = 111,
+
+    CV_CAP_OPENNI_IMAGE_GENERATOR_PRESENT         = CV_CAP_OPENNI_IMAGE_GENERATOR + CV_CAP_PROP_OPENNI_GENERATOR_PRESENT,
+    CV_CAP_OPENNI_IMAGE_GENERATOR_OUTPUT_MODE     = CV_CAP_OPENNI_IMAGE_GENERATOR + CV_CAP_PROP_OPENNI_OUTPUT_MODE,
+    CV_CAP_OPENNI_DEPTH_GENERATOR_PRESENT         = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_PROP_OPENNI_GENERATOR_PRESENT,
+    CV_CAP_OPENNI_DEPTH_GENERATOR_BASELINE        = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_PROP_OPENNI_BASELINE,
+    CV_CAP_OPENNI_DEPTH_GENERATOR_FOCAL_LENGTH    = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_PROP_OPENNI_FOCAL_LENGTH,
+    CV_CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION    = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_PROP_OPENNI_REGISTRATION,
+    CV_CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION_ON = CV_CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION,
+    CV_CAP_OPENNI_IR_GENERATOR_PRESENT            = CV_CAP_OPENNI_IR_GENERATOR + CV_CAP_PROP_OPENNI_GENERATOR_PRESENT,
+
+    // Properties of cameras available through GStreamer interface
+    CV_CAP_GSTREAMER_QUEUE_LENGTH           = 200, // default is 1
+
+    // PVAPI
+    CV_CAP_PROP_PVAPI_MULTICASTIP           = 300, // ip for anable multicast master mode. 0 for disable multicast
+    CV_CAP_PROP_PVAPI_FRAMESTARTTRIGGERMODE = 301, // FrameStartTriggerMode: Determines how a frame is initiated
+    CV_CAP_PROP_PVAPI_DECIMATIONHORIZONTAL  = 302, // Horizontal sub-sampling of the image
+    CV_CAP_PROP_PVAPI_DECIMATIONVERTICAL    = 303, // Vertical sub-sampling of the image
+    CV_CAP_PROP_PVAPI_BINNINGX              = 304, // Horizontal binning factor
+    CV_CAP_PROP_PVAPI_BINNINGY              = 305, // Vertical binning factor
+    CV_CAP_PROP_PVAPI_PIXELFORMAT           = 306, // Pixel format
+
+    // Properties of cameras available through XIMEA SDK interface
+    CV_CAP_PROP_XI_DOWNSAMPLING                                 = 400, // Change image resolution by binning or skipping.
+    CV_CAP_PROP_XI_DATA_FORMAT                                  = 401, // Output data format.
+    CV_CAP_PROP_XI_OFFSET_X                                     = 402, // Horizontal offset from the origin to the area of interest (in pixels).
+    CV_CAP_PROP_XI_OFFSET_Y                                     = 403, // Vertical offset from the origin to the area of interest (in pixels).
+    CV_CAP_PROP_XI_TRG_SOURCE                                   = 404, // Defines source of trigger.
+    CV_CAP_PROP_XI_TRG_SOFTWARE                                 = 405, // Generates an internal trigger. PRM_TRG_SOURCE must be set to TRG_SOFTWARE.
+    CV_CAP_PROP_XI_GPI_SELECTOR                                 = 406, // Selects general purpose input
+    CV_CAP_PROP_XI_GPI_MODE                                     = 407, // Set general purpose input mode
+    CV_CAP_PROP_XI_GPI_LEVEL                                    = 408, // Get general purpose level
+    CV_CAP_PROP_XI_GPO_SELECTOR                                 = 409, // Selects general purpose output
+    CV_CAP_PROP_XI_GPO_MODE                                     = 410, // Set general purpose output mode
+    CV_CAP_PROP_XI_LED_SELECTOR                                 = 411, // Selects camera signalling LED
+    CV_CAP_PROP_XI_LED_MODE                                     = 412, // Define camera signalling LED functionality
+    CV_CAP_PROP_XI_MANUAL_WB                                    = 413, // Calculates White Balance(must be called during acquisition)
+    CV_CAP_PROP_XI_AUTO_WB                                      = 414, // Automatic white balance
+    CV_CAP_PROP_XI_AEAG                                         = 415, // Automatic exposure/gain
+    CV_CAP_PROP_XI_EXP_PRIORITY                                 = 416, // Exposure priority (0.5 - exposure 50%, gain 50%).
+    CV_CAP_PROP_XI_AE_MAX_LIMIT                                 = 417, // Maximum limit of exposure in AEAG procedure
+    CV_CAP_PROP_XI_AG_MAX_LIMIT                                 = 418,  // Maximum limit of gain in AEAG procedure
+    CV_CAP_PROP_XI_AEAG_LEVEL                                   = 419, // Average intensity of output signal AEAG should achieve(in %)
+    CV_CAP_PROP_XI_TIMEOUT                                      = 420, // Image capture timeout in milliseconds
+    CV_CAP_PROP_XI_EXPOSURE                                     = 421, // Exposure time in microseconds
+    CV_CAP_PROP_XI_EXPOSURE_BURST_COUNT                         = 422, // Sets the number of times of exposure in one frame.
+    CV_CAP_PROP_XI_GAIN_SELECTOR                                = 423, // Gain selector for parameter Gain allows to select different type of gains.
+    CV_CAP_PROP_XI_GAIN                                         = 424, // Gain in dB
+    CV_CAP_PROP_XI_DOWNSAMPLING_TYPE                            = 426, // Change image downsampling type.
+    CV_CAP_PROP_XI_BINNING_SELECTOR                             = 427, // Binning engine selector.
+    CV_CAP_PROP_XI_BINNING_VERTICAL                             = 428, // Vertical Binning - number of vertical photo-sensitive cells to combine together.
+    CV_CAP_PROP_XI_BINNING_HORIZONTAL                           = 429, // Horizontal Binning - number of horizontal photo-sensitive cells to combine together.
+    CV_CAP_PROP_XI_BINNING_PATTERN                              = 430, // Binning pattern type.
+    CV_CAP_PROP_XI_DECIMATION_SELECTOR                          = 431, // Decimation engine selector.
+    CV_CAP_PROP_XI_DECIMATION_VERTICAL                          = 432, // Vertical Decimation - vertical sub-sampling of the image - reduces the vertical resolution of the image by the specified vertical decimation factor.
+    CV_CAP_PROP_XI_DECIMATION_HORIZONTAL                        = 433, // Horizontal Decimation - horizontal sub-sampling of the image - reduces the horizontal resolution of the image by the specified vertical decimation factor.
+    CV_CAP_PROP_XI_DECIMATION_PATTERN                           = 434, // Decimation pattern type.
+    CV_CAP_PROP_XI_TEST_PATTERN_GENERATOR_SELECTOR              = 587, // Selects which test pattern generator is controlled by the TestPattern feature.
+    CV_CAP_PROP_XI_TEST_PATTERN                                 = 588, // Selects which test pattern type is generated by the selected generator.
+    CV_CAP_PROP_XI_IMAGE_DATA_FORMAT                            = 435, // Output data format.
+    CV_CAP_PROP_XI_SHUTTER_TYPE                                 = 436, // Change sensor shutter type(CMOS sensor).
+    CV_CAP_PROP_XI_SENSOR_TAPS                                  = 437, // Number of taps
+    CV_CAP_PROP_XI_AEAG_ROI_OFFSET_X                            = 439, // Automatic exposure/gain ROI offset X
+    CV_CAP_PROP_XI_AEAG_ROI_OFFSET_Y                            = 440, // Automatic exposure/gain ROI offset Y
+    CV_CAP_PROP_XI_AEAG_ROI_WIDTH                               = 441, // Automatic exposure/gain ROI Width
+    CV_CAP_PROP_XI_AEAG_ROI_HEIGHT                              = 442, // Automatic exposure/gain ROI Height
+    CV_CAP_PROP_XI_BPC                                          = 445, // Correction of bad pixels
+    CV_CAP_PROP_XI_WB_KR                                        = 448, // White balance red coefficient
+    CV_CAP_PROP_XI_WB_KG                                        = 449, // White balance green coefficient
+    CV_CAP_PROP_XI_WB_KB                                        = 450, // White balance blue coefficient
+    CV_CAP_PROP_XI_WIDTH                                        = 451, // Width of the Image provided by the device (in pixels).
+    CV_CAP_PROP_XI_HEIGHT                                       = 452, // Height of the Image provided by the device (in pixels).
+    CV_CAP_PROP_XI_REGION_SELECTOR                              = 589, // Selects Region in Multiple ROI which parameters are set by width, height, ... ,region mode
+    CV_CAP_PROP_XI_REGION_MODE                                  = 595, // Activates/deactivates Region selected by Region Selector
+    CV_CAP_PROP_XI_LIMIT_BANDWIDTH                              = 459, // Set/get bandwidth(datarate)(in Megabits)
+    CV_CAP_PROP_XI_SENSOR_DATA_BIT_DEPTH                        = 460, // Sensor output data bit depth.
+    CV_CAP_PROP_XI_OUTPUT_DATA_BIT_DEPTH                        = 461, // Device output data bit depth.
+    CV_CAP_PROP_XI_IMAGE_DATA_BIT_DEPTH                         = 462, // bitdepth of data returned by function xiGetImage
+    CV_CAP_PROP_XI_OUTPUT_DATA_PACKING                          = 463, // Device output data packing (or grouping) enabled. Packing could be enabled if output_data_bit_depth > 8 and packing capability is available.
+    CV_CAP_PROP_XI_OUTPUT_DATA_PACKING_TYPE                     = 464, // Data packing type. Some cameras supports only specific packing type.
+    CV_CAP_PROP_XI_IS_COOLED                                    = 465, // Returns 1 for cameras that support cooling.
+    CV_CAP_PROP_XI_COOLING                                      = 466, // Start camera cooling.
+    CV_CAP_PROP_XI_TARGET_TEMP                                  = 467, // Set sensor target temperature for cooling.
+    CV_CAP_PROP_XI_CHIP_TEMP                                    = 468, // Camera sensor temperature
+    CV_CAP_PROP_XI_HOUS_TEMP                                    = 469, // Camera housing temperature
+    CV_CAP_PROP_XI_HOUS_BACK_SIDE_TEMP                          = 590, // Camera housing back side temperature
+    CV_CAP_PROP_XI_SENSOR_BOARD_TEMP                            = 596, // Camera sensor board temperature
+    CV_CAP_PROP_XI_CMS                                          = 470, // Mode of color management system.
+    CV_CAP_PROP_XI_APPLY_CMS                                    = 471, // Enable applying of CMS profiles to xiGetImage (see XI_PRM_INPUT_CMS_PROFILE, XI_PRM_OUTPUT_CMS_PROFILE).
+    CV_CAP_PROP_XI_IMAGE_IS_COLOR                               = 474, // Returns 1 for color cameras.
+    CV_CAP_PROP_XI_COLOR_FILTER_ARRAY                           = 475, // Returns color filter array type of RAW data.
+    CV_CAP_PROP_XI_GAMMAY                                       = 476, // Luminosity gamma
+    CV_CAP_PROP_XI_GAMMAC                                       = 477, // Chromaticity gamma
+    CV_CAP_PROP_XI_SHARPNESS                                    = 478, // Sharpness Strength
+    CV_CAP_PROP_XI_CC_MATRIX_00                                 = 479, // Color Correction Matrix element [0][0]
+    CV_CAP_PROP_XI_CC_MATRIX_01                                 = 480, // Color Correction Matrix element [0][1]
+    CV_CAP_PROP_XI_CC_MATRIX_02                                 = 481, // Color Correction Matrix element [0][2]
+    CV_CAP_PROP_XI_CC_MATRIX_03                                 = 482, // Color Correction Matrix element [0][3]
+    CV_CAP_PROP_XI_CC_MATRIX_10                                 = 483, // Color Correction Matrix element [1][0]
+    CV_CAP_PROP_XI_CC_MATRIX_11                                 = 484, // Color Correction Matrix element [1][1]
+    CV_CAP_PROP_XI_CC_MATRIX_12                                 = 485, // Color Correction Matrix element [1][2]
+    CV_CAP_PROP_XI_CC_MATRIX_13                                 = 486, // Color Correction Matrix element [1][3]
+    CV_CAP_PROP_XI_CC_MATRIX_20                                 = 487, // Color Correction Matrix element [2][0]
+    CV_CAP_PROP_XI_CC_MATRIX_21                                 = 488, // Color Correction Matrix element [2][1]
+    CV_CAP_PROP_XI_CC_MATRIX_22                                 = 489, // Color Correction Matrix element [2][2]
+    CV_CAP_PROP_XI_CC_MATRIX_23                                 = 490, // Color Correction Matrix element [2][3]
+    CV_CAP_PROP_XI_CC_MATRIX_30                                 = 491, // Color Correction Matrix element [3][0]
+    CV_CAP_PROP_XI_CC_MATRIX_31                                 = 492, // Color Correction Matrix element [3][1]
+    CV_CAP_PROP_XI_CC_MATRIX_32                                 = 493, // Color Correction Matrix element [3][2]
+    CV_CAP_PROP_XI_CC_MATRIX_33                                 = 494, // Color Correction Matrix element [3][3]
+    CV_CAP_PROP_XI_DEFAULT_CC_MATRIX                            = 495, // Set default Color Correction Matrix
+    CV_CAP_PROP_XI_TRG_SELECTOR                                 = 498, // Selects the type of trigger.
+    CV_CAP_PROP_XI_ACQ_FRAME_BURST_COUNT                        = 499, // Sets number of frames acquired by burst. This burst is used only if trigger is set to FrameBurstStart
+    CV_CAP_PROP_XI_DEBOUNCE_EN                                  = 507, // Enable/Disable debounce to selected GPI
+    CV_CAP_PROP_XI_DEBOUNCE_T0                                  = 508, // Debounce time (x * 10us)
+    CV_CAP_PROP_XI_DEBOUNCE_T1                                  = 509, // Debounce time (x * 10us)
+    CV_CAP_PROP_XI_DEBOUNCE_POL                                 = 510, // Debounce polarity (pol = 1 t0 - falling edge, t1 - rising edge)
+    CV_CAP_PROP_XI_LENS_MODE                                    = 511, // Status of lens control interface. This shall be set to XI_ON before any Lens operations.
+    CV_CAP_PROP_XI_LENS_APERTURE_VALUE                          = 512, // Current lens aperture value in stops. Examples: 2.8, 4, 5.6, 8, 11
+    CV_CAP_PROP_XI_LENS_FOCUS_MOVEMENT_VALUE                    = 513, // Lens current focus movement value to be used by XI_PRM_LENS_FOCUS_MOVE in motor steps.
+    CV_CAP_PROP_XI_LENS_FOCUS_MOVE                              = 514, // Moves lens focus motor by steps set in XI_PRM_LENS_FOCUS_MOVEMENT_VALUE.
+    CV_CAP_PROP_XI_LENS_FOCUS_DISTANCE                          = 515, // Lens focus distance in cm.
+    CV_CAP_PROP_XI_LENS_FOCAL_LENGTH                            = 516, // Lens focal distance in mm.
+    CV_CAP_PROP_XI_LENS_FEATURE_SELECTOR                        = 517, // Selects the current feature which is accessible by XI_PRM_LENS_FEATURE.
+    CV_CAP_PROP_XI_LENS_FEATURE                                 = 518, // Allows access to lens feature value currently selected by XI_PRM_LENS_FEATURE_SELECTOR.
+    CV_CAP_PROP_XI_DEVICE_MODEL_ID                              = 521, // Return device model id
+    CV_CAP_PROP_XI_DEVICE_SN                                    = 522, // Return device serial number
+    CV_CAP_PROP_XI_IMAGE_DATA_FORMAT_RGB32_ALPHA                = 529, // The alpha channel of RGB32 output image format.
+    CV_CAP_PROP_XI_IMAGE_PAYLOAD_SIZE                           = 530, // Buffer size in bytes sufficient for output image returned by xiGetImage
+    CV_CAP_PROP_XI_TRANSPORT_PIXEL_FORMAT                       = 531, // Current format of pixels on transport layer.
+    CV_CAP_PROP_XI_SENSOR_CLOCK_FREQ_HZ                         = 532, // Sensor clock frequency in Hz.
+    CV_CAP_PROP_XI_SENSOR_CLOCK_FREQ_INDEX                      = 533, // Sensor clock frequency index. Sensor with selected frequencies have possibility to set the frequency only by this index.
+    CV_CAP_PROP_XI_SENSOR_OUTPUT_CHANNEL_COUNT                  = 534, // Number of output channels from sensor used for data transfer.
+    CV_CAP_PROP_XI_FRAMERATE                                    = 535, // Define framerate in Hz
+    CV_CAP_PROP_XI_COUNTER_SELECTOR                             = 536, // Select counter
+    CV_CAP_PROP_XI_COUNTER_VALUE                                = 537, // Counter status
+    CV_CAP_PROP_XI_ACQ_TIMING_MODE                              = 538, // Type of sensor frames timing.
+    CV_CAP_PROP_XI_AVAILABLE_BANDWIDTH                          = 539, // Calculate and return available interface bandwidth(int Megabits)
+    CV_CAP_PROP_XI_BUFFER_POLICY                                = 540, // Data move policy
+    CV_CAP_PROP_XI_LUT_EN                                       = 541, // Activates LUT.
+    CV_CAP_PROP_XI_LUT_INDEX                                    = 542, // Control the index (offset) of the coefficient to access in the LUT.
+    CV_CAP_PROP_XI_LUT_VALUE                                    = 543, // Value at entry LUTIndex of the LUT
+    CV_CAP_PROP_XI_TRG_DELAY                                    = 544, // Specifies the delay in microseconds (us) to apply after the trigger reception before activating it.
+    CV_CAP_PROP_XI_TS_RST_MODE                                  = 545, // Defines how time stamp reset engine will be armed
+    CV_CAP_PROP_XI_TS_RST_SOURCE                                = 546, // Defines which source will be used for timestamp reset. Writing this parameter will trigger settings of engine (arming)
+    CV_CAP_PROP_XI_IS_DEVICE_EXIST                              = 547, // Returns 1 if camera connected and works properly.
+    CV_CAP_PROP_XI_ACQ_BUFFER_SIZE                              = 548, // Acquisition buffer size in buffer_size_unit. Default bytes.
+    CV_CAP_PROP_XI_ACQ_BUFFER_SIZE_UNIT                         = 549, // Acquisition buffer size unit in bytes. Default 1. E.g. Value 1024 means that buffer_size is in KiBytes
+    CV_CAP_PROP_XI_ACQ_TRANSPORT_BUFFER_SIZE                    = 550, // Acquisition transport buffer size in bytes
+    CV_CAP_PROP_XI_BUFFERS_QUEUE_SIZE                           = 551, // Queue of field/frame buffers
+    CV_CAP_PROP_XI_ACQ_TRANSPORT_BUFFER_COMMIT                  = 552, // Number of buffers to commit to low level
+    CV_CAP_PROP_XI_RECENT_FRAME                                 = 553, // GetImage returns most recent frame
+    CV_CAP_PROP_XI_DEVICE_RESET                                 = 554, // Resets the camera to default state.
+    CV_CAP_PROP_XI_COLUMN_FPN_CORRECTION                        = 555, // Correction of column FPN
+    CV_CAP_PROP_XI_ROW_FPN_CORRECTION                           = 591, // Correction of row FPN
+    CV_CAP_PROP_XI_SENSOR_MODE                                  = 558, // Current sensor mode. Allows to select sensor mode by one integer. Setting of this parameter affects: image dimensions and downsampling.
+    CV_CAP_PROP_XI_HDR                                          = 559, // Enable High Dynamic Range feature.
+    CV_CAP_PROP_XI_HDR_KNEEPOINT_COUNT                          = 560, // The number of kneepoints in the PWLR.
+    CV_CAP_PROP_XI_HDR_T1                                       = 561, // position of first kneepoint(in % of XI_PRM_EXPOSURE)
+    CV_CAP_PROP_XI_HDR_T2                                       = 562, // position of second kneepoint (in % of XI_PRM_EXPOSURE)
+    CV_CAP_PROP_XI_KNEEPOINT1                                   = 563, // value of first kneepoint (% of sensor saturation)
+    CV_CAP_PROP_XI_KNEEPOINT2                                   = 564, // value of second kneepoint (% of sensor saturation)
+    CV_CAP_PROP_XI_IMAGE_BLACK_LEVEL                            = 565, // Last image black level counts. Can be used for Offline processing to recall it.
+    CV_CAP_PROP_XI_HW_REVISION                                  = 571, // Returns hardware revision number.
+    CV_CAP_PROP_XI_DEBUG_LEVEL                                  = 572, // Set debug level
+    CV_CAP_PROP_XI_AUTO_BANDWIDTH_CALCULATION                   = 573, // Automatic bandwidth calculation,
+    CV_CAP_PROP_XI_FFS_FILE_ID                                  = 594, // File number.
+    CV_CAP_PROP_XI_FFS_FILE_SIZE                                = 580, // Size of file.
+    CV_CAP_PROP_XI_FREE_FFS_SIZE                                = 581, // Size of free camera FFS.
+    CV_CAP_PROP_XI_USED_FFS_SIZE                                = 582, // Size of used camera FFS.
+    CV_CAP_PROP_XI_FFS_ACCESS_KEY                               = 583, // Setting of key enables file operations on some cameras.
+    CV_CAP_PROP_XI_SENSOR_FEATURE_SELECTOR                      = 585, // Selects the current feature which is accessible by XI_PRM_SENSOR_FEATURE_VALUE.
+    CV_CAP_PROP_XI_SENSOR_FEATURE_VALUE                         = 586, // Allows access to sensor feature value currently selected by XI_PRM_SENSOR_FEATURE_SELECTOR.
+
+
+    // Properties for Android cameras
+    CV_CAP_PROP_ANDROID_FLASH_MODE = 8001,
+    CV_CAP_PROP_ANDROID_FOCUS_MODE = 8002,
+    CV_CAP_PROP_ANDROID_WHITE_BALANCE = 8003,
+    CV_CAP_PROP_ANDROID_ANTIBANDING = 8004,
+    CV_CAP_PROP_ANDROID_FOCAL_LENGTH = 8005,
+    CV_CAP_PROP_ANDROID_FOCUS_DISTANCE_NEAR = 8006,
+    CV_CAP_PROP_ANDROID_FOCUS_DISTANCE_OPTIMAL = 8007,
+    CV_CAP_PROP_ANDROID_FOCUS_DISTANCE_FAR = 8008,
+    CV_CAP_PROP_ANDROID_EXPOSE_LOCK = 8009,
+    CV_CAP_PROP_ANDROID_WHITEBALANCE_LOCK = 8010,
+
+    // Properties of cameras available through AVFOUNDATION interface
+    CV_CAP_PROP_IOS_DEVICE_FOCUS = 9001,
+    CV_CAP_PROP_IOS_DEVICE_EXPOSURE = 9002,
+    CV_CAP_PROP_IOS_DEVICE_FLASH = 9003,
+    CV_CAP_PROP_IOS_DEVICE_WHITEBALANCE = 9004,
+    CV_CAP_PROP_IOS_DEVICE_TORCH = 9005,
+
+    // Properties of cameras available through Smartek Giganetix Ethernet Vision interface
+    /* --- Vladimir Litvinenko (litvinenko.vladimir@gmail.com) --- */
+    CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
+    CV_CAP_PROP_GIGA_FRAME_OFFSET_Y = 10002,
+    CV_CAP_PROP_GIGA_FRAME_WIDTH_MAX = 10003,
+    CV_CAP_PROP_GIGA_FRAME_HEIGH_MAX = 10004,
+    CV_CAP_PROP_GIGA_FRAME_SENS_WIDTH = 10005,
+    CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006,
+
+    CV_CAP_PROP_INTELPERC_PROFILE_COUNT               = 11001,
+    CV_CAP_PROP_INTELPERC_PROFILE_IDX                 = 11002,
+    CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE  = 11003,
+    CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE      = 11004,
+    CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD  = 11005,
+    CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ     = 11006,
+    CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT     = 11007,
+
+    // Intel PerC streams
+    CV_CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
+    CV_CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
+    CV_CAP_INTELPERC_GENERATORS_MASK = CV_CAP_INTELPERC_DEPTH_GENERATOR + CV_CAP_INTELPERC_IMAGE_GENERATOR
+};
+
+enum
+{
+    // Data given from depth generator.
+    CV_CAP_OPENNI_DEPTH_MAP                 = 0, // Depth values in mm (CV_16UC1)
+    CV_CAP_OPENNI_POINT_CLOUD_MAP           = 1, // XYZ in meters (CV_32FC3)
+    CV_CAP_OPENNI_DISPARITY_MAP             = 2, // Disparity in pixels (CV_8UC1)
+    CV_CAP_OPENNI_DISPARITY_MAP_32F         = 3, // Disparity in pixels (CV_32FC1)
+    CV_CAP_OPENNI_VALID_DEPTH_MASK          = 4, // CV_8UC1
+
+    // Data given from RGB image generator.
+    CV_CAP_OPENNI_BGR_IMAGE                 = 5,
+    CV_CAP_OPENNI_GRAY_IMAGE                = 6,
+
+    // Data given from IR image generator.
+    CV_CAP_OPENNI_IR_IMAGE                  = 7
+};
+
+// Supported output modes of OpenNI image generator
+enum
+{
+    CV_CAP_OPENNI_VGA_30HZ     = 0,
+    CV_CAP_OPENNI_SXGA_15HZ    = 1,
+    CV_CAP_OPENNI_SXGA_30HZ    = 2,
+    CV_CAP_OPENNI_QVGA_30HZ    = 3,
+    CV_CAP_OPENNI_QVGA_60HZ    = 4
+};
+
+enum
+{
+    CV_CAP_INTELPERC_DEPTH_MAP              = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
+    CV_CAP_INTELPERC_UVDEPTH_MAP            = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
+    CV_CAP_INTELPERC_IR_MAP                 = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
+    CV_CAP_INTELPERC_IMAGE                  = 3
+};
+
+// gPhoto2 properties, if propertyId is less than 0 then work on widget with that __additive inversed__ camera setting ID
+// Get IDs by using CAP_PROP_GPHOTO2_WIDGET_ENUMERATE.
+// @see CvCaptureCAM_GPHOTO2 for more info
+enum
+{
+    CV_CAP_PROP_GPHOTO2_PREVIEW           = 17001, // Capture only preview from liveview mode.
+    CV_CAP_PROP_GPHOTO2_WIDGET_ENUMERATE  = 17002, // Readonly, returns (const char *).
+    CV_CAP_PROP_GPHOTO2_RELOAD_CONFIG     = 17003, // Trigger, only by set. Reload camera settings.
+    CV_CAP_PROP_GPHOTO2_RELOAD_ON_CHANGE  = 17004, // Reload all settings on set.
+    CV_CAP_PROP_GPHOTO2_COLLECT_MSGS      = 17005, // Collect messages with details.
+    CV_CAP_PROP_GPHOTO2_FLUSH_MSGS        = 17006, // Readonly, returns (const char *).
+    CV_CAP_PROP_SPEED                     = 17007, // Exposure speed. Can be readonly, depends on camera program.
+    CV_CAP_PROP_APERTURE                  = 17008, // Aperture. Can be readonly, depends on camera program.
+    CV_CAP_PROP_EXPOSUREPROGRAM           = 17009, // Camera exposure program.
+    CV_CAP_PROP_VIEWFINDER                = 17010  // Enter liveview mode.
+};
+
+//! Macro to construct the fourcc code of the codec. Same as CV_FOURCC()
+#define CV_FOURCC_MACRO(c1, c2, c3, c4) (((c1) & 255) + (((c2) & 255) << 8) + (((c3) & 255) << 16) + (((c4) & 255) << 24))
+
+/** @brief Constructs the fourcc code of the codec function
+
+Simply call it with 4 chars fourcc code like `CV_FOURCC('I', 'Y', 'U', 'V')`
+
+List of codes can be obtained at [Video Codecs by FOURCC](http://www.fourcc.org/codecs.php) page.
+FFMPEG backend with MP4 container natively uses other values as fourcc code:
+see [ObjectType](http://www.mp4ra.org/codecs.html).
+*/
+CV_INLINE int CV_FOURCC(char c1, char c2, char c3, char c4)
+{
+    return CV_FOURCC_MACRO(c1, c2, c3, c4);
+}
+
+//! (Windows only) Open Codec Selection Dialog
+#define CV_FOURCC_PROMPT -1
+//! (Linux only) Use default codec for specified filename
+#define CV_FOURCC_DEFAULT CV_FOURCC('I', 'Y', 'U', 'V')
+
+#endif // OPENCV_VIDEOIO_LEGACY_CONSTANTS_H
diff --git a/IPL/include/opencv/opencv2/videoio/registry.hpp b/IPL/include/opencv/opencv2/videoio/registry.hpp
new file mode 100644
index 0000000..89fb5a8
--- /dev/null
+++ b/IPL/include/opencv/opencv2/videoio/registry.hpp
@@ -0,0 +1,47 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_VIDEOIO_REGISTRY_HPP
+#define OPENCV_VIDEOIO_REGISTRY_HPP
+
+#include <opencv2/videoio.hpp>
+
+namespace cv { namespace videoio_registry {
+/** @addtogroup videoio_registry
+This section contains API description how to query/configure available Video I/O backends.
+
+Runtime configuration options:
+- enable debug mode: `OPENCV_VIDEOIO_DEBUG=1`
+- change backend priority: `OPENCV_VIDEOIO_PRIORITY_<backend>=9999`
+- disable backend: `OPENCV_VIDEOIO_PRIORITY_<backend>=0`
+- specify list of backends with high priority (>100000): `OPENCV_VIDEOIO_PRIORITY_LIST=FFMPEG,GSTREAMER`
+
+@{
+ */
+
+
+/** @brief Returns backend API name or "UnknownVideoAPI(xxx)"
+@param api backend ID (#VideoCaptureAPIs)
+*/
+CV_EXPORTS_W cv::String getBackendName(VideoCaptureAPIs api);
+
+/** @brief Returns list of all available backends */
+CV_EXPORTS_W std::vector<VideoCaptureAPIs> getBackends();
+
+/** @brief Returns list of available backends which works via `cv::VideoCapture(int index)` */
+CV_EXPORTS_W std::vector<VideoCaptureAPIs> getCameraBackends();
+
+/** @brief Returns list of available backends which works via `cv::VideoCapture(filename)` */
+CV_EXPORTS_W std::vector<VideoCaptureAPIs> getStreamBackends();
+
+/** @brief Returns list of available backends which works via `cv::VideoWriter()` */
+CV_EXPORTS_W std::vector<VideoCaptureAPIs> getWriterBackends();
+
+/** @brief Returns true if backend is available */
+CV_EXPORTS bool hasBackend(VideoCaptureAPIs api);
+
+//! @}
+}} // namespace
+
+#endif // OPENCV_VIDEOIO_REGISTRY_HPP
diff --git a/IPL/include/opencv/opencv2/videoio/videoio_c.h b/IPL/include/opencv/opencv2/videoio/videoio_c.h
index 91d26ea..cf1a6d0 100644
--- a/IPL/include/opencv/opencv2/videoio/videoio_c.h
+++ b/IPL/include/opencv/opencv2/videoio/videoio_c.h
@@ -39,11 +39,13 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOIO_H__
-#define __OPENCV_VIDEOIO_H__
+#ifndef OPENCV_VIDEOIO_H
+#define OPENCV_VIDEOIO_H
 
 #include "opencv2/core/core_c.h"
 
+#include "opencv2/videoio/legacy/constants_c.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
@@ -57,486 +59,95 @@ extern "C" {
 *                         Working with Video Files and Cameras                           *
 \****************************************************************************************/
 
-/* "black box" capture structure */
+/** @brief "black box" capture structure
+
+In C++ use cv::VideoCapture
+*/
 typedef struct CvCapture CvCapture;
 
-/* start capturing frames from video file */
+/** @brief start capturing frames from video file
+*/
 CVAPI(CvCapture*) cvCreateFileCapture( const char* filename );
 
-/* start capturing frames from video file. allows specifying a preferred API to use */
+/** @brief start capturing frames from video file. allows specifying a preferred API to use
+*/
 CVAPI(CvCapture*) cvCreateFileCaptureWithPreference( const char* filename , int apiPreference);
 
-enum
-{
-    CV_CAP_ANY      =0,     // autodetect
-
-    CV_CAP_MIL      =100,   // MIL proprietary drivers
-
-    CV_CAP_VFW      =200,   // platform native
-    CV_CAP_V4L      =200,
-    CV_CAP_V4L2     =200,
-
-    CV_CAP_FIREWARE =300,   // IEEE 1394 drivers
-    CV_CAP_FIREWIRE =300,
-    CV_CAP_IEEE1394 =300,
-    CV_CAP_DC1394   =300,
-    CV_CAP_CMU1394  =300,
-
-    CV_CAP_STEREO   =400,   // TYZX proprietary drivers
-    CV_CAP_TYZX     =400,
-    CV_TYZX_LEFT    =400,
-    CV_TYZX_RIGHT   =401,
-    CV_TYZX_COLOR   =402,
-    CV_TYZX_Z       =403,
-
-    CV_CAP_QT       =500,   // QuickTime
-
-    CV_CAP_UNICAP   =600,   // Unicap drivers
-
-    CV_CAP_DSHOW    =700,   // DirectShow (via videoInput)
-    CV_CAP_MSMF     =1400,  // Microsoft Media Foundation (via videoInput)
-
-    CV_CAP_PVAPI    =800,   // PvAPI, Prosilica GigE SDK
-
-    CV_CAP_OPENNI   =900,   // OpenNI (for Kinect)
-    CV_CAP_OPENNI_ASUS =910,   // OpenNI (for Asus Xtion)
-
-    CV_CAP_ANDROID  =1000,  // Android - not used
-    CV_CAP_ANDROID_BACK =CV_CAP_ANDROID+99, // Android back camera - not used
-    CV_CAP_ANDROID_FRONT =CV_CAP_ANDROID+98, // Android front camera - not used
-
-    CV_CAP_XIAPI    =1100,   // XIMEA Camera API
-
-    CV_CAP_AVFOUNDATION = 1200,  // AVFoundation framework for iOS (OS X Lion will have the same API)
-
-    CV_CAP_GIGANETIX = 1300,  // Smartek Giganetix GigEVisionSDK
-
-    CV_CAP_INTELPERC = 1500, // Intel Perceptual Computing
-
-    CV_CAP_OPENNI2 = 1600,   // OpenNI2 (for Kinect)
-    CV_CAP_GPHOTO2 = 1700,
-    CV_CAP_GSTREAMER = 1800, // GStreamer
-    CV_CAP_FFMPEG = 1900,    // FFMPEG
-    CV_CAP_IMAGES = 2000     // OpenCV Image Sequence (e.g. img_%02d.jpg)
-};
-
-/* start capturing frames from camera: index = camera_index + domain_offset (CV_CAP_*) */
+/** @brief start capturing frames from camera: index = camera_index + domain_offset (CV_CAP_*)
+*/
 CVAPI(CvCapture*) cvCreateCameraCapture( int index );
 
-/* grab a frame, return 1 on success, 0 on fail.
-  this function is thought to be fast               */
+/** @brief grab a frame, return 1 on success, 0 on fail.
+
+  this function is thought to be fast
+*/
 CVAPI(int) cvGrabFrame( CvCapture* capture );
 
-/* get the frame grabbed with cvGrabFrame(..)
+/** @brief get the frame grabbed with cvGrabFrame(..)
+
   This function may apply some frame processing like
   frame decompression, flipping etc.
-  !!!DO NOT RELEASE or MODIFY the retrieved frame!!! */
+  @warning !!!DO NOT RELEASE or MODIFY the retrieved frame!!!
+*/
 CVAPI(IplImage*) cvRetrieveFrame( CvCapture* capture, int streamIdx CV_DEFAULT(0) );
 
-/* Just a combination of cvGrabFrame and cvRetrieveFrame
-   !!!DO NOT RELEASE or MODIFY the retrieved frame!!!      */
+/** @brief Just a combination of cvGrabFrame and cvRetrieveFrame
+
+  @warning !!!DO NOT RELEASE or MODIFY the retrieved frame!!!
+*/
 CVAPI(IplImage*) cvQueryFrame( CvCapture* capture );
 
-/* stop capturing/reading and free resources */
+/** @brief stop capturing/reading and free resources
+*/
 CVAPI(void) cvReleaseCapture( CvCapture** capture );
 
-enum
-{
-    // modes of the controlling registers (can be: auto, manual, auto single push, absolute Latter allowed with any other mode)
-    // every feature can have only one mode turned on at a time
-    CV_CAP_PROP_DC1394_OFF         = -4,  //turn the feature off (not controlled manually nor automatically)
-    CV_CAP_PROP_DC1394_MODE_MANUAL = -3, //set automatically when a value of the feature is set by the user
-    CV_CAP_PROP_DC1394_MODE_AUTO = -2,
-    CV_CAP_PROP_DC1394_MODE_ONE_PUSH_AUTO = -1,
-    CV_CAP_PROP_POS_MSEC       =0,
-    CV_CAP_PROP_POS_FRAMES     =1,
-    CV_CAP_PROP_POS_AVI_RATIO  =2,
-    CV_CAP_PROP_FRAME_WIDTH    =3,
-    CV_CAP_PROP_FRAME_HEIGHT   =4,
-    CV_CAP_PROP_FPS            =5,
-    CV_CAP_PROP_FOURCC         =6,
-    CV_CAP_PROP_FRAME_COUNT    =7,
-    CV_CAP_PROP_FORMAT         =8,
-    CV_CAP_PROP_MODE           =9,
-    CV_CAP_PROP_BRIGHTNESS    =10,
-    CV_CAP_PROP_CONTRAST      =11,
-    CV_CAP_PROP_SATURATION    =12,
-    CV_CAP_PROP_HUE           =13,
-    CV_CAP_PROP_GAIN          =14,
-    CV_CAP_PROP_EXPOSURE      =15,
-    CV_CAP_PROP_CONVERT_RGB   =16,
-    CV_CAP_PROP_WHITE_BALANCE_BLUE_U =17,
-    CV_CAP_PROP_RECTIFICATION =18,
-    CV_CAP_PROP_MONOCHROME    =19,
-    CV_CAP_PROP_SHARPNESS     =20,
-    CV_CAP_PROP_AUTO_EXPOSURE =21, // exposure control done by camera,
-                                   // user can adjust refernce level
-                                   // using this feature
-    CV_CAP_PROP_GAMMA         =22,
-    CV_CAP_PROP_TEMPERATURE   =23,
-    CV_CAP_PROP_TRIGGER       =24,
-    CV_CAP_PROP_TRIGGER_DELAY =25,
-    CV_CAP_PROP_WHITE_BALANCE_RED_V =26,
-    CV_CAP_PROP_ZOOM          =27,
-    CV_CAP_PROP_FOCUS         =28,
-    CV_CAP_PROP_GUID          =29,
-    CV_CAP_PROP_ISO_SPEED     =30,
-    CV_CAP_PROP_MAX_DC1394    =31,
-    CV_CAP_PROP_BACKLIGHT     =32,
-    CV_CAP_PROP_PAN           =33,
-    CV_CAP_PROP_TILT          =34,
-    CV_CAP_PROP_ROLL          =35,
-    CV_CAP_PROP_IRIS          =36,
-    CV_CAP_PROP_SETTINGS      =37,
-    CV_CAP_PROP_BUFFERSIZE    =38,
-    CV_CAP_PROP_AUTOFOCUS     =39,
-    CV_CAP_PROP_SAR_NUM       =40,
-    CV_CAP_PROP_SAR_DEN       =41,
-
-    CV_CAP_PROP_AUTOGRAB      =1024, // property for videoio class CvCapture_Android only
-    CV_CAP_PROP_SUPPORTED_PREVIEW_SIZES_STRING=1025, // readonly, tricky property, returns cpnst char* indeed
-    CV_CAP_PROP_PREVIEW_FORMAT=1026, // readonly, tricky property, returns cpnst char* indeed
-
-    // OpenNI map generators
-    CV_CAP_OPENNI_DEPTH_GENERATOR = 1 << 31,
-    CV_CAP_OPENNI_IMAGE_GENERATOR = 1 << 30,
-    CV_CAP_OPENNI_GENERATORS_MASK = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_OPENNI_IMAGE_GENERATOR,
-
-    // Properties of cameras available through OpenNI interfaces
-    CV_CAP_PROP_OPENNI_OUTPUT_MODE     = 100,
-    CV_CAP_PROP_OPENNI_FRAME_MAX_DEPTH = 101, // in mm
-    CV_CAP_PROP_OPENNI_BASELINE        = 102, // in mm
-    CV_CAP_PROP_OPENNI_FOCAL_LENGTH    = 103, // in pixels
-    CV_CAP_PROP_OPENNI_REGISTRATION    = 104, // flag
-    CV_CAP_PROP_OPENNI_REGISTRATION_ON = CV_CAP_PROP_OPENNI_REGISTRATION, // flag that synchronizes the remapping depth map to image map
-                                                                          // by changing depth generator's view point (if the flag is "on") or
-                                                                          // sets this view point to its normal one (if the flag is "off").
-    CV_CAP_PROP_OPENNI_APPROX_FRAME_SYNC = 105,
-    CV_CAP_PROP_OPENNI_MAX_BUFFER_SIZE   = 106,
-    CV_CAP_PROP_OPENNI_CIRCLE_BUFFER     = 107,
-    CV_CAP_PROP_OPENNI_MAX_TIME_DURATION = 108,
-
-    CV_CAP_PROP_OPENNI_GENERATOR_PRESENT = 109,
-    CV_CAP_PROP_OPENNI2_SYNC = 110,
-    CV_CAP_PROP_OPENNI2_MIRROR = 111,
-
-    CV_CAP_OPENNI_IMAGE_GENERATOR_PRESENT         = CV_CAP_OPENNI_IMAGE_GENERATOR + CV_CAP_PROP_OPENNI_GENERATOR_PRESENT,
-    CV_CAP_OPENNI_IMAGE_GENERATOR_OUTPUT_MODE     = CV_CAP_OPENNI_IMAGE_GENERATOR + CV_CAP_PROP_OPENNI_OUTPUT_MODE,
-    CV_CAP_OPENNI_DEPTH_GENERATOR_BASELINE        = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_PROP_OPENNI_BASELINE,
-    CV_CAP_OPENNI_DEPTH_GENERATOR_FOCAL_LENGTH    = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_PROP_OPENNI_FOCAL_LENGTH,
-    CV_CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION    = CV_CAP_OPENNI_DEPTH_GENERATOR + CV_CAP_PROP_OPENNI_REGISTRATION,
-    CV_CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION_ON = CV_CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION,
-
-    // Properties of cameras available through GStreamer interface
-    CV_CAP_GSTREAMER_QUEUE_LENGTH           = 200, // default is 1
-
-    // PVAPI
-    CV_CAP_PROP_PVAPI_MULTICASTIP           = 300, // ip for anable multicast master mode. 0 for disable multicast
-    CV_CAP_PROP_PVAPI_FRAMESTARTTRIGGERMODE = 301, // FrameStartTriggerMode: Determines how a frame is initiated
-    CV_CAP_PROP_PVAPI_DECIMATIONHORIZONTAL  = 302, // Horizontal sub-sampling of the image
-    CV_CAP_PROP_PVAPI_DECIMATIONVERTICAL    = 303, // Vertical sub-sampling of the image
-    CV_CAP_PROP_PVAPI_BINNINGX              = 304, // Horizontal binning factor
-    CV_CAP_PROP_PVAPI_BINNINGY              = 305, // Vertical binning factor
-    CV_CAP_PROP_PVAPI_PIXELFORMAT           = 306, // Pixel format
-
-    // Properties of cameras available through XIMEA SDK interface
-    CV_CAP_PROP_XI_DOWNSAMPLING                                 = 400, // Change image resolution by binning or skipping.
-    CV_CAP_PROP_XI_DATA_FORMAT                                  = 401, // Output data format.
-    CV_CAP_PROP_XI_OFFSET_X                                     = 402, // Horizontal offset from the origin to the area of interest (in pixels).
-    CV_CAP_PROP_XI_OFFSET_Y                                     = 403, // Vertical offset from the origin to the area of interest (in pixels).
-    CV_CAP_PROP_XI_TRG_SOURCE                                   = 404, // Defines source of trigger.
-    CV_CAP_PROP_XI_TRG_SOFTWARE                                 = 405, // Generates an internal trigger. PRM_TRG_SOURCE must be set to TRG_SOFTWARE.
-    CV_CAP_PROP_XI_GPI_SELECTOR                                 = 406, // Selects general purpose input
-    CV_CAP_PROP_XI_GPI_MODE                                     = 407, // Set general purpose input mode
-    CV_CAP_PROP_XI_GPI_LEVEL                                    = 408, // Get general purpose level
-    CV_CAP_PROP_XI_GPO_SELECTOR                                 = 409, // Selects general purpose output
-    CV_CAP_PROP_XI_GPO_MODE                                     = 410, // Set general purpose output mode
-    CV_CAP_PROP_XI_LED_SELECTOR                                 = 411, // Selects camera signalling LED
-    CV_CAP_PROP_XI_LED_MODE                                     = 412, // Define camera signalling LED functionality
-    CV_CAP_PROP_XI_MANUAL_WB                                    = 413, // Calculates White Balance(must be called during acquisition)
-    CV_CAP_PROP_XI_AUTO_WB                                      = 414, // Automatic white balance
-    CV_CAP_PROP_XI_AEAG                                         = 415, // Automatic exposure/gain
-    CV_CAP_PROP_XI_EXP_PRIORITY                                 = 416, // Exposure priority (0.5 - exposure 50%, gain 50%).
-    CV_CAP_PROP_XI_AE_MAX_LIMIT                                 = 417, // Maximum limit of exposure in AEAG procedure
-    CV_CAP_PROP_XI_AG_MAX_LIMIT                                 = 418,  // Maximum limit of gain in AEAG procedure
-    CV_CAP_PROP_XI_AEAG_LEVEL                                   = 419, // Average intensity of output signal AEAG should achieve(in %)
-    CV_CAP_PROP_XI_TIMEOUT                                      = 420, // Image capture timeout in milliseconds
-    CV_CAP_PROP_XI_EXPOSURE                                     = 421, // Exposure time in microseconds
-    CV_CAP_PROP_XI_EXPOSURE_BURST_COUNT                         = 422, // Sets the number of times of exposure in one frame.
-    CV_CAP_PROP_XI_GAIN_SELECTOR                                = 423, // Gain selector for parameter Gain allows to select different type of gains.
-    CV_CAP_PROP_XI_GAIN                                         = 424, // Gain in dB
-    CV_CAP_PROP_XI_DOWNSAMPLING_TYPE                            = 426, // Change image downsampling type.
-    CV_CAP_PROP_XI_BINNING_SELECTOR                             = 427, // Binning engine selector.
-    CV_CAP_PROP_XI_BINNING_VERTICAL                             = 428, // Vertical Binning - number of vertical photo-sensitive cells to combine together.
-    CV_CAP_PROP_XI_BINNING_HORIZONTAL                           = 429, // Horizontal Binning - number of horizontal photo-sensitive cells to combine together.
-    CV_CAP_PROP_XI_BINNING_PATTERN                              = 430, // Binning pattern type.
-    CV_CAP_PROP_XI_DECIMATION_SELECTOR                          = 431, // Decimation engine selector.
-    CV_CAP_PROP_XI_DECIMATION_VERTICAL                          = 432, // Vertical Decimation - vertical sub-sampling of the image - reduces the vertical resolution of the image by the specified vertical decimation factor.
-    CV_CAP_PROP_XI_DECIMATION_HORIZONTAL                        = 433, // Horizontal Decimation - horizontal sub-sampling of the image - reduces the horizontal resolution of the image by the specified vertical decimation factor.
-    CV_CAP_PROP_XI_DECIMATION_PATTERN                           = 434, // Decimation pattern type.
-    CV_CAP_PROP_XI_TEST_PATTERN_GENERATOR_SELECTOR              = 587, // Selects which test pattern generator is controlled by the TestPattern feature.
-    CV_CAP_PROP_XI_TEST_PATTERN                                 = 588, // Selects which test pattern type is generated by the selected generator.
-    CV_CAP_PROP_XI_IMAGE_DATA_FORMAT                            = 435, // Output data format.
-    CV_CAP_PROP_XI_SHUTTER_TYPE                                 = 436, // Change sensor shutter type(CMOS sensor).
-    CV_CAP_PROP_XI_SENSOR_TAPS                                  = 437, // Number of taps
-    CV_CAP_PROP_XI_AEAG_ROI_OFFSET_X                            = 439, // Automatic exposure/gain ROI offset X
-    CV_CAP_PROP_XI_AEAG_ROI_OFFSET_Y                            = 440, // Automatic exposure/gain ROI offset Y
-    CV_CAP_PROP_XI_AEAG_ROI_WIDTH                               = 441, // Automatic exposure/gain ROI Width
-    CV_CAP_PROP_XI_AEAG_ROI_HEIGHT                              = 442, // Automatic exposure/gain ROI Height
-    CV_CAP_PROP_XI_BPC                                          = 445, // Correction of bad pixels
-    CV_CAP_PROP_XI_WB_KR                                        = 448, // White balance red coefficient
-    CV_CAP_PROP_XI_WB_KG                                        = 449, // White balance green coefficient
-    CV_CAP_PROP_XI_WB_KB                                        = 450, // White balance blue coefficient
-    CV_CAP_PROP_XI_WIDTH                                        = 451, // Width of the Image provided by the device (in pixels).
-    CV_CAP_PROP_XI_HEIGHT                                       = 452, // Height of the Image provided by the device (in pixels).
-    CV_CAP_PROP_XI_REGION_SELECTOR                              = 589, // Selects Region in Multiple ROI which parameters are set by width, height, ... ,region mode
-    CV_CAP_PROP_XI_REGION_MODE                                  = 595, // Activates/deactivates Region selected by Region Selector
-    CV_CAP_PROP_XI_LIMIT_BANDWIDTH                              = 459, // Set/get bandwidth(datarate)(in Megabits)
-    CV_CAP_PROP_XI_SENSOR_DATA_BIT_DEPTH                        = 460, // Sensor output data bit depth.
-    CV_CAP_PROP_XI_OUTPUT_DATA_BIT_DEPTH                        = 461, // Device output data bit depth.
-    CV_CAP_PROP_XI_IMAGE_DATA_BIT_DEPTH                         = 462, // bitdepth of data returned by function xiGetImage
-    CV_CAP_PROP_XI_OUTPUT_DATA_PACKING                          = 463, // Device output data packing (or grouping) enabled. Packing could be enabled if output_data_bit_depth > 8 and packing capability is available.
-    CV_CAP_PROP_XI_OUTPUT_DATA_PACKING_TYPE                     = 464, // Data packing type. Some cameras supports only specific packing type.
-    CV_CAP_PROP_XI_IS_COOLED                                    = 465, // Returns 1 for cameras that support cooling.
-    CV_CAP_PROP_XI_COOLING                                      = 466, // Start camera cooling.
-    CV_CAP_PROP_XI_TARGET_TEMP                                  = 467, // Set sensor target temperature for cooling.
-    CV_CAP_PROP_XI_CHIP_TEMP                                    = 468, // Camera sensor temperature
-    CV_CAP_PROP_XI_HOUS_TEMP                                    = 469, // Camera housing tepmerature
-    CV_CAP_PROP_XI_HOUS_BACK_SIDE_TEMP                          = 590, // Camera housing back side tepmerature
-    CV_CAP_PROP_XI_SENSOR_BOARD_TEMP                            = 596, // Camera sensor board temperature
-    CV_CAP_PROP_XI_CMS                                          = 470, // Mode of color management system.
-    CV_CAP_PROP_XI_APPLY_CMS                                    = 471, // Enable applying of CMS profiles to xiGetImage (see XI_PRM_INPUT_CMS_PROFILE, XI_PRM_OUTPUT_CMS_PROFILE).
-    CV_CAP_PROP_XI_IMAGE_IS_COLOR                               = 474, // Returns 1 for color cameras.
-    CV_CAP_PROP_XI_COLOR_FILTER_ARRAY                           = 475, // Returns color filter array type of RAW data.
-    CV_CAP_PROP_XI_GAMMAY                                       = 476, // Luminosity gamma
-    CV_CAP_PROP_XI_GAMMAC                                       = 477, // Chromaticity gamma
-    CV_CAP_PROP_XI_SHARPNESS                                    = 478, // Sharpness Strenght
-    CV_CAP_PROP_XI_CC_MATRIX_00                                 = 479, // Color Correction Matrix element [0][0]
-    CV_CAP_PROP_XI_CC_MATRIX_01                                 = 480, // Color Correction Matrix element [0][1]
-    CV_CAP_PROP_XI_CC_MATRIX_02                                 = 481, // Color Correction Matrix element [0][2]
-    CV_CAP_PROP_XI_CC_MATRIX_03                                 = 482, // Color Correction Matrix element [0][3]
-    CV_CAP_PROP_XI_CC_MATRIX_10                                 = 483, // Color Correction Matrix element [1][0]
-    CV_CAP_PROP_XI_CC_MATRIX_11                                 = 484, // Color Correction Matrix element [1][1]
-    CV_CAP_PROP_XI_CC_MATRIX_12                                 = 485, // Color Correction Matrix element [1][2]
-    CV_CAP_PROP_XI_CC_MATRIX_13                                 = 486, // Color Correction Matrix element [1][3]
-    CV_CAP_PROP_XI_CC_MATRIX_20                                 = 487, // Color Correction Matrix element [2][0]
-    CV_CAP_PROP_XI_CC_MATRIX_21                                 = 488, // Color Correction Matrix element [2][1]
-    CV_CAP_PROP_XI_CC_MATRIX_22                                 = 489, // Color Correction Matrix element [2][2]
-    CV_CAP_PROP_XI_CC_MATRIX_23                                 = 490, // Color Correction Matrix element [2][3]
-    CV_CAP_PROP_XI_CC_MATRIX_30                                 = 491, // Color Correction Matrix element [3][0]
-    CV_CAP_PROP_XI_CC_MATRIX_31                                 = 492, // Color Correction Matrix element [3][1]
-    CV_CAP_PROP_XI_CC_MATRIX_32                                 = 493, // Color Correction Matrix element [3][2]
-    CV_CAP_PROP_XI_CC_MATRIX_33                                 = 494, // Color Correction Matrix element [3][3]
-    CV_CAP_PROP_XI_DEFAULT_CC_MATRIX                            = 495, // Set default Color Correction Matrix
-    CV_CAP_PROP_XI_TRG_SELECTOR                                 = 498, // Selects the type of trigger.
-    CV_CAP_PROP_XI_ACQ_FRAME_BURST_COUNT                        = 499, // Sets number of frames acquired by burst. This burst is used only if trigger is set to FrameBurstStart
-    CV_CAP_PROP_XI_DEBOUNCE_EN                                  = 507, // Enable/Disable debounce to selected GPI
-    CV_CAP_PROP_XI_DEBOUNCE_T0                                  = 508, // Debounce time (x * 10us)
-    CV_CAP_PROP_XI_DEBOUNCE_T1                                  = 509, // Debounce time (x * 10us)
-    CV_CAP_PROP_XI_DEBOUNCE_POL                                 = 510, // Debounce polarity (pol = 1 t0 - falling edge, t1 - rising edge)
-    CV_CAP_PROP_XI_LENS_MODE                                    = 511, // Status of lens control interface. This shall be set to XI_ON before any Lens operations.
-    CV_CAP_PROP_XI_LENS_APERTURE_VALUE                          = 512, // Current lens aperture value in stops. Examples: 2.8, 4, 5.6, 8, 11
-    CV_CAP_PROP_XI_LENS_FOCUS_MOVEMENT_VALUE                    = 513, // Lens current focus movement value to be used by XI_PRM_LENS_FOCUS_MOVE in motor steps.
-    CV_CAP_PROP_XI_LENS_FOCUS_MOVE                              = 514, // Moves lens focus motor by steps set in XI_PRM_LENS_FOCUS_MOVEMENT_VALUE.
-    CV_CAP_PROP_XI_LENS_FOCUS_DISTANCE                          = 515, // Lens focus distance in cm.
-    CV_CAP_PROP_XI_LENS_FOCAL_LENGTH                            = 516, // Lens focal distance in mm.
-    CV_CAP_PROP_XI_LENS_FEATURE_SELECTOR                        = 517, // Selects the current feature which is accessible by XI_PRM_LENS_FEATURE.
-    CV_CAP_PROP_XI_LENS_FEATURE                                 = 518, // Allows access to lens feature value currently selected by XI_PRM_LENS_FEATURE_SELECTOR.
-    CV_CAP_PROP_XI_DEVICE_MODEL_ID                              = 521, // Return device model id
-    CV_CAP_PROP_XI_DEVICE_SN                                    = 522, // Return device serial number
-    CV_CAP_PROP_XI_IMAGE_DATA_FORMAT_RGB32_ALPHA                = 529, // The alpha channel of RGB32 output image format.
-    CV_CAP_PROP_XI_IMAGE_PAYLOAD_SIZE                           = 530, // Buffer size in bytes sufficient for output image returned by xiGetImage
-    CV_CAP_PROP_XI_TRANSPORT_PIXEL_FORMAT                       = 531, // Current format of pixels on transport layer.
-    CV_CAP_PROP_XI_SENSOR_CLOCK_FREQ_HZ                         = 532, // Sensor clock frequency in Hz.
-    CV_CAP_PROP_XI_SENSOR_CLOCK_FREQ_INDEX                      = 533, // Sensor clock frequency index. Sensor with selected frequencies have possibility to set the frequency only by this index.
-    CV_CAP_PROP_XI_SENSOR_OUTPUT_CHANNEL_COUNT                  = 534, // Number of output channels from sensor used for data transfer.
-    CV_CAP_PROP_XI_FRAMERATE                                    = 535, // Define framerate in Hz
-    CV_CAP_PROP_XI_COUNTER_SELECTOR                             = 536, // Select counter
-    CV_CAP_PROP_XI_COUNTER_VALUE                                = 537, // Counter status
-    CV_CAP_PROP_XI_ACQ_TIMING_MODE                              = 538, // Type of sensor frames timing.
-    CV_CAP_PROP_XI_AVAILABLE_BANDWIDTH                          = 539, // Calculate and return available interface bandwidth(int Megabits)
-    CV_CAP_PROP_XI_BUFFER_POLICY                                = 540, // Data move policy
-    CV_CAP_PROP_XI_LUT_EN                                       = 541, // Activates LUT.
-    CV_CAP_PROP_XI_LUT_INDEX                                    = 542, // Control the index (offset) of the coefficient to access in the LUT.
-    CV_CAP_PROP_XI_LUT_VALUE                                    = 543, // Value at entry LUTIndex of the LUT
-    CV_CAP_PROP_XI_TRG_DELAY                                    = 544, // Specifies the delay in microseconds (us) to apply after the trigger reception before activating it.
-    CV_CAP_PROP_XI_TS_RST_MODE                                  = 545, // Defines how time stamp reset engine will be armed
-    CV_CAP_PROP_XI_TS_RST_SOURCE                                = 546, // Defines which source will be used for timestamp reset. Writing this parameter will trigger settings of engine (arming)
-    CV_CAP_PROP_XI_IS_DEVICE_EXIST                              = 547, // Returns 1 if camera connected and works properly.
-    CV_CAP_PROP_XI_ACQ_BUFFER_SIZE                              = 548, // Acquisition buffer size in buffer_size_unit. Default bytes.
-    CV_CAP_PROP_XI_ACQ_BUFFER_SIZE_UNIT                         = 549, // Acquisition buffer size unit in bytes. Default 1. E.g. Value 1024 means that buffer_size is in KiBytes
-    CV_CAP_PROP_XI_ACQ_TRANSPORT_BUFFER_SIZE                    = 550, // Acquisition transport buffer size in bytes
-    CV_CAP_PROP_XI_BUFFERS_QUEUE_SIZE                           = 551, // Queue of field/frame buffers
-    CV_CAP_PROP_XI_ACQ_TRANSPORT_BUFFER_COMMIT                  = 552, // Number of buffers to commit to low level
-    CV_CAP_PROP_XI_RECENT_FRAME                                 = 553, // GetImage returns most recent frame
-    CV_CAP_PROP_XI_DEVICE_RESET                                 = 554, // Resets the camera to default state.
-    CV_CAP_PROP_XI_COLUMN_FPN_CORRECTION                        = 555, // Correction of column FPN
-    CV_CAP_PROP_XI_ROW_FPN_CORRECTION                           = 591, // Correction of row FPN
-    CV_CAP_PROP_XI_SENSOR_MODE                                  = 558, // Current sensor mode. Allows to select sensor mode by one integer. Setting of this parameter affects: image dimensions and downsampling.
-    CV_CAP_PROP_XI_HDR                                          = 559, // Enable High Dynamic Range feature.
-    CV_CAP_PROP_XI_HDR_KNEEPOINT_COUNT                          = 560, // The number of kneepoints in the PWLR.
-    CV_CAP_PROP_XI_HDR_T1                                       = 561, // position of first kneepoint(in % of XI_PRM_EXPOSURE)
-    CV_CAP_PROP_XI_HDR_T2                                       = 562, // position of second kneepoint (in % of XI_PRM_EXPOSURE)
-    CV_CAP_PROP_XI_KNEEPOINT1                                   = 563, // value of first kneepoint (% of sensor saturation)
-    CV_CAP_PROP_XI_KNEEPOINT2                                   = 564, // value of second kneepoint (% of sensor saturation)
-    CV_CAP_PROP_XI_IMAGE_BLACK_LEVEL                            = 565, // Last image black level counts. Can be used for Offline processing to recall it.
-    CV_CAP_PROP_XI_HW_REVISION                                  = 571, // Returns hardware revision number.
-    CV_CAP_PROP_XI_DEBUG_LEVEL                                  = 572, // Set debug level
-    CV_CAP_PROP_XI_AUTO_BANDWIDTH_CALCULATION                   = 573, // Automatic bandwidth calculation,
-    CV_CAP_PROP_XI_FFS_FILE_ID                                  = 594, // File number.
-    CV_CAP_PROP_XI_FFS_FILE_SIZE                                = 580, // Size of file.
-    CV_CAP_PROP_XI_FREE_FFS_SIZE                                = 581, // Size of free camera FFS.
-    CV_CAP_PROP_XI_USED_FFS_SIZE                                = 582, // Size of used camera FFS.
-    CV_CAP_PROP_XI_FFS_ACCESS_KEY                               = 583, // Setting of key enables file operations on some cameras.
-    CV_CAP_PROP_XI_SENSOR_FEATURE_SELECTOR                      = 585, // Selects the current feature which is accessible by XI_PRM_SENSOR_FEATURE_VALUE.
-    CV_CAP_PROP_XI_SENSOR_FEATURE_VALUE                         = 586, // Allows access to sensor feature value currently selected by XI_PRM_SENSOR_FEATURE_SELECTOR.
-
-    // Properties for Android cameras
-    CV_CAP_PROP_ANDROID_FLASH_MODE = 8001,
-    CV_CAP_PROP_ANDROID_FOCUS_MODE = 8002,
-    CV_CAP_PROP_ANDROID_WHITE_BALANCE = 8003,
-    CV_CAP_PROP_ANDROID_ANTIBANDING = 8004,
-    CV_CAP_PROP_ANDROID_FOCAL_LENGTH = 8005,
-    CV_CAP_PROP_ANDROID_FOCUS_DISTANCE_NEAR = 8006,
-    CV_CAP_PROP_ANDROID_FOCUS_DISTANCE_OPTIMAL = 8007,
-    CV_CAP_PROP_ANDROID_FOCUS_DISTANCE_FAR = 8008,
-    CV_CAP_PROP_ANDROID_EXPOSE_LOCK = 8009,
-    CV_CAP_PROP_ANDROID_WHITEBALANCE_LOCK = 8010,
-
-    // Properties of cameras available through AVFOUNDATION interface
-    CV_CAP_PROP_IOS_DEVICE_FOCUS = 9001,
-    CV_CAP_PROP_IOS_DEVICE_EXPOSURE = 9002,
-    CV_CAP_PROP_IOS_DEVICE_FLASH = 9003,
-    CV_CAP_PROP_IOS_DEVICE_WHITEBALANCE = 9004,
-    CV_CAP_PROP_IOS_DEVICE_TORCH = 9005,
-
-    // Properties of cameras available through Smartek Giganetix Ethernet Vision interface
-    /* --- Vladimir Litvinenko (litvinenko.vladimir@gmail.com) --- */
-    CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
-    CV_CAP_PROP_GIGA_FRAME_OFFSET_Y = 10002,
-    CV_CAP_PROP_GIGA_FRAME_WIDTH_MAX = 10003,
-    CV_CAP_PROP_GIGA_FRAME_HEIGH_MAX = 10004,
-    CV_CAP_PROP_GIGA_FRAME_SENS_WIDTH = 10005,
-    CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006,
-
-    CV_CAP_PROP_INTELPERC_PROFILE_COUNT               = 11001,
-    CV_CAP_PROP_INTELPERC_PROFILE_IDX                 = 11002,
-    CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE  = 11003,
-    CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE      = 11004,
-    CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD  = 11005,
-    CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ     = 11006,
-    CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT     = 11007,
-
-    // Intel PerC streams
-    CV_CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
-    CV_CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
-    CV_CAP_INTELPERC_GENERATORS_MASK = CV_CAP_INTELPERC_DEPTH_GENERATOR + CV_CAP_INTELPERC_IMAGE_GENERATOR
-};
-
-// Generic camera output modes.
-// Currently, these are supported through the libv4l interface only.
-enum
-{
-    CV_CAP_MODE_BGR  = 0, // BGR24 (default)
-    CV_CAP_MODE_RGB  = 1, // RGB24
-    CV_CAP_MODE_GRAY = 2, // Y8
-    CV_CAP_MODE_YUYV = 3  // YUYV
-};
-
-enum
-{
-    // Data given from depth generator.
-    CV_CAP_OPENNI_DEPTH_MAP                 = 0, // Depth values in mm (CV_16UC1)
-    CV_CAP_OPENNI_POINT_CLOUD_MAP           = 1, // XYZ in meters (CV_32FC3)
-    CV_CAP_OPENNI_DISPARITY_MAP             = 2, // Disparity in pixels (CV_8UC1)
-    CV_CAP_OPENNI_DISPARITY_MAP_32F         = 3, // Disparity in pixels (CV_32FC1)
-    CV_CAP_OPENNI_VALID_DEPTH_MASK          = 4, // CV_8UC1
-
-    // Data given from RGB image generator.
-    CV_CAP_OPENNI_BGR_IMAGE                 = 5,
-    CV_CAP_OPENNI_GRAY_IMAGE                = 6
-};
-
-// Supported output modes of OpenNI image generator
-enum
-{
-    CV_CAP_OPENNI_VGA_30HZ     = 0,
-    CV_CAP_OPENNI_SXGA_15HZ    = 1,
-    CV_CAP_OPENNI_SXGA_30HZ    = 2,
-    CV_CAP_OPENNI_QVGA_30HZ    = 3,
-    CV_CAP_OPENNI_QVGA_60HZ    = 4
-};
-
-enum
-{
-    CV_CAP_INTELPERC_DEPTH_MAP              = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
-    CV_CAP_INTELPERC_UVDEPTH_MAP            = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
-    CV_CAP_INTELPERC_IR_MAP                 = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
-    CV_CAP_INTELPERC_IMAGE                  = 3
-};
-
-// gPhoto2 properties, if propertyId is less than 0 then work on widget with that __additive inversed__ camera setting ID
-// Get IDs by using CAP_PROP_GPHOTO2_WIDGET_ENUMERATE.
-// @see CvCaptureCAM_GPHOTO2 for more info
-enum
-{
-    CV_CAP_PROP_GPHOTO2_PREVIEW           = 17001, // Capture only preview from liveview mode.
-    CV_CAP_PROP_GPHOTO2_WIDGET_ENUMERATE  = 17002, // Readonly, returns (const char *).
-    CV_CAP_PROP_GPHOTO2_RELOAD_CONFIG     = 17003, // Trigger, only by set. Reload camera settings.
-    CV_CAP_PROP_GPHOTO2_RELOAD_ON_CHANGE  = 17004, // Reload all settings on set.
-    CV_CAP_PROP_GPHOTO2_COLLECT_MSGS      = 17005, // Collect messages with details.
-    CV_CAP_PROP_GPHOTO2_FLUSH_MSGS        = 17006, // Readonly, returns (const char *).
-    CV_CAP_PROP_SPEED                     = 17007, // Exposure speed. Can be readonly, depends on camera program.
-    CV_CAP_PROP_APERTURE                  = 17008, // Aperture. Can be readonly, depends on camera program.
-    CV_CAP_PROP_EXPOSUREPROGRAM           = 17009, // Camera exposure program.
-    CV_CAP_PROP_VIEWFINDER                = 17010  // Enter liveview mode.
-};
-
-/* retrieve or set capture properties */
+/** @brief retrieve capture properties
+*/
 CVAPI(double) cvGetCaptureProperty( CvCapture* capture, int property_id );
+/** @brief set capture properties
+*/
 CVAPI(int)    cvSetCaptureProperty( CvCapture* capture, int property_id, double value );
 
-// Return the type of the capturer (eg, CV_CAP_V4W, CV_CAP_UNICAP), which is unknown if created with CV_CAP_ANY
-CVAPI(int)    cvGetCaptureDomain( CvCapture* capture);
-
-/* "black box" video file writer structure */
-typedef struct CvVideoWriter CvVideoWriter;
+/** @brief Return the type of the capturer (eg, ::CV_CAP_VFW, ::CV_CAP_UNICAP)
 
-#define CV_FOURCC_MACRO(c1, c2, c3, c4) (((c1) & 255) + (((c2) & 255) << 8) + (((c3) & 255) << 16) + (((c4) & 255) << 24))
+It is unknown if created with ::CV_CAP_ANY
+*/
+CVAPI(int)    cvGetCaptureDomain( CvCapture* capture);
 
-CV_INLINE int CV_FOURCC(char c1, char c2, char c3, char c4)
-{
-    return CV_FOURCC_MACRO(c1, c2, c3, c4);
-}
+/** @brief "black box" video file writer structure
 
-#define CV_FOURCC_PROMPT -1  /* Open Codec Selection Dialog (Windows only) */
-#define CV_FOURCC_DEFAULT CV_FOURCC('I', 'Y', 'U', 'V') /* Use default codec for specified filename (Linux only) */
+In C++ use cv::VideoWriter
+*/
+typedef struct CvVideoWriter CvVideoWriter;
 
-/* initialize video file writer */
+/** @brief initialize video file writer
+*/
 CVAPI(CvVideoWriter*) cvCreateVideoWriter( const char* filename, int fourcc,
                                            double fps, CvSize frame_size,
                                            int is_color CV_DEFAULT(1));
 
-/* write frame to video file */
+/** @brief write frame to video file
+*/
 CVAPI(int) cvWriteFrame( CvVideoWriter* writer, const IplImage* image );
 
-/* close video file writer */
+/** @brief close video file writer
+*/
 CVAPI(void) cvReleaseVideoWriter( CvVideoWriter** writer );
 
-/****************************************************************************************\
-*                              Obsolete functions/synonyms                               *
-\****************************************************************************************/
-
-#define cvCaptureFromFile cvCreateFileCapture
-#define cvCaptureFromCAM cvCreateCameraCapture
-#define cvCaptureFromAVI cvCaptureFromFile
-#define cvCreateAVIWriter cvCreateVideoWriter
-#define cvWriteToAVI cvWriteFrame
+// ***************************************************************************************
+//! @name Obsolete functions/synonyms
+//! @{
+#define cvCaptureFromCAM cvCreateCameraCapture //!< @deprecated use cvCreateCameraCapture() instead
+#define cvCaptureFromFile cvCreateFileCapture  //!< @deprecated use cvCreateFileCapture() instead
+#define cvCaptureFromAVI cvCaptureFromFile     //!< @deprecated use cvCreateFileCapture() instead
+#define cvCreateAVIWriter cvCreateVideoWriter  //!< @deprecated use cvCreateVideoWriter() instead
+#define cvWriteToAVI cvWriteFrame              //!< @deprecated use cvWriteFrame() instead
+//!  @} Obsolete...
 
-/** @} videoio_c */
+//! @} videoio_c
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif //__OPENCV_VIDEOIO_H__
+#endif //OPENCV_VIDEOIO_H
diff --git a/IPL/include/opencv/opencv2/videostab.hpp b/IPL/include/opencv/opencv2/videostab.hpp
index 17b061f..ca3f5ad 100644
--- a/IPL/include/opencv/opencv2/videostab.hpp
+++ b/IPL/include/opencv/opencv2/videostab.hpp
@@ -40,15 +40,15 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_HPP__
-#define __OPENCV_VIDEOSTAB_HPP__
+#ifndef OPENCV_VIDEOSTAB_HPP
+#define OPENCV_VIDEOSTAB_HPP
 
 /**
   @defgroup videostab Video Stabilization
 
 The video stabilization module contains a set of functions and classes that can be used to solve the
-problem of video stabilization. There are a few methods implemented, most of them are descibed in
-the papers @cite OF06 and @cite G11 . However, there are some extensions and deviations from the orginal
+problem of video stabilization. There are a few methods implemented, most of them are described in
+the papers @cite OF06 and @cite G11 . However, there are some extensions and deviations from the original
 paper methods.
 
 ### References
diff --git a/IPL/include/opencv/opencv2/videostab/deblurring.hpp b/IPL/include/opencv/opencv2/videostab/deblurring.hpp
index 8028c1d..d1bd514 100644
--- a/IPL/include/opencv/opencv2/videostab/deblurring.hpp
+++ b/IPL/include/opencv/opencv2/videostab/deblurring.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_DEBLURRING_HPP__
-#define __OPENCV_VIDEOSTAB_DEBLURRING_HPP__
+#ifndef OPENCV_VIDEOSTAB_DEBLURRING_HPP
+#define OPENCV_VIDEOSTAB_DEBLURRING_HPP
 
 #include <vector>
 #include "opencv2/core.hpp"
@@ -66,7 +66,7 @@ class CV_EXPORTS DeblurerBase
     virtual void setRadius(int val) { radius_ = val; }
     virtual int radius() const { return radius_; }
 
-    virtual void deblur(int idx, Mat &frame) = 0;
+    virtual void deblur(int idx, Mat &frame, const Range &range) = 0;
 
 
     // data from stabilizer
@@ -90,7 +90,7 @@ class CV_EXPORTS DeblurerBase
 class CV_EXPORTS NullDeblurer : public DeblurerBase
 {
 public:
-    virtual void deblur(int /*idx*/, Mat &/*frame*/) {}
+    virtual void deblur(int /*idx*/, Mat &/*frame*/, const Range &/*range*/) CV_OVERRIDE {}
 };
 
 class CV_EXPORTS WeightingDeblurer : public DeblurerBase
@@ -101,7 +101,7 @@ class CV_EXPORTS WeightingDeblurer : public DeblurerBase
     void setSensitivity(float val) { sensitivity_ = val; }
     float sensitivity() const { return sensitivity_; }
 
-    virtual void deblur(int idx, Mat &frame);
+    virtual void deblur(int idx, Mat &frame, const Range &range) CV_OVERRIDE;
 
 private:
     float sensitivity_;
diff --git a/IPL/include/opencv/opencv2/videostab/fast_marching.hpp b/IPL/include/opencv/opencv2/videostab/fast_marching.hpp
index c0c7985..43f8e4a 100644
--- a/IPL/include/opencv/opencv2/videostab/fast_marching.hpp
+++ b/IPL/include/opencv/opencv2/videostab/fast_marching.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_FAST_MARCHING_HPP__
-#define __OPENCV_VIDEOSTAB_FAST_MARCHING_HPP__
+#ifndef OPENCV_VIDEOSTAB_FAST_MARCHING_HPP
+#define OPENCV_VIDEOSTAB_FAST_MARCHING_HPP
 
 #include <cmath>
 #include <queue>
@@ -63,7 +63,7 @@ namespace videostab
 class CV_EXPORTS FastMarchingMethod
 {
 public:
-    FastMarchingMethod() : inf_(1e6f) {}
+    FastMarchingMethod() : inf_(1e6f), size_(0) {}
 
     /** @brief Template method that runs the Fast Marching Method.
 
diff --git a/IPL/include/opencv/opencv2/videostab/fast_marching_inl.hpp b/IPL/include/opencv/opencv2/videostab/fast_marching_inl.hpp
index 6388e69..fdd488a 100644
--- a/IPL/include/opencv/opencv2/videostab/fast_marching_inl.hpp
+++ b/IPL/include/opencv/opencv2/videostab/fast_marching_inl.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_FAST_MARCHING_INL_HPP__
-#define __OPENCV_VIDEOSTAB_FAST_MARCHING_INL_HPP__
+#ifndef OPENCV_VIDEOSTAB_FAST_MARCHING_INL_HPP
+#define OPENCV_VIDEOSTAB_FAST_MARCHING_INL_HPP
 
 #include "opencv2/videostab/fast_marching.hpp"
 
diff --git a/IPL/include/opencv/opencv2/videostab/frame_source.hpp b/IPL/include/opencv/opencv2/videostab/frame_source.hpp
index 612fbdb..36343dd 100644
--- a/IPL/include/opencv/opencv2/videostab/frame_source.hpp
+++ b/IPL/include/opencv/opencv2/videostab/frame_source.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_FRAME_SOURCE_HPP__
-#define __OPENCV_VIDEOSTAB_FRAME_SOURCE_HPP__
+#ifndef OPENCV_VIDEOSTAB_FRAME_SOURCE_HPP
+#define OPENCV_VIDEOSTAB_FRAME_SOURCE_HPP
 
 #include <vector>
 #include "opencv2/core.hpp"
@@ -65,8 +65,8 @@ class CV_EXPORTS IFrameSource
 class CV_EXPORTS NullFrameSource : public IFrameSource
 {
 public:
-    virtual void reset() {}
-    virtual Mat nextFrame() { return Mat(); }
+    virtual void reset() CV_OVERRIDE {}
+    virtual Mat nextFrame() CV_OVERRIDE { return Mat(); }
 };
 
 class CV_EXPORTS VideoFileSource : public IFrameSource
@@ -74,8 +74,8 @@ class CV_EXPORTS VideoFileSource : public IFrameSource
 public:
     VideoFileSource(const String &path, bool volatileFrame = false);
 
-    virtual void reset();
-    virtual Mat nextFrame();
+    virtual void reset() CV_OVERRIDE;
+    virtual Mat nextFrame() CV_OVERRIDE;
 
     int width();
     int height();
@@ -86,6 +86,28 @@ class CV_EXPORTS VideoFileSource : public IFrameSource
     Ptr<IFrameSource> impl;
 };
 
+class MaskFrameSource : public IFrameSource
+{
+public:
+    MaskFrameSource(const Ptr<IFrameSource>& source): impl(source) {};
+
+    virtual void reset() CV_OVERRIDE { impl->reset(); }
+    virtual Mat nextFrame() CV_OVERRIDE {
+        Mat nextFrame = impl->nextFrame();
+        maskCallback_(nextFrame);
+        return nextFrame;
+    }
+
+    void setMaskCallback(std::function<void(Mat&)> MaskCallback)
+    {
+        maskCallback_ = std::bind(MaskCallback, std::placeholders::_1);
+    };
+
+private:
+    Ptr<IFrameSource> impl;
+    std::function<void(Mat&)> maskCallback_;
+};
+
 //! @}
 
 } // namespace videostab
diff --git a/IPL/include/opencv/opencv2/videostab/global_motion.hpp b/IPL/include/opencv/opencv2/videostab/global_motion.hpp
index 5d51e42..24291c2 100644
--- a/IPL/include/opencv/opencv2/videostab/global_motion.hpp
+++ b/IPL/include/opencv/opencv2/videostab/global_motion.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_GLOBAL_MOTION_HPP__
-#define __OPENCV_VIDEOSTAB_GLOBAL_MOTION_HPP__
+#ifndef OPENCV_VIDEOSTAB_GLOBAL_MOTION_HPP
+#define OPENCV_VIDEOSTAB_GLOBAL_MOTION_HPP
 
 #include <vector>
 #include <fstream>
@@ -139,7 +139,7 @@ class CV_EXPORTS MotionEstimatorRansacL2 : public MotionEstimatorBase
     void setMinInlierRatio(float val) { minInlierRatio_ = val; }
     float minInlierRatio() const { return minInlierRatio_; }
 
-    virtual Mat estimate(InputArray points0, InputArray points1, bool *ok = 0);
+    virtual Mat estimate(InputArray points0, InputArray points1, bool *ok = 0) CV_OVERRIDE;
 
 private:
     RansacParams ransacParams_;
@@ -155,7 +155,7 @@ class CV_EXPORTS MotionEstimatorL1 : public MotionEstimatorBase
 public:
     MotionEstimatorL1(MotionModel model = MM_AFFINE);
 
-    virtual Mat estimate(InputArray points0, InputArray points1, bool *ok = 0);
+    virtual Mat estimate(InputArray points0, InputArray points1, bool *ok = 0) CV_OVERRIDE;
 
 private:
     std::vector<double> obj_, collb_, colub_;
@@ -180,6 +180,12 @@ class CV_EXPORTS ImageMotionEstimatorBase
     virtual void setMotionModel(MotionModel val) { motionModel_ = val; }
     virtual MotionModel motionModel() const { return motionModel_; }
 
+    virtual void setFrameMask(InputArray mask)
+    {
+        if (!mask.empty())
+            CV_Error(Error::StsNotImplemented, "Mask support is not implemented.");
+    }
+
     virtual Mat estimate(const Mat &frame0, const Mat &frame1, bool *ok = 0) = 0;
 
 protected:
@@ -194,7 +200,7 @@ class CV_EXPORTS FromFileMotionReader : public ImageMotionEstimatorBase
 public:
     FromFileMotionReader(const String &path);
 
-    virtual Mat estimate(const Mat &frame0, const Mat &frame1, bool *ok = 0);
+    virtual Mat estimate(const Mat &frame0, const Mat &frame1, bool *ok = 0) CV_OVERRIDE;
 
 private:
     std::ifstream file_;
@@ -205,10 +211,12 @@ class CV_EXPORTS ToFileMotionWriter : public ImageMotionEstimatorBase
 public:
     ToFileMotionWriter(const String &path, Ptr<ImageMotionEstimatorBase> estimator);
 
-    virtual void setMotionModel(MotionModel val) { motionEstimator_->setMotionModel(val); }
-    virtual MotionModel motionModel() const { return motionEstimator_->motionModel(); }
+    virtual void setMotionModel(MotionModel val) CV_OVERRIDE { motionEstimator_->setMotionModel(val); }
+    virtual MotionModel motionModel() const CV_OVERRIDE { return motionEstimator_->motionModel(); }
 
-    virtual Mat estimate(const Mat &frame0, const Mat &frame1, bool *ok = 0);
+    virtual void setFrameMask(InputArray mask) CV_OVERRIDE { motionEstimator_->setFrameMask(mask); }
+
+    virtual Mat estimate(const Mat &frame0, const Mat &frame1, bool *ok = 0) CV_OVERRIDE;
 
 private:
     std::ofstream file_;
@@ -223,8 +231,8 @@ class CV_EXPORTS KeypointBasedMotionEstimator : public ImageMotionEstimatorBase
 public:
     KeypointBasedMotionEstimator(Ptr<MotionEstimatorBase> estimator);
 
-    virtual void setMotionModel(MotionModel val) { motionEstimator_->setMotionModel(val); }
-    virtual MotionModel motionModel() const { return motionEstimator_->motionModel(); }
+    virtual void setMotionModel(MotionModel val) CV_OVERRIDE { motionEstimator_->setMotionModel(val); }
+    virtual MotionModel motionModel() const CV_OVERRIDE { return motionEstimator_->motionModel(); }
 
     void setDetector(Ptr<FeatureDetector> val) { detector_ = val; }
     Ptr<FeatureDetector> detector() const { return detector_; }
@@ -235,13 +243,17 @@ class CV_EXPORTS KeypointBasedMotionEstimator : public ImageMotionEstimatorBase
     void setOutlierRejector(Ptr<IOutlierRejector> val) { outlierRejector_ = val; }
     Ptr<IOutlierRejector> outlierRejector() const { return outlierRejector_; }
 
-    virtual Mat estimate(const Mat &frame0, const Mat &frame1, bool *ok = 0);
+    virtual void setFrameMask(InputArray mask) CV_OVERRIDE { mask_ = mask.getMat(); }
+
+    virtual Mat estimate(const Mat &frame0, const Mat &frame1, bool *ok = 0) CV_OVERRIDE;
+    Mat estimate(InputArray frame0, InputArray frame1, bool *ok = 0);
 
 private:
     Ptr<MotionEstimatorBase> motionEstimator_;
     Ptr<FeatureDetector> detector_;
     Ptr<ISparseOptFlowEstimator> optFlowEstimator_;
     Ptr<IOutlierRejector> outlierRejector_;
+    Mat mask_;
 
     std::vector<uchar> status_;
     std::vector<KeyPoint> keypointsPrev_;
@@ -256,13 +268,13 @@ class CV_EXPORTS KeypointBasedMotionEstimatorGpu : public ImageMotionEstimatorBa
 public:
     KeypointBasedMotionEstimatorGpu(Ptr<MotionEstimatorBase> estimator);
 
-    virtual void setMotionModel(MotionModel val) { motionEstimator_->setMotionModel(val); }
-    virtual MotionModel motionModel() const { return motionEstimator_->motionModel(); }
+    virtual void setMotionModel(MotionModel val) CV_OVERRIDE { motionEstimator_->setMotionModel(val); }
+    virtual MotionModel motionModel() const CV_OVERRIDE { return motionEstimator_->motionModel(); }
 
     void setOutlierRejector(Ptr<IOutlierRejector> val) { outlierRejector_ = val; }
     Ptr<IOutlierRejector> outlierRejector() const { return outlierRejector_; }
 
-    virtual Mat estimate(const Mat &frame0, const Mat &frame1, bool *ok = 0);
+    virtual Mat estimate(const Mat &frame0, const Mat &frame1, bool *ok = 0) CV_OVERRIDE;
     Mat estimate(const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, bool *ok = 0);
 
 private:
@@ -287,7 +299,7 @@ class CV_EXPORTS KeypointBasedMotionEstimatorGpu : public ImageMotionEstimatorBa
 @param from Source frame index.
 @param to Destination frame index.
 @param motions Pair-wise motions. motions[i] denotes motion from the frame i to the frame i+1
-@return Motion from the frame from to the frame to.
+@return Motion from the Source frame to the Destination frame.
  */
 CV_EXPORTS Mat getMotion(int from, int to, const std::vector<Mat> &motions);
 
diff --git a/IPL/include/opencv/opencv2/videostab/inpainting.hpp b/IPL/include/opencv/opencv2/videostab/inpainting.hpp
index 844c68c..9c123f0 100644
--- a/IPL/include/opencv/opencv2/videostab/inpainting.hpp
+++ b/IPL/include/opencv/opencv2/videostab/inpainting.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_INPAINTINT_HPP__
-#define __OPENCV_VIDEOSTAB_INPAINTINT_HPP__
+#ifndef OPENCV_VIDEOSTAB_INPAINTINT_HPP
+#define OPENCV_VIDEOSTAB_INPAINTINT_HPP
 
 #include <vector>
 #include "opencv2/core.hpp"
@@ -102,7 +102,7 @@ class CV_EXPORTS InpainterBase
 class CV_EXPORTS NullInpainter : public InpainterBase
 {
 public:
-    virtual void inpaint(int /*idx*/, Mat &/*frame*/, Mat &/*mask*/) {}
+    virtual void inpaint(int /*idx*/, Mat &/*frame*/, Mat &/*mask*/) CV_OVERRIDE {}
 };
 
 class CV_EXPORTS InpaintingPipeline : public InpainterBase
@@ -111,14 +111,14 @@ class CV_EXPORTS InpaintingPipeline : public InpainterBase
     void pushBack(Ptr<InpainterBase> inpainter) { inpainters_.push_back(inpainter); }
     bool empty() const { return inpainters_.empty(); }
 
-    virtual void setRadius(int val);
-    virtual void setMotionModel(MotionModel val);
-    virtual void setFrames(const std::vector<Mat> &val);
-    virtual void setMotions(const std::vector<Mat> &val);
-    virtual void setStabilizedFrames(const std::vector<Mat> &val);
-    virtual void setStabilizationMotions(const std::vector<Mat> &val);
+    virtual void setRadius(int val) CV_OVERRIDE;
+    virtual void setMotionModel(MotionModel val) CV_OVERRIDE;
+    virtual void setFrames(const std::vector<Mat> &val) CV_OVERRIDE;
+    virtual void setMotions(const std::vector<Mat> &val) CV_OVERRIDE;
+    virtual void setStabilizedFrames(const std::vector<Mat> &val) CV_OVERRIDE;
+    virtual void setStabilizationMotions(const std::vector<Mat> &val) CV_OVERRIDE;
 
-    virtual void inpaint(int idx, Mat &frame, Mat &mask);
+    virtual void inpaint(int idx, Mat &frame, Mat &mask) CV_OVERRIDE;
 
 private:
     std::vector<Ptr<InpainterBase> > inpainters_;
@@ -132,7 +132,7 @@ class CV_EXPORTS ConsistentMosaicInpainter : public InpainterBase
     void setStdevThresh(float val) { stdevThresh_ = val; }
     float stdevThresh() const { return stdevThresh_; }
 
-    virtual void inpaint(int idx, Mat &frame, Mat &mask);
+    virtual void inpaint(int idx, Mat &frame, Mat &mask) CV_OVERRIDE;
 
 private:
     float stdevThresh_;
@@ -155,7 +155,7 @@ class CV_EXPORTS MotionInpainter : public InpainterBase
     void setBorderMode(int val) { borderMode_ = val; }
     int borderMode() const { return borderMode_; }
 
-    virtual void inpaint(int idx, Mat &frame, Mat &mask);
+    virtual void inpaint(int idx, Mat &frame, Mat &mask) CV_OVERRIDE;
 
 private:
     FastMarchingMethod fmm_;
@@ -174,7 +174,7 @@ class CV_EXPORTS MotionInpainter : public InpainterBase
 class CV_EXPORTS ColorAverageInpainter : public InpainterBase
 {
 public:
-    virtual void inpaint(int idx, Mat &frame, Mat &mask);
+    virtual void inpaint(int idx, Mat &frame, Mat &mask) CV_OVERRIDE;
 
 private:
     FastMarchingMethod fmm_;
@@ -185,7 +185,7 @@ class CV_EXPORTS ColorInpainter : public InpainterBase
 public:
     ColorInpainter(int method = INPAINT_TELEA, double radius = 2.);
 
-    virtual void inpaint(int idx, Mat &frame, Mat &mask);
+    virtual void inpaint(int idx, Mat &frame, Mat &mask) CV_OVERRIDE;
 
 private:
     int method_;
diff --git a/IPL/include/opencv/opencv2/videostab/log.hpp b/IPL/include/opencv/opencv2/videostab/log.hpp
index 28625ed..73e7049 100644
--- a/IPL/include/opencv/opencv2/videostab/log.hpp
+++ b/IPL/include/opencv/opencv2/videostab/log.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_LOG_HPP__
-#define __OPENCV_VIDEOSTAB_LOG_HPP__
+#ifndef OPENCV_VIDEOSTAB_LOG_HPP
+#define OPENCV_VIDEOSTAB_LOG_HPP
 
 #include "opencv2/core.hpp"
 
@@ -63,13 +63,13 @@ class CV_EXPORTS ILog
 class CV_EXPORTS NullLog : public ILog
 {
 public:
-    virtual void print(const char * /*format*/, ...) {}
+    virtual void print(const char * /*format*/, ...) CV_OVERRIDE {}
 };
 
 class CV_EXPORTS LogToStdout : public ILog
 {
 public:
-    virtual void print(const char *format, ...);
+    virtual void print(const char *format, ...) CV_OVERRIDE;
 };
 
 //! @}
diff --git a/IPL/include/opencv/opencv2/videostab/motion_core.hpp b/IPL/include/opencv/opencv2/videostab/motion_core.hpp
index 17448e3..4525cc7 100644
--- a/IPL/include/opencv/opencv2/videostab/motion_core.hpp
+++ b/IPL/include/opencv/opencv2/videostab/motion_core.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_MOTION_CORE_HPP__
-#define __OPENCV_VIDEOSTAB_MOTION_CORE_HPP__
+#ifndef OPENCV_VIDEOSTAB_MOTION_CORE_HPP
+#define OPENCV_VIDEOSTAB_MOTION_CORE_HPP
 
 #include <cmath>
 #include "opencv2/core.hpp"
diff --git a/IPL/include/opencv/opencv2/videostab/motion_stabilizing.hpp b/IPL/include/opencv/opencv2/videostab/motion_stabilizing.hpp
index 3bdbfbd..f0dbff1 100644
--- a/IPL/include/opencv/opencv2/videostab/motion_stabilizing.hpp
+++ b/IPL/include/opencv/opencv2/videostab/motion_stabilizing.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_MOTION_STABILIZING_HPP__
-#define __OPENCV_VIDEOSTAB_MOTION_STABILIZING_HPP__
+#ifndef OPENCV_VIDEOSTAB_MOTION_STABILIZING_HPP
+#define OPENCV_VIDEOSTAB_MOTION_STABILIZING_HPP
 
 #include <vector>
 #include <utility>
@@ -63,7 +63,7 @@ class CV_EXPORTS IMotionStabilizer
 
     //! assumes that [0, size-1) is in or equals to [range.first, range.second)
     virtual void stabilize(
-            int size, const std::vector<Mat> &motions, std::pair<int,int> range,
+            int size, const std::vector<Mat> &motions, const Range &range,
             Mat *stabilizationMotions) = 0;
 };
 
@@ -74,8 +74,8 @@ class CV_EXPORTS MotionStabilizationPipeline : public IMotionStabilizer
     bool empty() const { return stabilizers_.empty(); }
 
     virtual void stabilize(
-            int size, const std::vector<Mat> &motions, std::pair<int,int> range,
-            Mat *stabilizationMotions);
+            int size, const std::vector<Mat> &motions, const Range &range,
+            Mat *stabilizationMotions) CV_OVERRIDE;
 
 private:
     std::vector<Ptr<IMotionStabilizer> > stabilizers_;
@@ -87,11 +87,11 @@ class CV_EXPORTS MotionFilterBase : public IMotionStabilizer
     virtual ~MotionFilterBase() {}
 
     virtual Mat stabilize(
-            int idx, const std::vector<Mat> &motions, std::pair<int,int> range) = 0;
+            int idx, const std::vector<Mat> &motions, const Range &range) = 0;
 
     virtual void stabilize(
-            int size, const std::vector<Mat> &motions, std::pair<int,int> range,
-            Mat *stabilizationMotions);
+            int size, const std::vector<Mat> &motions, const Range &range,
+            Mat *stabilizationMotions) CV_OVERRIDE;
 };
 
 class CV_EXPORTS GaussianMotionFilter : public MotionFilterBase
@@ -104,7 +104,7 @@ class CV_EXPORTS GaussianMotionFilter : public MotionFilterBase
     float stdev() const { return stdev_; }
 
     virtual Mat stabilize(
-            int idx, const std::vector<Mat> &motions, std::pair<int,int> range);
+            int idx, const std::vector<Mat> &motions, const Range &range) CV_OVERRIDE;
 
 private:
     int radius_;
@@ -141,8 +141,8 @@ class CV_EXPORTS LpMotionStabilizer : public IMotionStabilizer
     float weight4() const { return w4_; }
 
     virtual void stabilize(
-            int size, const std::vector<Mat> &motions, std::pair<int,int> range,
-            Mat *stabilizationMotions);
+            int size, const std::vector<Mat> &motions, const Range &range,
+            Mat *stabilizationMotions) CV_OVERRIDE;
 
 private:
     MotionModel model_;
diff --git a/IPL/include/opencv/opencv2/videostab/optical_flow.hpp b/IPL/include/opencv/opencv2/videostab/optical_flow.hpp
index 41d1953..5e06941 100644
--- a/IPL/include/opencv/opencv2/videostab/optical_flow.hpp
+++ b/IPL/include/opencv/opencv2/videostab/optical_flow.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_OPTICAL_FLOW_HPP__
-#define __OPENCV_VIDEOSTAB_OPTICAL_FLOW_HPP__
+#ifndef OPENCV_VIDEOSTAB_OPTICAL_FLOW_HPP
+#define OPENCV_VIDEOSTAB_OPTICAL_FLOW_HPP
 
 #include "opencv2/core.hpp"
 #include "opencv2/opencv_modules.hpp"
@@ -99,7 +99,7 @@ class CV_EXPORTS SparsePyrLkOptFlowEstimator
 public:
     virtual void run(
             InputArray frame0, InputArray frame1, InputArray points0, InputOutputArray points1,
-            OutputArray status, OutputArray errors);
+            OutputArray status, OutputArray errors) CV_OVERRIDE;
 };
 
 #ifdef HAVE_OPENCV_CUDAOPTFLOW
@@ -112,7 +112,7 @@ class CV_EXPORTS SparsePyrLkOptFlowEstimatorGpu
 
     virtual void run(
             InputArray frame0, InputArray frame1, InputArray points0, InputOutputArray points1,
-            OutputArray status, OutputArray errors);
+            OutputArray status, OutputArray errors) CV_OVERRIDE;
 
     void run(const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, const cuda::GpuMat &points0, cuda::GpuMat &points1,
              cuda::GpuMat &status, cuda::GpuMat &errors);
@@ -133,7 +133,7 @@ class CV_EXPORTS DensePyrLkOptFlowEstimatorGpu
 
     virtual void run(
             InputArray frame0, InputArray frame1, InputOutputArray flowX, InputOutputArray flowY,
-            OutputArray errors);
+            OutputArray errors) CV_OVERRIDE;
 
 private:
     Ptr<cuda::DensePyrLKOpticalFlow> optFlowEstimator_;
diff --git a/IPL/include/opencv/opencv2/videostab/outlier_rejection.hpp b/IPL/include/opencv/opencv2/videostab/outlier_rejection.hpp
index 9e40f85..1d29896 100644
--- a/IPL/include/opencv/opencv2/videostab/outlier_rejection.hpp
+++ b/IPL/include/opencv/opencv2/videostab/outlier_rejection.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_OUTLIER_REJECTION_HPP__
-#define __OPENCV_VIDEOSTAB_OUTLIER_REJECTION_HPP__
+#ifndef OPENCV_VIDEOSTAB_OUTLIER_REJECTION_HPP
+#define OPENCV_VIDEOSTAB_OUTLIER_REJECTION_HPP
 
 #include <vector>
 #include "opencv2/core.hpp"
@@ -68,7 +68,7 @@ class CV_EXPORTS NullOutlierRejector : public IOutlierRejector
 {
 public:
     virtual void process(
-            Size frameSize, InputArray points0, InputArray points1, OutputArray mask);
+            Size frameSize, InputArray points0, InputArray points1, OutputArray mask) CV_OVERRIDE;
 };
 
 class CV_EXPORTS TranslationBasedLocalOutlierRejector : public IOutlierRejector
@@ -83,7 +83,7 @@ class CV_EXPORTS TranslationBasedLocalOutlierRejector : public IOutlierRejector
     RansacParams ransacParams() const { return ransacParams_; }
 
     virtual void process(
-            Size frameSize, InputArray points0, InputArray points1, OutputArray mask);
+            Size frameSize, InputArray points0, InputArray points1, OutputArray mask) CV_OVERRIDE;
 
 private:
     Size cellSize_;
diff --git a/IPL/include/opencv/opencv2/videostab/ring_buffer.hpp b/IPL/include/opencv/opencv2/videostab/ring_buffer.hpp
index 7cc3f03..55d5244 100644
--- a/IPL/include/opencv/opencv2/videostab/ring_buffer.hpp
+++ b/IPL/include/opencv/opencv2/videostab/ring_buffer.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_RING_BUFFER_HPP__
-#define __OPENCV_VIDEOSTAB_RING_BUFFER_HPP__
+#ifndef OPENCV_VIDEOSTAB_RING_BUFFER_HPP
+#define OPENCV_VIDEOSTAB_RING_BUFFER_HPP
 
 #include <vector>
 #include "opencv2/imgproc.hpp"
diff --git a/IPL/include/opencv/opencv2/videostab/stabilizer.hpp b/IPL/include/opencv/opencv2/videostab/stabilizer.hpp
index c18d314..28fe348 100644
--- a/IPL/include/opencv/opencv2/videostab/stabilizer.hpp
+++ b/IPL/include/opencv/opencv2/videostab/stabilizer.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_STABILIZER_HPP__
-#define __OPENCV_VIDEOSTAB_STABILIZER_HPP__
+#ifndef OPENCV_VIDEOSTAB_STABILIZER_HPP
+#define OPENCV_VIDEOSTAB_STABILIZER_HPP
 
 #include <vector>
 #include <ctime>
@@ -77,6 +77,9 @@ class CV_EXPORTS StabilizerBase
     void setFrameSource(Ptr<IFrameSource> val) { frameSource_ = val; }
     Ptr<IFrameSource> frameSource() const { return frameSource_; }
 
+    void setMaskSource(const Ptr<IFrameSource>& val) { maskSource_ = val; }
+    Ptr<IFrameSource> maskSource() const { return maskSource_; }
+
     void setMotionEstimator(Ptr<ImageMotionEstimatorBase> val) { motionEstimator_ = val; }
     Ptr<ImageMotionEstimatorBase> motionEstimator() const { return motionEstimator_; }
 
@@ -110,6 +113,7 @@ class CV_EXPORTS StabilizerBase
 
     Ptr<ILog> log_;
     Ptr<IFrameSource> frameSource_;
+    Ptr<IFrameSource> maskSource_;
     Ptr<ImageMotionEstimatorBase> motionEstimator_;
     Ptr<DeblurerBase> deblurer_;
     Ptr<InpainterBase> inpainter_;
@@ -144,14 +148,14 @@ class CV_EXPORTS OnePassStabilizer : public StabilizerBase, public IFrameSource
     void setMotionFilter(Ptr<MotionFilterBase> val) { motionFilter_ = val; }
     Ptr<MotionFilterBase> motionFilter() const { return motionFilter_; }
 
-    virtual void reset();
-    virtual Mat nextFrame() { return nextStabilizedFrame(); }
+    virtual void reset() CV_OVERRIDE;
+    virtual Mat nextFrame() CV_OVERRIDE { return nextStabilizedFrame(); }
 
 protected:
-    virtual void setUp(const Mat &firstFrame);
-    virtual Mat estimateMotion();
-    virtual Mat estimateStabilizationMotion();
-    virtual Mat postProcessFrame(const Mat &frame);
+    virtual void setUp(const Mat &firstFrame) CV_OVERRIDE;
+    virtual Mat estimateMotion() CV_OVERRIDE;
+    virtual Mat estimateStabilizationMotion() CV_OVERRIDE;
+    virtual Mat postProcessFrame(const Mat &frame) CV_OVERRIDE;
 
     Ptr<MotionFilterBase> motionFilter_;
 };
@@ -170,16 +174,16 @@ class CV_EXPORTS TwoPassStabilizer : public StabilizerBase, public IFrameSource
     void setEstimateTrimRatio(bool val) { mustEstTrimRatio_ = val; }
     bool mustEstimateTrimaRatio() const { return mustEstTrimRatio_; }
 
-    virtual void reset();
-    virtual Mat nextFrame();
+    virtual void reset() CV_OVERRIDE;
+    virtual Mat nextFrame() CV_OVERRIDE;
 
 protected:
     void runPrePassIfNecessary();
 
-    virtual void setUp(const Mat &firstFrame);
-    virtual Mat estimateMotion();
-    virtual Mat estimateStabilizationMotion();
-    virtual Mat postProcessFrame(const Mat &frame);
+    virtual void setUp(const Mat &firstFrame) CV_OVERRIDE;
+    virtual Mat estimateMotion() CV_OVERRIDE;
+    virtual Mat estimateStabilizationMotion() CV_OVERRIDE;
+    virtual Mat postProcessFrame(const Mat &frame) CV_OVERRIDE;
 
     Ptr<IMotionStabilizer> motionStabilizer_;
     Ptr<WobbleSuppressorBase> wobbleSuppressor_;
diff --git a/IPL/include/opencv/opencv2/videostab/wobble_suppression.hpp b/IPL/include/opencv/opencv2/videostab/wobble_suppression.hpp
index 3f0a943..d60ae6d 100644
--- a/IPL/include/opencv/opencv2/videostab/wobble_suppression.hpp
+++ b/IPL/include/opencv/opencv2/videostab/wobble_suppression.hpp
@@ -40,8 +40,8 @@
 //
 //M*/
 
-#ifndef __OPENCV_VIDEOSTAB_WOBBLE_SUPPRESSION_HPP__
-#define __OPENCV_VIDEOSTAB_WOBBLE_SUPPRESSION_HPP__
+#ifndef OPENCV_VIDEOSTAB_WOBBLE_SUPPRESSION_HPP
+#define OPENCV_VIDEOSTAB_WOBBLE_SUPPRESSION_HPP
 
 #include <vector>
 #include "opencv2/core.hpp"
@@ -95,7 +95,7 @@ class CV_EXPORTS WobbleSuppressorBase
 class CV_EXPORTS NullWobbleSuppressor : public WobbleSuppressorBase
 {
 public:
-    virtual void suppress(int idx, const Mat &frame, Mat &result);
+    virtual void suppress(int idx, const Mat &frame, Mat &result) CV_OVERRIDE;
 };
 
 class CV_EXPORTS MoreAccurateMotionWobbleSuppressorBase : public WobbleSuppressorBase
@@ -113,7 +113,7 @@ class CV_EXPORTS MoreAccurateMotionWobbleSuppressorBase : public WobbleSuppresso
 class CV_EXPORTS MoreAccurateMotionWobbleSuppressor : public MoreAccurateMotionWobbleSuppressorBase
 {
 public:
-    virtual void suppress(int idx, const Mat &frame, Mat &result);
+    virtual void suppress(int idx, const Mat &frame, Mat &result) CV_OVERRIDE;
 
 private:
     Mat_<float> mapx_, mapy_;
@@ -124,7 +124,7 @@ class CV_EXPORTS MoreAccurateMotionWobbleSuppressorGpu : public MoreAccurateMoti
 {
 public:
     void suppress(int idx, const cuda::GpuMat &frame, cuda::GpuMat &result);
-    virtual void suppress(int idx, const Mat &frame, Mat &result);
+    virtual void suppress(int idx, const Mat &frame, Mat &result) CV_OVERRIDE;
 
 private:
     cuda::GpuMat frameDevice_, resultDevice_;
diff --git a/IPL/include/opencv/opencv2/xfeatures2d.hpp b/IPL/include/opencv/opencv2/xfeatures2d.hpp
index b8f62b8..26201a8 100644
--- a/IPL/include/opencv/opencv2/xfeatures2d.hpp
+++ b/IPL/include/opencv/opencv2/xfeatures2d.hpp
@@ -51,7 +51,13 @@ This section describes experimental algorithms for 2d feature detection.
     @defgroup xfeatures2d_nonfree Non-free 2D Features Algorithms
 
 This section describes two popular algorithms for 2d feature detection, SIFT and SURF, that are
-known to be patented. Use them at your own risk.
+known to be patented. You need to set the OPENCV_ENABLE_NONFREE option in cmake to use those. Use them at your own risk.
+
+    @defgroup xfeatures2d_match Experimental 2D Features Matching Algorithm
+
+This section describes the following matching strategies:
+    - GMS: Grid-based Motion Statistics, @cite Bian2017gms
+    - LOGOS: Local geometric support for high-outlier spatial verification, @cite Lowry2018LOGOSLG
 
 @}
 */
@@ -61,6 +67,39 @@ namespace cv
 namespace xfeatures2d
 {
 
+
+/** @brief Class for extracting keypoints and computing descriptors using the Scale Invariant Feature Transform
+(SIFT) algorithm by D. Lowe @cite Lowe04 .
+*/
+class CV_EXPORTS_W SIFT : public Feature2D
+{
+public:
+    /**
+    @param nfeatures The number of best features to retain. The features are ranked by their scores
+    (measured in SIFT algorithm as the local contrast)
+
+    @param nOctaveLayers The number of layers in each octave. 3 is the value used in D. Lowe paper. The
+    number of octaves is computed automatically from the image resolution.
+
+    @param contrastThreshold The contrast threshold used to filter out weak features in semi-uniform
+    (low-contrast) regions. The larger the threshold, the less features are produced by the detector.
+
+    @param edgeThreshold The threshold used to filter out edge-like features. Note that the its meaning
+    is different from the contrastThreshold, i.e. the larger the edgeThreshold, the less features are
+    filtered out (more features are retained).
+
+    @param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image
+    is captured with a weak camera with soft lenses, you might want to reduce the number.
+    */
+    CV_WRAP static Ptr<SIFT> create(int nfeatures = 0, int nOctaveLayers = 3,
+        double contrastThreshold = 0.04, double edgeThreshold = 10,
+        double sigma = 1.6);
+};
+
+typedef SIFT SiftFeatureDetector;
+typedef SIFT SiftDescriptorExtractor;
+
+
 //! @addtogroup xfeatures2d_experiment
 //! @{
 
@@ -80,10 +119,9 @@ class CV_EXPORTS_W FREAK : public Feature2D
 {
 public:
 
-    enum
-    {
-        NB_SCALES = 64, NB_PAIRS = 512, NB_ORIENPAIRS = 45
-    };
+    static const int    NB_SCALES        = 64;
+    static const int    NB_PAIRS         = 512;
+    static const int    NB_ORIENPAIRS    = 45;
 
     /**
     @param orientationNormalized Enable orientation normalization.
@@ -132,6 +170,8 @@ class CV_EXPORTS_W BriefDescriptorExtractor : public Feature2D
 
 An image descriptor that can be computed very fast, while being
 about as robust as, for example, SURF or BRIEF.
+
+@note It requires a color image as input.
  */
 class CV_EXPORTS_W LUCID : public Feature2D
 {
@@ -140,7 +180,7 @@ class CV_EXPORTS_W LUCID : public Feature2D
      * @param lucid_kernel kernel for descriptor construction, where 1=3x3, 2=5x5, 3=7x7 and so forth
      * @param blur_kernel kernel for blurring image prior to descriptor construction, where 1=3x3, 2=5x5, 3=7x7 and so forth
      */
-    CV_WRAP static Ptr<LUCID> create(const int lucid_kernel, const int blur_kernel);
+    CV_WRAP static Ptr<LUCID> create(const int lucid_kernel = 1, const int blur_kernel = 2);
 };
 
 
@@ -158,6 +198,7 @@ LATCH is a binary descriptor based on learned comparisons of triplets of image p
 * rotationInvariance - whether or not the descriptor should compansate for orientation changes.
 * half_ssd_size - the size of half of the mini-patches size. For example, if we would like to compare triplets of patches of size 7x7x
     then the half_ssd_size should be (7-1)/2 = 3.
+* sigma - sigma value for GaussianBlur smoothing of the source image. Source image will be used without smoothing in case sigma value is 0.
 
 Note: the descriptor can be coupled with any keypoint extractor. The only demand is that if you use set rotationInvariance = True then
     you will have to use an extractor which estimates the patch orientation (in degrees). Examples for such extractors are ORB and SIFT.
@@ -168,7 +209,7 @@ Note: a complete example can be found under /samples/cpp/tutorial_code/xfeatures
 class CV_EXPORTS_W LATCH : public Feature2D
 {
 public:
-	CV_WRAP static Ptr<LATCH> create(int bytes = 32, bool rotationInvariance = true, int half_ssd_size=3);
+    CV_WRAP static Ptr<LATCH> create(int bytes = 32, bool rotationInvariance = true, int half_ssd_size = 3, double sigma = 2.0);
 };
 
 /** @brief Class implementing DAISY descriptor, described in @cite Tola10
@@ -190,12 +231,12 @@ DAISY::NRM_SIFT mean that descriptors are normalized for L2 norm equal to 1.0 bu
 class CV_EXPORTS_W DAISY : public Feature2D
 {
 public:
-    enum
+    enum NormalizationType
     {
         NRM_NONE = 100, NRM_PARTIAL = 101, NRM_FULL = 102, NRM_SIFT = 103,
     };
     CV_WRAP static Ptr<DAISY> create( float radius = 15, int q_radius = 3, int q_theta = 8,
-                int q_hist = 8, int norm = DAISY::NRM_NONE, InputArray H = noArray(),
+                int q_hist = 8, DAISY::NormalizationType norm = DAISY::NRM_NONE, InputArray H = noArray(),
                 bool interpolation = true, bool use_orientation = false );
 
     /** @overload
@@ -203,11 +244,11 @@ class CV_EXPORTS_W DAISY : public Feature2D
      * @param keypoints of interest within image
      * @param descriptors resulted descriptors array
      */
-    virtual void compute( InputArray image, std::vector<KeyPoint>& keypoints, OutputArray descriptors ) = 0;
+    virtual void compute( InputArray image, std::vector<KeyPoint>& keypoints, OutputArray descriptors ) CV_OVERRIDE = 0;
 
     virtual void compute( InputArrayOfArrays images,
                           std::vector<std::vector<KeyPoint> >& keypoints,
-                          OutputArrayOfArrays descriptors );
+                          OutputArrayOfArrays descriptors ) CV_OVERRIDE;
 
     /** @overload
      * @param image image to extract descriptors
@@ -280,6 +321,723 @@ class CV_EXPORTS_W MSDDetector : public Feature2D {
             float m_scale_factor = 1.25f, int m_n_scales = -1, bool m_compute_orientation = false);
 };
 
+/** @brief Class implementing VGG (Oxford Visual Geometry Group) descriptor trained end to end
+using "Descriptor Learning Using Convex Optimisation" (DLCO) aparatus described in @cite Simonyan14.
+
+@param desc type of descriptor to use, VGG::VGG_120 is default (120 dimensions float)
+Available types are VGG::VGG_120, VGG::VGG_80, VGG::VGG_64, VGG::VGG_48
+@param isigma gaussian kernel value for image blur (default is 1.4f)
+@param img_normalize use image sample intensity normalization (enabled by default)
+@param use_orientation sample patterns using keypoints orientation, enabled by default
+@param scale_factor adjust the sampling window of detected keypoints to 64.0f (VGG sampling window)
+6.25f is default and fits for KAZE, SURF detected keypoints window ratio
+6.75f should be the scale for SIFT detected keypoints window ratio
+5.00f should be the scale for AKAZE, MSD, AGAST, FAST, BRISK keypoints window ratio
+0.75f should be the scale for ORB keypoints ratio
+
+@param dsc_normalize clamp descriptors to 255 and convert to uchar CV_8UC1 (disabled by default)
+
+ */
+class CV_EXPORTS_W VGG : public Feature2D
+{
+public:
+
+    CV_WRAP enum
+    {
+        VGG_120 = 100, VGG_80 = 101, VGG_64 = 102, VGG_48 = 103,
+    };
+
+    CV_WRAP static Ptr<VGG> create( int desc = VGG::VGG_120, float isigma = 1.4f,
+                                    bool img_normalize = true, bool use_scale_orientation = true,
+                                    float scale_factor = 6.25f, bool dsc_normalize = false );
+
+    CV_WRAP virtual void setSigma(const float isigma) = 0;
+    CV_WRAP virtual float getSigma() const = 0;
+
+    CV_WRAP virtual void setUseNormalizeImage(const bool img_normalize) = 0;
+    CV_WRAP virtual bool getUseNormalizeImage() const = 0;
+
+    CV_WRAP virtual void setUseScaleOrientation(const bool use_scale_orientation) = 0;
+    CV_WRAP virtual bool getUseScaleOrientation() const = 0;
+
+    CV_WRAP virtual void setScaleFactor(const float scale_factor) = 0;
+    CV_WRAP virtual float getScaleFactor() const = 0;
+
+    CV_WRAP virtual void setUseNormalizeDescriptor(const bool dsc_normalize) = 0;
+    CV_WRAP virtual bool getUseNormalizeDescriptor() const = 0;
+};
+
+/** @brief Class implementing BoostDesc (Learning Image Descriptors with Boosting), described in
+@cite Trzcinski13a and @cite Trzcinski13b.
+
+@param desc type of descriptor to use, BoostDesc::BINBOOST_256 is default (256 bit long dimension)
+Available types are: BoostDesc::BGM, BoostDesc::BGM_HARD, BoostDesc::BGM_BILINEAR, BoostDesc::LBGM,
+BoostDesc::BINBOOST_64, BoostDesc::BINBOOST_128, BoostDesc::BINBOOST_256
+@param use_orientation sample patterns using keypoints orientation, enabled by default
+@param scale_factor adjust the sampling window of detected keypoints
+6.25f is default and fits for KAZE, SURF detected keypoints window ratio
+6.75f should be the scale for SIFT detected keypoints window ratio
+5.00f should be the scale for AKAZE, MSD, AGAST, FAST, BRISK keypoints window ratio
+0.75f should be the scale for ORB keypoints ratio
+1.50f was the default in original implementation
+
+@note BGM is the base descriptor where each binary dimension is computed as the output of a single weak learner.
+BGM_HARD and BGM_BILINEAR refers to same BGM but use different type of gradient binning. In the BGM_HARD that
+use ASSIGN_HARD binning type the gradient is assigned to the nearest orientation bin. In the BGM_BILINEAR that use
+ASSIGN_BILINEAR binning type the gradient is assigned to the two neighbouring bins. In the BGM and all other modes that use
+ASSIGN_SOFT binning type the gradient is assigned to 8 nearest bins according to the cosine value between the gradient
+angle and the bin center. LBGM (alias FP-Boost) is the floating point extension where each dimension is computed
+as a linear combination of the weak learner responses. BINBOOST and subvariants are the binary extensions of LBGM
+where each bit is computed as a thresholded linear combination of a set of weak learners.
+BoostDesc header files (boostdesc_*.i) was exported from original binaries with export-boostdesc.py script from
+samples subfolder.
+
+*/
+
+class CV_EXPORTS_W BoostDesc : public Feature2D
+{
+public:
+
+    CV_WRAP enum
+    {
+       BGM = 100, BGM_HARD = 101, BGM_BILINEAR = 102, LBGM = 200,
+       BINBOOST_64 = 300, BINBOOST_128 = 301, BINBOOST_256 = 302
+    };
+
+    CV_WRAP static Ptr<BoostDesc> create( int desc = BoostDesc::BINBOOST_256,
+                    bool use_scale_orientation = true, float scale_factor = 6.25f );
+
+    CV_WRAP virtual void setUseScaleOrientation(const bool use_scale_orientation) = 0;
+    CV_WRAP virtual bool getUseScaleOrientation() const = 0;
+
+    CV_WRAP virtual void setScaleFactor(const float scale_factor) = 0;
+    CV_WRAP virtual float getScaleFactor() const = 0;
+};
+
+
+/*
+* Position-Color-Texture signatures
+*/
+
+/**
+* @brief Class implementing PCT (position-color-texture) signature extraction
+*       as described in @cite KrulisLS16.
+*       The algorithm is divided to a feature sampler and a clusterizer.
+*       Feature sampler produces samples at given set of coordinates.
+*       Clusterizer then produces clusters of these samples using k-means algorithm.
+*       Resulting set of clusters is the signature of the input image.
+*
+*       A signature is an array of SIGNATURE_DIMENSION-dimensional points.
+*       Used dimensions are:
+*       weight, x, y position; lab color, contrast, entropy.
+* @cite KrulisLS16
+* @cite BeecksUS10
+*/
+class CV_EXPORTS_W PCTSignatures : public Algorithm
+{
+public:
+    /**
+    * @brief Lp distance function selector.
+    */
+    enum DistanceFunction
+    {
+        L0_25, L0_5, L1, L2, L2SQUARED, L5, L_INFINITY
+    };
+
+    /**
+    * @brief Point distributions supported by random point generator.
+    */
+    enum PointDistribution
+    {
+        UNIFORM,    //!< Generate numbers uniformly.
+        REGULAR,    //!< Generate points in a regular grid.
+        NORMAL      //!< Generate points with normal (gaussian) distribution.
+    };
+
+    /**
+    * @brief Similarity function selector.
+    * @see
+    *       Christian Beecks, Merih Seran Uysal, Thomas Seidl.
+    *       Signature quadratic form distance.
+    *       In Proceedings of the ACM International Conference on Image and Video Retrieval, pages 438-445.
+    *       ACM, 2010.
+    * @cite BeecksUS10
+    * @note For selected distance function: \f[ d(c_i, c_j) \f]  and parameter: \f[ \alpha \f]
+    */
+    enum SimilarityFunction
+    {
+        MINUS,      //!< \f[ -d(c_i, c_j) \f]
+        GAUSSIAN,   //!< \f[ e^{ -\alpha * d^2(c_i, c_j)} \f]
+        HEURISTIC   //!< \f[ \frac{1}{\alpha + d(c_i, c_j)} \f]
+    };
+
+
+    /**
+    * @brief Creates PCTSignatures algorithm using sample and seed count.
+    *       It generates its own sets of sampling points and clusterization seed indexes.
+    * @param initSampleCount Number of points used for image sampling.
+    * @param initSeedCount Number of initial clusterization seeds.
+    *       Must be lower or equal to initSampleCount
+    * @param pointDistribution Distribution of generated points. Default: UNIFORM.
+    *       Available: UNIFORM, REGULAR, NORMAL.
+    * @return Created algorithm.
+    */
+    CV_WRAP static Ptr<PCTSignatures> create(
+        const int initSampleCount = 2000,
+        const int initSeedCount = 400,
+        const int pointDistribution = 0);
+
+    /**
+    * @brief Creates PCTSignatures algorithm using pre-generated sampling points
+    *       and number of clusterization seeds. It uses the provided
+    *       sampling points and generates its own clusterization seed indexes.
+    * @param initSamplingPoints Sampling points used in image sampling.
+    * @param initSeedCount Number of initial clusterization seeds.
+    *       Must be lower or equal to initSamplingPoints.size().
+    * @return Created algorithm.
+    */
+    CV_WRAP static Ptr<PCTSignatures> create(
+        const std::vector<Point2f>& initSamplingPoints,
+        const int initSeedCount);
+
+    /**
+    * @brief Creates PCTSignatures algorithm using pre-generated sampling points
+    *       and clusterization seeds indexes.
+    * @param initSamplingPoints Sampling points used in image sampling.
+    * @param initClusterSeedIndexes Indexes of initial clusterization seeds.
+    *       Its size must be lower or equal to initSamplingPoints.size().
+    * @return Created algorithm.
+    */
+    CV_WRAP static Ptr<PCTSignatures> create(
+        const std::vector<Point2f>& initSamplingPoints,
+        const std::vector<int>& initClusterSeedIndexes);
+
+
+
+    /**
+    * @brief Computes signature of given image.
+    * @param image Input image of CV_8U type.
+    * @param signature Output computed signature.
+    */
+    CV_WRAP virtual void computeSignature(
+        InputArray image,
+        OutputArray signature) const = 0;
+
+    /**
+    * @brief Computes signatures for multiple images in parallel.
+    * @param images Vector of input images of CV_8U type.
+    * @param signatures Vector of computed signatures.
+    */
+    CV_WRAP virtual void computeSignatures(
+        const std::vector<Mat>& images,
+        std::vector<Mat>& signatures) const = 0;
+
+    /**
+    * @brief Draws signature in the source image and outputs the result.
+    *       Signatures are visualized as a circle
+    *       with radius based on signature weight
+    *       and color based on signature color.
+    *       Contrast and entropy are not visualized.
+    * @param source Source image.
+    * @param signature Image signature.
+    * @param result Output result.
+    * @param radiusToShorterSideRatio Determines maximal radius of signature in the output image.
+    * @param borderThickness Border thickness of the visualized signature.
+    */
+    CV_WRAP static void drawSignature(
+        InputArray source,
+        InputArray signature,
+        OutputArray result,
+        float radiusToShorterSideRatio = 1.0 / 8,
+        int borderThickness = 1);
+
+    /**
+    * @brief Generates initial sampling points according to selected point distribution.
+    * @param initPoints Output vector where the generated points will be saved.
+    * @param count Number of points to generate.
+    * @param pointDistribution Point distribution selector.
+    *       Available: UNIFORM, REGULAR, NORMAL.
+    * @note Generated coordinates are in range [0..1)
+    */
+    CV_WRAP static void generateInitPoints(
+        std::vector<Point2f>& initPoints,
+        const int count,
+        int pointDistribution);
+
+
+    /**** sampler ****/
+
+    /**
+    * @brief Number of initial samples taken from the image.
+    */
+    CV_WRAP virtual int getSampleCount() const = 0;
+
+    /**
+    * @brief Color resolution of the greyscale bitmap represented in allocated bits
+    *       (i.e., value 4 means that 16 shades of grey are used).
+    *       The greyscale bitmap is used for computing contrast and entropy values.
+    */
+    CV_WRAP virtual int getGrayscaleBits() const = 0;
+    /**
+    * @brief Color resolution of the greyscale bitmap represented in allocated bits
+    *       (i.e., value 4 means that 16 shades of grey are used).
+    *       The greyscale bitmap is used for computing contrast and entropy values.
+    */
+    CV_WRAP virtual void setGrayscaleBits(int grayscaleBits) = 0;
+
+    /**
+    * @brief Size of the texture sampling window used to compute contrast and entropy
+    *       (center of the window is always in the pixel selected by x,y coordinates
+    *       of the corresponding feature sample).
+    */
+    CV_WRAP virtual int getWindowRadius() const = 0;
+    /**
+    * @brief Size of the texture sampling window used to compute contrast and entropy
+    *       (center of the window is always in the pixel selected by x,y coordinates
+    *       of the corresponding feature sample).
+    */
+    CV_WRAP virtual void setWindowRadius(int radius) = 0;
+
+
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual float getWeightX() const = 0;
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual void setWeightX(float weight) = 0;
+
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual float getWeightY() const = 0;
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual void setWeightY(float weight) = 0;
+
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual float getWeightL() const = 0;
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual void setWeightL(float weight) = 0;
+
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual float getWeightA() const = 0;
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual void setWeightA(float weight) = 0;
+
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual float getWeightB() const = 0;
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual void setWeightB(float weight) = 0;
+
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual float getWeightContrast() const = 0;
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual void setWeightContrast(float weight) = 0;
+
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual float getWeightEntropy() const = 0;
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space
+    *       (x,y = position; L,a,b = color in CIE Lab space; c = contrast. e = entropy)
+    */
+    CV_WRAP virtual void setWeightEntropy(float weight) = 0;
+
+    /**
+    * @brief Initial samples taken from the image.
+    *       These sampled features become the input for clustering.
+    */
+    CV_WRAP virtual std::vector<Point2f> getSamplingPoints() const = 0;
+
+
+
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space.
+    * @param idx ID of the weight
+    * @param value Value of the weight
+    * @note
+    *       WEIGHT_IDX = 0;
+    *       X_IDX = 1;
+    *       Y_IDX = 2;
+    *       L_IDX = 3;
+    *       A_IDX = 4;
+    *       B_IDX = 5;
+    *       CONTRAST_IDX = 6;
+    *       ENTROPY_IDX = 7;
+    */
+    CV_WRAP virtual void setWeight(int idx, float value) = 0;
+    /**
+    * @brief Weights (multiplicative constants) that linearly stretch individual axes of the feature space.
+    * @param weights Values of all weights.
+    * @note
+    *       WEIGHT_IDX = 0;
+    *       X_IDX = 1;
+    *       Y_IDX = 2;
+    *       L_IDX = 3;
+    *       A_IDX = 4;
+    *       B_IDX = 5;
+    *       CONTRAST_IDX = 6;
+    *       ENTROPY_IDX = 7;
+    */
+    CV_WRAP virtual void setWeights(const std::vector<float>& weights) = 0;
+
+    /**
+    * @brief Translations of the individual axes of the feature space.
+    * @param idx ID of the translation
+    * @param value Value of the translation
+    * @note
+    *       WEIGHT_IDX = 0;
+    *       X_IDX = 1;
+    *       Y_IDX = 2;
+    *       L_IDX = 3;
+    *       A_IDX = 4;
+    *       B_IDX = 5;
+    *       CONTRAST_IDX = 6;
+    *       ENTROPY_IDX = 7;
+    */
+    CV_WRAP virtual void setTranslation(int idx, float value) = 0;
+    /**
+    * @brief Translations of the individual axes of the feature space.
+    * @param translations Values of all translations.
+    * @note
+    *       WEIGHT_IDX = 0;
+    *       X_IDX = 1;
+    *       Y_IDX = 2;
+    *       L_IDX = 3;
+    *       A_IDX = 4;
+    *       B_IDX = 5;
+    *       CONTRAST_IDX = 6;
+    *       ENTROPY_IDX = 7;
+    */
+    CV_WRAP virtual void setTranslations(const std::vector<float>& translations) = 0;
+
+    /**
+    * @brief Sets sampling points used to sample the input image.
+    * @param samplingPoints Vector of sampling points in range [0..1)
+    * @note Number of sampling points must be greater or equal to clusterization seed count.
+    */
+    CV_WRAP virtual void setSamplingPoints(std::vector<Point2f> samplingPoints) = 0;
+
+
+
+    /**** clusterizer ****/
+    /**
+    * @brief Initial seeds (initial number of clusters) for the k-means algorithm.
+    */
+    CV_WRAP virtual std::vector<int> getInitSeedIndexes() const = 0;
+    /**
+    * @brief Initial seed indexes for the k-means algorithm.
+    */
+    CV_WRAP virtual void setInitSeedIndexes(std::vector<int> initSeedIndexes) = 0;
+    /**
+    * @brief Number of initial seeds (initial number of clusters) for the k-means algorithm.
+    */
+    CV_WRAP virtual int getInitSeedCount() const = 0;
+
+    /**
+    * @brief Number of iterations of the k-means clustering.
+    *       We use fixed number of iterations, since the modified clustering is pruning clusters
+    *       (not iteratively refining k clusters).
+    */
+    CV_WRAP virtual int getIterationCount() const = 0;
+    /**
+    * @brief Number of iterations of the k-means clustering.
+    *       We use fixed number of iterations, since the modified clustering is pruning clusters
+    *       (not iteratively refining k clusters).
+    */
+    CV_WRAP virtual void setIterationCount(int iterationCount) = 0;
+
+    /**
+    * @brief Maximal number of generated clusters. If the number is exceeded,
+    *       the clusters are sorted by their weights and the smallest clusters are cropped.
+    */
+    CV_WRAP virtual int getMaxClustersCount() const = 0;
+    /**
+    * @brief Maximal number of generated clusters. If the number is exceeded,
+    *       the clusters are sorted by their weights and the smallest clusters are cropped.
+    */
+    CV_WRAP virtual void setMaxClustersCount(int maxClustersCount) = 0;
+
+    /**
+    * @brief This parameter multiplied by the index of iteration gives lower limit for cluster size.
+    *       Clusters containing fewer points than specified by the limit have their centroid dismissed
+    *       and points are reassigned.
+    */
+    CV_WRAP virtual int getClusterMinSize() const = 0;
+    /**
+    * @brief This parameter multiplied by the index of iteration gives lower limit for cluster size.
+    *       Clusters containing fewer points than specified by the limit have their centroid dismissed
+    *       and points are reassigned.
+    */
+    CV_WRAP virtual void setClusterMinSize(int clusterMinSize) = 0;
+
+    /**
+    * @brief Threshold euclidean distance between two centroids.
+    *       If two cluster centers are closer than this distance,
+    *       one of the centroid is dismissed and points are reassigned.
+    */
+    CV_WRAP virtual float getJoiningDistance() const = 0;
+    /**
+    * @brief Threshold euclidean distance between two centroids.
+    *       If two cluster centers are closer than this distance,
+    *       one of the centroid is dismissed and points are reassigned.
+    */
+    CV_WRAP virtual void setJoiningDistance(float joiningDistance) = 0;
+
+    /**
+    * @brief Remove centroids in k-means whose weight is lesser or equal to given threshold.
+    */
+    CV_WRAP virtual float getDropThreshold() const = 0;
+    /**
+    * @brief Remove centroids in k-means whose weight is lesser or equal to given threshold.
+    */
+    CV_WRAP virtual void setDropThreshold(float dropThreshold) = 0;
+
+    /**
+    * @brief Distance function selector used for measuring distance between two points in k-means.
+    */
+    CV_WRAP virtual int getDistanceFunction() const = 0;
+    /**
+    * @brief Distance function selector used for measuring distance between two points in k-means.
+    *       Available: L0_25, L0_5, L1, L2, L2SQUARED, L5, L_INFINITY.
+    */
+    CV_WRAP virtual void setDistanceFunction(int distanceFunction) = 0;
+
+};
+
+/**
+* @brief Class implementing Signature Quadratic Form Distance (SQFD).
+* @see Christian Beecks, Merih Seran Uysal, Thomas Seidl.
+*   Signature quadratic form distance.
+*   In Proceedings of the ACM International Conference on Image and Video Retrieval, pages 438-445.
+*   ACM, 2010.
+* @cite BeecksUS10
+*/
+class CV_EXPORTS_W PCTSignaturesSQFD : public Algorithm
+{
+public:
+
+    /**
+    * @brief Creates the algorithm instance using selected distance function,
+    *       similarity function and similarity function parameter.
+    * @param distanceFunction Distance function selector. Default: L2
+    *       Available: L0_25, L0_5, L1, L2, L2SQUARED, L5, L_INFINITY
+    * @param similarityFunction Similarity function selector. Default: HEURISTIC
+    *       Available: MINUS, GAUSSIAN, HEURISTIC
+    * @param similarityParameter Parameter of the similarity function.
+    */
+    CV_WRAP static Ptr<PCTSignaturesSQFD> create(
+        const int distanceFunction = 3,
+        const int similarityFunction = 2,
+        const float similarityParameter = 1.0f);
+
+    /**
+    * @brief Computes Signature Quadratic Form Distance of two signatures.
+    * @param _signature0 The first signature.
+    * @param _signature1 The second signature.
+    */
+    CV_WRAP virtual float computeQuadraticFormDistance(
+        InputArray _signature0,
+        InputArray _signature1) const = 0;
+
+    /**
+    * @brief Computes Signature Quadratic Form Distance between the reference signature
+    *       and each of the other image signatures.
+    * @param sourceSignature The signature to measure distance of other signatures from.
+    * @param imageSignatures Vector of signatures to measure distance from the source signature.
+    * @param distances Output vector of measured distances.
+    */
+    CV_WRAP virtual void computeQuadraticFormDistances(
+        const Mat& sourceSignature,
+        const std::vector<Mat>& imageSignatures,
+        std::vector<float>& distances) const = 0;
+
+};
+
+/**
+* @brief Elliptic region around an interest point.
+*/
+class CV_EXPORTS Elliptic_KeyPoint : public KeyPoint
+{
+public:
+    Size_<float> axes; //!< the lengths of the major and minor ellipse axes
+    float si;  //!< the integration scale at which the parameters were estimated
+    Matx23f transf; //!< the transformation between image space and local patch space
+    Elliptic_KeyPoint();
+    Elliptic_KeyPoint(Point2f pt, float angle, Size axes, float size, float si);
+    virtual ~Elliptic_KeyPoint();
+};
+
+/**
+ * @brief Class implementing the Harris-Laplace feature detector as described in @cite Mikolajczyk2004.
+ */
+class CV_EXPORTS_W HarrisLaplaceFeatureDetector : public Feature2D
+{
+public:
+    /**
+     * @brief Creates a new implementation instance.
+     *
+     * @param numOctaves the number of octaves in the scale-space pyramid
+     * @param corn_thresh the threshold for the Harris cornerness measure
+     * @param DOG_thresh the threshold for the Difference-of-Gaussians scale selection
+     * @param maxCorners the maximum number of corners to consider
+     * @param num_layers the number of intermediate scales per octave
+     */
+    CV_WRAP static Ptr<HarrisLaplaceFeatureDetector> create(
+            int numOctaves=6,
+            float corn_thresh=0.01f,
+            float DOG_thresh=0.01f,
+            int maxCorners=5000,
+            int num_layers=4);
+};
+
+/**
+ * @brief Class implementing affine adaptation for key points.
+ *
+ * A @ref FeatureDetector and a @ref DescriptorExtractor are wrapped to augment the
+ * detected points with their affine invariant elliptic region and to compute
+ * the feature descriptors on the regions after warping them into circles.
+ *
+ * The interface is equivalent to @ref Feature2D, adding operations for
+ * @ref Elliptic_KeyPoint "Elliptic_KeyPoints" instead of @ref KeyPoint "KeyPoints".
+ */
+class CV_EXPORTS AffineFeature2D : public Feature2D
+{
+public:
+    /**
+     * @brief Creates an instance wrapping the given keypoint detector and
+     * descriptor extractor.
+     */
+    static Ptr<AffineFeature2D> create(
+        Ptr<FeatureDetector> keypoint_detector,
+        Ptr<DescriptorExtractor> descriptor_extractor);
+
+    /**
+     * @brief Creates an instance where keypoint detector and descriptor
+     * extractor are identical.
+     */
+    static Ptr<AffineFeature2D> create(
+        Ptr<FeatureDetector> keypoint_detector)
+    {
+        return create(keypoint_detector, keypoint_detector);
+    }
+
+    using Feature2D::detect; // overload, don't hide
+    /**
+     * @brief Detects keypoints in the image using the wrapped detector and
+     * performs affine adaptation to augment them with their elliptic regions.
+     */
+    virtual void detect(
+        InputArray image,
+        CV_OUT std::vector<Elliptic_KeyPoint>& keypoints,
+        InputArray mask=noArray() ) = 0;
+
+    using Feature2D::detectAndCompute; // overload, don't hide
+    /**
+     * @brief Detects keypoints and computes descriptors for their surrounding
+     * regions, after warping them into circles.
+     */
+    virtual void detectAndCompute(
+        InputArray image,
+        InputArray mask,
+        CV_OUT std::vector<Elliptic_KeyPoint>& keypoints,
+        OutputArray descriptors,
+        bool useProvidedKeypoints=false ) = 0;
+};
+
+
+/** @brief Estimates cornerness for prespecified KeyPoints using the FAST algorithm
+
+@param image grayscale image where keypoints (corners) are detected.
+@param keypoints keypoints which should be tested to fit the FAST criteria. Keypoints not being
+detected as corners are removed.
+@param threshold threshold on difference between intensity of the central pixel and pixels of a
+circle around this pixel.
+@param nonmaxSuppression if true, non-maximum suppression is applied to detected corners
+(keypoints).
+@param type one of the three neighborhoods as defined in the paper:
+FastFeatureDetector::TYPE_9_16, FastFeatureDetector::TYPE_7_12,
+FastFeatureDetector::TYPE_5_8
+
+Detects corners using the FAST algorithm by @cite Rosten06 .
+ */
+CV_EXPORTS void FASTForPointSet( InputArray image, CV_IN_OUT std::vector<KeyPoint>& keypoints,
+                      int threshold, bool nonmaxSuppression=true, cv::FastFeatureDetector::DetectorType type=FastFeatureDetector::TYPE_9_16);
+
+
+//! @}
+
+
+//! @addtogroup xfeatures2d_match
+//! @{
+
+/** @brief GMS (Grid-based Motion Statistics) feature matching strategy described in @cite Bian2017gms .
+    @param size1 Input size of image1.
+    @param size2 Input size of image2.
+    @param keypoints1 Input keypoints of image1.
+    @param keypoints2 Input keypoints of image2.
+    @param matches1to2 Input 1-nearest neighbor matches.
+    @param matchesGMS Matches returned by the GMS matching strategy.
+    @param withRotation Take rotation transformation into account.
+    @param withScale Take scale transformation into account.
+    @param thresholdFactor The higher, the less matches.
+    @note
+        Since GMS works well when the number of features is large, we recommend to use the ORB feature and set FastThreshold to 0 to get as many as possible features quickly.
+        If matching results are not satisfying, please add more features. (We use 10000 for images with 640 X 480).
+        If your images have big rotation and scale changes, please set withRotation or withScale to true.
+ */
+CV_EXPORTS_W void matchGMS(const Size& size1, const Size& size2, const std::vector<KeyPoint>& keypoints1, const std::vector<KeyPoint>& keypoints2,
+                           const std::vector<DMatch>& matches1to2, CV_OUT std::vector<DMatch>& matchesGMS, const bool withRotation = false,
+                           const bool withScale = false, const double thresholdFactor = 6.0);
+
+/** @brief LOGOS (Local geometric support for high-outlier spatial verification) feature matching strategy described in @cite Lowry2018LOGOSLG .
+    @param keypoints1 Input keypoints of image1.
+    @param keypoints2 Input keypoints of image2.
+    @param nn1 Index to the closest BoW centroid for each descriptors of image1.
+    @param nn2 Index to the closest BoW centroid for each descriptors of image2.
+    @param matches1to2 Matches returned by the LOGOS matching strategy.
+    @note
+        This matching strategy is suitable for features matching against large scale database.
+        First step consists in constructing the bag-of-words (BoW) from a representative image database.
+        Image descriptors are then represented by their closest codevector (nearest BoW centroid).
+ */
+CV_EXPORTS_W void matchLOGOS(const std::vector<KeyPoint>& keypoints1, const std::vector<KeyPoint>& keypoints2,
+                             const std::vector<int>& nn1, const std::vector<int>& nn2,
+                             std::vector<DMatch>& matches1to2);
+
 //! @}
 
 }
diff --git a/IPL/include/opencv/opencv2/xfeatures2d/cuda.hpp b/IPL/include/opencv/opencv2/xfeatures2d/cuda.hpp
index 16039a5..ea4a323 100644
--- a/IPL/include/opencv/opencv2/xfeatures2d/cuda.hpp
+++ b/IPL/include/opencv/opencv2/xfeatures2d/cuda.hpp
@@ -83,7 +83,7 @@ between function calls.
         opencv_source_code/samples/gpu/surf_keypoint_matcher.cpp
 
  */
-class CV_EXPORTS SURF_CUDA
+class CV_EXPORTS_W SURF_CUDA
 {
 public:
     enum KeypointLayout
@@ -104,15 +104,28 @@ class CV_EXPORTS SURF_CUDA
     explicit SURF_CUDA(double _hessianThreshold, int _nOctaves=4,
          int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);
 
+    /**
+    @param _hessianThreshold Threshold for hessian keypoint detector used in SURF.
+    @param _nOctaves Number of pyramid octaves the keypoint detector will use.
+    @param _nOctaveLayers Number of octave layers within each octave.
+    @param _extended Extended descriptor flag (true - use extended 128-element descriptors; false - use
+    64-element descriptors).
+    @param _keypointsRatio
+    @param _upright Up-right or rotated features flag (true - do not compute orientation of features;
+    false - compute orientation).
+    */
+    CV_WRAP static Ptr<SURF_CUDA> create(double _hessianThreshold, int _nOctaves = 4,
+        int _nOctaveLayers = 2, bool _extended = false, float _keypointsRatio = 0.01f, bool _upright = false);
+
     //! returns the descriptor size in float's (64 or 128)
-    int descriptorSize() const;
+    CV_WRAP int descriptorSize() const;
     //! returns the default norm type
-    int defaultNorm() const;
+    CV_WRAP int defaultNorm() const;
 
     //! upload host keypoints to device memory
     void uploadKeypoints(const std::vector<KeyPoint>& keypoints, GpuMat& keypointsGPU);
     //! download keypoints from device to host memory
-    void downloadKeypoints(const GpuMat& keypointsGPU, std::vector<KeyPoint>& keypoints);
+    CV_WRAP void downloadKeypoints(const GpuMat& keypointsGPU, CV_OUT std::vector<KeyPoint>& keypoints);
 
     //! download descriptors from device to host memory
     void downloadDescriptors(const GpuMat& descriptorsGPU, std::vector<float>& descriptors);
@@ -133,24 +146,47 @@ class CV_EXPORTS SURF_CUDA
     void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
         bool useProvidedKeypoints = false);
 
+    /** @brief Finds the keypoints using fast hessian detector used in SURF
+
+    @param img Source image, currently supports only CV_8UC1 images.
+    @param mask A mask image same size as src and of type CV_8UC1.
+    @param keypoints Detected keypoints.
+     */
+    CV_WRAP inline void detect(const GpuMat& img, const GpuMat& mask, CV_OUT GpuMat& keypoints) {
+        (*this)(img, mask, keypoints);
+    }
+
     void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
     void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors,
         bool useProvidedKeypoints = false);
 
+    /** @brief Finds the keypoints and computes their descriptors using fast hessian detector used in SURF
+
+    @param img Source image, currently supports only CV_8UC1 images.
+    @param mask A mask image same size as src and of type CV_8UC1.
+    @param keypoints Detected keypoints.
+    @param descriptors Keypoint descriptors.
+    @param useProvidedKeypoints Compute descriptors for the user-provided keypoints and recompute keypoints direction.
+     */
+    CV_WRAP inline void detectWithDescriptors(const GpuMat& img, const GpuMat& mask, CV_OUT GpuMat& keypoints, CV_OUT GpuMat& descriptors,
+        bool useProvidedKeypoints = false) {
+        (*this)(img, mask, keypoints, descriptors, useProvidedKeypoints);
+    }
+
     void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,
         bool useProvidedKeypoints = false);
 
     void releaseMemory();
 
     // SURF parameters
-    double hessianThreshold;
-    int nOctaves;
-    int nOctaveLayers;
-    bool extended;
-    bool upright;
+    CV_PROP double hessianThreshold;
+    CV_PROP int nOctaves;
+    CV_PROP int nOctaveLayers;
+    CV_PROP bool extended;
+    CV_PROP bool upright;
 
     //! max keypoints = min(keypointsRatio * img.size().area(), 65535)
-    float keypointsRatio;
+    CV_PROP float keypointsRatio;
 
     GpuMat sum, mask1, maskSum;
 
diff --git a/IPL/include/opencv/opencv2/xfeatures2d/nonfree.hpp b/IPL/include/opencv/opencv2/xfeatures2d/nonfree.hpp
index 9533972..472d822 100644
--- a/IPL/include/opencv/opencv2/xfeatures2d/nonfree.hpp
+++ b/IPL/include/opencv/opencv2/xfeatures2d/nonfree.hpp
@@ -50,40 +50,6 @@ namespace cv
 namespace xfeatures2d
 {
 
-//! @addtogroup xfeatures2d_nonfree
-//! @{
-
-/** @brief Class for extracting keypoints and computing descriptors using the Scale Invariant Feature Transform
-(SIFT) algorithm by D. Lowe @cite Lowe04 .
- */
-class CV_EXPORTS_W SIFT : public Feature2D
-{
-public:
-    /**
-    @param nfeatures The number of best features to retain. The features are ranked by their scores
-    (measured in SIFT algorithm as the local contrast)
-
-    @param nOctaveLayers The number of layers in each octave. 3 is the value used in D. Lowe paper. The
-    number of octaves is computed automatically from the image resolution.
-
-    @param contrastThreshold The contrast threshold used to filter out weak features in semi-uniform
-    (low-contrast) regions. The larger the threshold, the less features are produced by the detector.
-
-    @param edgeThreshold The threshold used to filter out edge-like features. Note that the its meaning
-    is different from the contrastThreshold, i.e. the larger the edgeThreshold, the less features are
-    filtered out (more features are retained).
-
-    @param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image
-    is captured with a weak camera with soft lenses, you might want to reduce the number.
-     */
-    CV_WRAP static Ptr<SIFT> create( int nfeatures = 0, int nOctaveLayers = 3,
-                                    double contrastThreshold = 0.04, double edgeThreshold = 10,
-                                    double sigma = 1.6);
-};
-
-typedef SIFT SiftFeatureDetector;
-typedef SIFT SiftDescriptorExtractor;
-
 /** @brief Class for extracting Speeded Up Robust Features from an image @cite Bay06 .
 
 The algorithm parameters:
diff --git a/IPL/include/opencv/opencv2/ximgproc.hpp b/IPL/include/opencv/opencv2/ximgproc.hpp
index 84015b9..d5a2f6e 100644
--- a/IPL/include/opencv/opencv2/ximgproc.hpp
+++ b/IPL/include/opencv/opencv2/ximgproc.hpp
@@ -2,26 +2,26 @@
  *  By downloading, copying, installing or using the software you agree to this license.
  *  If you do not agree to this license, do not download, install,
  *  copy or use the software.
- *  
- *  
+ *
+ *
  *  License Agreement
  *  For Open Source Computer Vision Library
  *  (3 - clause BSD License)
- *  
+ *
  *  Redistribution and use in source and binary forms, with or without modification,
  *  are permitted provided that the following conditions are met :
- *  
- *  *Redistributions of source code must retain the above copyright notice,
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
  *  this list of conditions and the following disclaimer.
- *  
+ *
  *  * Redistributions in binary form must reproduce the above copyright notice,
  *  this list of conditions and the following disclaimer in the documentation
  *  and / or other materials provided with the distribution.
- *  
+ *
  *  * Neither the names of the copyright holders nor the names of the contributors
  *  may be used to endorse or promote products derived from this software
  *  without specific prior written permission.
- *  
+ *
  *  This software is provided by the copyright holders and contributors "as is" and
  *  any express or implied warranties, including, but not limited to, the implied
  *  warranties of merchantability and fitness for a particular purpose are disclaimed.
@@ -41,6 +41,7 @@
 #include "ximgproc/disparity_filter.hpp"
 #include "ximgproc/sparse_match_interpolator.hpp"
 #include "ximgproc/structured_edge_detection.hpp"
+#include "ximgproc/edgeboxes.hpp"
 #include "ximgproc/seeds.hpp"
 #include "ximgproc/segmentation.hpp"
 #include "ximgproc/fast_hough_transform.hpp"
@@ -48,6 +49,17 @@
 #include "ximgproc/weighted_median_filter.hpp"
 #include "ximgproc/slic.hpp"
 #include "ximgproc/lsc.hpp"
+#include "ximgproc/paillou_filter.hpp"
+#include "ximgproc/fast_line_detector.hpp"
+#include "ximgproc/deriche_filter.hpp"
+#include "ximgproc/peilin.hpp"
+#include "ximgproc/fourier_descriptors.hpp"
+#include "ximgproc/ridgefilter.hpp"
+#include "ximgproc/brightedges.hpp"
+#include "ximgproc/run_length_morphology.hpp"
+#include "ximgproc/edgepreserving_filter.hpp"
+#include "ximgproc/color_match.hpp"
+
 
 /** @defgroup ximgproc Extended Image Processing
   @{
@@ -56,12 +68,38 @@
 This module contains implementations of modern structured edge detection algorithms,
 i.e. algorithms which somehow takes into account pixel affinities in natural images.
 
+    @defgroup ximgproc_edgeboxes EdgeBoxes
+
     @defgroup ximgproc_filters Filters
 
     @defgroup ximgproc_superpixel Superpixels
 
     @defgroup ximgproc_segmentation Image segmentation
-  @}
+
+    @defgroup ximgproc_fast_line_detector Fast line detector
+
+    @defgroup ximgproc_fourier Fourier descriptors
+
+    @defgroup ximgproc_run_length_morphology Binary morphology on run-length encoded image
+
+    These functions support morphological operations on binary images. In order to be fast and space efficient binary images are encoded with a run-length representation.
+    This representation groups continuous horizontal sequences of "on" pixels together in a "run". A run is charactarized by the column position of the first pixel in the run, the column
+    position of the last pixel in the run and the row position. This representation is very compact for binary images which contain large continuous areas of "on" and "off" pixels. A checkerboard
+    pattern would be a good example. The representation is not so suitable for binary images created from random noise images or other images where little correlation between neighboring pixels
+    exists.
+
+    The morphological operations supported here are very similar to the operations supported in the imgproc module. In general they are fast. However on several occasions they are slower than the functions
+    from imgproc. The structuring elements of cv::MORPH_RECT and cv::MORPH_CROSS have very good support from the imgproc module. Also small structuring elements are very fast in imgproc (presumably
+    due to opencl support). Therefore the functions from this module are recommended for larger structuring elements (cv::MORPH_ELLIPSE or self defined structuring elements). A sample application
+    (run_length_morphology_demo) is supplied which allows to compare the speed of some morphological operations for the functions using run-length encoding and the imgproc functions for a given image.
+
+    Run length encoded images are stored in standard opencv images. Images have a single column of cv::Point3i elements. The number of rows is the number of run + 1. The first row contains
+    the size of the original (not encoded) image.  For the runs the following mapping is used (x: column begin, y: column end (last column), z: row).
+
+    The size of the original image is required for compatibility with the imgproc functions when the boundary handling requires that pixel outside the image boundary are
+    "on".
+
+    @}
 */
 
 namespace cv
@@ -69,10 +107,26 @@ namespace cv
 namespace ximgproc
 {
 
+enum ThinningTypes{
+    THINNING_ZHANGSUEN    = 0, // Thinning technique of Zhang-Suen
+    THINNING_GUOHALL      = 1  // Thinning technique of Guo-Hall
+};
+
+/**
+* @brief Specifies the binarization method to use in cv::ximgproc::niBlackThreshold
+*/
+enum LocalBinarizationMethods{
+	BINARIZATION_NIBLACK = 0, //!< Classic Niblack binarization. See @cite Niblack1985 .
+	BINARIZATION_SAUVOLA = 1, //!< Sauvola's technique. See @cite Sauvola1997 .
+	BINARIZATION_WOLF = 2,    //!< Wolf's technique. See @cite Wolf2004 .
+	BINARIZATION_NICK = 3     //!< NICK technique. See @cite Khurshid2009 .
+};
+
 //! @addtogroup ximgproc
 //! @{
 
-/** @brief Applies Niblack thresholding to input image.
+/** @brief Performs thresholding on input images using Niblack's technique or some of the
+popular variations it inspired.
 
 The function transforms a grayscale image to a binary image according to the formulae:
 -   **THRESH_BINARY**
@@ -81,8 +135,9 @@ The function transforms a grayscale image to a binary image according to the for
     \f[dst(x,y) =  \fork{0}{if \(src(x,y) > T(x,y)\)}{\texttt{maxValue}}{otherwise}\f]
 where \f$T(x,y)\f$ is a threshold calculated individually for each pixel.
 
-The threshold value \f$T(x, y)\f$ is the mean minus \f$ delta \f$ times standard deviation
-of \f$\texttt{blockSize} \times\texttt{blockSize}\f$ neighborhood of \f$(x, y)\f$.
+The threshold value \f$T(x, y)\f$ is determined based on the binarization method chosen. For
+classic Niblack, it is the mean minus \f$ k \f$ times standard deviation of
+\f$\texttt{blockSize} \times\texttt{blockSize}\f$ neighborhood of \f$(x, y)\f$.
 
 The function can't process the image in-place.
 
@@ -93,14 +148,51 @@ used with the THRESH_BINARY and THRESH_BINARY_INV thresholding types.
 @param type Thresholding type, see cv::ThresholdTypes.
 @param blockSize Size of a pixel neighborhood that is used to calculate a threshold value
 for the pixel: 3, 5, 7, and so on.
-@param delta Constant multiplied with the standard deviation and subtracted from the mean.
-Normally, it is taken to be a real number between 0 and 1.
-
+@param k The user-adjustable parameter used by Niblack and inspired techniques. For Niblack, this is
+normally a value between 0 and 1 that is multiplied with the standard deviation and subtracted from
+the mean.
+@param binarizationMethod Binarization method to use. By default, Niblack's technique is used.
+Other techniques can be specified, see cv::ximgproc::LocalBinarizationMethods.
+@param r The user-adjustable parameter used by Sauvola's technique. This is the dynamic range
+of standard deviation.
 @sa  threshold, adaptiveThreshold
  */
 CV_EXPORTS_W void niBlackThreshold( InputArray _src, OutputArray _dst,
                                     double maxValue, int type,
-                                    int blockSize, double delta );
+                                    int blockSize, double k, int binarizationMethod = BINARIZATION_NIBLACK,
+                                    double r = 128 );
+
+/** @brief Applies a binary blob thinning operation, to achieve a skeletization of the input image.
+
+The function transforms a binary blob image into a skeletized form using the technique of Zhang-Suen.
+
+@param src Source 8-bit single-channel image, containing binary blobs, with blobs having 255 pixel values.
+@param dst Destination image of the same size and the same type as src. The function can work in-place.
+@param thinningType Value that defines which thinning algorithm should be used. See cv::ximgproc::ThinningTypes
+ */
+CV_EXPORTS_W void thinning( InputArray src, OutputArray dst, int thinningType = THINNING_ZHANGSUEN);
+
+/** @brief Performs anisotropic diffusian on an image.
+
+ The function applies Perona-Malik anisotropic diffusion to an image. This is the solution to the partial differential equation:
+
+ \f[{\frac  {\partial I}{\partial t}}={\mathrm  {div}}\left(c(x,y,t)\nabla I\right)=\nabla c\cdot \nabla I+c(x,y,t)\Delta I\f]
+
+ Suggested functions for c(x,y,t) are:
+
+ \f[c\left(\|\nabla I\|\right)=e^{{-\left(\|\nabla I\|/K\right)^{2}}}\f]
+
+ or
+
+ \f[ c\left(\|\nabla I\|\right)={\frac {1}{1+\left({\frac  {\|\nabla I\|}{K}}\right)^{2}}} \f]
+
+ @param src Source image with 3 channels.
+ @param dst Destination image of the same size and the same number of channels as src .
+ @param alpha The amount of time to step forward by on each iteration (normally, it's between 0 and 1).
+ @param K sensitivity to the edges
+ @param niters The number of iterations
+*/
+CV_EXPORTS_W void anisotropicDiffusion(InputArray src, OutputArray dst, float alpha, float K, int niters );
 
 //! @}
 
diff --git a/IPL/include/opencv/opencv2/ximgproc/brightedges.hpp b/IPL/include/opencv/opencv2/ximgproc/brightedges.hpp
new file mode 100644
index 0000000..fa30911
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/brightedges.hpp
@@ -0,0 +1,50 @@
+﻿/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2017, IBM Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//      Marc Fiammante marc.fiammante@fr.ibm.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of OpenCV Foundation or contributors may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "opencv2/core.hpp"
+namespace cv
+{
+    namespace ximgproc {
+        CV_EXPORTS_W void  BrightEdges(Mat &_original, Mat &_edgeview, int contrast = 1, int shortrange = 3, int longrange = 9);
+    }
+}
\ No newline at end of file
diff --git a/IPL/include/opencv/opencv2/ximgproc/color_match.hpp b/IPL/include/opencv/opencv2/ximgproc/color_match.hpp
new file mode 100644
index 0000000..c18390d
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/color_match.hpp
@@ -0,0 +1,66 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_COLOR_MATCH_HPP__
+#define __OPENCV_COLOR_MATCH_HPP__
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace ximgproc {
+
+//! @addtogroup ximgproc_filters
+//! @{
+
+/**
+* @brief   creates a quaternion image.
+*
+* @param   img         Source 8-bit, 32-bit or 64-bit image, with 3-channel image.
+* @param   qimg        result CV_64FC4 a quaternion image( 4 chanels zero channel and B,G,R).
+*/
+CV_EXPORTS_W void createQuaternionImage(InputArray img, OutputArray qimg);
+
+/**
+* @brief   calculates conjugate of a quaternion image.
+*
+* @param   qimg         quaternion image.
+* @param   qcimg        conjugate of qimg
+*/
+CV_EXPORTS_W void qconj(InputArray qimg, OutputArray qcimg);
+/**
+* @brief   divides each element by its modulus.
+*
+* @param   qimg         quaternion image.
+* @param   qnimg        conjugate of qimg
+*/
+CV_EXPORTS_W void qunitary(InputArray qimg, OutputArray qnimg);
+/**
+* @brief   Calculates the per-element quaternion product of two arrays
+*
+* @param   src1         quaternion image.
+* @param   src2         quaternion image.
+* @param   dst        product dst(I)=src1(I) . src2(I)
+*/
+CV_EXPORTS_W void qmultiply(InputArray  	src1, InputArray  	src2, OutputArray  	dst);
+/**
+* @brief    Performs a forward or inverse Discrete quaternion Fourier transform of a 2D quaternion array.
+*
+* @param   img        quaternion image.
+* @param   qimg       quaternion image in dual space.
+* @param   flags      quaternion image in dual space. only DFT_INVERSE flags is supported
+* @param   sideLeft   true the hypercomplex exponential is to be multiplied on the left (false on the right ).
+*/
+CV_EXPORTS_W void qdft(InputArray img, OutputArray qimg, int  	flags, bool sideLeft);
+/**
+* @brief    Compares a color template against overlapped color image regions.
+*
+* @param   img        Image where the search is running. It must be 3 channels image
+* @param   templ       Searched template. It must be not greater than the source image and have 3 channels
+* @param   result     Map of comparison results. It must be single-channel 64-bit floating-point
+*/
+CV_EXPORTS_W void colorMatchTemplate(InputArray img, InputArray templ, OutputArray result);
+
+}
+}
+#endif
diff --git a/IPL/include/opencv/opencv2/ximgproc/deriche_filter.hpp b/IPL/include/opencv/opencv2/ximgproc/deriche_filter.hpp
new file mode 100644
index 0000000..26d3b67
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/deriche_filter.hpp
@@ -0,0 +1,77 @@
+/*
+ *  By downloading, copying, installing or using the software you agree to this license.
+ *  If you do not agree to this license, do not download, install,
+ *  copy or use the software.
+ *
+ *
+ *  License Agreement
+ *  For Open Source Computer Vision Library
+ *  (3 - clause BSD License)
+ *
+ *  Redistribution and use in source and binary forms, with or without modification,
+ *  are permitted provided that the following conditions are met :
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *  this list of conditions and the following disclaimer in the documentation
+ *  and / or other materials provided with the distribution.
+ *
+ *  * Neither the names of the copyright holders nor the names of the contributors
+ *  may be used to endorse or promote products derived from this software
+ *  without specific prior written permission.
+ *
+ *  This software is provided by the copyright holders and contributors "as is" and
+ *  any express or implied warranties, including, but not limited to, the implied
+ *  warranties of merchantability and fitness for a particular purpose are disclaimed.
+ *  In no event shall copyright holders or contributors be liable for any direct,
+ *  indirect, incidental, special, exemplary, or consequential damages
+ *  (including, but not limited to, procurement of substitute goods or services;
+ *  loss of use, data, or profits; or business interruption) however caused
+ *  and on any theory of liability, whether in contract, strict liability,
+ *  or tort(including negligence or otherwise) arising in any way out of
+ *  the use of this software, even if advised of the possibility of such damage.
+ */
+
+#ifndef __OPENCV_DERICHEFILTER_HPP__
+#define __OPENCV_DERICHEFILTER_HPP__
+#ifdef __cplusplus
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace ximgproc {
+
+//! @addtogroup ximgproc_filters
+//! @{
+
+/**
+* @brief   Applies Y Deriche filter to an image.
+*
+* For more details about this implementation, please see http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.476.5736&rep=rep1&type=pdf
+*
+* @param   op         Source 8-bit or 16bit image, 1-channel or 3-channel image.
+* @param   dst        result CV_32FC image with same number of channel than _op.
+* @param   alpha double see paper
+* @param   omega   double see paper
+*
+*/
+CV_EXPORTS_W void GradientDericheY(InputArray op, OutputArray dst, double alpha,double omega);
+/**
+* @brief   Applies X Deriche filter to an image.
+*
+* For more details about this implementation, please see http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.476.5736&rep=rep1&type=pdf
+*
+* @param   op         Source 8-bit or 16bit image, 1-channel or 3-channel image.
+* @param   dst        result CV_32FC image with same number of channel than _op.
+* @param   alpha double see paper
+* @param   omega   double see paper
+*
+*/
+CV_EXPORTS_W void GradientDericheX(InputArray op, OutputArray dst, double alpha,double omega);
+
+}
+}
+#endif
+#endif
diff --git a/IPL/include/opencv/opencv2/ximgproc/disparity_filter.hpp b/IPL/include/opencv/opencv2/ximgproc/disparity_filter.hpp
index 07bcf6d..b738436 100644
--- a/IPL/include/opencv/opencv2/ximgproc/disparity_filter.hpp
+++ b/IPL/include/opencv/opencv2/ximgproc/disparity_filter.hpp
@@ -11,7 +11,7 @@
  *  Redistribution and use in source and binary forms, with or without modification,
  *  are permitted provided that the following conditions are met :
  *
- *  *Redistributions of source code must retain the above copyright notice,
+ *  * Redistributions of source code must retain the above copyright notice,
  *  this list of conditions and the following disclaimer.
  *
  *  * Redistributions in binary form must reproduce the above copyright notice,
diff --git a/IPL/include/opencv/opencv2/ximgproc/edge_filter.hpp b/IPL/include/opencv/opencv2/ximgproc/edge_filter.hpp
index 9b722fa..82be7c7 100644
--- a/IPL/include/opencv/opencv2/ximgproc/edge_filter.hpp
+++ b/IPL/include/opencv/opencv2/ximgproc/edge_filter.hpp
@@ -11,7 +11,7 @@
  *  Redistribution and use in source and binary forms, with or without modification,
  *  are permitted provided that the following conditions are met :
  *
- *  *Redistributions of source code must retain the above copyright notice,
+ *  * Redistributions of source code must retain the above copyright notice,
  *  this list of conditions and the following disclaimer.
  *
  *  * Redistributions in binary form must reproduce the above copyright notice,
@@ -107,7 +107,7 @@ guided image then use DTFilter interface to avoid extra computations on initiali
 @param guide guided image (also called as joint image) with unsigned 8-bit or floating-point 32-bit
 depth and up to 4 channels.
 @param src filtering image with unsigned 8-bit or floating-point 32-bit depth and up to 4 channels.
-@param dst
+@param dst destination image
 @param sigmaSpatial \f${\sigma}_H\f$ parameter in the original article, it's similar to the sigma in the
 coordinate space into bilateralFilter.
 @param sigmaColor \f${\sigma}_r\f$ parameter in the original article, it's similar to the sigma in the
@@ -316,6 +316,28 @@ proportional to sigmaSpace .
 CV_EXPORTS_W
 void jointBilateralFilter(InputArray joint, InputArray src, OutputArray dst, int d, double sigmaColor, double sigmaSpace, int borderType = BORDER_DEFAULT);
 
+/** @brief Applies the bilateral texture filter to an image. It performs structure-preserving texture filter.
+For more details about this filter see @cite Cho2014.
+
+@param src Source image whose depth is 8-bit UINT or 32-bit FLOAT
+
+@param dst Destination image of the same size and type as src.
+
+@param fr Radius of kernel to be used for filtering. It should be positive integer
+
+@param numIter Number of iterations of algorithm, It should be positive integer
+
+@param sigmaAlpha Controls the sharpness of the weight transition from edges to smooth/texture regions, where
+a bigger value means sharper transition. When the value is negative, it is automatically calculated.
+
+@param sigmaAvg Range blur parameter for texture blurring. Larger value makes result to be more blurred. When the
+value is negative, it is automatically calculated as described in the paper.
+
+@sa rollingGuidanceFilter, bilateralFilter
+*/
+CV_EXPORTS_W
+void bilateralTextureFilter(InputArray src, OutputArray dst, int fr = 3, int numIter = 1, double sigmaAlpha = -1., double sigmaAvg = -1.);
+
 //////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////
 
@@ -353,6 +375,80 @@ void rollingGuidanceFilter(InputArray src, OutputArray dst, int d = -1, double s
 //////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////
 
+/** @brief Interface for implementations of Fast Bilateral Solver.
+
+For more details about this solver see @cite BarronPoole2016 .
+*/
+class CV_EXPORTS_W FastBilateralSolverFilter : public Algorithm
+{
+public:
+    /** @brief Apply smoothing operation to the source image.
+
+    @param src source image for filtering with unsigned 8-bit or signed 16-bit or floating-point 32-bit depth and up to 3 channels.
+
+    @param confidence confidence image with unsigned 8-bit or floating-point 32-bit confidence and 1 channel.
+
+    @param dst destination image.
+
+    @note Confidence images with CV_8U depth are expected to in [0, 255] and CV_32F in [0, 1] range.
+    */
+    CV_WRAP virtual void filter(InputArray src, InputArray confidence, OutputArray dst) = 0;
+};
+
+/** @brief Factory method, create instance of FastBilateralSolverFilter and execute the initialization routines.
+
+@param guide image serving as guide for filtering. It should have 8-bit depth and either 1 or 3 channels.
+
+@param sigma_spatial parameter, that is similar to spatial space sigma (bandwidth) in bilateralFilter.
+
+@param sigma_luma parameter, that is similar to luma space sigma (bandwidth) in bilateralFilter.
+
+@param sigma_chroma parameter, that is similar to chroma space sigma (bandwidth) in bilateralFilter.
+
+@param lambda smoothness strength parameter for solver.
+
+@param num_iter number of iterations used for solver, 25 is usually enough.
+
+@param max_tol convergence tolerance used for solver.
+
+For more details about the Fast Bilateral Solver parameters, see the original paper @cite BarronPoole2016.
+
+*/
+CV_EXPORTS_W Ptr<FastBilateralSolverFilter> createFastBilateralSolverFilter(InputArray guide, double sigma_spatial, double sigma_luma, double sigma_chroma, double lambda = 128.0, int num_iter = 25, double max_tol = 1e-5);
+
+
+
+/** @brief Simple one-line Fast Bilateral Solver filter call. If you have multiple images to filter with the same
+guide then use FastBilateralSolverFilter interface to avoid extra computations.
+
+@param guide image serving as guide for filtering. It should have 8-bit depth and either 1 or 3 channels.
+
+@param src source image for filtering with unsigned 8-bit or signed 16-bit or floating-point 32-bit depth and up to 4 channels.
+
+@param confidence confidence image with unsigned 8-bit or floating-point 32-bit confidence and 1 channel.
+
+@param dst destination image.
+
+@param sigma_spatial parameter, that is similar to spatial space sigma (bandwidth) in bilateralFilter.
+
+@param sigma_luma parameter, that is similar to luma space sigma (bandwidth) in bilateralFilter.
+
+@param sigma_chroma parameter, that is similar to chroma space sigma (bandwidth) in bilateralFilter.
+
+@param lambda smoothness strength parameter for solver.
+
+@param num_iter number of iterations used for solver, 25 is usually enough.
+
+@param max_tol convergence tolerance used for solver.
+
+For more details about the Fast Bilateral Solver parameters, see the original paper @cite BarronPoole2016.
+
+@note Confidence images with CV_8U depth are expected to in [0, 255] and CV_32F in [0, 1] range.
+*/
+CV_EXPORTS_W void fastBilateralSolverFilter(InputArray guide, InputArray src, InputArray confidence, OutputArray dst, double sigma_spatial = 8, double sigma_luma = 8, double sigma_chroma = 8, double lambda = 128.0, int num_iter = 25, double max_tol = 1e-5);
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
 
 /** @brief Interface for implementations of Fast Global Smoother filter.
 
diff --git a/IPL/include/opencv/opencv2/ximgproc/edgeboxes.hpp b/IPL/include/opencv/opencv2/ximgproc/edgeboxes.hpp
new file mode 100644
index 0000000..966f9cd
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/edgeboxes.hpp
@@ -0,0 +1,201 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_EDGEBOXES_HPP__
+#define __OPENCV_EDGEBOXES_HPP__
+
+#include <opencv2/core.hpp>
+
+namespace cv
+{
+namespace ximgproc
+{
+
+//! @addtogroup ximgproc_edgeboxes
+//! @{
+
+// bounding box data structures
+typedef struct
+{
+  int x, y, w, h;
+  float score;
+} Box;
+
+typedef std::vector<Box> Boxes;
+
+/** @brief Class implementing EdgeBoxes algorithm from @cite ZitnickECCV14edgeBoxes :
+ */
+class CV_EXPORTS_W EdgeBoxes : public Algorithm
+{
+
+public:
+
+    /** @brief Returns array containing proposal boxes.
+
+    @param edge_map edge image.
+    @param orientation_map orientation map.
+    @param boxes proposal boxes.
+    @param scores of the proposal boxes, provided a vector of float types.
+    */
+    CV_WRAP virtual void getBoundingBoxes(InputArray edge_map, InputArray orientation_map, CV_OUT std::vector<Rect> &boxes, OutputArray scores = noArray()) = 0;
+
+    /** @brief Returns the step size of sliding window search.
+    */
+    CV_WRAP virtual float getAlpha() const = 0;
+    /** @brief Sets the step size of sliding window search.
+    */
+    CV_WRAP virtual void setAlpha(float value) = 0;
+
+    /** @brief Returns the nms threshold for object proposals.
+    */
+    CV_WRAP virtual float getBeta() const = 0;
+    /** @brief Sets the nms threshold for object proposals.
+    */
+    CV_WRAP virtual void setBeta(float value) = 0;
+
+    /** @brief Returns adaptation rate for nms threshold.
+    */
+    CV_WRAP virtual float getEta() const = 0;
+    /** @brief Sets the adaptation rate for nms threshold.
+    */
+    CV_WRAP virtual void setEta(float value) = 0;
+
+    /** @brief Returns the min score of boxes to detect.
+    */
+    CV_WRAP virtual float getMinScore() const = 0;
+    /** @brief Sets the min score of boxes to detect.
+    */
+    CV_WRAP virtual void setMinScore(float value) = 0;
+
+    /** @brief Returns the max number of boxes to detect.
+    */
+    CV_WRAP virtual int getMaxBoxes() const = 0;
+    /** @brief Sets max number of boxes to detect.
+    */
+    CV_WRAP virtual void setMaxBoxes(int value) = 0;
+
+    /** @brief Returns the edge min magnitude.
+    */
+    CV_WRAP virtual float getEdgeMinMag() const = 0;
+    /** @brief Sets the edge min magnitude.
+    */
+    CV_WRAP virtual void setEdgeMinMag(float value) = 0;
+
+    /** @brief Returns the edge merge threshold.
+    */
+    CV_WRAP virtual float getEdgeMergeThr() const = 0;
+    /** @brief Sets the edge merge threshold.
+    */
+    CV_WRAP virtual void setEdgeMergeThr(float value) = 0;
+
+    /** @brief Returns the cluster min magnitude.
+    */
+    CV_WRAP virtual float getClusterMinMag() const = 0;
+    /** @brief Sets the cluster min magnitude.
+    */
+    CV_WRAP virtual void setClusterMinMag(float value) = 0;
+
+    /** @brief Returns the max aspect ratio of boxes.
+    */
+    CV_WRAP virtual float getMaxAspectRatio() const = 0;
+    /** @brief Sets the max aspect ratio of boxes.
+    */
+    CV_WRAP virtual void setMaxAspectRatio(float value) = 0;
+
+    /** @brief Returns the minimum area of boxes.
+    */
+    CV_WRAP virtual float getMinBoxArea() const = 0;
+    /** @brief Sets the minimum area of boxes.
+    */
+    CV_WRAP virtual void setMinBoxArea(float value) = 0;
+
+    /** @brief Returns the affinity sensitivity.
+    */
+    CV_WRAP virtual float getGamma() const = 0;
+    /** @brief Sets the affinity sensitivity
+    */
+    CV_WRAP virtual void setGamma(float value) = 0;
+
+    /** @brief Returns the scale sensitivity.
+    */
+    CV_WRAP virtual float getKappa() const = 0;
+    /** @brief Sets the scale sensitivity.
+    */
+    CV_WRAP virtual void setKappa(float value) = 0;
+
+};
+
+/** @brief Creates a Edgeboxes
+
+@param alpha step size of sliding window search.
+@param beta nms threshold for object proposals.
+@param eta adaptation rate for nms threshold.
+@param minScore min score of boxes to detect.
+@param maxBoxes max number of boxes to detect.
+@param edgeMinMag edge min magnitude. Increase to trade off accuracy for speed.
+@param edgeMergeThr edge merge threshold. Increase to trade off accuracy for speed.
+@param clusterMinMag cluster min magnitude. Increase to trade off accuracy for speed.
+@param maxAspectRatio max aspect ratio of boxes.
+@param minBoxArea minimum area of boxes.
+@param gamma affinity sensitivity.
+@param kappa scale sensitivity.
+*/
+CV_EXPORTS_W Ptr<EdgeBoxes>
+createEdgeBoxes(float alpha=0.65f,
+                float beta=0.75f,
+                float eta=1,
+                float minScore=0.01f,
+                int   maxBoxes=10000,
+                float edgeMinMag=0.1f,
+                float edgeMergeThr=0.5f,
+                float clusterMinMag=0.5f,
+                float maxAspectRatio=3,
+                float minBoxArea=1000,
+                float gamma=2,
+                float kappa=1.5f);
+
+//! @}
+
+}
+}
+
+#endif /* __OPENCV_EDGEBOXES_HPP__ */
diff --git a/IPL/include/opencv/opencv2/ximgproc/edgepreserving_filter.hpp b/IPL/include/opencv/opencv2/ximgproc/edgepreserving_filter.hpp
new file mode 100644
index 0000000..f5685ce
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/edgepreserving_filter.hpp
@@ -0,0 +1,33 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_EDGEPRESERVINGFILTER_HPP__
+#define __OPENCV_EDGEPRESERVINGFILTER_HPP__
+
+#include <opencv2/core.hpp>
+
+namespace cv { namespace ximgproc {
+
+//! @addtogroup ximgproc
+//! @{
+
+    /**
+    * @brief Smoothes an image using the Edge-Preserving filter.
+    *
+    * The function smoothes Gaussian noise as well as salt & pepper noise.
+    * For more details about this implementation, please see
+    * [ReiWoe18]  Reich, S. and Wörgötter, F. and Dellen, B. (2018). A Real-Time Edge-Preserving Denoising Filter. Proceedings of the 13th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications (VISIGRAPP): Visapp, 85-94, 4. DOI: 10.5220/0006509000850094.
+    *
+    * @param src Source 8-bit 3-channel image.
+    * @param dst Destination image of the same size and type as src.
+    * @param d Diameter of each pixel neighborhood that is used during filtering. Must be greater or equal 3.
+    * @param threshold Threshold, which distinguishes between noise, outliers, and data.
+    */
+    CV_EXPORTS_W void edgePreservingFilter( InputArray src, OutputArray dst, int d, double threshold );
+
+}} // namespace
+
+//! @}
+
+#endif
diff --git a/IPL/include/opencv/opencv2/ximgproc/fast_hough_transform.hpp b/IPL/include/opencv/opencv2/ximgproc/fast_hough_transform.hpp
index cdfb032..adfbf54 100644
--- a/IPL/include/opencv/opencv2/ximgproc/fast_hough_transform.hpp
+++ b/IPL/include/opencv/opencv2/ximgproc/fast_hough_transform.hpp
@@ -130,12 +130,12 @@ typedef enum {
 * The function calculates the fast Hough transform for full, half or quarter
 * range of angles.
 */
-CV_EXPORTS void FastHoughTransform( InputArray  src,
-                                    OutputArray dst,
-                                    int         dstMatDepth,
-                                    int         angleRange = ARO_315_135,
-                                    int         op = FHT_ADD,
-                                    int         makeSkew = HDO_DESKEW );
+CV_EXPORTS_W void FastHoughTransform( InputArray  src,
+                                      OutputArray dst,
+                                      int         dstMatDepth,
+                                      int         angleRange = ARO_315_135,
+                                      int         op = FHT_ADD,
+                                      int         makeSkew = HDO_DESKEW );
 
 /**
 * @brief   Calculates coordinates of line segment corresponded by point in Hough space.
@@ -152,11 +152,11 @@ CV_EXPORTS void FastHoughTransform( InputArray  src,
 *
 * The function calculates coordinates of line segment corresponded by point in Hough space.
 */
-CV_EXPORTS Vec4i HoughPoint2Line(const Point &houghPoint,
-                                 InputArray  srcImgInfo,
-                                 int         angleRange = ARO_315_135,
-                                 int         makeSkew = HDO_DESKEW,
-                                 int         rules = RO_IGNORE_BORDERS );
+CV_EXPORTS_W Vec4i HoughPoint2Line(const Point &houghPoint,
+                                   InputArray  srcImgInfo,
+                                   int         angleRange = ARO_315_135,
+                                   int         makeSkew = HDO_DESKEW,
+                                   int         rules = RO_IGNORE_BORDERS );
 
 } }// namespace cv::ximgproc
 
diff --git a/IPL/include/opencv/opencv2/ximgproc/fast_line_detector.hpp b/IPL/include/opencv/opencv2/ximgproc/fast_line_detector.hpp
new file mode 100644
index 0000000..1df5558
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/fast_line_detector.hpp
@@ -0,0 +1,81 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_FAST_LINE_DETECTOR_HPP__
+#define __OPENCV_FAST_LINE_DETECTOR_HPP__
+
+#include <opencv2/core.hpp>
+
+namespace cv
+{
+namespace ximgproc
+{
+
+//! @addtogroup ximgproc_fast_line_detector
+//! @{
+
+/** @brief Class implementing the FLD (Fast Line Detector) algorithm described
+in @cite Lee14 .
+*/
+
+//! @include samples/fld_lines.cpp
+
+class CV_EXPORTS_W FastLineDetector : public Algorithm
+{
+public:
+    /** @example fld_lines.cpp
+      An example using the FastLineDetector
+      */
+    /** @brief Finds lines in the input image.
+      This is the output of the default parameters of the algorithm on the above
+      shown image.
+
+      ![image](pics/corridor_fld.jpg)
+
+      @param _image A grayscale (CV_8UC1) input image. If only a roi needs to be
+      selected, use: `fld_ptr-\>detect(image(roi), lines, ...);
+      lines += Scalar(roi.x, roi.y, roi.x, roi.y);`
+      @param _lines A vector of Vec4f elements specifying the beginning
+      and ending point of a line.  Where Vec4f is (x1, y1, x2, y2), point
+      1 is the start, point 2 - end. Returned lines are directed so that the
+      brighter side is on their left.
+      */
+    CV_WRAP virtual void detect(InputArray _image, OutputArray _lines) = 0;
+
+    /** @brief Draws the line segments on a given image.
+      @param _image The image, where the lines will be drawn. Should be bigger
+      or equal to the image, where the lines were found.
+      @param lines A vector of the lines that needed to be drawn.
+      @param draw_arrow If true, arrow heads will be drawn.
+    */
+    CV_WRAP virtual void drawSegments(InputOutputArray _image, InputArray lines,
+            bool draw_arrow = false) = 0;
+
+    virtual ~FastLineDetector() { }
+};
+
+/** @brief Creates a smart pointer to a FastLineDetector object and initializes it
+
+@param _length_threshold    10         - Segment shorter than this will be discarded
+@param _distance_threshold  1.41421356 - A point placed from a hypothesis line
+                                         segment farther than this will be
+                                         regarded as an outlier
+@param _canny_th1           50         - First threshold for
+                                         hysteresis procedure in Canny()
+@param _canny_th2           50         - Second threshold for
+                                         hysteresis procedure in Canny()
+@param _canny_aperture_size 3          - Aperturesize for the sobel
+                                         operator in Canny()
+@param _do_merge            false      - If true, incremental merging of segments
+                                         will be perfomred
+*/
+CV_EXPORTS_W Ptr<FastLineDetector> createFastLineDetector(
+        int _length_threshold = 10, float _distance_threshold = 1.414213562f,
+        double _canny_th1 = 50.0, double _canny_th2 = 50.0, int _canny_aperture_size = 3,
+        bool _do_merge = false);
+
+//! @} ximgproc_fast_line_detector
+}
+}
+#endif
diff --git a/IPL/include/opencv/opencv2/ximgproc/fourier_descriptors.hpp b/IPL/include/opencv/opencv2/ximgproc/fourier_descriptors.hpp
new file mode 100644
index 0000000..969a33d
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/fourier_descriptors.hpp
@@ -0,0 +1,119 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_FOURIERDESCRIPTORS_HPP__
+#define __OPENCV_FOURIERDESCRIPTORS_HPP__
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace ximgproc {
+
+    //! @addtogroup ximgproc_fourier
+    //! @{
+
+    /** @brief Class for ContourFitting algorithms.
+    ContourFitting match two contours \f$ z_a \f$ and \f$ z_b \f$ minimizing distance
+    \f[ d(z_a,z_b)=\sum (a_n - s  b_n e^{j(n \alpha +\phi )})^2 \f] where \f$ a_n \f$ and \f$ b_n \f$ are Fourier descriptors of \f$ z_a \f$ and \f$ z_b \f$ and s is a scaling factor and  \f$ \phi \f$ is angle rotation and \f$ \alpha \f$ is starting point factor adjustement
+    */
+    class CV_EXPORTS_W ContourFitting : public Algorithm
+    {
+        int ctrSize;
+        int fdSize;
+        std::vector<std::complex<double> > b;
+        std::vector<std::complex<double> > a;
+        std::vector<double> frequence;
+        std::vector<double> rho, psi;
+        void frequencyInit();
+        void fAlpha(double x, double &fn, double &df);
+        double distance(std::complex<double> r, double alpha);
+        double  newtonRaphson(double x1, double x2);
+    public:
+        /** @brief Fit two closed curves using fourier descriptors. More details in @cite PersoonFu1977 and @cite BergerRaghunathan1998
+
+        * @param ctr number of Fourier descriptors equal to number of contour points after resampling.
+        * @param fd Contour defining second shape (Target).
+        */
+        ContourFitting(int ctr=1024,int fd=16):ctrSize(ctr),fdSize(fd){};
+        /** @brief Fit two closed curves using fourier descriptors. More details in @cite PersoonFu1977 and @cite BergerRaghunathan1998
+
+        @param src Contour defining first shape.
+        @param dst Contour defining second shape (Target).
+        @param alphaPhiST : \f$ \alpha \f$=alphaPhiST(0,0), \f$ \phi \f$=alphaPhiST(0,1) (in radian), s=alphaPhiST(0,2), Tx=alphaPhiST(0,3), Ty=alphaPhiST(0,4) rotation center
+        @param dist distance between src and dst after matching.
+        @param fdContour false then src and dst are contours and true src and dst are fourier descriptors.
+        */
+        void estimateTransformation(InputArray src, InputArray dst, OutputArray alphaPhiST, double *dist = 0, bool fdContour = false);
+        /** @brief Fit two closed curves using fourier descriptors. More details in @cite PersoonFu1977 and @cite BergerRaghunathan1998
+
+        @param src Contour defining first shape.
+        @param dst Contour defining second shape (Target).
+        @param alphaPhiST : \f$ \alpha \f$=alphaPhiST(0,0), \f$ \phi \f$=alphaPhiST(0,1) (in radian), s=alphaPhiST(0,2), Tx=alphaPhiST(0,3), Ty=alphaPhiST(0,4) rotation center
+        @param dist distance between src and dst after matching.
+        @param fdContour false then src and dst are contours and true src and dst are fourier descriptors.
+        */
+        CV_WRAP void estimateTransformation(InputArray src, InputArray dst, OutputArray alphaPhiST, CV_OUT double &dist , bool fdContour = false);
+        /** @brief set number of Fourier descriptors used in estimateTransformation
+
+        @param n number of Fourier descriptors equal to number of contour points after resampling.
+        */
+        CV_WRAP void setCtrSize(int n);
+        /** @brief set number of Fourier descriptors when estimateTransformation used vector<Point>
+
+        @param n number of fourier descriptors used for optimal curve matching.
+        */
+        CV_WRAP void setFDSize(int n);
+        /**
+        @returns number of fourier descriptors
+        */
+        CV_WRAP int getCtrSize() { return ctrSize; };
+        /**
+        @returns number of fourier descriptors used for optimal curve matching
+        */
+        CV_WRAP int getFDSize() { return fdSize; };
+    };
+    /**
+    * @brief   Fourier descriptors for planed closed curves
+    *
+    * For more details about this implementation, please see @cite PersoonFu1977
+    *
+    * @param   src   contour type vector<Point> , vector<Point2f>  or vector<Point2d>
+    * @param   dst   Mat of type CV_64FC2 and nbElt rows A VERIFIER
+    * @param   nbElt number of rows in dst or getOptimalDFTSize rows if nbElt=-1
+    * @param   nbFD number of FD return in dst dst = [FD(1...nbFD/2) FD(nbFD/2-nbElt+1...:nbElt)]
+    *
+    */
+    CV_EXPORTS_W void fourierDescriptor(InputArray src, OutputArray dst, int nbElt=-1,int nbFD=-1);
+    /**
+    * @brief   transform a contour
+    *
+    * @param   src   contour or Fourier Descriptors if fd is true
+    * @param   t   transform Mat given by estimateTransformation
+    * @param   dst   Mat of type CV_64FC2 and nbElt rows
+    * @param   fdContour true src are Fourier Descriptors. fdContour false src is a contour
+    *
+    */
+    CV_EXPORTS_W void transformFD(InputArray src, InputArray t,OutputArray dst, bool fdContour=true);
+    /**
+    * @brief   Contour sampling .
+    *
+    * @param   src   contour type vector<Point> , vector<Point2f>  or vector<Point2d>
+    * @param   out   Mat of type CV_64FC2 and nbElt rows
+    * @param   nbElt number of points in out contour
+    *
+    */
+    CV_EXPORTS_W void contourSampling(InputArray src, OutputArray out, int nbElt);
+
+    /**
+    * @brief create ContourFitting algorithm object
+    *
+    * @param ctr number of Fourier descriptors equal to number of contour points after resampling.
+    * @param fd Contour defining second shape (Target).
+    */
+    CV_EXPORTS_W Ptr<ContourFitting> createContourFitting(int ctr = 1024, int fd = 16);
+
+    //! @} ximgproc_fourier
+}
+}
+#endif
diff --git a/IPL/include/opencv/opencv2/ximgproc/paillou_filter.hpp b/IPL/include/opencv/opencv2/ximgproc/paillou_filter.hpp
new file mode 100644
index 0000000..03754a1
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/paillou_filter.hpp
@@ -0,0 +1,67 @@
+/*
+ *  By downloading, copying, installing or using the software you agree to this license.
+ *  If you do not agree to this license, do not download, install,
+ *  copy or use the software.
+ *
+ *
+ *  License Agreement
+ *  For Open Source Computer Vision Library
+ *  (3 - clause BSD License)
+ *
+ *  Redistribution and use in source and binary forms, with or without modification,
+ *  are permitted provided that the following conditions are met :
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *  this list of conditions and the following disclaimer in the documentation
+ *  and / or other materials provided with the distribution.
+ *
+ *  * Neither the names of the copyright holders nor the names of the contributors
+ *  may be used to endorse or promote products derived from this software
+ *  without specific prior written permission.
+ *
+ *  This software is provided by the copyright holders and contributors "as is" and
+ *  any express or implied warranties, including, but not limited to, the implied
+ *  warranties of merchantability and fitness for a particular purpose are disclaimed.
+ *  In no event shall copyright holders or contributors be liable for any direct,
+ *  indirect, incidental, special, exemplary, or consequential damages
+ *  (including, but not limited to, procurement of substitute goods or services;
+ *  loss of use, data, or profits; or business interruption) however caused
+ *  and on any theory of liability, whether in contract, strict liability,
+ *  or tort(including negligence or otherwise) arising in any way out of
+ *  the use of this software, even if advised of the possibility of such damage.
+ */
+
+#ifndef __OPENCV_PAILLOUFILTER_HPP__
+#define __OPENCV_PAILLOUFILTER_HPP__
+#ifdef __cplusplus
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace ximgproc {
+
+//! @addtogroup ximgproc_filters
+//! @{
+
+/**
+* @brief   Applies Paillou filter to an image.
+*
+* For more details about this implementation, please see @cite paillou1997detecting
+*
+* @param   op          Source CV_8U(S) or CV_16U(S), 1-channel or 3-channels image.
+* @param   _dst        result CV_32F image with same number of channel than op.
+* @param   omega double see paper
+* @param   alpha double see paper
+*
+* @sa GradientPaillouX, GradientPaillouY
+*/
+CV_EXPORTS void GradientPaillouY(InputArray op, OutputArray _dst, double alpha, double omega);
+CV_EXPORTS void GradientPaillouX(InputArray op, OutputArray _dst, double alpha, double omega);
+
+}
+}
+#endif
+#endif
diff --git a/IPL/include/opencv/opencv2/ximgproc/peilin.hpp b/IPL/include/opencv/opencv2/ximgproc/peilin.hpp
new file mode 100644
index 0000000..1b224aa
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/peilin.hpp
@@ -0,0 +1,32 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_PEILIN_HPP__
+#define __OPENCV_PEILIN_HPP__
+
+#include <opencv2/core.hpp>
+
+namespace cv { namespace ximgproc {
+
+    //! @addtogroup ximgproc
+    //! @{
+
+    /**
+    * @brief   Calculates an affine transformation that normalize given image using Pei&Lin Normalization.
+    *
+    * Assume given image \f$I=T(\bar{I})\f$ where \f$\bar{I}\f$ is a normalized image and \f$T\f$ is an affine transformation distorting this image by translation, rotation, scaling and skew.
+    * The function returns an affine transformation matrix corresponding to the transformation \f$T^{-1}\f$ described in [PeiLin95].
+    * For more details about this implementation, please see
+    * [PeiLin95] Soo-Chang Pei and Chao-Nan Lin. Image normalization for pattern recognition. Image and Vision Computing, Vol. 13, N.10, pp. 711-723, 1995.
+    *
+    * @param I Given transformed image.
+    * @return Transformation matrix corresponding to inversed image transformation
+    */
+    CV_EXPORTS Matx23d PeiLinNormalization ( InputArray I );
+    /** @overload */
+    CV_EXPORTS_W void PeiLinNormalization ( InputArray I, OutputArray T );
+
+}} // namespace
+
+#endif
diff --git a/IPL/include/opencv/opencv2/ximgproc/ridgefilter.hpp b/IPL/include/opencv/opencv2/ximgproc/ridgefilter.hpp
new file mode 100644
index 0000000..7f21e73
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/ridgefilter.hpp
@@ -0,0 +1,53 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+/*
+Ridge Detection Filter.
+OpenCV port by : Kushal Vyas (@kushalvyas), Venkatesh Vijaykumar(@venkateshvijaykumar)
+Adapted from Niki Estner's explanation of RidgeFilter.
+*/
+
+#ifndef __OPENCV_XIMGPROC_RIDGEFILTER_HPP__
+#define __OPENCV_XIMGPROC_RIDGEFILTER_HPP__
+
+#include <opencv2/core.hpp>
+
+namespace cv { namespace ximgproc {
+
+//! @addtogroup ximgproc_filters
+//! @{
+
+/** @brief  Applies Ridge Detection Filter to an input image.
+Implements Ridge detection similar to the one in [Mathematica](http://reference.wolfram.com/language/ref/RidgeFilter.html)
+using the eigen values from the Hessian Matrix of the input image using Sobel Derivatives.
+Additional refinement can be done using Skeletonization and Binarization. Adapted from @cite segleafvein and @cite M_RF
+
+*/
+class CV_EXPORTS_W RidgeDetectionFilter : public Algorithm
+{
+public:
+    /**
+    @brief Create pointer to the Ridge detection filter.
+    @param ddepth  Specifies output image depth. Defualt is CV_32FC1
+    @param dx Order of derivative x, default is 1
+    @param dy  Order of derivative y, default is 1
+    @param ksize Sobel kernel size , default is 3
+    @param out_dtype Converted format for output, default is CV_8UC1
+    @param scale Optional scale value for derivative values, default is 1
+    @param delta  Optional bias added to output, default is 0
+    @param borderType Pixel extrapolation method, default is BORDER_DEFAULT
+    @see Sobel, threshold, getStructuringElement, morphologyEx.( for additional refinement)
+    */
+    CV_WRAP static Ptr<RidgeDetectionFilter> create(int ddepth = CV_32FC1, int dx=1, int dy=1, int ksize = 3, int out_dtype=CV_8UC1, double scale = 1, double delta = 0, int borderType = BORDER_DEFAULT);
+    /**
+    @brief Apply Ridge detection filter on input image.
+    @param _img InputArray as supported by Sobel. img can be 1-Channel or 3-Channels.
+    @param out OutputAray of structure as RidgeDetectionFilter::ddepth. Output image with ridges.
+    */
+    CV_WRAP virtual void getRidgeFilteredImage(InputArray _img, OutputArray out) = 0;
+};
+
+//! @}
+}} // namespace
+#endif
diff --git a/IPL/include/opencv/opencv2/ximgproc/run_length_morphology.hpp b/IPL/include/opencv/opencv2/ximgproc/run_length_morphology.hpp
new file mode 100644
index 0000000..5754691
--- /dev/null
+++ b/IPL/include/opencv/opencv2/ximgproc/run_length_morphology.hpp
@@ -0,0 +1,119 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_RUN_LENGTH_MORPHOLOGY_HPP__
+#define __OPENCV_RUN_LENGTH_MORPHOLOGY_HPP__
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace ximgproc {
+namespace rl {
+
+
+//! @addtogroup ximgproc_run_length_morphology
+//! @{
+
+/**
+* @brief   Applies a fixed-level threshold to each array element.
+*
+*
+* @param   src         input array (single-channel).
+* @param   rlDest      resulting run length encoded image.
+* @param   thresh      threshold value.
+* @param   type        thresholding type (only cv::THRESH_BINARY and cv::THRESH_BINARY_INV are supported)
+*
+*/
+CV_EXPORTS void threshold(InputArray src, OutputArray rlDest, double thresh, int type);
+
+
+/**
+* @brief   Dilates an run-length encoded binary image by using a specific structuring element.
+*
+*
+* @param   rlSrc       input image
+* @param   rlDest      result
+* @param   rlKernel    kernel
+* @param   anchor      position of the anchor within the element; default value (0, 0)
+*                      is usually the element center.
+*
+*/
+CV_EXPORTS void dilate(InputArray rlSrc, OutputArray rlDest, InputArray rlKernel, Point anchor = Point(0, 0));
+
+/**
+* @brief   Erodes an run-length encoded binary image by using a specific structuring element.
+*
+*
+* @param   rlSrc       input image
+* @param   rlDest      result
+* @param   rlKernel    kernel
+* @param   bBoundaryOn indicates whether pixel outside the image boundary are assumed to be on
+            (True: works in the same way as the default of cv::erode, False: is a little faster)
+* @param   anchor      position of the anchor within the element; default value (0, 0)
+*                      is usually the element center.
+*
+*/
+CV_EXPORTS void erode(InputArray rlSrc, OutputArray rlDest, InputArray rlKernel,
+    bool bBoundaryOn = true, Point anchor = Point(0, 0));
+
+/**
+* @brief   Returns a run length encoded structuring element of the specified size and shape.
+*
+*
+* @param   shape	Element shape that can be one of cv::MorphShapes
+* @param   ksize	Size of the structuring element.
+*
+*/
+CV_EXPORTS cv::Mat getStructuringElement(int shape, Size ksize);
+
+/**
+* @brief   Paint run length encoded binary image into an image.
+*
+*
+* @param   image       image to paint into (currently only single channel images).
+* @param   rlSrc       run length encoded image
+* @param   value      all foreground pixel of the binary image are set to this value
+*
+*/
+CV_EXPORTS void paint(InputOutputArray image, InputArray rlSrc, const cv::Scalar& value);
+
+/**
+* @brief   Check whether a custom made structuring element can be used with run length morphological operations.
+*          (It must consist of a continuous array of single runs per row)
+*
+* @param   rlStructuringElement   mask to be tested
+*/
+CV_EXPORTS bool isRLMorphologyPossible(InputArray rlStructuringElement);
+
+/**
+* @brief   Creates a run-length encoded image from a vector of runs (column begin, column end, row)
+*
+* @param   runs   vector of runs
+* @param   res    result
+* @param   size   image size (to be used if an "on" boundary should be used in erosion, using the default
+*                  means that the size is computed from the extension of the input)
+*/
+CV_EXPORTS void createRLEImage(std::vector<cv::Point3i>& runs, OutputArray res, Size size = Size(0, 0));
+
+/**
+* @brief   Applies a morphological operation to a run-length encoded binary image.
+*
+*
+* @param   rlSrc       input image
+* @param   rlDest      result
+* @param   op          all operations supported by cv::morphologyEx (except cv::MORPH_HITMISS)
+* @param   rlKernel    kernel
+* @param   bBoundaryOnForErosion indicates whether pixel outside the image boundary are assumed
+*          to be on for erosion operations (True: works in the same way as the default of cv::erode,
+*          False: is a little faster)
+* @param   anchor      position of the anchor within the element; default value (0, 0) is usually the element center.
+*
+*/
+CV_EXPORTS void morphologyEx(InputArray rlSrc, OutputArray rlDest, int op, InputArray rlKernel,
+    bool bBoundaryOnForErosion = true, Point anchor = Point(0,0));
+
+}
+}
+}
+#endif
diff --git a/IPL/include/opencv/opencv2/ximgproc/segmentation.hpp b/IPL/include/opencv/opencv2/ximgproc/segmentation.hpp
index 02d28bf..02346aa 100644
--- a/IPL/include/opencv/opencv2/ximgproc/segmentation.hpp
+++ b/IPL/include/opencv/opencv2/ximgproc/segmentation.hpp
@@ -73,9 +73,9 @@ namespace cv {
                      */
                     class CV_EXPORTS_W SelectiveSearchSegmentationStrategy : public Algorithm {
                         public:
-                            /** @brief Set a initial image, with a segementation.
+                            /** @brief Set a initial image, with a segmentation.
                                 @param img The input image. Any number of channel can be provided
-                                @param regions A segementation of the image. The parameter must be the same size of img.
+                                @param regions A segmentation of the image. The parameter must be the same size of img.
                                 @param sizes The sizes of different regions
                                 @param image_id If not set to -1, try to cache pre-computations. If the same set og (img, regions, size) is used, the image_id need to be the same.
                             */
@@ -236,7 +236,7 @@ namespace cv {
                             /** @brief Based on all images, graph segmentations and stragies, computes all possible rects and return them
                                 @param rects The list of rects. The first ones are more relevents than the lasts ones.
                             */
-                            CV_WRAP virtual void process(std::vector<Rect>& rects) = 0;
+                            CV_WRAP virtual void process(CV_OUT std::vector<Rect>& rects) = 0;
                     };
 
                     /** @brief Create a new SelectiveSearchSegmentation class.
diff --git a/IPL/include/opencv/opencv2/ximgproc/slic.hpp b/IPL/include/opencv/opencv2/ximgproc/slic.hpp
index 4d9eb9a..ab8e4bc 100644
--- a/IPL/include/opencv/opencv2/ximgproc/slic.hpp
+++ b/IPL/include/opencv/opencv2/ximgproc/slic.hpp
@@ -61,6 +61,8 @@ namespace ximgproc
 //! @addtogroup ximgproc_superpixel
 //! @{
 
+    enum SLICType { SLIC = 100, SLICO = 101, MSLIC = 102 };
+
 /** @brief Class implementing the SLIC (Simple Linear Iterative Clustering) superpixels
 algorithm described in @cite Achanta2012.
 
@@ -68,7 +70,9 @@ SLIC (Simple Linear Iterative Clustering) clusters pixels using pixel channels a
 to efficiently generate compact, nearly uniform superpixels. The simplicity of approach makes it
 extremely easy to use a lone parameter specifies the number of superpixels and the efficiency of
 the algorithm makes it very practical.
-
+Several optimizations are available for SLIC class:
+SLICO stands for "Zero parameter SLIC" and it is an optimization of baseline SLIC described in @cite Achanta2012.
+MSLIC stands for "Manifold SLIC" and it is an optimization of baseline SLIC described in @cite Liu_2017_IEEE.
  */
 
 class CV_EXPORTS_W SuperpixelSLIC : public Algorithm
@@ -134,26 +138,25 @@ class CV_EXPORTS_W SuperpixelSLIC : public Algorithm
 
 };
 
-/** @brief Class implementing the SLIC (Simple Linear Iterative Clustering) superpixels
+/** @brief Initialize a SuperpixelSLIC object
 
 @param image Image to segment
 @param algorithm Chooses the algorithm variant to use:
-SLIC segments image using a desired region_size, and in addition
-SLICO will choose an adaptive compactness factor.
+SLIC segments image using a desired region_size, and in addition SLICO will optimize using adaptive compactness factor,
+while MSLIC will optimize using manifold methods resulting in more content-sensitive superpixels.
 @param region_size Chooses an average superpixel size measured in pixels
 @param ruler Chooses the enforcement of superpixel smoothness factor of superpixel
 
 The function initializes a SuperpixelSLIC object for the input image. It sets the parameters of choosed
 superpixel algorithm, which are: region_size and ruler. It preallocate some buffers for future
-computing iterations over the given image. An example of SLIC versus SLICO is ilustrated in the
-following picture.
+computing iterations over the given image. For enanched results it is recommended for color images to
+preprocess image with little gaussian blur using a small 3 x 3 kernel and additional conversion into
+CieLAB color space. An example of SLIC versus SLICO and MSLIC is ilustrated in the following picture.
 
-![image](pics/slic_slico_kermit.png)
+![image](pics/superpixels_slic.png)
 
  */
 
-    enum SLIC { SLIC = 100, SLICO = 101 };
-
     CV_EXPORTS_W Ptr<SuperpixelSLIC> createSuperpixelSLIC( InputArray image, int algorithm = SLICO,
                                                            int region_size = 10, float ruler = 10.0f );
 
diff --git a/IPL/include/opencv/opencv2/ximgproc/sparse_match_interpolator.hpp b/IPL/include/opencv/opencv2/ximgproc/sparse_match_interpolator.hpp
index 4821aba..80e2057 100644
--- a/IPL/include/opencv/opencv2/ximgproc/sparse_match_interpolator.hpp
+++ b/IPL/include/opencv/opencv2/ximgproc/sparse_match_interpolator.hpp
@@ -11,7 +11,7 @@
  *  Redistribution and use in source and binary forms, with or without modification,
  *  are permitted provided that the following conditions are met :
  *
- *  *Redistributions of source code must retain the above copyright notice,
+ *  * Redistributions of source code must retain the above copyright notice,
  *  this list of conditions and the following disclaimer.
  *
  *  * Redistributions in binary form must reproduce the above copyright notice,
@@ -77,6 +77,18 @@ estimator from @cite Revaud2015 and Fast Global Smoother as post-processing filt
 class CV_EXPORTS_W EdgeAwareInterpolator : public SparseMatchInterpolator
 {
 public:
+    /** @brief Interface to provide a more elaborated cost map, i.e. edge map, for the edge-aware term.
+     *  This implementation is based on a rather simple gradient-based edge map estimation.
+     *  To used more complex edge map estimator (e.g. StructuredEdgeDetection that has been
+     *  used in the original publication) that may lead to improved accuracies, the internal
+     *  edge map estimation can be bypassed here.
+     *  @param _costMap a type CV_32FC1 Mat is required.
+     *  @see cv::ximgproc::createSuperpixelSLIC
+    */
+    CV_WRAP virtual void setCostMap(const Mat & _costMap) = 0;
+    /** @brief Parameter to tune the approximate size of the superpixel used for oversegmentation.
+     *  @see cv::ximgproc::createSuperpixelSLIC
+    */
     /** @brief K is a number of nearest-neighbor matches considered, when fitting a locally affine
     model. Usually it should be around 128. However, lower values would make the interpolation
     noticeably faster.
@@ -125,6 +137,132 @@ EdgeAwareInterpolator.
 CV_EXPORTS_W
 Ptr<EdgeAwareInterpolator> createEdgeAwareInterpolator();
 
+/** @brief Sparse match interpolation algorithm based on modified piecewise locally-weighted affine
+ * estimator called Robust Interpolation method of Correspondences or RIC from @cite Hu2017 and Variational
+ * and Fast Global Smoother as post-processing filter. The RICInterpolator is a extension of the EdgeAwareInterpolator.
+ * Main concept of this extension is an piece-wise affine model based on over-segmentation via SLIC superpixel estimation.
+ * The method contains an efficient propagation mechanism to estimate among the pieces-wise models.
+ */
+class CV_EXPORTS_W RICInterpolator : public SparseMatchInterpolator
+{
+public:
+    /** @brief K is a number of nearest-neighbor matches considered, when fitting a locally affine
+     *model for a superpixel segment. However, lower values would make the interpolation
+     *noticeably faster. The original implementation of @cite Hu2017 uses 32.
+    */
+    CV_WRAP virtual void setK(int k = 32) = 0;
+    /** @copybrief setK
+     *  @see setK
+     */
+    CV_WRAP virtual int getK() const = 0;
+    /** @brief Interface to provide a more elaborated cost map, i.e. edge map, for the edge-aware term.
+     *  This implementation is based on a rather simple gradient-based edge map estimation.
+     *  To used more complex edge map estimator (e.g. StructuredEdgeDetection that has been
+     *  used in the original publication) that may lead to improved accuracies, the internal
+     *  edge map estimation can be bypassed here.
+     *  @param costMap a type CV_32FC1 Mat is required.
+     *  @see cv::ximgproc::createSuperpixelSLIC
+    */
+    CV_WRAP virtual void setCostMap(const Mat & costMap) = 0;
+    /** @brief Get the internal cost, i.e. edge map, used for estimating the edge-aware term.
+     *  @see setCostMap
+     */
+    CV_WRAP virtual void setSuperpixelSize(int spSize = 15) = 0;
+    /** @copybrief setSuperpixelSize
+     *  @see setSuperpixelSize
+     */
+    CV_WRAP virtual int getSuperpixelSize() const = 0;
+    /** @brief Parameter defines the number of nearest-neighbor matches for each superpixel considered, when fitting a locally affine
+     *model.
+    */
+    CV_WRAP virtual void setSuperpixelNNCnt(int spNN = 150) = 0;
+    /** @copybrief setSuperpixelNNCnt
+     *  @see setSuperpixelNNCnt
+    */
+    CV_WRAP virtual int getSuperpixelNNCnt() const = 0;
+    /** @brief Parameter to tune enforcement of superpixel smoothness factor used for oversegmentation.
+     *  @see cv::ximgproc::createSuperpixelSLIC
+    */
+    CV_WRAP virtual void setSuperpixelRuler(float ruler = 15.f) = 0;
+    /** @copybrief setSuperpixelRuler
+     *  @see setSuperpixelRuler
+     */
+    CV_WRAP virtual float  getSuperpixelRuler() const = 0;
+    /** @brief Parameter to choose superpixel algorithm variant to use:
+     * - cv::ximgproc::SLICType SLIC segments image using a desired region_size (value: 100)
+     * - cv::ximgproc::SLICType SLICO will optimize using adaptive compactness factor (value: 101)
+     * - cv::ximgproc::SLICType MSLIC will optimize using manifold methods resulting in more content-sensitive superpixels (value: 102).
+     *  @see cv::ximgproc::createSuperpixelSLIC
+    */
+    CV_WRAP virtual void setSuperpixelMode(int mode = 100) = 0;
+    /** @copybrief setSuperpixelMode
+     *  @see setSuperpixelMode
+     */
+    CV_WRAP virtual int getSuperpixelMode() const = 0;
+    /** @brief Alpha is a parameter defining a global weight for transforming geodesic distance into weight.
+     */
+    CV_WRAP virtual void setAlpha(float alpha = 0.7f) = 0;
+    /** @copybrief setAlpha
+     *  @see setAlpha
+     */
+    CV_WRAP virtual float getAlpha() const = 0;
+    /** @brief Parameter defining the number of iterations for piece-wise affine model estimation.
+     */
+    CV_WRAP virtual void setModelIter(int modelIter = 4) = 0;
+    /** @copybrief setModelIter
+     *  @see setModelIter
+     */
+    CV_WRAP virtual int getModelIter() const = 0;
+    /** @brief Parameter to choose wether additional refinement of the piece-wise affine models is employed.
+    */
+    CV_WRAP virtual void setRefineModels(bool refineModles = true) = 0;
+    /** @copybrief setRefineModels
+     *  @see setRefineModels
+     */
+    CV_WRAP virtual bool getRefineModels() const = 0;
+    /** @brief MaxFlow is a threshold to validate the predictions using a certain piece-wise affine model.
+     * If the prediction exceeds the treshold the translational model will be applied instead.
+    */
+    CV_WRAP virtual void setMaxFlow(float maxFlow = 250.f) = 0;
+    /** @copybrief setMaxFlow
+     *  @see setMaxFlow
+     */
+    CV_WRAP virtual float getMaxFlow() const = 0;
+    /** @brief Parameter to choose wether the VariationalRefinement post-processing  is employed.
+    */
+    CV_WRAP virtual void setUseVariationalRefinement(bool use_variational_refinement = false) = 0;
+    /** @copybrief setUseVariationalRefinement
+     *  @see setUseVariationalRefinement
+     */
+    CV_WRAP virtual bool  getUseVariationalRefinement() const = 0;
+    /** @brief Sets whether the fastGlobalSmootherFilter() post-processing is employed.
+    */
+    CV_WRAP virtual void setUseGlobalSmootherFilter(bool use_FGS = true) = 0;
+    /** @copybrief setUseGlobalSmootherFilter
+     *  @see setUseGlobalSmootherFilter
+     */
+    CV_WRAP virtual bool getUseGlobalSmootherFilter() const = 0;
+    /** @brief Sets the respective fastGlobalSmootherFilter() parameter.
+     */
+    CV_WRAP virtual void  setFGSLambda(float lambda = 500.f) = 0;
+    /** @copybrief setFGSLambda
+     *  @see setFGSLambda
+     */
+    CV_WRAP virtual float getFGSLambda() const = 0;
+    /** @brief Sets the respective fastGlobalSmootherFilter() parameter.
+     */
+    CV_WRAP virtual void  setFGSSigma(float sigma = 1.5f) = 0;
+    /** @copybrief setFGSSigma
+     *  @see setFGSSigma
+     */
+    CV_WRAP virtual float getFGSSigma() const = 0;
+};
+
+/** @brief Factory method that creates an instance of the
+RICInterpolator.
+*/
+CV_EXPORTS_W
+Ptr<RICInterpolator> createRICInterpolator();
 //! @}
 }
 }
diff --git a/IPL/include/opencv/opencv2/ximgproc/structured_edge_detection.hpp b/IPL/include/opencv/opencv2/ximgproc/structured_edge_detection.hpp
index db6e906..b0eb777 100644
--- a/IPL/include/opencv/opencv2/ximgproc/structured_edge_detection.hpp
+++ b/IPL/include/opencv/opencv2/ximgproc/structured_edge_detection.hpp
@@ -102,11 +102,31 @@ class CV_EXPORTS_W StructuredEdgeDetection : public Algorithm
 
     The algorithm underlies this function is much more robust to texture presence, than common
     approaches, e.g. Sobel
-    @param src source image (RGB, float, in [0;1]) to detect edges
-    @param dst destination image (grayscale, float, in [0;1]) where edges are drawn
+    @param _src source image (RGB, float, in [0;1]) to detect edges
+    @param _dst destination image (grayscale, float, in [0;1]) where edges are drawn
     @sa Sobel, Canny
      */
-    CV_WRAP virtual void detectEdges(const Mat &src, CV_OUT Mat &dst) const = 0;
+    CV_WRAP virtual void detectEdges(cv::InputArray _src, cv::OutputArray _dst) const = 0;
+
+    /** @brief The function computes orientation from edge image.
+
+    @param _src edge image.
+    @param _dst orientation image.
+     */
+    CV_WRAP virtual void computeOrientation(cv::InputArray _src, cv::OutputArray _dst) const = 0;
+
+
+    /** @brief The function edgenms in edge image and suppress edges where edge is stronger in orthogonal direction.
+
+    @param edge_image edge image from detectEdges function.
+    @param orientation_image orientation image from computeOrientation function.
+    @param _dst suppressed image (grayscale, float, in [0;1])
+    @param r radius for NMS suppression.
+    @param s radius for boundary suppression.
+    @param m multiplier for conservative suppression.
+    @param isParallel enables/disables parallel computing.
+     */
+    CV_WRAP virtual void edgesNms(cv::InputArray edge_image, cv::InputArray orientation_image, cv::OutputArray _dst, int r = 2, int s = 0, float m = 1, bool isParallel = true) const = 0;
 };
 
 /*!
diff --git a/IPL/include/opencv/opencv2/ximgproc/weighted_median_filter.hpp b/IPL/include/opencv/opencv2/ximgproc/weighted_median_filter.hpp
index 30a169c..d3da29e 100644
--- a/IPL/include/opencv/opencv2/ximgproc/weighted_median_filter.hpp
+++ b/IPL/include/opencv/opencv2/ximgproc/weighted_median_filter.hpp
@@ -63,12 +63,12 @@ namespace ximgproc
 */
 enum WMFWeightType
 {
-    WMF_EXP, //!< \f$exp(-|I1-I2|^2/(2*sigma^2))\f$
-    WMF_IV1, //!< \f$(|I1-I2|+sigma)^-1\f$
-    WMF_IV2, //!< \f$(|I1-I2|^2+sigma^2)^-1\f$
-    WMF_COS, //!< \f$dot(I1,I2)/(|I1|*|I2|)\f$
-    WMF_JAC, //!< \f$(min(r1,r2)+min(g1,g2)+min(b1,b2))/(max(r1,r2)+max(g1,g2)+max(b1,b2))\f$
-    WMF_OFF //!< unweighted
+    WMF_EXP = 1     , //!< \f$exp(-|I1-I2|^2/(2*sigma^2))\f$
+    WMF_IV1 = 1 << 1, //!< \f$(|I1-I2|+sigma)^-1\f$
+    WMF_IV2 = 1 << 2, //!< \f$(|I1-I2|^2+sigma^2)^-1\f$
+    WMF_COS = 1 << 3, //!< \f$dot(I1,I2)/(|I1|*|I2|)\f$
+    WMF_JAC = 1 << 4, //!< \f$(min(r1,r2)+min(g1,g2)+min(b1,b2))/(max(r1,r2)+max(g1,g2)+max(b1,b2))\f$
+    WMF_OFF = 1 << 5  //!< unweighted
 };
 
 /**
@@ -87,7 +87,8 @@ enum WMFWeightType
 *
 * @sa medianBlur, jointBilateralFilter
 */
-CV_EXPORTS void weightedMedianFilter(InputArray joint, InputArray src, OutputArray dst, int r, double sigma=25.5, WMFWeightType weightType=WMF_EXP, Mat mask=Mat());
+CV_EXPORTS_W void weightedMedianFilter(InputArray joint, InputArray src, OutputArray dst,
+                                       int r, double sigma = 25.5, int weightType = WMF_EXP, InputArray mask = noArray());
 }
 }
 
diff --git a/IPL/include/opencv/opencv2/xobjdetect.hpp b/IPL/include/opencv/opencv2/xobjdetect.hpp
index 45038ab..13dde11 100644
--- a/IPL/include/opencv/opencv2/xobjdetect.hpp
+++ b/IPL/include/opencv/opencv2/xobjdetect.hpp
@@ -43,7 +43,6 @@ the use of this software, even if advised of the possibility of such damage.
 #define __OPENCV_XOBJDETECT_XOBJDETECT_HPP__
 
 #include <opencv2/core.hpp>
-#include <opencv2/highgui.hpp>
 #include <vector>
 #include <string>
 
diff --git a/IPL/include/opencv/opencv2/xphoto.hpp b/IPL/include/opencv/opencv2/xphoto.hpp
index 844ef41..031fbb9 100644
--- a/IPL/include/opencv/opencv2/xphoto.hpp
+++ b/IPL/include/opencv/opencv2/xphoto.hpp
@@ -49,4 +49,8 @@
 #include "xphoto/inpainting.hpp"
 #include "xphoto/white_balance.hpp"
 #include "xphoto/dct_image_denoising.hpp"
+#include "xphoto/bm3d_image_denoising.hpp"
+#include "xphoto/oilpainting.hpp"
+#include "xphoto/tonemap.hpp"
+
 #endif
diff --git a/IPL/include/opencv/opencv2/xphoto/bm3d_image_denoising.hpp b/IPL/include/opencv/opencv2/xphoto/bm3d_image_denoising.hpp
new file mode 100644
index 0000000..5873f4c
--- /dev/null
+++ b/IPL/include/opencv/opencv2/xphoto/bm3d_image_denoising.hpp
@@ -0,0 +1,186 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_BM3D_IMAGE_DENOISING_HPP__
+#define __OPENCV_BM3D_IMAGE_DENOISING_HPP__
+
+/** @file
+@date Jul 19, 2016
+@author Bartek Pawlik
+*/
+
+#include <opencv2/core.hpp>
+
+namespace cv
+{
+    namespace xphoto
+    {
+        //! @addtogroup xphoto
+        //! @{
+
+        //! BM3D transform types
+        enum TransformTypes
+        {
+            /** Un-normalized Haar transform */
+            HAAR = 0
+        };
+
+        //! BM3D algorithm steps
+        enum Bm3dSteps
+        {
+            /** Execute all steps of the algorithm */
+            BM3D_STEPALL = 0,
+            /** Execute only first step of the algorithm */
+            BM3D_STEP1 = 1,
+            /** Execute only second step of the algorithm */
+            BM3D_STEP2 = 2
+        };
+
+        /** @brief Performs image denoising using the Block-Matching and 3D-filtering algorithm
+        <http://www.cs.tut.fi/~foi/GCF-BM3D/BM3D_TIP_2007.pdf> with several computational
+        optimizations. Noise expected to be a gaussian white noise.
+
+        @param src Input 8-bit or 16-bit 1-channel image.
+        @param dstStep1 Output image of the first step of BM3D with the same size and type as src.
+        @param dstStep2 Output image of the second step of BM3D with the same size and type as src.
+        @param h Parameter regulating filter strength. Big h value perfectly removes noise but also
+        removes image details, smaller h value preserves details but also preserves some noise.
+        @param templateWindowSize Size in pixels of the template patch that is used for block-matching.
+        Should be power of 2.
+        @param searchWindowSize Size in pixels of the window that is used to perform block-matching.
+        Affect performance linearly: greater searchWindowsSize - greater denoising time.
+        Must be larger than templateWindowSize.
+        @param blockMatchingStep1 Block matching threshold for the first step of BM3D (hard thresholding),
+        i.e. maximum distance for which two blocks are considered similar.
+        Value expressed in euclidean distance.
+        @param blockMatchingStep2 Block matching threshold for the second step of BM3D (Wiener filtering),
+        i.e. maximum distance for which two blocks are considered similar.
+        Value expressed in euclidean distance.
+        @param groupSize Maximum size of the 3D group for collaborative filtering.
+        @param slidingStep Sliding step to process every next reference block.
+        @param beta Kaiser window parameter that affects the sidelobe attenuation of the transform of the
+        window. Kaiser window is used in order to reduce border effects. To prevent usage of the window,
+        set beta to zero.
+        @param normType Norm used to calculate distance between blocks. L2 is slower than L1
+        but yields more accurate results.
+        @param step Step of BM3D to be executed. Possible variants are: step 1, step 2, both steps.
+        @param transformType Type of the orthogonal transform used in collaborative filtering step.
+        Currently only Haar transform is supported.
+
+        This function expected to be applied to grayscale images. Advanced usage of this function
+        can be manual denoising of colored image in different colorspaces.
+
+        @sa
+        fastNlMeansDenoising
+        */
+        CV_EXPORTS_W void bm3dDenoising(
+            InputArray src,
+            InputOutputArray dstStep1,
+            OutputArray dstStep2,
+            float h = 1,
+            int templateWindowSize = 4,
+            int searchWindowSize = 16,
+            int blockMatchingStep1 = 2500,
+            int blockMatchingStep2 = 400,
+            int groupSize = 8,
+            int slidingStep = 1,
+            float beta = 2.0f,
+            int normType = cv::NORM_L2,
+            int step = cv::xphoto::BM3D_STEPALL,
+            int transformType = cv::xphoto::HAAR);
+
+        /** @brief Performs image denoising using the Block-Matching and 3D-filtering algorithm
+        <http://www.cs.tut.fi/~foi/GCF-BM3D/BM3D_TIP_2007.pdf> with several computational
+        optimizations. Noise expected to be a gaussian white noise.
+
+        @param src Input 8-bit or 16-bit 1-channel image.
+        @param dst Output image with the same size and type as src.
+        @param h Parameter regulating filter strength. Big h value perfectly removes noise but also
+        removes image details, smaller h value preserves details but also preserves some noise.
+        @param templateWindowSize Size in pixels of the template patch that is used for block-matching.
+        Should be power of 2.
+        @param searchWindowSize Size in pixels of the window that is used to perform block-matching.
+        Affect performance linearly: greater searchWindowsSize - greater denoising time.
+        Must be larger than templateWindowSize.
+        @param blockMatchingStep1 Block matching threshold for the first step of BM3D (hard thresholding),
+        i.e. maximum distance for which two blocks are considered similar.
+        Value expressed in euclidean distance.
+        @param blockMatchingStep2 Block matching threshold for the second step of BM3D (Wiener filtering),
+        i.e. maximum distance for which two blocks are considered similar.
+        Value expressed in euclidean distance.
+        @param groupSize Maximum size of the 3D group for collaborative filtering.
+        @param slidingStep Sliding step to process every next reference block.
+        @param beta Kaiser window parameter that affects the sidelobe attenuation of the transform of the
+        window. Kaiser window is used in order to reduce border effects. To prevent usage of the window,
+        set beta to zero.
+        @param normType Norm used to calculate distance between blocks. L2 is slower than L1
+        but yields more accurate results.
+        @param step Step of BM3D to be executed. Allowed are only BM3D_STEP1 and BM3D_STEPALL.
+        BM3D_STEP2 is not allowed as it requires basic estimate to be present.
+        @param transformType Type of the orthogonal transform used in collaborative filtering step.
+        Currently only Haar transform is supported.
+
+        This function expected to be applied to grayscale images. Advanced usage of this function
+        can be manual denoising of colored image in different colorspaces.
+
+        @sa
+        fastNlMeansDenoising
+        */
+        CV_EXPORTS_W void bm3dDenoising(
+            InputArray src,
+            OutputArray dst,
+            float h = 1,
+            int templateWindowSize = 4,
+            int searchWindowSize = 16,
+            int blockMatchingStep1 = 2500,
+            int blockMatchingStep2 = 400,
+            int groupSize = 8,
+            int slidingStep = 1,
+            float beta = 2.0f,
+            int normType = cv::NORM_L2,
+            int step = cv::xphoto::BM3D_STEPALL,
+            int transformType = cv::xphoto::HAAR);
+        //! @}
+    }
+}
+
+#endif // __OPENCV_BM3D_IMAGE_DENOISING_HPP__
diff --git a/IPL/include/opencv/opencv2/xphoto/inpainting.hpp b/IPL/include/opencv/opencv2/xphoto/inpainting.hpp
index 9c40e8c..cd71a9d 100644
--- a/IPL/include/opencv/opencv2/xphoto/inpainting.hpp
+++ b/IPL/include/opencv/opencv2/xphoto/inpainting.hpp
@@ -9,9 +9,14 @@
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
+//                       (3-clause BSD License)
 //
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2000-2019, Intel Corporation, all rights reserved.
 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2009-2016, NVIDIA Corporation, all rights reserved.
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2015-2016, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015-2016, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -24,8 +29,9 @@
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
+//   * Neither the names of the copyright holders nor the names of the contributors
+//    may be used to endorse or promote products derived from this software
+//    without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
@@ -58,24 +64,48 @@ namespace xphoto
 //! @addtogroup xphoto
 //! @{
 
-    //! various inpainting algorithms
+    //! @brief Various inpainting algorithms
+    //! @sa inpaint
     enum InpaintTypes
     {
         /** This algorithm searches for dominant correspondences (transformations) of
         image patches and tries to seamlessly fill-in the area to be inpainted using this
         transformations */
-        INPAINT_SHIFTMAP = 0
+        INPAINT_SHIFTMAP = 0,
+        /** Performs Frequency Selective Reconstruction (FSR).
+        One of the two quality profiles BEST and FAST can be chosen, depending on the time available for reconstruction.
+        See @cite GenserPCS2018 and @cite SeilerTIP2015 for details.
+
+        The algorithm may be utilized for the following areas of application:
+        1. %Error Concealment (Inpainting).
+           The sampling mask indicates the missing pixels of the distorted input
+           image to be reconstructed.
+        2. Non-Regular Sampling.
+           For more information on how to choose a good sampling mask, please review
+           @cite GroscheICIP2018 and @cite GroscheIST2018.
+
+        1-channel grayscale or 3-channel BGR image are accepted.
+
+        Conventional accepted ranges:
+        - 0-255 for CV_8U
+        - 0-65535 for CV_16U
+        - 0-1 for CV_32F/CV_64F.
+        */
+        INPAINT_FSR_BEST = 1,
+        INPAINT_FSR_FAST = 2                     //!< See #INPAINT_FSR_BEST
     };
 
     /** @brief The function implements different single-image inpainting algorithms.
 
-    See the original paper @cite He2012 for details.
+    See the original papers @cite He2012 (Shiftmap) or @cite GenserPCS2018 and @cite SeilerTIP2015 (FSR) for details.
 
-    @param src source image, it could be of any type and any number of channels from 1 to 4. In case of
+    @param src source image
+    - #INPAINT_SHIFTMAP: it could be of any type and any number of channels from 1 to 4. In case of
     3- and 4-channels images the function expect them in CIELab colorspace or similar one, where first
     color component shows intensity, while second and third shows colors. Nonetheless you can try any
     colorspaces.
-    @param mask mask (CV_8UC1), where non-zero pixels indicate valid image area, while zero pixels
+    - #INPAINT_FSR_BEST or #INPAINT_FSR_FAST: 1-channel grayscale or 3-channel BGR image.
+    @param mask mask (#CV_8UC1), where non-zero pixels indicate valid image area, while zero pixels
     indicate area to be inpainted
     @param dst destination image
     @param algorithmType see xphoto::InpaintTypes
diff --git a/IPL/include/opencv/opencv2/xphoto/oilpainting.hpp b/IPL/include/opencv/opencv2/xphoto/oilpainting.hpp
new file mode 100644
index 0000000..6d5c6cf
--- /dev/null
+++ b/IPL/include/opencv/opencv2/xphoto/oilpainting.hpp
@@ -0,0 +1,41 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+
+#ifndef __OPENCV_OIL_PAINTING_HPP__
+#define __OPENCV_OIL_PAINTING_HPP__
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+namespace cv
+{
+namespace xphoto
+{
+
+//! @addtogroup xphoto
+//! @{
+
+/** @brief oilPainting
+See the book @cite Holzmann1988 for details.
+@param src Input three-channel or one channel image (either CV_8UC3 or CV_8UC1)
+@param dst Output image of the same size and type as src.
+@param size neighbouring size is 2-size+1
+@param dynRatio image is divided by dynRatio before histogram processing
+@param code	color space conversion code(see ColorConversionCodes). Histogram will used only first plane
+*/
+CV_EXPORTS_W void oilPainting(InputArray src, OutputArray dst, int size, int dynRatio, int code);
+/** @brief oilPainting
+See the book @cite Holzmann1988 for details.
+@param src Input three-channel or one channel image (either CV_8UC3 or CV_8UC1)
+@param dst Output image of the same size and type as src.
+@param size neighbouring size is 2-size+1
+@param dynRatio image is divided by dynRatio before histogram processing
+*/
+CV_EXPORTS_W void oilPainting(InputArray src, OutputArray dst, int size, int dynRatio);
+//! @}
+}
+}
+
+#endif // __OPENCV_OIL_PAINTING_HPP__
diff --git a/IPL/include/opencv/opencv2/xphoto/tonemap.hpp b/IPL/include/opencv/opencv2/xphoto/tonemap.hpp
new file mode 100644
index 0000000..9a69d39
--- /dev/null
+++ b/IPL/include/opencv/opencv2/xphoto/tonemap.hpp
@@ -0,0 +1,56 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_XPHOTO_TONEMAP_HPP
+#define OPENCV_XPHOTO_TONEMAP_HPP
+
+#include "opencv2/photo.hpp"
+
+namespace cv { namespace xphoto {
+
+//! @addtogroup xphoto
+//! @{
+
+/** @brief This algorithm decomposes image into two layers: base layer and detail layer using bilateral filter
+and compresses contrast of the base layer thus preserving all the details.
+
+This implementation uses regular bilateral filter from OpenCV.
+
+Saturation enhancement is possible as in cv::TonemapDrago.
+
+For more information see @cite DD02 .
+ */
+class CV_EXPORTS_W TonemapDurand : public Tonemap
+{
+public:
+
+    CV_WRAP virtual float getSaturation() const = 0;
+    CV_WRAP virtual void setSaturation(float saturation) = 0;
+
+    CV_WRAP virtual float getContrast() const = 0;
+    CV_WRAP virtual void setContrast(float contrast) = 0;
+
+    CV_WRAP virtual float getSigmaSpace() const = 0;
+    CV_WRAP virtual void setSigmaSpace(float sigma_space) = 0;
+
+    CV_WRAP virtual float getSigmaColor() const = 0;
+    CV_WRAP virtual void setSigmaColor(float sigma_color) = 0;
+};
+
+/** @brief Creates TonemapDurand object
+
+You need to set the OPENCV_ENABLE_NONFREE option in cmake to use those. Use them at your own risk.
+
+@param gamma gamma value for gamma correction. See createTonemap
+@param contrast resulting contrast on logarithmic scale, i. e. log(max / min), where max and min
+are maximum and minimum luminance values of the resulting image.
+@param saturation saturation enhancement value. See createTonemapDrago
+@param sigma_color bilateral filter sigma in color space
+@param sigma_space bilateral filter sigma in coordinate space
+ */
+CV_EXPORTS_W Ptr<TonemapDurand>
+createTonemapDurand(float gamma = 1.0f, float contrast = 4.0f, float saturation = 1.0f, float sigma_color = 2.0f, float sigma_space = 2.0f);
+
+}} // namespace
+#endif  // OPENCV_XPHOTO_TONEMAP_HPP
diff --git a/IPL/include/opencv/opencv2/xphoto/white_balance.hpp b/IPL/include/opencv/opencv2/xphoto/white_balance.hpp
index d4d68ea..1767f1f 100644
--- a/IPL/include/opencv/opencv2/xphoto/white_balance.hpp
+++ b/IPL/include/opencv/opencv2/xphoto/white_balance.hpp
@@ -58,67 +58,172 @@ namespace xphoto
 //! @addtogroup xphoto
 //! @{
 
-    //! various white balance algorithms
-    enum WhitebalanceTypes
-    {
-        /** perform smart histogram adjustments (ignoring 4% pixels with minimal and maximal
-        values) for each channel */
-        WHITE_BALANCE_SIMPLE = 0,
-        WHITE_BALANCE_GRAYWORLD = 1
-    };
-
-    /** @brief The function implements different algorithm of automatic white balance,
-
-    i.e. it tries to map image's white color to perceptual white (this can be violated due to
-    specific illumination or camera settings).
-
-    @param src
-    @param dst
-    @param algorithmType see xphoto::WhitebalanceTypes
-    @param inputMin minimum value in the input image
-    @param inputMax maximum value in the input image
-    @param outputMin minimum value in the output image
-    @param outputMax maximum value in the output image
-    @sa cvtColor, equalizeHist
-     */
-    CV_EXPORTS_W void balanceWhite(const Mat &src, Mat &dst, const int algorithmType,
-        const float inputMin  = 0.0f, const float inputMax  = 255.0f,
-        const float outputMin = 0.0f, const float outputMax = 255.0f);
-
-    /** @brief Implements a simple grayworld white balance algorithm.
-
-    The function autowbGrayworld scales the values of pixels based on a
-    gray-world assumption which states that the average of all channels
-    should result in a gray image.
-
-    This function adds a modification which thresholds pixels based on their
-    saturation value and only uses pixels below the provided threshold in
-    finding average pixel values.
-
-    Saturation is calculated using the following for a 3-channel RGB image per
-    pixel I and is in the range [0, 1]:
-
-    \f[ \texttt{Saturation} [I] = \frac{\textrm{max}(R,G,B) - \textrm{min}(R,G,B)
-    }{\textrm{max}(R,G,B)} \f]
-
-    A threshold of 1 means that all pixels are used to white-balance, while a
-    threshold of 0 means no pixels are used. Lower thresholds are useful in
-    white-balancing saturated images.
-
-    Currently only works on images of type @ref CV_8UC3.
-
-    @param src Input array.
-    @param dst Output array of the same size and type as src.
-    @param thresh Maximum saturation for a pixel to be included in the
-        gray-world assumption.
-
-    @sa balanceWhite
-     */
-    CV_EXPORTS_W void autowbGrayworld(InputArray src, OutputArray dst,
-        float thresh = 0.5f);
+/** @brief The base class for auto white balance algorithms.
+ */
+class CV_EXPORTS_W WhiteBalancer : public Algorithm
+{
+  public:
+    /** @brief Applies white balancing to the input image
 
+    @param src Input image
+    @param dst White balancing result
+    @sa cvtColor, equalizeHist
+    */
+    CV_WRAP virtual void balanceWhite(InputArray src, OutputArray dst) = 0;
+};
+
+/** @brief A simple white balance algorithm that works by independently stretching
+    each of the input image channels to the specified range. For increased robustness
+    it ignores the top and bottom \f$p\%\f$ of pixel values.
+ */
+class CV_EXPORTS_W SimpleWB : public WhiteBalancer
+{
+  public:
+    /** @brief Input image range minimum value
+    @see setInputMin */
+    CV_WRAP virtual float getInputMin() const = 0;
+    /** @copybrief getInputMin @see getInputMin */
+    CV_WRAP virtual void setInputMin(float val) = 0;
+
+    /** @brief Input image range maximum value
+    @see setInputMax */
+    CV_WRAP virtual float getInputMax() const = 0;
+    /** @copybrief getInputMax @see getInputMax */
+    CV_WRAP virtual void setInputMax(float val) = 0;
+
+    /** @brief Output image range minimum value
+    @see setOutputMin */
+    CV_WRAP virtual float getOutputMin() const = 0;
+    /** @copybrief getOutputMin @see getOutputMin */
+    CV_WRAP virtual void setOutputMin(float val) = 0;
+
+    /** @brief Output image range maximum value
+    @see setOutputMax */
+    CV_WRAP virtual float getOutputMax() const = 0;
+    /** @copybrief getOutputMax @see getOutputMax */
+    CV_WRAP virtual void setOutputMax(float val) = 0;
+
+    /** @brief Percent of top/bottom values to ignore
+    @see setP */
+    CV_WRAP virtual float getP() const = 0;
+    /** @copybrief getP @see getP */
+    CV_WRAP virtual void setP(float val) = 0;
+};
+
+/** @brief Creates an instance of SimpleWB
+ */
+CV_EXPORTS_W Ptr<SimpleWB> createSimpleWB();
+
+/** @brief Gray-world white balance algorithm
+
+This algorithm scales the values of pixels based on a
+gray-world assumption which states that the average of all channels
+should result in a gray image.
+
+It adds a modification which thresholds pixels based on their
+saturation value and only uses pixels below the provided threshold in
+finding average pixel values.
+
+Saturation is calculated using the following for a 3-channel RGB image per
+pixel I and is in the range [0, 1]:
+
+\f[ \texttt{Saturation} [I] = \frac{\textrm{max}(R,G,B) - \textrm{min}(R,G,B)
+}{\textrm{max}(R,G,B)} \f]
+
+A threshold of 1 means that all pixels are used to white-balance, while a
+threshold of 0 means no pixels are used. Lower thresholds are useful in
+white-balancing saturated images.
+
+Currently supports images of type @ref CV_8UC3 and @ref CV_16UC3.
+ */
+class CV_EXPORTS_W GrayworldWB : public WhiteBalancer
+{
+  public:
+    /** @brief Maximum saturation for a pixel to be included in the
+        gray-world assumption
+    @see setSaturationThreshold */
+    CV_WRAP virtual float getSaturationThreshold() const = 0;
+    /** @copybrief getSaturationThreshold @see getSaturationThreshold */
+    CV_WRAP virtual void setSaturationThreshold(float val) = 0;
+};
+
+/** @brief Creates an instance of GrayworldWB
+ */
+CV_EXPORTS_W Ptr<GrayworldWB> createGrayworldWB();
+
+/** @brief More sophisticated learning-based automatic white balance algorithm.
+
+As @ref GrayworldWB, this algorithm works by applying different gains to the input
+image channels, but their computation is a bit more involved compared to the
+simple gray-world assumption. More details about the algorithm can be found in
+@cite Cheng2015 .
+
+To mask out saturated pixels this function uses only pixels that satisfy the
+following condition:
+
+\f[ \frac{\textrm{max}(R,G,B)}{\texttt{range_max_val}} < \texttt{saturation_thresh} \f]
+
+Currently supports images of type @ref CV_8UC3 and @ref CV_16UC3.
+ */
+class CV_EXPORTS_W LearningBasedWB : public WhiteBalancer
+{
+  public:
+    /** @brief Implements the feature extraction part of the algorithm.
+
+    In accordance with @cite Cheng2015 , computes the following features for the input image:
+    1. Chromaticity of an average (R,G,B) tuple
+    2. Chromaticity of the brightest (R,G,B) tuple (while ignoring saturated pixels)
+    3. Chromaticity of the dominant (R,G,B) tuple (the one that has the highest value in the RGB histogram)
+    4. Mode of the chromaticity palette, that is constructed by taking 300 most common colors according to
+       the RGB histogram and projecting them on the chromaticity plane. Mode is the most high-density point
+       of the palette, which is computed by a straightforward fixed-bandwidth kernel density estimator with
+       a Epanechnikov kernel function.
+
+    @param src Input three-channel image (BGR color space is assumed).
+    @param dst An array of four (r,g) chromaticity tuples corresponding to the features listed above.
+    */
+    CV_WRAP virtual void extractSimpleFeatures(InputArray src, OutputArray dst) = 0;
+
+    /** @brief Maximum possible value of the input image (e.g. 255 for 8 bit images,
+               4095 for 12 bit images)
+    @see setRangeMaxVal */
+    CV_WRAP virtual int getRangeMaxVal() const = 0;
+    /** @copybrief getRangeMaxVal @see getRangeMaxVal */
+    CV_WRAP virtual void setRangeMaxVal(int val) = 0;
+
+    /** @brief Threshold that is used to determine saturated pixels, i.e. pixels where at least one of the
+        channels exceeds \f$\texttt{saturation_threshold}\times\texttt{range_max_val}\f$ are ignored.
+    @see setSaturationThreshold */
+    CV_WRAP virtual float getSaturationThreshold() const = 0;
+    /** @copybrief getSaturationThreshold @see getSaturationThreshold */
+    CV_WRAP virtual void setSaturationThreshold(float val) = 0;
+
+    /** @brief Defines the size of one dimension of a three-dimensional RGB histogram that is used internally
+        by the algorithm. It often makes sense to increase the number of bins for images with higher bit depth
+        (e.g. 256 bins for a 12 bit image).
+    @see setHistBinNum */
+    CV_WRAP virtual int getHistBinNum() const = 0;
+    /** @copybrief getHistBinNum @see getHistBinNum */
+    CV_WRAP virtual void setHistBinNum(int val) = 0;
+};
+
+/** @brief Creates an instance of LearningBasedWB
+
+@param path_to_model Path to a .yml file with the model. If not specified, the default model is used
+ */
+CV_EXPORTS_W Ptr<LearningBasedWB> createLearningBasedWB(const String& path_to_model = String());
+
+/** @brief Implements an efficient fixed-point approximation for applying channel gains, which is
+    the last step of multiple white balance algorithms.
+
+@param src Input three-channel image in the BGR color space (either CV_8UC3 or CV_16UC3)
+@param dst Output image of the same size and type as src.
+@param gainB gain for the B channel
+@param gainG gain for the G channel
+@param gainR gain for the R channel
+*/
+CV_EXPORTS_W void applyChannelGains(InputArray src, OutputArray dst, float gainB, float gainG, float gainR);
 //! @}
-
 }
 }
 
diff --git a/IPL/include/processes/IPLUndistort.h b/IPL/include/processes/IPLUndistort.h
index 2ba3b14..ebf5c0b 100644
--- a/IPL/include/processes/IPLUndistort.h
+++ b/IPL/include/processes/IPLUndistort.h
@@ -28,6 +28,7 @@
 
 #include "opencv2/core/core.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/calib3d.hpp"
 #include "opencv2/highgui/highgui.hpp"
 
 /**
diff --git a/IPL/src/processes/IPLCamera.cpp b/IPL/src/processes/IPLCamera.cpp
index 40e3617..be7aa89 100644
--- a/IPL/src/processes/IPLCamera.cpp
+++ b/IPL/src/processes/IPLCamera.cpp
@@ -105,15 +105,15 @@ bool IPLCamera::processInputData(IPLData*, int, bool)
 
     // collect information
     std::stringstream s;
-    s << "<b>Width: </b>" << IPLCameraIO::camera()->get(CV_CAP_PROP_FRAME_WIDTH) << "\n";
-    s << "<b>Height: </b>" << IPLCameraIO::camera()->get(CV_CAP_PROP_FRAME_HEIGHT) << "\n";
-    s << "<b>Brightness: </b>" << IPLCameraIO::camera()->get(CV_CAP_PROP_BRIGHTNESS) << "\n";
-    s << "<b>Contrast: </b>" << IPLCameraIO::camera()->get(CV_CAP_PROP_CONTRAST) << "\n";
-    s << "<b>Saturation: </b>" << IPLCameraIO::camera()->get(CV_CAP_PROP_SATURATION) << "\n";
-    s << "<b>Hue: </b>" << IPLCameraIO::camera()->get(CV_CAP_PROP_HUE) << "\n";
-    s << "<b>Gain: </b>" << IPLCameraIO::camera()->get(CV_CAP_PROP_GAIN) << "\n";
-    s << "<b>Exposure: </b>" << IPLCameraIO::camera()->get(CV_CAP_PROP_EXPOSURE) << "\n";
-    s << "<b>GUID: </b>" << IPLCameraIO::camera()->get(CV_CAP_PROP_GUID) << "";
+    s << "<b>Width: </b>" << IPLCameraIO::camera()->get(cv::CAP_PROP_FRAME_WIDTH) << "\n";
+    s << "<b>Height: </b>" << IPLCameraIO::camera()->get(cv::CAP_PROP_FRAME_HEIGHT) << "\n";
+    s << "<b>Brightness: </b>" << IPLCameraIO::camera()->get(cv::CAP_PROP_BRIGHTNESS) << "\n";
+    s << "<b>Contrast: </b>" << IPLCameraIO::camera()->get(cv::CAP_PROP_CONTRAST) << "\n";
+    s << "<b>Saturation: </b>" << IPLCameraIO::camera()->get(cv::CAP_PROP_SATURATION) << "\n";
+    s << "<b>Hue: </b>" << IPLCameraIO::camera()->get(cv::CAP_PROP_HUE) << "\n";
+    s << "<b>Gain: </b>" << IPLCameraIO::camera()->get(cv::CAP_PROP_GAIN) << "\n";
+    s << "<b>Exposure: </b>" << IPLCameraIO::camera()->get(cv::CAP_PROP_EXPOSURE) << "\n";
+    s << "<b>GUID: </b>" << IPLCameraIO::camera()->get(cv::CAP_PROP_GUID) << "";
 
     addInformation(s.str());
 
diff --git a/IPL/src/processes/IPLCameraCalibration.cpp b/IPL/src/processes/IPLCameraCalibration.cpp
index 461f989..d71ffca 100644
--- a/IPL/src/processes/IPLCameraCalibration.cpp
+++ b/IPL/src/processes/IPLCameraCalibration.cpp
@@ -88,7 +88,7 @@ bool IPLCameraCalibration::processInputData(IPLData* data , int index, bool useO
     notifyProgressEventHandler(-1);
     cv::Mat input;
     cv::Mat output = _image->toCvMat();
-    cv::cvtColor(_image->toCvMat(), input, CV_BGR2GRAY);
+    cv::cvtColor(_image->toCvMat(), input, cv::COLOR_BGR2GRAY);
 
     std::vector<cv::Point2f>              pointBuf;
     cv::Size                              boardSize(targetCols, targetRows);
@@ -127,7 +127,7 @@ bool IPLCameraCalibration::processInputData(IPLData* data , int index, bool useO
         switch(targetType) // Find feature points on the input format
         {
             case 0: // CHESSBOARD
-                found = cv::findChessboardCorners(input, boardSize, pointBuf, CV_CALIB_CB_ADAPTIVE_THRESH | CV_CALIB_CB_FAST_CHECK | CV_CALIB_CB_NORMALIZE_IMAGE);
+                found = cv::findChessboardCorners(input, boardSize, pointBuf, cv::CALIB_CB_ADAPTIVE_THRESH | cv::CALIB_CB_FAST_CHECK | cv::CALIB_CB_NORMALIZE_IMAGE);
             break;
             case 1: //CIRCLES_GRID:
                 found = cv::findCirclesGrid(input, boardSize, pointBuf);
@@ -143,7 +143,7 @@ bool IPLCameraCalibration::processInputData(IPLData* data , int index, bool useO
             // improve the found corners' coordinate accuracy for chessboard
             if(targetType == 0) // CHESSBOARD
             {
-                cv::cornerSubPix(input, pointBuf, cv::Size(11,11), cv::Size(-1,-1), cv::TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 30, 0.1 ));
+                cv::cornerSubPix(input, pointBuf, cv::Size(11,11), cv::Size(-1,-1), cv::TermCriteria( cv::TermCriteria::EPS+cv::TermCriteria::MAX_ITER, 30, 0.1 ));
             }
 
             _imagePoints.push_back(pointBuf);
@@ -164,7 +164,7 @@ bool IPLCameraCalibration::processInputData(IPLData* data , int index, bool useO
     if(_mode == CALIBRATION)
     {
         Pattern patternType = (Pattern) targetType;
-        bool result = this->runCalibration(_imagePoints, imageSize, boardSize, patternType, 1, 1, CV_CALIB_FIX_K4|CV_CALIB_FIX_K5, cameraMatrix,
+        bool result = this->runCalibration(_imagePoints, imageSize, boardSize, patternType, 1, 1, cv::CALIB_FIX_K4 | cv::CALIB_FIX_K5, cameraMatrix,
                                 distCoeffs, rvecs, tvecs, reprojErrs, totalAvgErr);
 
         if(result) {
diff --git a/IPL/src/processes/IPLCanny.cpp b/IPL/src/processes/IPLCanny.cpp
index f331652..0ff68ad 100644
--- a/IPL/src/processes/IPLCanny.cpp
+++ b/IPL/src/processes/IPLCanny.cpp
@@ -195,7 +195,7 @@ bool IPLCanny::processInputData(IPLData* data, int, bool useOpenCV)
         notifyProgressEventHandler(-1);
         cv::Mat input;
         cv::Mat output;
-        cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+        cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
         cv::GaussianBlur(input, input, cv::Size(window, window), sigma);
         cv::Canny(input, output, lowThreshold*255, highThreshold*255, window);
 
diff --git a/IPL/src/processes/IPLFeatureDetection.cpp b/IPL/src/processes/IPLFeatureDetection.cpp
index 7653167..a60def2 100644
--- a/IPL/src/processes/IPLFeatureDetection.cpp
+++ b/IPL/src/processes/IPLFeatureDetection.cpp
@@ -69,7 +69,7 @@ bool IPLFeatureDetection::processInputData(IPLData* data, int, bool useOpenCV)
     notifyProgressEventHandler(-1);
     cv::Mat input;
     cv::Mat output;
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
     input.copyTo(output);
 
     //cv::OrbFeatureDetector detector;
diff --git a/IPL/src/processes/IPLFeatureMatcher.cpp b/IPL/src/processes/IPLFeatureMatcher.cpp
index ab05e51..9886eb6 100644
--- a/IPL/src/processes/IPLFeatureMatcher.cpp
+++ b/IPL/src/processes/IPLFeatureMatcher.cpp
@@ -105,8 +105,8 @@ bool IPLFeatureMatcher::processInputData(IPLData* data , int index, bool useOpen
         cv::Mat output;
         std::vector<cv::KeyPoint> keypoints1 = *_keypoints1->get();
         std::vector<cv::KeyPoint> keypoints2 = *_keypoints2->get();
-        cv::cvtColor(_image1->toCvMat(), input1, CV_BGR2GRAY);
-        cv::cvtColor(_image2->toCvMat(), input2, CV_BGR2GRAY);
+        cv::cvtColor(_image1->toCvMat(), input1, cv::COLOR_BGR2GRAY);
+        cv::cvtColor(_image2->toCvMat(), input2, cv::COLOR_BGR2GRAY);
 
         std::stringstream s1;
         s1 << "Number of Keypoints 1: ";
diff --git a/IPL/src/processes/IPLFloodFill.cpp b/IPL/src/processes/IPLFloodFill.cpp
index b4ae804..de24f94 100644
--- a/IPL/src/processes/IPLFloodFill.cpp
+++ b/IPL/src/processes/IPLFloodFill.cpp
@@ -74,7 +74,7 @@ bool IPLFloodFill::processInputData(IPLData* data, int, bool)
     notifyProgressEventHandler(-1);
     cv::Mat input;
     cv::Mat output;
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
 
     /// Detector parameters
     int blockSize = 2;
@@ -90,7 +90,7 @@ bool IPLFloodFill::processInputData(IPLData* data, int, bool)
     cv::normalize( output, dst_norm, 0, 255, cv::NORM_MINMAX, CV_32FC1, cv::Mat() );
     cv::convertScaleAbs( dst_norm, output );
 
-    cvtColor(output, output, CV_GRAY2BGR);
+    cvtColor(output, output, cv::COLOR_GRAY2BGR);
 
     /// Drawing a circle around corners
     for( int j = 0; j < dst_norm.rows ; j++ )
diff --git a/IPL/src/processes/IPLGoodFeaturesToTrack.cpp b/IPL/src/processes/IPLGoodFeaturesToTrack.cpp
index 0e73a4d..1245efa 100644
--- a/IPL/src/processes/IPLGoodFeaturesToTrack.cpp
+++ b/IPL/src/processes/IPLGoodFeaturesToTrack.cpp
@@ -77,7 +77,7 @@ bool IPLGoodFeaturesToTrack::processInputData(IPLData* data, int, bool)
     cv::Mat overlay = image->toCvMat();
     cv::Mat result = cv::Mat(image->height(), image->width(), CV_8UC1);
     result = cv::Scalar(0);
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
 
     std::vector<cv::Vec2f> corners;
     cv::goodFeaturesToTrack(input, corners, maxCorners, qualityLevel, minDistance, cv::noArray(), block_size, useHarrisDetector, k);
diff --git a/IPL/src/processes/IPLGradientOperator.cpp b/IPL/src/processes/IPLGradientOperator.cpp
index 854171a..1279cee 100644
--- a/IPL/src/processes/IPLGradientOperator.cpp
+++ b/IPL/src/processes/IPLGradientOperator.cpp
@@ -158,7 +158,7 @@ bool IPLGradientOperator::roberts(IPLImage* image)
     cv::Mat input;
     cv::Mat gX;
     cv::Mat gY;
-    cvtColor(image->toCvMat(),input,CV_BGR2GRAY);
+    cvtColor(image->toCvMat(),input,cv::COLOR_BGR2GRAY);
 
     filter2D(input,gX,CV_32F,rxKernel);
     filter2D(input,gY,CV_32F,ryKernel);
@@ -204,7 +204,7 @@ bool IPLGradientOperator::sobel(IPLImage* image)
    cv::Mat input;
    cv::Mat gX;
    cv::Mat gY;
-   cvtColor(image->toCvMat(),input,CV_BGR2GRAY);
+   cvtColor(image->toCvMat(),input,cv::COLOR_BGR2GRAY);
  
    Sobel(input,gX,CV_32F,1,0,kSize);
    Sobel(input,gY,CV_32F,0,1,kSize);
diff --git a/IPL/src/processes/IPLHarrisCorner.cpp b/IPL/src/processes/IPLHarrisCorner.cpp
index aa46d31..f90fb60 100644
--- a/IPL/src/processes/IPLHarrisCorner.cpp
+++ b/IPL/src/processes/IPLHarrisCorner.cpp
@@ -73,7 +73,7 @@ bool IPLHarrisCorner::processInputData(IPLData* data, int, bool useOpenCV)
     notifyProgressEventHandler(-1);
     cv::Mat input;
     cv::Mat output;
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
 
     /// Detector parameters
     int blockSize = 2;
@@ -89,7 +89,7 @@ bool IPLHarrisCorner::processInputData(IPLData* data, int, bool useOpenCV)
     cv::normalize( output, dst_norm, 0, 255, cv::NORM_MINMAX, CV_32FC1, cv::Mat() );
     cv::convertScaleAbs( dst_norm, output );
 
-    cvtColor(output, output, CV_GRAY2BGR);
+    cvtColor(output, output, cv::COLOR_GRAY2BGR);
 
     /// Drawing a circle around corners
     for( int j = 0; j < dst_norm.rows ; j++ )
diff --git a/IPL/src/processes/IPLHoughCircles.cpp b/IPL/src/processes/IPLHoughCircles.cpp
index ad5de06..3a627e7 100644
--- a/IPL/src/processes/IPLHoughCircles.cpp
+++ b/IPL/src/processes/IPLHoughCircles.cpp
@@ -72,11 +72,11 @@ bool IPLHoughCircles::processInputData(IPLData* data, int, bool)
     cv::Mat overlay = image->toCvMat();
     cv::Mat result = cv::Mat(image->height(), image->width(), CV_8UC1);
     result = cv::Scalar(0);
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
     overlay.convertTo(overlay, CV_8UC3);
 
     std::vector<cv::Vec3f> circles;
-    cv::HoughCircles(input, circles, CV_HOUGH_GRADIENT, 2, input.rows/4, thresholdCanny, thresholdCenter, minRadius, maxRadius);
+    cv::HoughCircles(input, circles, cv::HOUGH_GRADIENT, 2, input.rows/4, thresholdCanny, thresholdCenter, minRadius, maxRadius);
 
     // WARNING: cv::HoughCircles does not work in debug mode!!!
     //          destroys the std::vector<cv::Vec3f> circles;
diff --git a/IPL/src/processes/IPLHoughLineSegments.cpp b/IPL/src/processes/IPLHoughLineSegments.cpp
index ffd6210..6308784 100644
--- a/IPL/src/processes/IPLHoughLineSegments.cpp
+++ b/IPL/src/processes/IPLHoughLineSegments.cpp
@@ -78,7 +78,7 @@ bool IPLHoughLineSegments::processInputData(IPLData* data, int, bool)
     cv::Mat overlay = image->toCvMat();
     cv::Mat result = cv::Mat(image->height(), image->width(), CV_8UC1);
     result = cv::Scalar(0);
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
     overlay.convertTo(overlay, CV_8UC3);
 
     std::vector<cv::Vec4i> lines;
@@ -93,10 +93,10 @@ bool IPLHoughLineSegments::processInputData(IPLData* data, int, bool)
     for(int i = 0; i < (int) lines.size(); i++ )
     {
        cv::Vec4i l = lines[i];
-       cv::line(overlay, cv::Point(l[0], l[1]), cv::Point(l[2], l[3]), cv::Scalar(0,0,255), 2, CV_AA);
+       cv::line(overlay, cv::Point(l[0], l[1]), cv::Point(l[2], l[3]), cv::Scalar(0,0,255), 2, cv::LINE_AA);
 
        // raw result
-       cv::line(result, cv::Point(l[0], l[1]), cv::Point(l[2], l[3]), cv::Scalar(255), 1, CV_AA);
+       cv::line(result, cv::Point(l[0], l[1]), cv::Point(l[2], l[3]), cv::Scalar(255), 1, cv::LINE_AA);
      }
 
     _overlay = new IPLImage(overlay);
diff --git a/IPL/src/processes/IPLHoughLines.cpp b/IPL/src/processes/IPLHoughLines.cpp
index 84783a1..f32c8cd 100644
--- a/IPL/src/processes/IPLHoughLines.cpp
+++ b/IPL/src/processes/IPLHoughLines.cpp
@@ -78,7 +78,7 @@ bool IPLHoughLines::processInputData(IPLData* data, int, bool)
     cv::Mat overlay = image->toCvMat();
     cv::Mat result = cv::Mat(image->height(), image->width(), CV_8UC1);
     result = cv::Scalar(0);
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
     overlay.convertTo(overlay, CV_8UC3);
 
     std::vector<cv::Vec2f> lines;
@@ -100,10 +100,10 @@ bool IPLHoughLines::processInputData(IPLData* data, int, bool)
         pt1.y = cvRound(y0 + 1000*(a));
         pt2.x = cvRound(x0 - 1000*(-b));
         pt2.y = cvRound(y0 - 1000*(a));
-       cv::line(overlay, pt1, pt2, cv::Scalar(0,0,255), 2, CV_AA);
+       cv::line(overlay, pt1, pt2, cv::Scalar(0,0,255), 2, cv::LINE_AA);
 
        // raw result
-       cv::line(result, pt1, pt2, cv::Scalar(255), 1, CV_AA);
+       cv::line(result, pt1, pt2, cv::Scalar(255), 1, cv::LINE_AA);
      }
 
     _overlay = new IPLImage(overlay);
diff --git a/IPL/src/processes/IPLLaplacian.cpp b/IPL/src/processes/IPLLaplacian.cpp
index 1e85927..9ba0b59 100644
--- a/IPL/src/processes/IPLLaplacian.cpp
+++ b/IPL/src/processes/IPLLaplacian.cpp
@@ -74,7 +74,7 @@ bool IPLLaplacian::processInputData(IPLData* data, int, bool useOpenCV)
     notifyProgressEventHandler(-1);
     cv::Mat input;
     cv::Mat output;
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
 
     /// Detector parameters
     int blockSize = 2;
@@ -90,7 +90,7 @@ bool IPLLaplacian::processInputData(IPLData* data, int, bool useOpenCV)
     cv::normalize( output, dst_norm, 0, 255, cv::NORM_MINMAX, CV_32FC1, cv::Mat() );
     cv::convertScaleAbs( dst_norm, output );
 
-    cvtColor(output, output, CV_GRAY2BGR);
+    cvtColor(output, output, cv::COLOR_GRAY2BGR);
 
     /// Drawing a circle around corners
     for( int j = 0; j < dst_norm.rows ; j++ )
diff --git a/IPL/src/processes/IPLMatchTemplate.cpp b/IPL/src/processes/IPLMatchTemplate.cpp
index 0b61ad8..e7220e6 100644
--- a/IPL/src/processes/IPLMatchTemplate.cpp
+++ b/IPL/src/processes/IPLMatchTemplate.cpp
@@ -73,10 +73,10 @@ bool IPLMatchTemplate::processInputData(IPLData* data, int, bool)
     cv::Mat overlay = image->toCvMat();
     cv::Mat result = cv::Mat(image->height(), image->width(), CV_8UC1);
     result = cv::Scalar(0);
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
 
     std::vector<cv::Vec3f> circles;
-    cv::HoughCircles(input, circles, CV_HOUGH_GRADIENT, 2, input.rows/4, thresholdCanny, thresholdCenter, minRadius, maxRadius);
+    cv::HoughCircles(input, circles, cv::HOUGH_GRADIENT, 2, input.rows/4, thresholdCanny, thresholdCenter, minRadius, maxRadius);
 
     // WARNING: cv::HoughCircles does not work in debug mode!!!
     //          destroys the std::vector<cv::Vec3f> circles;
diff --git a/IPL/src/processes/IPLNormalizeIllumination.cpp b/IPL/src/processes/IPLNormalizeIllumination.cpp
index 5d44ded..882f02d 100644
--- a/IPL/src/processes/IPLNormalizeIllumination.cpp
+++ b/IPL/src/processes/IPLNormalizeIllumination.cpp
@@ -73,7 +73,7 @@ bool IPLNormalizeIllumination::processInputData(IPLData* data, int, bool)
     cv::Mat *lMat;
     image->toCvMat().convertTo(bgr32,CV_32F);
     bgr32 *= 1.f/255.f;
-    cvtColor(bgr32,labMat,CV_BGR2Lab);
+    cvtColor(bgr32,labMat,cv::COLOR_BGR2Lab);
     split(labMat,splitLabMats);
     // grab the lightness channel
     lMat = &splitLabMats[0];
@@ -119,7 +119,7 @@ bool IPLNormalizeIllumination::processInputData(IPLData* data, int, bool)
     int from_to[] = { 0,0 };
     mixChannels(&result,1,&labMat,1,from_to,1);
 
-    cvtColor(labMat,rgbResult32,CV_Lab2BGR);
+    cvtColor(labMat,rgbResult32,cv::COLOR_Lab2BGR);
     rgbResult32 *= 255.f;
     rgbResult32.convertTo(rgbResult,CV_8U);
 
diff --git a/IPL/src/processes/IPLOpticalFlow.cpp b/IPL/src/processes/IPLOpticalFlow.cpp
index 2b79921..50e7dc3 100644
--- a/IPL/src/processes/IPLOpticalFlow.cpp
+++ b/IPL/src/processes/IPLOpticalFlow.cpp
@@ -73,9 +73,9 @@ bool IPLOpticalFlow::processInputData(IPLData* data, int, bool useOpenCV)
     // Obtain first image
     if(_image_prev.cols == 0)
         _image_prev = image->toCvMat();
-        //cvtColor(image->toCvMat(), _image_prev, CV_BGR2GRAY);
+        //cvtColor(image->toCvMat(), _image_prev, cv::COLOR_BGR2GRAY);
 
-    //cvtColor(image->toCvMat(), _image_next, CV_BGR2GRAY);
+    //cvtColor(image->toCvMat(), _image_next, cv::COLOR_BGR2GRAY);
     _image_next = image->toCvMat();
 
     cv::Mat flow;
diff --git a/IPL/src/processes/IPLScharr.cpp b/IPL/src/processes/IPLScharr.cpp
index 0fcb07d..71593e0 100644
--- a/IPL/src/processes/IPLScharr.cpp
+++ b/IPL/src/processes/IPLScharr.cpp
@@ -74,7 +74,7 @@ bool IPLScharr::processInputData(IPLData* data, int, bool useOpenCV)
     notifyProgressEventHandler(-1);
     cv::Mat input;
     cv::Mat output;
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
 
     /// Detector parameters
     int blockSize = 2;
@@ -90,7 +90,7 @@ bool IPLScharr::processInputData(IPLData* data, int, bool useOpenCV)
     cv::normalize( output, dst_norm, 0, 255, cv::NORM_MINMAX, CV_32FC1, cv::Mat() );
     cv::convertScaleAbs( dst_norm, output );
 
-    cvtColor(output, output, CV_GRAY2BGR);
+    cvtColor(output, output, cv::COLOR_GRAY2BGR);
 
     /// Drawing a circle around corners
     for( int j = 0; j < dst_norm.rows ; j++ )
diff --git a/IPL/src/processes/IPLSobel.cpp b/IPL/src/processes/IPLSobel.cpp
index 5af6379..1a930fc 100644
--- a/IPL/src/processes/IPLSobel.cpp
+++ b/IPL/src/processes/IPLSobel.cpp
@@ -74,7 +74,7 @@ bool IPLSobel::processInputData(IPLData* data, int, bool useOpenCV)
     notifyProgressEventHandler(-1);
     cv::Mat input;
     cv::Mat output;
-    cvtColor(image->toCvMat(), input, CV_BGR2GRAY);
+    cvtColor(image->toCvMat(), input, cv::COLOR_BGR2GRAY);
 
     /// Detector parameters
     int blockSize = 2;
@@ -90,7 +90,7 @@ bool IPLSobel::processInputData(IPLData* data, int, bool useOpenCV)
     cv::normalize( output, dst_norm, 0, 255, cv::NORM_MINMAX, CV_32FC1, cv::Mat() );
     cv::convertScaleAbs( dst_norm, output );
 
-    cvtColor(output, output, CV_GRAY2BGR);
+    cvtColor(output, output, cv::COLOR_GRAY2BGR);
 
     /// Drawing a circle around corners
     for( int j = 0; j < dst_norm.rows ; j++ )