Skip to content

Commit deb5985

Browse files
committed
[Refactor] vgg11
1 parent ab35e3e commit deb5985

File tree

8 files changed

+552
-388
lines changed

8 files changed

+552
-388
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ project(
55
VERSION 0.1
66
LANGUAGES C CXX CUDA)
77

8-
set(TensorRT_7_8_10_TARGETS mlp lenet)
8+
set(TensorRT_7_8_10_TARGETS mlp lenet alexnet vgg)
99

1010
set(TensorRT_8_TARGETS)
1111

vgg/CMakeLists.txt

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,43 @@
1-
cmake_minimum_required(VERSION 2.6)
1+
cmake_minimum_required(VERSION 3.14)
22

3-
project(vgg)
3+
project(
4+
vgg11
5+
VERSION 0.1
6+
LANGUAGES C CXX CUDA)
47

5-
add_definitions(-std=c++11)
8+
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
9+
set(CMAKE_CUDA_ARCHITECTURES
10+
60
11+
70
12+
72
13+
75
14+
80
15+
86
16+
89)
17+
endif()
618

7-
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
8-
set(CMAKE_CXX_STANDARD 11)
9-
set(CMAKE_BUILD_TYPE Debug)
19+
set(CMAKE_CXX_STANDARD 17)
20+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
21+
set(CMAKE_CUDA_STANDARD 17)
22+
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
23+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
24+
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
25+
set(CMAKE_BUILD_TYPE
26+
"Debug"
27+
CACHE STRING "Build type for this project" FORCE)
1028

11-
include_directories(${PROJECT_SOURCE_DIR}/include)
12-
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
13-
# cuda
14-
include_directories(/usr/local/cuda/include)
15-
link_directories(/usr/local/cuda/lib64)
16-
# tensorrt
17-
include_directories(/usr/include/x86_64-linux-gnu/)
18-
link_directories(/usr/lib/x86_64-linux-gnu/)
29+
option(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cudaruntime library" OFF)
1930

20-
add_executable(vgg ${PROJECT_SOURCE_DIR}/vgg11.cpp)
21-
target_link_libraries(vgg nvinfer)
22-
target_link_libraries(vgg cudart)
31+
find_package(Threads REQUIRED)
32+
find_package(CUDAToolkit REQUIRED)
2333

24-
add_definitions(-O2 -pthread)
34+
if(NOT TARGET TensorRT::TensorRT)
35+
include(FindTensorRT.cmake)
36+
endif()
2537

38+
add_executable(${PROJECT_NAME} vgg11.cpp)
39+
40+
target_include_directories(${PROJECT_NAME} PUBLIC ${OpenCV_INCLUDE_DIRS})
41+
42+
target_link_libraries(${PROJECT_NAME} PUBLIC Threads::Threads CUDA::cudart
43+
TensorRT::TensorRT)

vgg/FindTensorRT.cmake

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
cmake_minimum_required(VERSION 3.17.0)
2+
3+
set(TRT_VERSION
4+
$ENV{TRT_VERSION}
5+
CACHE STRING
6+
"TensorRT version, e.g. \"8.6.1.6\" or \"8.6.1.6+cuda12.0.1.011\"")
7+
8+
# find TensorRT include folder
9+
if(NOT TensorRT_INCLUDE_DIR)
10+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
11+
set(TensorRT_INCLUDE_DIR
12+
"/usr/local/cuda/targets/aarch64-linux/include"
13+
CACHE PATH "TensorRT_INCLUDE_DIR")
14+
else()
15+
set(TensorRT_INCLUDE_DIR
16+
"/usr/include/x86_64-linux-gnu"
17+
CACHE PATH "TensorRT_INCLUDE_DIR")
18+
endif()
19+
message(STATUS "TensorRT: ${TensorRT_INCLUDE_DIR}")
20+
endif()
21+
22+
# find TensorRT library folder
23+
if(NOT TensorRT_LIBRARY_DIR)
24+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
25+
set(TensorRT_LIBRARY_DIR
26+
"/usr/lib/aarch64-linux-gnu/tegra"
27+
CACHE PATH "TensorRT_LIBRARY_DIR")
28+
else()
29+
set(TensorRT_LIBRARY_DIR
30+
"/usr/include/x86_64-linux-gnu"
31+
CACHE PATH "TensorRT_LIBRARY_DIR")
32+
endif()
33+
message(STATUS "TensorRT: ${TensorRT_LIBRARY_DIR}")
34+
endif()
35+
36+
set(TensorRT_LIBRARIES)
37+
38+
message(STATUS "Found TensorRT lib: ${TensorRT_LIBRARIES}")
39+
40+
# process for different TensorRT version
41+
if(DEFINED TRT_VERSION AND NOT TRT_VERSION STREQUAL "")
42+
string(REGEX MATCH "([0-9]+)" _match ${TRT_VERSION})
43+
set(TRT_MAJOR_VERSION "${_match}")
44+
set(_modules nvinfer nvinfer_plugin)
45+
46+
if(TRT_MAJOR_VERSION GREATER_EQUAL 8)
47+
list(APPEND _modules nvinfer_vc_plugin nvinfer_dispatch nvinfer_lean)
48+
endif()
49+
else()
50+
message(FATAL_ERROR "Please set a environment variable \"TRT_VERSION\"")
51+
endif()
52+
53+
# find and add all modules of TensorRT into list
54+
foreach(lib IN LISTS _modules)
55+
find_library(
56+
TensorRT_${lib}_LIBRARY
57+
NAMES ${lib}
58+
HINTS ${TensorRT_LIBRARY_DIR})
59+
list(APPEND TensorRT_LIBRARIES ${TensorRT_${lib}_LIBRARY})
60+
endforeach()
61+
62+
# make the "TensorRT target"
63+
add_library(TensorRT IMPORTED INTERFACE)
64+
add_library(TensorRT::TensorRT ALIAS TensorRT)
65+
target_link_libraries(TensorRT INTERFACE ${TensorRT_LIBRARIES})
66+
67+
set_target_properties(
68+
TensorRT
69+
PROPERTIES C_STANDARD 17
70+
CXX_STANDARD 17
71+
POSITION_INDEPENDENT_CODE ON
72+
SKIP_BUILD_RPATH TRUE
73+
BUILD_WITH_INSTALL_RPATH TRUE
74+
INSTALL_RPATH "$\{ORIGIN\}"
75+
INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIR}")
76+
77+
unset(TRT_MAJOR_VERSION)
78+
unset(_modules)

vgg/README.md

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,45 @@
1-
# vgg
2-
3-
VGG 11-layer model (configuration "A") from
4-
"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>
5-
6-
For the Pytorch implementation, you can refer to [pytorchx/vgg](https://github.com/wang-xinyu/pytorchx/tree/master/vgg)
7-
8-
VGG's architecture is simple, just some conv, relu, maxpool, and fc layers.
9-
10-
```
11-
// 1. generate vgg.wts from [pytorchx/vgg](https://github.com/wang-xinyu/pytorchx/tree/master/vgg)
12-
13-
// 2. put vgg.wts into tensorrtx/vgg
14-
15-
// 3. build and run
16-
17-
cd tensorrtx/vgg
18-
19-
mkdir build
20-
21-
cd build
22-
23-
cmake ..
24-
25-
make
26-
27-
sudo ./vgg -s // serialize model to plan file i.e. 'vgg.engine'
28-
sudo ./vgg -d // deserialize plan file and run inference
29-
30-
// 4. see if the output is same as pytorchx/vgg
1+
# VGG
2+
3+
## Introduction
4+
5+
This is a TensorRT-implemented VGG 11-layer model (configuration "A") from [Very Deep Convolutional Networks For Large-Scale Image Recognition](https://arxiv.org/pdf/1409.1556.pdf). For the Pytorch implementation, refer to [torchvision's implementation](https://github.com/pytorch/vision/blob/main/torchvision/models/vgg.py), for generating `.wts` file, refer to [pytorchx/vgg](https://github.com/wang-xinyu/pytorchx/tree/master/vgg)
6+
7+
VGG's architecture is simple, just some conv, relu, maxpool, and fc layers, e.g., for VGG11:
8+
9+
```txt
10+
VGG(
11+
(features): Sequential(
12+
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
13+
(1): ReLU(inplace=True)
14+
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
15+
(3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
16+
(4): ReLU(inplace=True)
17+
(5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
18+
(6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
19+
(7): ReLU(inplace=True)
20+
(8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
21+
(9): ReLU(inplace=True)
22+
(10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
23+
(11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
24+
(12): ReLU(inplace=True)
25+
(13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
26+
(14): ReLU(inplace=True)
27+
(15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
28+
(16): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
29+
(17): ReLU(inplace=True)
30+
(18): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
31+
(19): ReLU(inplace=True)
32+
(20): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
33+
)
34+
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
35+
(classifier): Sequential(
36+
(0): Linear(in_features=25088, out_features=4096, bias=True)
37+
(1): ReLU(inplace=True)
38+
(2): Dropout(p=0.5, inplace=False)
39+
(3): Linear(in_features=4096, out_features=4096, bias=True)
40+
(4): ReLU(inplace=True)
41+
(5): Dropout(p=0.5, inplace=False)
42+
(6): Linear(in_features=4096, out_features=1000, bias=True)
43+
)
44+
)
3145
```
32-
33-

0 commit comments

Comments
 (0)