Skip to content

Commit c00683d

Browse files
committed
Merge branch 'github_develop' into github_master
2 parents d79973e + c758cdf commit c00683d

31 files changed

+1453
-114
lines changed

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,29 @@
22

33
This is a list of notable changes to Hyperscan, in reverse chronological order.
44

5+
## [5.3.0] 2020-05-15
6+
- Improvement on literal matcher "Teddy" performance, including support for
7+
Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R) AVX-512
8+
VBMI).
9+
- Improvement on single-byte/two-byte matching performance, including support
10+
for Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512).
11+
- hsbench: add hyphen support for -T option.
12+
- tools/fuzz: add test scripts for synthetic pattern generation.
13+
- Bugfix for acceleration path analysis in LimEx NFA.
14+
- Bugfix for duplicate matches for Small-write engine.
15+
- Bugfix for UTF8 checking problem for hscollider.
16+
- Bugfix for issue #205: avoid crash of `hs_compile_lit_multi()` with clang and
17+
ASAN.
18+
- Bugfix for issue #211: fix error in `db_check_platform()` function.
19+
- Bugfix for issue #217: fix cmake parsing issue of CPU arch for non-English
20+
locale.
21+
- Bugfix for issue #228: avoid undefined behavior when calling `close()` after
22+
`fdopendir()` in `loadExpressions()`.
23+
- Bugfix for issue #239: fix hyperscan compile issue under gcc-10.
24+
- Add VLAN packets processing capability in pcap analysis script. (#214)
25+
- Avoid extra convert instruction for "Noodle". (#221)
26+
- Add Hyperscan version marcro in `hs.h`. (#222)
27+
528
## [5.2.1] 2019-10-13
629
- Bugfix for issue #186: fix compile issue when `BUILD_SHARED_LIBS` is on in
730
release mode.

CMakeLists.txt

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11)
22
project (hyperscan C CXX)
33

44
set (HS_MAJOR_VERSION 5)
5-
set (HS_MINOR_VERSION 2)
6-
set (HS_PATCH_VERSION 1)
5+
set (HS_MINOR_VERSION 3)
6+
set (HS_PATCH_VERSION 0)
77
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
88

99
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
@@ -187,9 +187,9 @@ else()
187187
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
188188
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
189189
OUTPUT_VARIABLE _GCC_OUTPUT)
190-
string(FIND "${_GCC_OUTPUT}" "Known" POS)
191-
string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT)
192-
string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1"
190+
string(FIND "${_GCC_OUTPUT}" "march" POS)
191+
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
192+
string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1"
193193
GNUCC_ARCH "${_GCC_OUTPUT}")
194194

195195
# test the parsed flag
@@ -326,7 +326,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
326326
set (FAT_RUNTIME_REQUISITES TRUE)
327327
endif()
328328
endif()
329-
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
329+
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitectures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
330330
endif ()
331331

332332
include (${CMAKE_MODULE_PATH}/arch.cmake)
@@ -340,7 +340,7 @@ if (NOT WIN32)
340340
set(C_FLAGS_TO_CHECK
341341
# Variable length arrays are way bad, most especially at run time
342342
"-Wvla"
343-
# Pointer arith on void pointers is doing it wong.
343+
# Pointer arith on void pointers is doing it wrong.
344344
"-Wpointer-arith"
345345
# Build our C code with -Wstrict-prototypes -Wmissing-prototypes
346346
"-Wstrict-prototypes"
@@ -383,7 +383,7 @@ if (CC_PAREN_EQUALITY)
383383
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-parentheses-equality")
384384
endif()
385385

386-
# clang compains about unused const vars in our Ragel-generated code.
386+
# clang complains about unused const vars in our Ragel-generated code.
387387
CHECK_CXX_COMPILER_FLAG("-Wunused-const-variable" CXX_UNUSED_CONST_VAR)
388388
if (CXX_UNUSED_CONST_VAR)
389389
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
@@ -418,6 +418,12 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS)
418418
# gcc5 complains about this
419419
CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
420420

421+
# gcc 10 complains about this
422+
CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW)
423+
if(CC_STRINGOP_OVERFLOW)
424+
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow")
425+
endif()
426+
421427
endif()
422428

423429
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})

cmake/arch.cmake

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,18 @@ int main(){
5858
(void)_mm512_abs_epi8(z);
5959
}" HAVE_AVX512)
6060

61+
# and now for AVX512VBMI
62+
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
63+
#if !defined(__AVX512VBMI__)
64+
#error no avx512vbmi
65+
#endif
66+
67+
int main(){
68+
__m512i a = _mm512_set1_epi8(0xFF);
69+
__m512i idx = _mm512_set_epi64(3ULL, 2ULL, 1ULL, 0ULL, 7ULL, 6ULL, 5ULL, 4ULL);
70+
(void)_mm512_permutexvar_epi8(idx, a);
71+
}" HAVE_AVX512VBMI)
72+
6173
if (FAT_RUNTIME)
6274
if (NOT HAVE_SSSE3)
6375
message(FATAL_ERROR "SSSE3 support required to build fat runtime")

doc/dev-reference/compilation.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ Hyperscan provides support for targeting a database at a particular CPU
5555
platform; see :ref:`instr_specialization` for details.
5656

5757
=====================
58-
Compile Pure Literals
58+
Compile Pure Literals
5959
=====================
6060

6161
Pure literal is a special case of regular expression. A character sequence is
@@ -75,12 +75,12 @@ characters exist in regular grammer like ``[``, ``]``, ``(``, ``)``, ``{``,
7575
While in pure literal case, all these meta characters lost extra meanings
7676
expect for that they are just common ASCII codes.
7777

78-
Hyperscan is initially designed to process common regualr expressions. It is
78+
Hyperscan is initially designed to process common regular expressions. It is
7979
hence embedded with a complex parser to do comprehensive regular grammer
8080
interpretion. Particularly, the identification of above meta characters is the
8181
basic step for the interpretion of far more complex regular grammers.
8282

83-
However in real cases, patterns may not always be regualr expressions. They
83+
However in real cases, patterns may not always be regular expressions. They
8484
could just be pure literals. Problem will come if the pure literals contain
8585
regular meta characters. Supposing fed directly into traditional Hyperscan
8686
compile API, all these meta characters will be interpreted in predefined ways,
@@ -98,7 +98,7 @@ In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns
9898
#. :c:func:`hs_compile_lit_multi`: compiles an array of pure literals into a
9999
pattern database. All of the supplied patterns will be scanned for
100100
concurrently at scan time, with user-supplied identifiers returned when they
101-
match.
101+
match.
102102

103103
These 2 APIs are designed for use cases where all patterns contained in the
104104
target rule set are pure literals. Users can pass the initial pure literal
@@ -110,8 +110,8 @@ Hyperscan needs to locate the end position of the input expression via clearly
110110
knowing each literal's length, not by simply identifying character ``\0`` of a
111111
string.
112112

113-
Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_MULTILINE`,
114-
:c:member:`HS_FLAG_SINGLEMATCH`, :c:member:`HS_FLAG_SOM_LEFTMOST`.
113+
Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`,
114+
:c:member:`HS_FLAG_SOM_LEFTMOST`.
115115

116116
.. note:: We don't support literal compilation API with :ref:`extparam`. And
117117
for runtime implementation, traditional runtime APIs can still be

doc/dev-reference/getting_started.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ instead of potentially executing illegal instructions. The API function
260260
:c:func:`hs_valid_platform` can be used by application writers to determine if
261261
the current platform is supported by Hyperscan.
262262

263-
At of this release, the variants of the runtime that are built, and the CPU
263+
As of this release, the variants of the runtime that are built, and the CPU
264264
capability that is required, are the following:
265265

266266
+----------+-------------------------------+---------------------------+

src/compiler/compiler.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015-2019, Intel Corporation
2+
* Copyright (c) 2015-2020, Intel Corporation
33
*
44
* Redistribution and use in source and binary forms, with or without
55
* modification, are permitted provided that the following conditions are met:
@@ -125,7 +125,7 @@ ParsedLitExpression::ParsedLitExpression(unsigned index_in,
125125
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
126126
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
127127
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
128-
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
128+
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET/MULTILINE
129129

130130
if (flags & ~HS_FLAG_ALL) {
131131
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
@@ -402,19 +402,18 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
402402
}
403403

404404
// Ensure that our pattern isn't too long (in characters).
405-
if (strlen(expression) > cc.grey.limitPatternLength) {
405+
if (expLength > cc.grey.limitPatternLength) {
406406
throw CompileError("Pattern length exceeds limit.");
407407
}
408408

409409
// filter out flags not supported by pure literal API.
410410
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
411411
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
412-
HS_FLAG_QUIET;
412+
HS_FLAG_QUIET | HS_FLAG_MULTILINE;
413413

414414
if (flags & not_supported) {
415-
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
416-
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
417-
"supported in literal API.");
415+
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_SINGLEMATCH and "
416+
"HS_FLAG_SOM_LEFTMOST are supported in literal API.");
418417
}
419418

420419
// This expression must be a pure literal, we can build ue2_literal

src/database.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes,
114114
static
115115
hs_error_t db_check_platform(const u64a p) {
116116
if (p != hs_current_platform
117-
&& p != hs_current_platform_no_avx2
118-
&& p != hs_current_platform_no_avx512) {
117+
&& p != (hs_current_platform | hs_current_platform_no_avx2)
118+
&& p != (hs_current_platform | hs_current_platform_no_avx512)) {
119119
return HS_DB_PLATFORM_ERROR;
120120
}
121121
// passed all checks

0 commit comments

Comments
 (0)