@@ -77,71 +77,44 @@ public function getLexicalDiversity(string $lexicalDiversityClassName = Naive::c
77
77
* @param int $contextLength The amount of space left and right of the found needle
78
78
* @param bool $ignorecase
79
79
* @param int $position. Available options: contain, begin, end, equal.
80
+ * @param bool $mark Option to mark the needle
80
81
* @return array
81
82
*/
82
- public function concordance (string $ needle , int $ contextLength = 20 , bool $ ignorecase = true , string $ position = 'contain ' ) : array
83
+ public function concordance (string $ needle , int $ contextLength = 20 , bool $ ignorecase = true , string $ position = 'contain ' , bool $ mark = false ) : array
83
84
{
84
85
// temporary solution to handle unicode chars
85
- $ this ->text = utf8_decode ($ this ->text );
86
+ $ text = utf8_decode ($ this ->text );
87
+ $ text = trim (preg_replace ('/[\s\t\n\r\s]+/ ' , ' ' , $ text ));
86
88
$ needle = utf8_decode ($ needle );
87
-
88
- $ found = [];
89
- $ text = ' ' . trim (preg_replace ('/[\s\t\n\r\s]+/ ' , ' ' , $ this ->text )) . ' ' ;
90
89
$ needleLength = strlen ($ needle );
91
- $ textLength = strlen ($ text );
92
- $ bufferLength = $ needleLength + 2 * $ contextLength ;
93
-
94
- // \p{L} or \p{Letter}: any kind of letter from any language.
95
-
96
- $ special_chars = "\/\-_\' " ;
97
- $ word_part = '\p{L} ' .$ special_chars ;
98
-
99
- switch ($ position ) {
100
- case 'equal ' :
101
- $ pattern = "/[^ $ word_part]( $ needle)[^ $ word_part]/ " ;
102
- break ;
103
- case 'begin ' :
104
- $ pattern = "/[^ $ word_part]( $ needle)[ $ special_chars]?[\p{L}]*|^( $ needle)/ " ;
105
- break ;
106
- case 'end ' :
107
- $ pattern = "/[\p{L}]*[ $ special_chars]?[\p{L}]*( $ needle)[^ $ word_part]/ " ;
108
- break ;
109
- case 'contain ' :
110
- $ pattern = "/( $ needle)/ " ;
111
- break ;
112
- default :
113
- $ pattern = "/( $ needle)/ " ;
114
- break ;
115
- }
90
+ $ found = [];
116
91
117
- $ case = $ ignorecase ? 'i ' : '' ;
118
- preg_match_all ($ pattern .$ case , $ text , $ matches , PREG_OFFSET_CAPTURE );
92
+ $ positions = $ this ->concordancePositions ($ text , $ needle , $ contextLength , $ ignorecase , $ position );
119
93
120
94
// Getting excerpts
121
- foreach ($ matches [ 1 ] as $ match ) {
122
-
123
- $ needlePosition = $ match [ 1 ] ;
124
- $ left = max ( $ needlePosition - $ contextLength , 0 ) ;
95
+ foreach ($ positions as $ needlePosition ) {
96
+ //marking the term
97
+ $ text_marked = ( $ mark ) ? Text:: markString ( $ text , $ needlePosition , $ needleLength , [ ' {{ ' , ' }} ' ]) : $ text ;
98
+ $ needleLength_marked = ( $ mark ) ? $ needleLength + 4 : $ needleLength ;
125
99
126
- if ($ needleLength + $ contextLength + $ needlePosition > $ textLength ) {
127
- $ tmp = substr ($ text , $ left );
128
- } else {
129
- $ tmp = substr ($ text , $ left , $ bufferLength );
130
- }
131
- $ found [] = utf8_encode ($ tmp );
100
+ $ found [] = utf8_encode (Text::getExcerpt ($ text_marked , $ needlePosition , $ needleLength_marked , $ contextLength ));
132
101
}
133
102
134
103
return $ found ;
135
104
}
136
105
137
- public function occurrences (string $ needle , int $ contextLength = 20 , bool $ ignorecase = true , string $ position = 'contain ' , bool $ mark = false ) : array
106
+ /**
107
+ * Return all positions of the needle in the text according to the position of the needle in a word.
108
+ * @param string $text
109
+ * @param int $needle
110
+ * @param int $contextLength The amount of space left and right of the found needle
111
+ * @param bool $ignorecase
112
+ * @param int $position. Available options: contain, begin, end, equal.
113
+ * @return array
114
+ */
115
+ public function concordancePositions (string $ text , string $ needle , int $ contextLength = 20 , bool $ ignorecase = true , string $ position = 'contain ' ) : array
138
116
{
139
- // temporary solution to handle unicode chars
140
- $ text = utf8_decode ($ this ->text );
141
- $ needle = utf8_decode ($ needle );
142
-
143
117
$ found = [];
144
- $ text = trim (preg_replace ('/[\s\t\n\r\s]+/ ' , ' ' , $ text ));
145
118
$ needleLength = strlen ($ needle );
146
119
$ textLength = strlen ($ text );
147
120
$ bufferLength = $ needleLength + 2 * $ contextLength ;
@@ -173,11 +146,7 @@ public function occurrences(string $needle, int $contextLength = 20, bool $ignor
173
146
preg_match_all ($ pattern .$ case , $ text , $ matches , PREG_OFFSET_CAPTURE );
174
147
$ positions = array_column ($ matches [1 ], 1 );
175
148
176
- $ excerpts = array_map (function ($ needlePos ) use ($ needleLength , $ text , $ contextLength , $ mark ) {
177
- return $ this ->extractExcerptTerm ($ needlePos , $ needleLength , $ text , $ contextLength , $ mark );
178
- }, $ positions );
179
-
180
- return $ excerpts ;
149
+ return $ positions ;
181
150
}
182
151
183
152
/**
0 commit comments