@@ -283,13 +283,60 @@ def test_contains_nan(any_string_dtype):
283283
284284def test_contains_compiled_regex (any_string_dtype ):
285285 # GH#61942
286- ser = Series (["foo" , "bar" , "baz" ], dtype = any_string_dtype )
286+ expected_dtype = (
287+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
288+ )
289+
290+ ser = Series (["foo" , "bar" , "Baz" ], dtype = any_string_dtype )
291+
287292 pat = re .compile ("ba." )
288293 result = ser .str .contains (pat )
294+ expected = Series ([False , True , False ], dtype = expected_dtype )
295+ tm .assert_series_equal (result , expected )
296+
297+ # TODO this currently works for pyarrow-backed dtypes but raises for python
298+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
299+ result = ser .str .contains (pat , case = False )
300+ expected = Series ([False , True , True ], dtype = expected_dtype )
301+ tm .assert_series_equal (result , expected )
302+ else :
303+ with pytest .raises (
304+ ValueError , match = "cannot process flags argument with a compiled pattern"
305+ ):
306+ ser .str .contains (pat , case = False )
307+
308+ pat = re .compile ("ba." , flags = re .IGNORECASE )
309+ result = ser .str .contains (pat )
310+ expected = Series ([False , True , True ], dtype = expected_dtype )
311+ tm .assert_series_equal (result , expected )
312+
313+ # TODO should this be supported?
314+ with pytest .raises (
315+ ValueError , match = "cannot process flags argument with a compiled pattern"
316+ ):
317+ ser .str .contains (pat , flags = re .IGNORECASE )
318+
289319
320+ def test_contains_compiled_regex_flags (any_string_dtype ):
321+ # ensure other (than ignorecase) flags are respected
290322 expected_dtype = (
291323 np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
292324 )
325+
326+ ser = Series (["foobar" , "foo\n bar" , "Baz" ], dtype = any_string_dtype )
327+
328+ pat = re .compile ("^ba" )
329+ result = ser .str .contains (pat )
330+ expected = Series ([False , False , False ], dtype = expected_dtype )
331+ tm .assert_series_equal (result , expected )
332+
333+ pat = re .compile ("^ba" , flags = re .MULTILINE )
334+ result = ser .str .contains (pat )
335+ expected = Series ([False , True , False ], dtype = expected_dtype )
336+ tm .assert_series_equal (result , expected )
337+
338+ pat = re .compile ("^ba" , flags = re .MULTILINE | re .IGNORECASE )
339+ result = ser .str .contains (pat )
293340 expected = Series ([False , True , True ], dtype = expected_dtype )
294341 tm .assert_series_equal (result , expected )
295342
@@ -833,14 +880,36 @@ def test_match_case_kwarg(any_string_dtype):
833880
834881def test_match_compiled_regex (any_string_dtype ):
835882 # GH#61952
836- values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
837- result = values .str .match (re .compile (r"ab" ), case = False )
838883 expected_dtype = (
839884 np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
840885 )
886+
887+ values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
888+
889+ result = values .str .match (re .compile ("ab" ))
890+ expected = Series ([True , False , True , False ], dtype = expected_dtype )
891+ tm .assert_series_equal (result , expected )
892+
893+ # TODO this currently works for pyarrow-backed dtypes but raises for python
894+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
895+ result = values .str .match (re .compile ("ab" ), case = False )
896+ expected = Series ([True , True , True , True ], dtype = expected_dtype )
897+ tm .assert_series_equal (result , expected )
898+ else :
899+ with pytest .raises (
900+ ValueError , match = "cannot process flags argument with a compiled pattern"
901+ ):
902+ values .str .match (re .compile ("ab" ), case = False )
903+
904+ result = values .str .match (re .compile ("ab" , flags = re .IGNORECASE ))
841905 expected = Series ([True , True , True , True ], dtype = expected_dtype )
842906 tm .assert_series_equal (result , expected )
843907
908+ with pytest .raises (
909+ ValueError , match = "cannot process flags argument with a compiled pattern"
910+ ):
911+ values .str .match (re .compile ("ab" ), flags = re .IGNORECASE )
912+
844913
845914# --------------------------------------------------------------------------------------
846915# str.fullmatch
@@ -913,14 +982,36 @@ def test_fullmatch_case_kwarg(any_string_dtype):
913982
914983def test_fullmatch_compiled_regex (any_string_dtype ):
915984 # GH#61952
916- values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
917- result = values .str .fullmatch (re .compile (r"ab" ), case = False )
918985 expected_dtype = (
919986 np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
920987 )
988+
989+ values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
990+
991+ result = values .str .fullmatch (re .compile ("ab" ))
992+ expected = Series ([True , False , False , False ], dtype = expected_dtype )
993+ tm .assert_series_equal (result , expected )
994+
995+ # TODO this currently works for pyarrow-backed dtypes but raises for python
996+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
997+ result = values .str .fullmatch (re .compile ("ab" ), case = False )
998+ expected = Series ([True , True , False , False ], dtype = expected_dtype )
999+ tm .assert_series_equal (result , expected )
1000+ else :
1001+ with pytest .raises (
1002+ ValueError , match = "cannot process flags argument with a compiled pattern"
1003+ ):
1004+ values .str .fullmatch (re .compile ("ab" ), case = False )
1005+
1006+ result = values .str .fullmatch (re .compile ("ab" , flags = re .IGNORECASE ))
9211007 expected = Series ([True , True , False , False ], dtype = expected_dtype )
9221008 tm .assert_series_equal (result , expected )
9231009
1010+ with pytest .raises (
1011+ ValueError , match = "cannot process flags argument with a compiled pattern"
1012+ ):
1013+ values .str .fullmatch (re .compile ("ab" ), flags = re .IGNORECASE )
1014+
9241015
9251016# --------------------------------------------------------------------------------------
9261017# str.findall
0 commit comments