From 8201ecf8bd86fe427276228d4bc268253dc9840f Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 10:17:43 +0100 Subject: [PATCH 01/26] docs: shorten docstring --- src/safeds/data/tabular/query/_math_operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/query/_math_operations.py b/src/safeds/data/tabular/query/_math_operations.py index 0c33ccd69..e3d64e75d 100644 --- a/src/safeds/data/tabular/query/_math_operations.py +++ b/src/safeds/data/tabular/query/_math_operations.py @@ -669,7 +669,7 @@ def round_to_significant_figures(self, significant_figures: int) -> Cell: @abstractmethod def sign(self) -> Cell: """ - Get the sign (-1 for negative numbers, 0 for zero, and 1 for positive numbers). + Get the sign (-1 if negative, 0 for zero, and 1 if positive). Note that IEEE 754 defines a negative zero (-0) and a positive zero (+0). This method return a negative zero for -0 and a positive zero for +0. From c029fbaa6067d3dc48b264d0ee5c1777338c5d2d Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 11:22:33 +0100 Subject: [PATCH 02/26] feat: `reverse` --- .../tabular/query/_lazy_string_operations.py | 153 ++- .../data/tabular/query/_string_operations.py | 957 +++++++++--------- .../_lazy_string_operations/test_contains.py | 22 - .../_lazy_string_operations/test_ends_with.py | 22 - .../_lazy_string_operations/test_index_of.py | 22 - .../_lazy_string_operations/test_length.py | 26 - .../_lazy_string_operations/test_replace.py | 24 - .../_lazy_string_operations/test_reverse.py | 21 + .../test_starts_with.py | 22 - .../_lazy_string_operations/test_substring.py | 36 - .../_lazy_string_operations/test_to_date.py | 22 - .../test_to_datetime.py | 22 - .../_lazy_string_operations/test_to_int.py | 26 - .../test_to_lowercase.py | 18 - .../test_to_uppercase.py | 18 - .../_lazy_string_operations/test_trim.py | 24 - .../_lazy_string_operations/test_trim_end.py | 24 - .../test_trim_start.py | 24 - 18 files changed, 587 insertions(+), 896 deletions(-) delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 94cc4ac25..43bdc3033 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -3,17 +3,13 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds._validation import _check_bounds, _ClosedBound, _convert_and_check_datetime_format from safeds.data.tabular.containers._lazy_cell import _LazyCell from ._string_operations import StringOperations if TYPE_CHECKING: - import datetime - import polars as pl - from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell from safeds.data.tabular.containers._cell import Cell @@ -48,76 +44,79 @@ def __str__(self) -> str: # String operations # ------------------------------------------------------------------------------------------------------------------ - def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: - return _LazyCell(self._expression.str.contains(substring, literal=True)) - - def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: - if optimize_for_ascii: - return _LazyCell(self._expression.str.len_bytes()) - else: - return _LazyCell(self._expression.str.len_chars()) - - def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: - return _LazyCell(self._expression.str.ends_with(suffix)) - - def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: - return _LazyCell(self._expression.str.find(substring, literal=True)) - - def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: - return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) - - def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: - return _LazyCell(self._expression.str.starts_with(prefix)) - - def substring( - self, - *, - start: _ConvertibleToIntCell = 0, - length: _ConvertibleToIntCell = None, - ) -> Cell[str | None]: - if isinstance(length, int): - _check_bounds("length", length, lower_bound=_ClosedBound(0)) - - return _LazyCell(self._expression.str.slice(start, length)) - - def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: - if format == "iso": - format = "%F" # noqa: A001 - elif format is not None: - format = _convert_and_check_datetime_format(format, type_="date", used_for_parsing=True) # noqa: A001 - - return _LazyCell(self._expression.str.to_date(format=format, strict=False)) - - def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: - if format == "iso": - format = "%+" # noqa: A001 - elif format is not None: - format = _convert_and_check_datetime_format(format, type_="datetime", used_for_parsing=True) # noqa: A001 - - return _LazyCell(self._expression.str.to_datetime(format=format, strict=False)) - - def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: - return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) - - def to_lowercase(self) -> Cell[str | None]: - return _LazyCell(self._expression.str.to_lowercase()) - - def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: - if format == "iso": - format = "%T" # noqa: A001 - elif format is not None: - format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) # noqa: A001 - - return _LazyCell(self._expression.str.to_time(format=format, strict=False)) - - def to_uppercase(self) -> Cell[str | None]: - return _LazyCell(self._expression.str.to_uppercase()) - - def trim(self) -> Cell[str | None]: - return _LazyCell(self._expression.str.strip_chars()) - - def trim_end(self) -> Cell[str | None]: - return _LazyCell(self._expression.str.strip_chars_end()) - - def trim_start(self) -> Cell[str | None]: - return _LazyCell(self._expression.str.strip_chars_start()) + def reverse(self) -> Cell[str | None]: + return _LazyCell(self._expression.str.reverse()) + + # def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: + # return _LazyCell(self._expression.str.contains(substring, literal=True)) + # + # def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: + # if optimize_for_ascii: + # return _LazyCell(self._expression.str.len_bytes()) + # else: + # return _LazyCell(self._expression.str.len_chars()) + # + # def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: + # return _LazyCell(self._expression.str.ends_with(suffix)) + # + # def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: + # return _LazyCell(self._expression.str.find(substring, literal=True)) + # + # def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: + # return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) + # + # def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: + # return _LazyCell(self._expression.str.starts_with(prefix)) + # + # def substring( + # self, + # *, + # start: _ConvertibleToIntCell = 0, + # length: _ConvertibleToIntCell = None, + # ) -> Cell[str | None]: + # if isinstance(length, int): + # _check_bounds("length", length, lower_bound=_ClosedBound(0)) + # + # return _LazyCell(self._expression.str.slice(start, length)) + # + # def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: + # if format == "iso": + # format = "%F" + # elif format is not None: + # format = _convert_and_check_datetime_format(format, type_="date", used_for_parsing=True) + # + # return _LazyCell(self._expression.str.to_date(format=format, strict=False)) + # + # def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: + # if format == "iso": + # format = "%+" + # elif format is not None: + # format = _convert_and_check_datetime_format(format, type_="datetime", used_for_parsing=True) + # + # return _LazyCell(self._expression.str.to_datetime(format=format, strict=False)) + # + # def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: + # return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) + # + # def to_lowercase(self) -> Cell[str | None]: + # return _LazyCell(self._expression.str.to_lowercase()) + # + # def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: + # if format == "iso": + # format = "%T" + # elif format is not None: + # format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) + # + # return _LazyCell(self._expression.str.to_time(format=format, strict=False)) + # + # def to_uppercase(self) -> Cell[str | None]: + # return _LazyCell(self._expression.str.to_uppercase()) + # + # def trim(self) -> Cell[str | None]: + # return _LazyCell(self._expression.str.strip_chars()) + # + # def trim_end(self) -> Cell[str | None]: + # return _LazyCell(self._expression.str.strip_chars_end()) + # + # def trim_start(self) -> Cell[str | None]: + # return _LazyCell(self._expression.str.strip_chars_start()) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 6b99afe59..cf92543e0 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -4,9 +4,6 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - import datetime - - from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell from safeds.data.tabular.containers import Cell # TODO: examples with None @@ -62,484 +59,510 @@ def __str__(self) -> str: ... # ------------------------------------------------------------------------------------------------------------------ @abstractmethod - def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: - """ - Check if the string value in the cell contains the substring. - - Parameters - ---------- - substring: - The substring to search for. - - Returns - ------- - contains: - Whether the string value contains the substring. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) - >>> column.transform(lambda cell: cell.str.contains("b")) - +-------+ - | a | - | --- | - | bool | - +=======+ - | true | - | true | - | false | - | null | - +-------+ - """ - - @abstractmethod - def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: - """ - Check if the string value in the cell ends with the suffix. - - Parameters - ---------- - suffix: - The suffix to search for. - - Returns - ------- - ends_with: - Whether the string value ends with the suffix. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) - >>> column.transform(lambda cell: cell.str.ends_with("c")) - +-------+ - | a | - | --- | - | bool | - +=======+ - | false | - | true | - | false | - | null | - +-------+ - """ - - @abstractmethod - def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: - """ - Get the index of the first occurrence of the substring in the string value in the cell. - - Parameters - ---------- - substring: - The substring to search for. - - Returns - ------- - index_of: - The index of the first occurrence of the substring. If the substring is not found, None is returned. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) - >>> column.transform(lambda cell: cell.str.index_of("b")) - +------+ - | a | - | --- | - | u32 | - +======+ - | 1 | - | 0 | - | null | - | null | - +------+ - """ - - @abstractmethod - def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: - """ - Get the number of characters of the string value in the cell. - - Parameters - ---------- - optimize_for_ascii: - Greatly speed up this operation if the string is ASCII-only. If the string contains non-ASCII characters, - this option will return incorrect results, though. - - Returns - ------- - length: - The length of the string value. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", "a", "abc", None]) - >>> column.transform(lambda cell: cell.str.length()) - +------+ - | a | - | --- | - | u32 | - +======+ - | 0 | - | 1 | - | 3 | - | null | - +------+ - """ - - @abstractmethod - def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: - """ - Replace occurrences of the old substring with the new substring in the string value in the cell. - - Parameters - ---------- - old: - The substring to replace. - new: - The substring to replace with. - - Returns - ------- - replaced_string: - The string value with the occurrences replaced. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) - >>> column.transform(lambda cell: cell.str.replace("b", "z")) - +------+ - | a | - | --- | - | str | - +======+ - | az | - | zc | - | cd | - | null | - +------+ - """ - - @abstractmethod - def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: - """ - Check if the string value in the cell starts with the prefix. - - Parameters - ---------- - prefix: - The prefix to search for. - - Returns - ------- - starts_with: - Whether the string value starts with the prefix. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) - >>> column.transform(lambda cell: cell.str.starts_with("a")) - +-------+ - | a | - | --- | - | bool | - +=======+ - | true | - | false | - | false | - | null | - +-------+ - """ - - @abstractmethod - def substring( - self, - *, - start: _ConvertibleToIntCell = 0, - length: _ConvertibleToIntCell = None, - ) -> Cell[str | None]: - """ - Get a substring of the string value in the cell. - - Parameters - ---------- - start: - The start index of the substring. - length: - The length of the substring. If None, the slice contains all rows starting from `start`. Must greater than - or equal to 0. - - Returns - ------- - substring: - The substring of the string value. - - Raises - ------ - OutOfBoundsError - If length is less than 0. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["abc", "def", "ghi", None]) - >>> column.transform(lambda cell: cell.str.substring(start=1, length=2)) - +------+ - | a | - | --- | - | str | - +======+ - | bc | - | ef | - | hi | - | null | - +------+ - """ - - # TODO: add format parameter + document - @abstractmethod - def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: - """ - Convert the string value in the cell to a date. - - Returns - ------- - date: - The date value. If the string cannot be converted to a date, None is returned. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["2021-01-01", "2021-02-01", "abc", None]) - >>> column.transform(lambda cell: cell.str.to_date()) - +------------+ - | a | - | --- | - | date | - +============+ - | 2021-01-01 | - | 2021-02-01 | - | null | - | null | - +------------+ - """ - - # TODO: add format parameter + document - @abstractmethod - def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: + def reverse(self) -> Cell[str | None]: """ - Convert the string value in the cell to a datetime. + Reverse the string. Returns ------- - datetime: - The datetime value. If the string cannot be converted to a datetime, None is returned. + cell: + The reversed string. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["2021-01-01T00:00:00Z", "2021-02-01T00:00:00Z", "abc", None]) - >>> column.transform(lambda cell: cell.str.to_datetime()) - +-------------------------+ - | a | - | --- | - | datetime[μs, UTC] | - +=========================+ - | 2021-01-01 00:00:00 UTC | - | 2021-02-01 00:00:00 UTC | - | null | - | null | - +-------------------------+ - """ - - # TODO: add to_time - - @abstractmethod - def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: - """ - Convert the string value in the cell to an integer. - - Parameters - ---------- - base: - The base of the integer. - - Returns - ------- - int: - The integer value. If the string cannot be converted to an integer, None is returned. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column1 = Column("a", ["1", "2", "3", "abc", None]) - >>> column1.transform(lambda cell: cell.str.to_int()) - +------+ - | a | - | --- | - | i64 | - +======+ - | 1 | - | 2 | - | 3 | - | null | - | null | - +------+ - - >>> column2 = Column("a", ["1", "10", "11", "abc", None]) - >>> column2.transform(lambda cell: cell.str.to_int(base=2)) - +------+ - | a | - | --- | - | i64 | - +======+ - | 1 | - | 2 | - | 3 | - | null | - | null | - +------+ - """ - - @abstractmethod - def to_lowercase(self) -> Cell[str | None]: - """ - Convert the string value in the cell to lowercase. - - Returns - ------- - lowercase: - The string value in lowercase. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["AB", "BC", "CD", None]) - >>> column.transform(lambda cell: cell.str.to_lowercase()) + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.reverse()) +------+ | a | | --- | | str | +======+ - | ab | - | bc | - | cd | + | ba | + | cb | | null | +------+ """ - @abstractmethod - def to_uppercase(self) -> Cell[str | None]: - """ - Convert the string value in the cell to uppercase. - - Returns - ------- - uppercase: - The string value in uppercase. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) - >>> column.transform(lambda cell: cell.str.to_uppercase()) - +------+ - | a | - | --- | - | str | - +======+ - | AB | - | BC | - | CD | - | null | - +------+ - """ - - @abstractmethod - def trim(self) -> Cell[str | None]: - """ - Remove whitespace from the start and end of the string value in the cell. - - Returns - ------- - trimmed: - The string value without whitespace at the start and end. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) - >>> column.transform(lambda cell: cell.str.trim()) - +------+ - | a | - | --- | - | str | - +======+ - | | - | abc | - | abc | - | abc | - | null | - +------+ - """ - - @abstractmethod - def trim_end(self) -> Cell[str | None]: - """ - Remove whitespace from the end of the string value in the cell. - - Returns - ------- - trimmed: - The string value without whitespace at the end. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) - >>> column.transform(lambda cell: cell.str.trim_end()) - +------+ - | a | - | --- | - | str | - +======+ - | | - | abc | - | abc | - | abc | - | null | - +------+ - """ - - @abstractmethod - def trim_start(self) -> Cell[str | None]: - """ - Remove whitespace from the start of the string value in the cell. - - Returns - ------- - trimmed: - The string value without whitespace at the start. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) - >>> column.transform(lambda cell: cell.str.trim_start()) - +------+ - | a | - | --- | - | str | - +======+ - | | - | abc | - | abc | - | abc | - | null | - +------+ - """ + # @abstractmethod + # def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: + # """ + # Check if the string value in the cell contains the substring. + # + # Parameters + # ---------- + # substring: + # The substring to search for. + # + # Returns + # ------- + # contains: + # Whether the string value contains the substring. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["ab", "bc", "cd", None]) + # >>> column.transform(lambda cell: cell.str.contains("b")) + # +-------+ + # | a | + # | --- | + # | bool | + # +=======+ + # | true | + # | true | + # | false | + # | null | + # +-------+ + # """ + # + # @abstractmethod + # def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: + # """ + # Check if the string value in the cell ends with the suffix. + # + # Parameters + # ---------- + # suffix: + # The suffix to search for. + # + # Returns + # ------- + # ends_with: + # Whether the string value ends with the suffix. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["ab", "bc", "cd", None]) + # >>> column.transform(lambda cell: cell.str.ends_with("c")) + # +-------+ + # | a | + # | --- | + # | bool | + # +=======+ + # | false | + # | true | + # | false | + # | null | + # +-------+ + # """ + # + # @abstractmethod + # def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: + # """ + # Get the index of the first occurrence of the substring in the string value in the cell. + # + # Parameters + # ---------- + # substring: + # The substring to search for. + # + # Returns + # ------- + # index_of: + # The index of the first occurrence of the substring. If the substring is not found, None is returned. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["ab", "bc", "cd", None]) + # >>> column.transform(lambda cell: cell.str.index_of("b")) + # +------+ + # | a | + # | --- | + # | u32 | + # +======+ + # | 1 | + # | 0 | + # | null | + # | null | + # +------+ + # """ + # + # @abstractmethod + # def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: + # """ + # Get the number of characters of the string value in the cell. + # + # Parameters + # ---------- + # optimize_for_ascii: + # Greatly speed up this operation if the string is ASCII-only. If the string contains non-ASCII characters, + # this option will return incorrect results, though. + # + # Returns + # ------- + # length: + # The length of the string value. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["", "a", "abc", None]) + # >>> column.transform(lambda cell: cell.str.length()) + # +------+ + # | a | + # | --- | + # | u32 | + # +======+ + # | 0 | + # | 1 | + # | 3 | + # | null | + # +------+ + # """ + # + # @abstractmethod + # def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: + # """ + # Replace occurrences of the old substring with the new substring in the string value in the cell. + # + # Parameters + # ---------- + # old: + # The substring to replace. + # new: + # The substring to replace with. + # + # Returns + # ------- + # replaced_string: + # The string value with the occurrences replaced. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["ab", "bc", "cd", None]) + # >>> column.transform(lambda cell: cell.str.replace("b", "z")) + # +------+ + # | a | + # | --- | + # | str | + # +======+ + # | az | + # | zc | + # | cd | + # | null | + # +------+ + # """ + # + # @abstractmethod + # def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: + # """ + # Check if the string value in the cell starts with the prefix. + # + # Parameters + # ---------- + # prefix: + # The prefix to search for. + # + # Returns + # ------- + # starts_with: + # Whether the string value starts with the prefix. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["ab", "bc", "cd", None]) + # >>> column.transform(lambda cell: cell.str.starts_with("a")) + # +-------+ + # | a | + # | --- | + # | bool | + # +=======+ + # | true | + # | false | + # | false | + # | null | + # +-------+ + # """ + # + # @abstractmethod + # def substring( + # self, + # *, + # start: _ConvertibleToIntCell = 0, + # length: _ConvertibleToIntCell = None, + # ) -> Cell[str | None]: + # """ + # Get a substring of the string value in the cell. + # + # Parameters + # ---------- + # start: + # The start index of the substring. + # length: + # The length of the substring. If None, the slice contains all rows starting from `start`. Must greater than + # or equal to 0. + # + # Returns + # ------- + # substring: + # The substring of the string value. + # + # Raises + # ------ + # OutOfBoundsError + # If length is less than 0. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["abc", "def", "ghi", None]) + # >>> column.transform(lambda cell: cell.str.substring(start=1, length=2)) + # +------+ + # | a | + # | --- | + # | str | + # +======+ + # | bc | + # | ef | + # | hi | + # | null | + # +------+ + # """ + # + # # TODO: add format parameter + document + # @abstractmethod + # def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: + # """ + # Convert the string value in the cell to a date. + # + # Returns + # ------- + # date: + # The date value. If the string cannot be converted to a date, None is returned. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["2021-01-01", "2021-02-01", "abc", None]) + # >>> column.transform(lambda cell: cell.str.to_date()) + # +------------+ + # | a | + # | --- | + # | date | + # +============+ + # | 2021-01-01 | + # | 2021-02-01 | + # | null | + # | null | + # +------------+ + # """ + # + # # TODO: add format parameter + document + # @abstractmethod + # def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: + # """ + # Convert the string value in the cell to a datetime. + # + # Returns + # ------- + # datetime: + # The datetime value. If the string cannot be converted to a datetime, None is returned. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["2021-01-01T00:00:00Z", "2021-02-01T00:00:00Z", "abc", None]) + # >>> column.transform(lambda cell: cell.str.to_datetime()) + # +-------------------------+ + # | a | + # | --- | + # | datetime[μs, UTC] | + # +=========================+ + # | 2021-01-01 00:00:00 UTC | + # | 2021-02-01 00:00:00 UTC | + # | null | + # | null | + # +-------------------------+ + # """ + # + # # TODO: add to_time + # + # @abstractmethod + # def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: + # """ + # Convert the string value in the cell to an integer. + # + # Parameters + # ---------- + # base: + # The base of the integer. + # + # Returns + # ------- + # int: + # The integer value. If the string cannot be converted to an integer, None is returned. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column1 = Column("a", ["1", "2", "3", "abc", None]) + # >>> column1.transform(lambda cell: cell.str.to_int()) + # +------+ + # | a | + # | --- | + # | i64 | + # +======+ + # | 1 | + # | 2 | + # | 3 | + # | null | + # | null | + # +------+ + # + # >>> column2 = Column("a", ["1", "10", "11", "abc", None]) + # >>> column2.transform(lambda cell: cell.str.to_int(base=2)) + # +------+ + # | a | + # | --- | + # | i64 | + # +======+ + # | 1 | + # | 2 | + # | 3 | + # | null | + # | null | + # +------+ + # """ + # + # @abstractmethod + # def to_lowercase(self) -> Cell[str | None]: + # """ + # Convert the string value in the cell to lowercase. + # + # Returns + # ------- + # lowercase: + # The string value in lowercase. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["AB", "BC", "CD", None]) + # >>> column.transform(lambda cell: cell.str.to_lowercase()) + # +------+ + # | a | + # | --- | + # | str | + # +======+ + # | ab | + # | bc | + # | cd | + # | null | + # +------+ + # """ + # + # @abstractmethod + # def to_uppercase(self) -> Cell[str | None]: + # """ + # Convert the string value in the cell to uppercase. + # + # Returns + # ------- + # uppercase: + # The string value in uppercase. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["ab", "bc", "cd", None]) + # >>> column.transform(lambda cell: cell.str.to_uppercase()) + # +------+ + # | a | + # | --- | + # | str | + # +======+ + # | AB | + # | BC | + # | CD | + # | null | + # +------+ + # """ + # + # @abstractmethod + # def trim(self) -> Cell[str | None]: + # """ + # Remove whitespace from the start and end of the string value in the cell. + # + # Returns + # ------- + # trimmed: + # The string value without whitespace at the start and end. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) + # >>> column.transform(lambda cell: cell.str.trim()) + # +------+ + # | a | + # | --- | + # | str | + # +======+ + # | | + # | abc | + # | abc | + # | abc | + # | null | + # +------+ + # """ + # + # @abstractmethod + # def trim_end(self) -> Cell[str | None]: + # """ + # Remove whitespace from the end of the string value in the cell. + # + # Returns + # ------- + # trimmed: + # The string value without whitespace at the end. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) + # >>> column.transform(lambda cell: cell.str.trim_end()) + # +------+ + # | a | + # | --- | + # | str | + # +======+ + # | | + # | abc | + # | abc | + # | abc | + # | null | + # +------+ + # """ + # + # @abstractmethod + # def trim_start(self) -> Cell[str | None]: + # """ + # Remove whitespace from the start of the string value in the cell. + # + # Returns + # ------- + # trimmed: + # The string value without whitespace at the start. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) + # >>> column.transform(lambda cell: cell.str.trim_start()) + # +------+ + # | a | + # | --- | + # | str | + # +======+ + # | | + # | abc | + # | abc | + # | abc | + # | null | + # +------+ + # """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py deleted file mode 100644 index bac8e354a..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "substring", "expected"), - [ - ("", "a", False), - ("abc", "", True), - ("abc", "a", True), - ("abc", "d", False), - ], - ids=[ - "empty string", - "empty substring", - "contained", - "not contained", - ], -) -def test_should_check_whether_string_contains_substring(string: str, substring: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.contains(substring), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py deleted file mode 100644 index 78102c900..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "suffix", "expected"), - [ - ("", "a", False), - ("abc", "", True), - ("abc", "c", True), - ("abc", "a", False), - ], - ids=[ - "empty string", - "empty suffix", - "ends with", - "does not end with", - ], -) -def test_should_check_whether_string_ends_with_prefix(string: str, suffix: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.ends_with(suffix), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py deleted file mode 100644 index 84e79ad1b..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "substring", "expected"), - [ - ("", "a", None), - ("abc", "", 0), - ("abc", "b", 1), - ("abc", "d", None), - ], - ids=[ - "empty string", - "empty substring", - "contained", - "not contained", - ], -) -def test_should_return_index_of_first_occurrence_of_substring(string: str, substring: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.index_of(substring), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py deleted file mode 100644 index 5b7f0370b..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py +++ /dev/null @@ -1,26 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "optimize_for_ascii", "expected"), - [ - ("", False, 0), - ("", True, 0), - ("abc", False, 3), - ("abc", True, 3), - ], - ids=[ - "empty (unoptimized)", - "empty (optimized)", - "non-empty (unoptimized)", - "non-empty (optimized)", - ], -) -def test_should_return_number_of_characters(string: str, optimize_for_ascii: bool, expected: bool) -> None: - assert_cell_operation_works( - string, - lambda cell: cell.str.length(optimize_for_ascii=optimize_for_ascii), - expected, - ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py deleted file mode 100644 index f1f32c07a..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "old", "new", "expected"), - [ - ("", "a", "b", ""), - ("abc", "", "d", "dadbdcd"), - ("abc", "a", "", "bc"), - ("abc", "d", "e", "abc"), - ("aba", "a", "d", "dbd"), - ], - ids=[ - "empty string", - "empty old", - "empty new", - "no occurrences", - "replace all occurrences", - ], -) -def test_should_replace_all_occurrences_of_old_with_new(string: str, old: str, new: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.replace(old, new), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py new file mode 100644 index 000000000..28dcf2cca --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py @@ -0,0 +1,21 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", ""), + ("abc", "cba"), + (None, None), + ], + ids=[ + "empty", + "non-empty", + "None", + ], +) +def test_should_reverse_string(string: str | None, expected: str | None) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.reverse(), expected, type_if_none=ColumnType.string()) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py deleted file mode 100644 index 7d402cd0b..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "prefix", "expected"), - [ - ("", "a", False), - ("abc", "", True), - ("abc", "a", True), - ("abc", "c", False), - ], - ids=[ - "empty string", - "empty prefix", - "starts with", - "does not start with", - ], -) -def test_should_check_whether_string_start_with_prefix(string: str, prefix: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.starts_with(prefix), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py deleted file mode 100644 index 8d1164a38..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py +++ /dev/null @@ -1,36 +0,0 @@ -import pytest - -from safeds.exceptions import OutOfBoundsError -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "start", "length", "expected"), - [ - ("", 0, None, ""), - ("abc", 0, None, "abc"), - ("abc", 1, None, "bc"), - ("abc", 10, None, ""), - ("abc", -1, None, "c"), - ("abc", -10, None, "abc"), - ("abc", 0, 1, "a"), - ("abc", 0, 10, "abc"), - ], - ids=[ - "empty", - "full string", - "positive start in bounds", - "positive start out of bounds", - "negative start in bounds", - "negative start out of bounds", - "positive length in bounds", - "positive length out of bounds", - ], -) -def test_should_return_substring(string: str, start: int, length: int | None, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.substring(start=start, length=length), expected) - - -def test_should_raise_if_length_is_negative() -> None: - with pytest.raises(OutOfBoundsError): - assert_cell_operation_works("abc", lambda cell: cell.str.substring(length=-1), None) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py deleted file mode 100644 index 677438e0a..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py +++ /dev/null @@ -1,22 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", None), - ("2022-01-09", datetime.date(2022, 1, 9)), - ("abc", None), - ], - ids=[ - "empty", - "ISO date", - "invalid string", - ], -) -def test_should_parse_date(string: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.to_date(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py deleted file mode 100644 index 4c96d03d0..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py +++ /dev/null @@ -1,22 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", None), - ("2022-01-09T23:29:01Z", datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), - ("abc", None), - ], - ids=[ - "empty", - "ISO datetime", - "invalid string", - ], -) -def test_should_parse_datetimes(string: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.to_datetime(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py deleted file mode 100644 index b4b3256cc..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py +++ /dev/null @@ -1,26 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "base", "expected"), - [ - ("", 10, None), - ("11", 10, 11), - ("11", 2, 3), - ("abc", 10, None), - ], - ids=[ - "empty", - "11 base 10", - "11 base 2", - "invalid string", - ], -) -def test_should_parse_integer(string: str, base: int, expected: bool) -> None: - assert_cell_operation_works( - string, - lambda cell: cell.str.to_int(base=base), - expected, - ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py deleted file mode 100644 index f4c880761..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py +++ /dev/null @@ -1,18 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", ""), - ("AbC", "abc"), - ], - ids=[ - "empty", - "non-empty", - ], -) -def test_should_lowercase_a_string(string: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.to_lowercase(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py deleted file mode 100644 index cfb14c7d2..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py +++ /dev/null @@ -1,18 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", ""), - ("AbC", "ABC"), - ], - ids=[ - "empty", - "non-empty", - ], -) -def test_should_uppercase_a_string(string: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.to_uppercase(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py deleted file mode 100644 index 2b2101e4e..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", ""), - ("abc", "abc"), - (" abc", "abc"), - ("abc ", "abc"), - (" abc ", "abc"), - ], - ids=[ - "empty", - "non-empty", - "whitespace start", - "whitespace end", - "whitespace start and end", - ], -) -def test_should_remove_whitespace_prefix_and_suffix(string: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.trim(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py deleted file mode 100644 index af0cd88dc..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", ""), - ("abc", "abc"), - (" abc", " abc"), - ("abc ", "abc"), - (" abc ", " abc"), - ], - ids=[ - "empty", - "non-empty", - "whitespace start", - "whitespace end", - "whitespace start and end", - ], -) -def test_should_remove_whitespace_suffix(string: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.trim_end(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py deleted file mode 100644 index 6b487f6e7..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", ""), - ("abc", "abc"), - (" abc", "abc"), - ("abc ", "abc "), - (" abc ", "abc "), - ], - ids=[ - "empty", - "non-empty", - "whitespace start", - "whitespace end", - "whitespace start and end", - ], -) -def test_should_remove_whitespace_prefix(string: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.trim_start(), expected) From a1daa122dc98052475b1f15220d0eb23efa0e381 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 11:29:11 +0100 Subject: [PATCH 03/26] feat: `to_uppercase` --- .../tabular/query/_lazy_string_operations.py | 6 +- .../data/tabular/query/_string_operations.py | 55 +++++++++---------- .../test_to_uppercase.py | 27 +++++++++ 3 files changed, 57 insertions(+), 31 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 43bdc3033..f1ea4b87d 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -47,6 +47,9 @@ def __str__(self) -> str: def reverse(self) -> Cell[str | None]: return _LazyCell(self._expression.str.reverse()) + def to_uppercase(self) -> Cell[str | None]: + return _LazyCell(self._expression.str.to_uppercase()) + # def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: # return _LazyCell(self._expression.str.contains(substring, literal=True)) # @@ -109,9 +112,6 @@ def reverse(self) -> Cell[str | None]: # # return _LazyCell(self._expression.str.to_time(format=format, strict=False)) # - # def to_uppercase(self) -> Cell[str | None]: - # return _LazyCell(self._expression.str.to_uppercase()) - # # def trim(self) -> Cell[str | None]: # return _LazyCell(self._expression.str.strip_chars()) # diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index cf92543e0..ac02c325f 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -84,6 +84,32 @@ def reverse(self) -> Cell[str | None]: +------+ """ + @abstractmethod + def to_uppercase(self) -> Cell[str | None]: + """ + Convert the string to uppercase. + + Returns + ------- + cell: + The uppercase string. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.to_uppercase()) + +------+ + | a | + | --- | + | str | + +======+ + | AB | + | BC | + | null | + +------+ + """ + # @abstractmethod # def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: # """ @@ -455,34 +481,7 @@ def reverse(self) -> Cell[str | None]: # | null | # +------+ # """ - # - # @abstractmethod - # def to_uppercase(self) -> Cell[str | None]: - # """ - # Convert the string value in the cell to uppercase. - # - # Returns - # ------- - # uppercase: - # The string value in uppercase. - # - # Examples - # -------- - # >>> from safeds.data.tabular.containers import Column - # >>> column = Column("a", ["ab", "bc", "cd", None]) - # >>> column.transform(lambda cell: cell.str.to_uppercase()) - # +------+ - # | a | - # | --- | - # | str | - # +======+ - # | AB | - # | BC | - # | CD | - # | null | - # +------+ - # """ - # + # @abstractmethod # def trim(self) -> Cell[str | None]: # """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py new file mode 100644 index 000000000..0e49f80cf --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py @@ -0,0 +1,27 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", ""), + ("abc", "ABC"), + ("ABC", "ABC"), + ("aBc", "ABC"), + (None, None), + ], + ids=[ + "empty", + "full lowercase", + "full uppercase", + "mixed", + "None", + ], +) +def test_should_convert_string_to_uppercase(string: str | None, expected: str | None) -> None: + assert_cell_operation_works( + string, lambda cell: cell.str.to_uppercase(), expected, type_if_none=ColumnType.string() + ) From 8adfbd28387ad2779937450245b3271856f18b4e Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 11:33:02 +0100 Subject: [PATCH 04/26] feat: `to_lowercase` --- .../tabular/query/_lazy_string_operations.py | 6 +-- .../data/tabular/query/_string_operations.py | 53 +++++++++---------- .../test_to_lowercase.py | 30 +++++++++++ 3 files changed, 59 insertions(+), 30 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index f1ea4b87d..d63826be6 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -47,6 +47,9 @@ def __str__(self) -> str: def reverse(self) -> Cell[str | None]: return _LazyCell(self._expression.str.reverse()) + def to_lowercase(self) -> Cell[str | None]: + return _LazyCell(self._expression.str.to_lowercase()) + def to_uppercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_uppercase()) @@ -101,9 +104,6 @@ def to_uppercase(self) -> Cell[str | None]: # def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: # return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) # - # def to_lowercase(self) -> Cell[str | None]: - # return _LazyCell(self._expression.str.to_lowercase()) - # # def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: # if format == "iso": # format = "%T" diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index ac02c325f..2febd3dcb 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -84,6 +84,32 @@ def reverse(self) -> Cell[str | None]: +------+ """ + @abstractmethod + def to_lowercase(self) -> Cell[str | None]: + """ + Convert the string to lowercase. + + Returns + ------- + cell: + The lowercase string. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["AB", "BC", None]) + >>> column.transform(lambda cell: cell.str.to_lowercase()) + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | bc | + | null | + +------+ + """ + @abstractmethod def to_uppercase(self) -> Cell[str | None]: """ @@ -454,33 +480,6 @@ def to_uppercase(self) -> Cell[str | None]: # | null | # +------+ # """ - # - # @abstractmethod - # def to_lowercase(self) -> Cell[str | None]: - # """ - # Convert the string value in the cell to lowercase. - # - # Returns - # ------- - # lowercase: - # The string value in lowercase. - # - # Examples - # -------- - # >>> from safeds.data.tabular.containers import Column - # >>> column = Column("a", ["AB", "BC", "CD", None]) - # >>> column.transform(lambda cell: cell.str.to_lowercase()) - # +------+ - # | a | - # | --- | - # | str | - # +======+ - # | ab | - # | bc | - # | cd | - # | null | - # +------+ - # """ # @abstractmethod # def trim(self) -> Cell[str | None]: diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py new file mode 100644 index 000000000..95ff9cebc --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py @@ -0,0 +1,30 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", ""), + ("abc", "abc"), + ("ABC", "abc"), + ("aBc", "abc"), + (None, None), + ], + ids=[ + "empty", + "full lowercase", + "full uppercase", + "mixed", + "None", + ], +) +def test_should_convert_string_to_lowercase(string: str | None, expected: str | None) -> None: + assert_cell_operation_works( + string, + lambda cell: cell.str.to_lowercase(), + expected, + type_if_none=ColumnType.string(), + ) From 21d7da38714c40067f0a1b418697f03a0f09ba35 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 11:43:21 +0100 Subject: [PATCH 05/26] feat: `ends_with` --- .../tabular/query/_lazy_string_operations.py | 8 +- .../data/tabular/query/_string_operations.py | 73 +++++++++---------- .../_lazy_string_operations/test_ends_with.py | 38 ++++++++++ .../_lazy_string_operations/test_reverse.py | 6 +- .../test_to_lowercase.py | 6 +- .../test_to_uppercase.py | 9 ++- 6 files changed, 89 insertions(+), 51 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index d63826be6..0249cfa0f 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -10,6 +10,7 @@ if TYPE_CHECKING: import polars as pl + from safeds._typing import _ConvertibleToStringCell from safeds.data.tabular.containers._cell import Cell @@ -44,6 +45,9 @@ def __str__(self) -> str: # String operations # ------------------------------------------------------------------------------------------------------------------ + def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: + return _LazyCell(self._expression.str.ends_with(suffix)) + def reverse(self) -> Cell[str | None]: return _LazyCell(self._expression.str.reverse()) @@ -61,9 +65,7 @@ def to_uppercase(self) -> Cell[str | None]: # return _LazyCell(self._expression.str.len_bytes()) # else: # return _LazyCell(self._expression.str.len_chars()) - # - # def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: - # return _LazyCell(self._expression.str.ends_with(suffix)) + # # def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: # return _LazyCell(self._expression.str.find(substring, literal=True)) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 2febd3dcb..b61f898e8 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -4,14 +4,9 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + from safeds._typing import _ConvertibleToStringCell from safeds.data.tabular.containers import Cell -# TODO: examples with None -# TODO: add more methods -# - reverse -# - to_time -# - ... - class StringOperations(ABC): """ @@ -58,6 +53,38 @@ def __str__(self) -> str: ... # String operations # ------------------------------------------------------------------------------------------------------------------ + @abstractmethod + def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: + """ + Check if the string ends with the suffix. + + Parameters + ---------- + suffix: + The expected suffix. + + Returns + ------- + ends_with: + Whether the string ends with the suffix. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.ends_with("b")) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | null | + +-------+ + """ + + @abstractmethod def reverse(self) -> Cell[str | None]: """ @@ -167,39 +194,7 @@ def to_uppercase(self) -> Cell[str | None]: # | null | # +-------+ # """ - # - # @abstractmethod - # def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: - # """ - # Check if the string value in the cell ends with the suffix. - # - # Parameters - # ---------- - # suffix: - # The suffix to search for. - # - # Returns - # ------- - # ends_with: - # Whether the string value ends with the suffix. - # - # Examples - # -------- - # >>> from safeds.data.tabular.containers import Column - # >>> column = Column("a", ["ab", "bc", "cd", None]) - # >>> column.transform(lambda cell: cell.str.ends_with("c")) - # +-------+ - # | a | - # | --- | - # | bool | - # +=======+ - # | false | - # | true | - # | false | - # | null | - # +-------+ - # """ - # + # @abstractmethod # def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: # """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py new file mode 100644 index 000000000..d7629506c --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py @@ -0,0 +1,38 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "suffix", "expected"), + [ + ("", "", True), + ("", "c", False), + ("abc", "", True), + ("abc", "c", True), + ("abc", "abc", True), + ("abc", "d", False), + (None, "", None), + ("abc", None, None), + (None, None, None), + ], + ids=[ + "empty string, empty suffix", + "empty string, non-empty suffix", + "non-empty string, empty suffix", + "correct suffix", + "suffix equal to string", + "incorrect suffix", + "None as string", + "None as suffix", + "None for both", + ], +) +def test_should_check_if_string_ends_with_suffix(value: str | None, suffix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.ends_with(suffix), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py index 28dcf2cca..30d694c90 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py @@ -5,7 +5,7 @@ @pytest.mark.parametrize( - ("string", "expected"), + ("value", "expected"), [ ("", ""), ("abc", "cba"), @@ -17,5 +17,5 @@ "None", ], ) -def test_should_reverse_string(string: str | None, expected: str | None) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.reverse(), expected, type_if_none=ColumnType.string()) +def test_should_reverse_string(value: str | None, expected: str | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.str.reverse(), expected, type_if_none=ColumnType.string()) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py index 95ff9cebc..21a9db2a3 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py @@ -5,7 +5,7 @@ @pytest.mark.parametrize( - ("string", "expected"), + ("value", "expected"), [ ("", ""), ("abc", "abc"), @@ -21,9 +21,9 @@ "None", ], ) -def test_should_convert_string_to_lowercase(string: str | None, expected: str | None) -> None: +def test_should_convert_string_to_lowercase(value: str | None, expected: str | None) -> None: assert_cell_operation_works( - string, + value, lambda cell: cell.str.to_lowercase(), expected, type_if_none=ColumnType.string(), diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py index 0e49f80cf..87d05bacb 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py @@ -5,7 +5,7 @@ @pytest.mark.parametrize( - ("string", "expected"), + ("value", "expected"), [ ("", ""), ("abc", "ABC"), @@ -21,7 +21,10 @@ "None", ], ) -def test_should_convert_string_to_uppercase(string: str | None, expected: str | None) -> None: +def test_should_convert_string_to_uppercase(value: str | None, expected: str | None) -> None: assert_cell_operation_works( - string, lambda cell: cell.str.to_uppercase(), expected, type_if_none=ColumnType.string() + value, + lambda cell: cell.str.to_uppercase(), + expected, + type_if_none=ColumnType.string(), ) From 4ac1bbf2126f5ce00d3cd92b6fde57ae84506347 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 11:45:43 +0100 Subject: [PATCH 06/26] feat: `starts_with` --- .../tabular/query/_lazy_string_operations.py | 3 + .../data/tabular/query/_string_operations.py | 66 +++++++++---------- .../test_starts_with.py | 42 ++++++++++++ 3 files changed, 77 insertions(+), 34 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 0249cfa0f..8cc1cd90f 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -51,6 +51,9 @@ def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: def reverse(self) -> Cell[str | None]: return _LazyCell(self._expression.str.reverse()) + def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: + return _LazyCell(self._expression.str.starts_with(prefix)) + def to_lowercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_lowercase()) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index b61f898e8..4a6b6be1f 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -84,7 +84,6 @@ def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: +-------+ """ - @abstractmethod def reverse(self) -> Cell[str | None]: """ @@ -111,6 +110,37 @@ def reverse(self) -> Cell[str | None]: +------+ """ + @abstractmethod + def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: + """ + Check if the string starts with the prefix. + + Parameters + ---------- + prefix: + The expected prefix. + + Returns + ------- + starts_with: + Whether the string starts with the prefix. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.starts_with("a")) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | null | + +-------+ + """ + @abstractmethod def to_lowercase(self) -> Cell[str | None]: """ @@ -293,39 +323,7 @@ def to_uppercase(self) -> Cell[str | None]: # | null | # +------+ # """ - # - # @abstractmethod - # def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: - # """ - # Check if the string value in the cell starts with the prefix. - # - # Parameters - # ---------- - # prefix: - # The prefix to search for. - # - # Returns - # ------- - # starts_with: - # Whether the string value starts with the prefix. - # - # Examples - # -------- - # >>> from safeds.data.tabular.containers import Column - # >>> column = Column("a", ["ab", "bc", "cd", None]) - # >>> column.transform(lambda cell: cell.str.starts_with("a")) - # +-------+ - # | a | - # | --- | - # | bool | - # +=======+ - # | true | - # | false | - # | false | - # | null | - # +-------+ - # """ - # + # @abstractmethod # def substring( # self, diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py new file mode 100644 index 000000000..141974bef --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py @@ -0,0 +1,42 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "prefix", "expected"), + [ + ("", "", True), + ("", "a", False), + ("abc", "", True), + ("abc", "a", True), + ("abc", "abc", True), + ("abc", "d", False), + (None, "", None), + ("abc", None, None), + (None, None, None), + ], + ids=[ + "empty string, empty prefix", + "empty string, non-empty prefix", + "non-empty string, empty prefix", + "correct prefix", + "prefix equal to string", + "incorrect prefix", + "None as string", + "None as prefix", + "None for both", + ], +) +def test_should_check_if_string_starts_with_prefix( + value: str | None, + prefix: str | None, + expected: bool | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.starts_with(prefix), + expected, + type_if_none=ColumnType.string(), + ) From eb123c194cfde8acbda081fbe36f1232b5657436 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 11:53:50 +0100 Subject: [PATCH 07/26] feat: `length` --- .../tabular/query/_lazy_string_operations.py | 11 +-- .../data/tabular/query/_string_operations.py | 71 ++++++++++--------- .../_lazy_string_operations/test_length.py | 34 +++++++++ 3 files changed, 76 insertions(+), 40 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 8cc1cd90f..80e3902f0 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -48,6 +48,12 @@ def __str__(self) -> str: def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.ends_with(suffix)) + def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: + if optimize_for_ascii: + return _LazyCell(self._expression.str.len_bytes()) + else: + return _LazyCell(self._expression.str.len_chars()) + def reverse(self) -> Cell[str | None]: return _LazyCell(self._expression.str.reverse()) @@ -63,11 +69,6 @@ def to_uppercase(self) -> Cell[str | None]: # def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: # return _LazyCell(self._expression.str.contains(substring, literal=True)) # - # def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: - # if optimize_for_ascii: - # return _LazyCell(self._expression.str.len_bytes()) - # else: - # return _LazyCell(self._expression.str.len_chars()) # # def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 4a6b6be1f..3e98a5514 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -65,7 +65,7 @@ def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: Returns ------- - ends_with: + cell: Whether the string ends with the suffix. Examples @@ -84,6 +84,39 @@ def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: +-------+ """ + @abstractmethod + def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: + """ + Get the number of characters. + + Parameters + ---------- + optimize_for_ascii: + Greatly speed up this operation if the string is ASCII-only. If the string contains non-ASCII characters, + this option will return incorrect results, though. + + Returns + ------- + cell: + The number of characters. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["", "a", "abc", None]) + >>> column.transform(lambda cell: cell.str.length()) + +------+ + | a | + | --- | + | u32 | + +======+ + | 0 | + | 1 | + | 3 | + | null | + +------+ + """ + @abstractmethod def reverse(self) -> Cell[str | None]: """ @@ -122,7 +155,7 @@ def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: Returns ------- - starts_with: + cell: Whether the string starts with the prefix. Examples @@ -257,39 +290,7 @@ def to_uppercase(self) -> Cell[str | None]: # +------+ # """ # - # @abstractmethod - # def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: - # """ - # Get the number of characters of the string value in the cell. - # - # Parameters - # ---------- - # optimize_for_ascii: - # Greatly speed up this operation if the string is ASCII-only. If the string contains non-ASCII characters, - # this option will return incorrect results, though. - # - # Returns - # ------- - # length: - # The length of the string value. - # - # Examples - # -------- - # >>> from safeds.data.tabular.containers import Column - # >>> column = Column("a", ["", "a", "abc", None]) - # >>> column.transform(lambda cell: cell.str.length()) - # +------+ - # | a | - # | --- | - # | u32 | - # +======+ - # | 0 | - # | 1 | - # | 3 | - # | null | - # +------+ - # """ - # + # @abstractmethod # def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: # """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py new file mode 100644 index 000000000..16a492048 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py @@ -0,0 +1,34 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "optimize_for_ascii", "expected"), + [ + ("", False, 0), + ("", True, 0), + ("abc", False, 3), + ("abc", True, 3), + ("a 🪲", False, 3), + ("a 🪲", True, 6), + (None, False, None), + ], + ids=[ + "empty (not optimized)", + "empty (optimized)", + "ASCII only (not optimized)", + "ASCII only (optimized)", + "unicode (not optimized)", + "unicode (optimized)", + "None", + ], +) +def test_should_get_number_of_characters(value: str | None, optimize_for_ascii: bool, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.length(optimize_for_ascii=optimize_for_ascii), + expected, + type_if_none=ColumnType.string(), + ) From d44c62cf1e3722c972a53df32a2707905df90c03 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 12:32:46 +0100 Subject: [PATCH 08/26] feat: `pad_start` --- .../tabular/query/_lazy_string_operations.py | 8 +++ .../data/tabular/query/_string_operations.py | 53 +++++++++++++++++ .../_lazy_string_operations/test_pad_start.py | 57 +++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 80e3902f0..632cd1e13 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash +from safeds._validation import _check_bounds, _ClosedBound from safeds.data.tabular.containers._lazy_cell import _LazyCell from ._string_operations import StringOperations @@ -54,6 +55,13 @@ def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: else: return _LazyCell(self._expression.str.len_chars()) + def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]: + _check_bounds("length", length, lower_bound=_ClosedBound(0)) + if len(character) != 1: + raise ValueError("Can only pad with a single character.") + + return _LazyCell(self._expression.str.pad_start(length, character)) + def reverse(self) -> Cell[str | None]: return _LazyCell(self._expression.str.reverse()) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 3e98a5514..0bc98b50d 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -6,6 +6,7 @@ if TYPE_CHECKING: from safeds._typing import _ConvertibleToStringCell from safeds.data.tabular.containers import Cell + from safeds.exceptions import OutOfBoundsError # noqa: F401 class StringOperations(ABC): @@ -117,6 +118,58 @@ def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: +------+ """ + @abstractmethod + def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]: + """ + Pad the start of the string with the given character until it has the given length. + + Parameters + ---------- + length: + The minimum length of the string. If the string is already at least as long, it is returned unchanged. Must + be greater than or equal to 0. + character: + How to pad the string. Must be a single character. + + Returns + ------- + cell: + The padded string. + + Raises + ------ + OutOfBoundsError + If `length` is less than 0. + ValueError + If `char` is not a single character. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bcde", None]) + >>> column.transform(lambda cell: cell.str.pad_start(3)) + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | bcde | + | null | + +------+ + + >>> column.transform(lambda cell: cell.str.pad_start(3, character="~")) + +------+ + | a | + | --- | + | str | + +======+ + | ~ab | + | bcde | + | null | + +------+ + """ + @abstractmethod def reverse(self) -> Cell[str | None]: """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py new file mode 100644 index 000000000..8425c7bef --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py @@ -0,0 +1,57 @@ +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from safeds.exceptions import OutOfBoundsError +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "length", "character", "expected"), + [ + ("", 0, "a", ""), + ("", 1, "a", "a"), + ("b", 2, "a", "ab"), + ("bc", 2, "a", "bc"), + ("abc", 2, "a", "abc"), + (None, 1, " ", None), + ], + ids=[ + "empty (length 0)", + "empty (length 1)", + "non-empty (shorter length)", + "non-empty (same length)", + "non-empty (longer length)", + "None", + ], +) +def test_should_pad_start(value: str | None, length: int, character: str, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.pad_start(length, character=character), + expected, + type_if_none=ColumnType.string(), + ) + + +def test_should_raise_if_length_is_out_of_bounds() -> None: + column = Column("col1", []) + with pytest.raises(OutOfBoundsError): + column.transform(lambda cell: cell.str.pad_start(-1)) + + +@pytest.mark.parametrize( + "character", + [ + "", + "ab", + ], + ids=[ + "empty string", + "multiple characters", + ], +) +def test_should_raise_if_char_is_not_single_character(character: str) -> None: + column = Column("col1", []) + with pytest.raises(ValueError, match=r"Can only pad with a single character\."): + column.transform(lambda cell: cell.str.pad_start(1, character=character)) From e910e350a1d5f65264e8d9f7c2fb1456a9601056 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 13:07:14 +0100 Subject: [PATCH 09/26] feat: `pad_end` --- .../tabular/query/_lazy_string_operations.py | 7 +++ .../data/tabular/query/_string_operations.py | 52 +++++++++++++++++ .../_lazy_string_operations/test_pad_end.py | 57 +++++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 632cd1e13..bd031acf1 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -55,6 +55,13 @@ def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: else: return _LazyCell(self._expression.str.len_chars()) + def pad_end(self, length: int, *, character: str = " ") -> Cell[str | None]: + _check_bounds("length", length, lower_bound=_ClosedBound(0)) + if len(character) != 1: + raise ValueError("Can only pad with a single character.") + + return _LazyCell(self._expression.str.pad_end(length, character)) + def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]: _check_bounds("length", length, lower_bound=_ClosedBound(0)) if len(character) != 1: diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 0bc98b50d..7e3eabf29 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -118,6 +118,58 @@ def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: +------+ """ + @abstractmethod + def pad_end(self, length: int, *, character: str = " ") -> Cell[str | None]: + """ + Pad the end of the string with the given character until it has the given length. + + Parameters + ---------- + length: + The minimum length of the string. If the string is already at least as long, it is returned unchanged. Must + be greater than or equal to 0. + character: + How to pad the string. Must be a single character. + + Returns + ------- + cell: + The padded string. + + Raises + ------ + OutOfBoundsError + If `length` is less than 0. + ValueError + If `char` is not a single character. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bcde", None]) + >>> column.transform(lambda cell: cell.str.pad_end(3)) + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | bcde | + | null | + +------+ + + >>> column.transform(lambda cell: cell.str.pad_end(3, character="~")) + +------+ + | a | + | --- | + | str | + +======+ + | ab~ | + | bcde | + | null | + +------+ + """ + @abstractmethod def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]: """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py new file mode 100644 index 000000000..ae9108597 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py @@ -0,0 +1,57 @@ +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from safeds.exceptions import OutOfBoundsError +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "length", "character", "expected"), + [ + ("", 0, "a", ""), + ("", 1, "a", "a"), + ("b", 2, "a", "ba"), + ("bc", 2, "a", "bc"), + ("abc", 2, "a", "abc"), + (None, 1, " ", None), + ], + ids=[ + "empty (length 0)", + "empty (length 1)", + "non-empty (shorter length)", + "non-empty (same length)", + "non-empty (longer length)", + "None", + ], +) +def test_should_pad_end(value: str | None, length: int, character: str, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.pad_end(length, character=character), + expected, + type_if_none=ColumnType.string(), + ) + + +def test_should_raise_if_length_is_out_of_bounds() -> None: + column = Column("col1", []) + with pytest.raises(OutOfBoundsError): + column.transform(lambda cell: cell.str.pad_end(-1)) + + +@pytest.mark.parametrize( + "character", + [ + "", + "ab", + ], + ids=[ + "empty string", + "multiple characters", + ], +) +def test_should_raise_if_char_is_not_single_character(character: str) -> None: + column = Column("col1", []) + with pytest.raises(ValueError, match=r"Can only pad with a single character\."): + column.transform(lambda cell: cell.str.pad_end(1, character=character)) From 8654602dafb9629e209a167f82f46f38613cdbc5 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 13:30:08 +0100 Subject: [PATCH 10/26] feat: `to_float` --- .../tabular/query/_lazy_string_operations.py | 5 +++ .../data/tabular/query/_string_operations.py | 27 +++++++++++++++ .../_lazy_string_operations/test_to_float.py | 34 +++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index bd031acf1..b0b91267b 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -75,6 +75,11 @@ def reverse(self) -> Cell[str | None]: def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.starts_with(prefix)) + def to_float(self) -> Cell[float | None]: + import polars as pl + + return _LazyCell(self._expression.cast(pl.Float64(), strict=False)) + def to_lowercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_lowercase()) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 7e3eabf29..8ce1667c5 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -279,6 +279,33 @@ def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: +-------+ """ + @abstractmethod + def to_float(self) -> Cell[float | None]: + """ + Convert the string to a float. + + Returns + ------- + cell: + The float value. If the string cannot be converted to a float, None is returned. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["1", "1.5", "abc", None]) + >>> column.transform(lambda cell: cell.str.to_float()) + +------+ + | a | + | --- | + | f64 | + +======+ + | 1 | + | 1.5 | + | null | + | null | + +------+ + """ + @abstractmethod def to_lowercase(self) -> Cell[str | None]: """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py new file mode 100644 index 000000000..5cd415b83 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py @@ -0,0 +1,34 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("", None), + ("abc", None), + ("1", 1.0), + ("1.5", 1.5), + ("-1.5", -1.5), + ("1e3", 1000), + (None, None), + ], + ids=[ + "empty", + "invalid", + "int", + "positive float", + "negative float", + "exponential", + "None", + ], +) +def test_should_convert_string_to_float(value: str | None, expected: float | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_float(), + expected, + type_if_none=ColumnType.string(), + ) From 3b9c77c06fd900676febe884620987dc62a56b7e Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 13:35:34 +0100 Subject: [PATCH 11/26] feat: `to_int` --- .../tabular/query/_lazy_string_operations.py | 9 +- .../data/tabular/query/_string_operations.py | 93 +++++++++---------- .../_lazy_string_operations/test_to_int.py | 34 +++++++ 3 files changed, 85 insertions(+), 51 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index b0b91267b..c3cf7c79e 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: import polars as pl - from safeds._typing import _ConvertibleToStringCell + from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell from safeds.data.tabular.containers._cell import Cell @@ -80,6 +80,9 @@ def to_float(self) -> Cell[float | None]: return _LazyCell(self._expression.cast(pl.Float64(), strict=False)) + def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: + return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) + def to_lowercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_lowercase()) @@ -127,9 +130,7 @@ def to_uppercase(self) -> Cell[str | None]: # # return _LazyCell(self._expression.str.to_datetime(format=format, strict=False)) # - # def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: - # return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) - # + # def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: # if format == "iso": # format = "%T" diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 8ce1667c5..9cd4be5db 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from safeds._typing import _ConvertibleToStringCell + from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell from safeds.data.tabular.containers import Cell from safeds.exceptions import OutOfBoundsError # noqa: F401 @@ -306,6 +306,51 @@ def to_float(self) -> Cell[float | None]: +------+ """ + @abstractmethod + def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: + """ + Convert the string to an integer. + + Parameters + ---------- + base: + The base of the integer. + + Returns + ------- + cell: + The integer value. If the string cannot be converted to an integer, None is returned. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", ["1", "10", "abc", None]) + >>> column1.transform(lambda cell: cell.str.to_int()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 10 | + | null | + | null | + +------+ + + >>> column2 = Column("a", ["1", "10", "abc", None]) + >>> column2.transform(lambda cell: cell.str.to_int(base=2)) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 2 | + | null | + | null | + +------+ + """ + @abstractmethod def to_lowercase(self) -> Cell[str | None]: """ @@ -560,52 +605,6 @@ def to_uppercase(self) -> Cell[str | None]: # # # TODO: add to_time # - # @abstractmethod - # def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: - # """ - # Convert the string value in the cell to an integer. - # - # Parameters - # ---------- - # base: - # The base of the integer. - # - # Returns - # ------- - # int: - # The integer value. If the string cannot be converted to an integer, None is returned. - # - # Examples - # -------- - # >>> from safeds.data.tabular.containers import Column - # >>> column1 = Column("a", ["1", "2", "3", "abc", None]) - # >>> column1.transform(lambda cell: cell.str.to_int()) - # +------+ - # | a | - # | --- | - # | i64 | - # +======+ - # | 1 | - # | 2 | - # | 3 | - # | null | - # | null | - # +------+ - # - # >>> column2 = Column("a", ["1", "10", "11", "abc", None]) - # >>> column2.transform(lambda cell: cell.str.to_int(base=2)) - # +------+ - # | a | - # | --- | - # | i64 | - # +======+ - # | 1 | - # | 2 | - # | 3 | - # | null | - # | null | - # +------+ - # """ # @abstractmethod # def trim(self) -> Cell[str | None]: diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py new file mode 100644 index 000000000..9350e1012 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py @@ -0,0 +1,34 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "base", "expected"), + [ + ("", 10, None), + ("abc", 10, None), + ("10", 10, 10), + ("10", 2, 2), + (None, 10, None), + ("0", None, None), + (None, None, None), + ], + ids=[ + "empty", + "invalid", + "base 10", + "base 2", + "None as value", + "None as base", + "None for both", + ], +) +def test_should_convert_string_to_integer(value: str | None, base: int | None, expected: float | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_int(base=base), + expected, + type_if_none=ColumnType.string(), + ) From a822ed2c5283388f4e683f1b4e29682c07034356 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 14:03:27 +0100 Subject: [PATCH 12/26] feat: `repeat` --- .../tabular/query/_lazy_string_operations.py | 5 ++ .../data/tabular/query/_string_operations.py | 56 +++++++++++++++---- .../_lazy_string_operations/test_repeat.py | 39 +++++++++++++ 3 files changed, 90 insertions(+), 10 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index c3cf7c79e..c9ab74715 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -69,6 +69,11 @@ def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]: return _LazyCell(self._expression.str.pad_start(length, character)) + def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]: + _check_bounds("count", count, lower_bound=_ClosedBound(0)) + + return _LazyCell(self._expression.repeat_by(count).list.join("", ignore_nulls=False)) + def reverse(self) -> Cell[str | None]: return _LazyCell(self._expression.str.reverse()) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 9cd4be5db..a8f33ebe7 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -222,6 +222,42 @@ def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]: +------+ """ + @abstractmethod + def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]: + """ + Repeat the string a number of times. + + Parameters + ---------- + count: + The number of times to repeat the string. Must be greater than or equal to 0. + + Returns + ------- + cell: + The repeated string. + + Raises + ------ + OutOfBoundsError + If `count` is less than 0. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.repeat(2)) + +------+ + | a | + | --- | + | str | + +======+ + | abab | + | bcbc | + | null | + +------+ + """ + @abstractmethod def reverse(self) -> Cell[str | None]: """ @@ -294,16 +330,16 @@ def to_float(self) -> Cell[float | None]: >>> from safeds.data.tabular.containers import Column >>> column = Column("a", ["1", "1.5", "abc", None]) >>> column.transform(lambda cell: cell.str.to_float()) - +------+ - | a | - | --- | - | f64 | - +======+ - | 1 | - | 1.5 | - | null | - | null | - +------+ + +---------+ + | a | + | --- | + | f64 | + +=========+ + | 1.00000 | + | 1.50000 | + | null | + | null | + +---------+ """ @abstractmethod diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py new file mode 100644 index 000000000..547e6e0c4 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py @@ -0,0 +1,39 @@ +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from safeds.exceptions import OutOfBoundsError +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "count", "expected"), + [ + ("", 1, ""), + ("a", 0, ""), + ("a", 1, "a"), + ("a", 2, "aa"), + (None, 0, ""), + (None, 1, None), + ("", None, None), + (None, None, None), + ], + ids=[ + "empty", + "zero count", + "non-empty (count 1)", + "non-empty (count 2)", + "None as string (count 0)", + "None as string (count 1)", + "None as count", + "None for both", + ], +) +def test_should_repeat_string(value: str | None, count: int | None, expected: str | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.str.repeat(count), expected, type_if_none=ColumnType.string()) + + +def test_should_raise_if_count_is_out_of_bounds() -> None: + column = Column("a", []) + with pytest.raises(OutOfBoundsError): + column.transform(lambda cell: cell.str.repeat(-1)) From 0540d8e8de0db7a04f4afce84528385bcc3493c7 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 14:25:48 +0100 Subject: [PATCH 13/26] feat: `strip`, `strip_end`, `strip_start` --- .../tabular/query/_lazy_string_operations.py | 9 + .../data/tabular/query/_string_operations.py | 210 +++++++++++------- .../_lazy_string_operations/test_strip.py | 34 +++ .../_lazy_string_operations/test_strip_end.py | 34 +++ .../test_strip_start.py | 34 +++ 5 files changed, 237 insertions(+), 84 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index c9ab74715..975f1d936 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -80,6 +80,15 @@ def reverse(self) -> Cell[str | None]: def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.starts_with(prefix)) + def strip(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + return _LazyCell(self._expression.str.strip_chars(characters)) + + def strip_end(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + return _LazyCell(self._expression.str.strip_chars_end(characters)) + + def strip_start(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + return _LazyCell(self._expression.str.strip_chars_start(characters)) + def to_float(self) -> Cell[float | None]: import polars as pl diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index a8f33ebe7..c0f4427aa 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -315,6 +315,132 @@ def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: +-------+ """ + @abstractmethod + def strip(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + """ + Remove leading and trailing characters. + + Parameters + ---------- + characters: + The characters to remove. If None, whitespace is removed. + + Returns + ------- + cell: + The stripped string. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [" ab ", "~ bc ~", None]) + >>> column.transform(lambda cell: cell.str.strip()) + +--------+ + | a | + | --- | + | str | + +========+ + | ab | + | ~ bc ~ | + | null | + +--------+ + + >>> column.transform(lambda cell: cell.str.strip(characters=" ~")) + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | bc | + | null | + +------+ + """ + + @abstractmethod + def strip_end(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + """ + Remove trailing characters. + + Parameters + ---------- + characters: + The characters to remove. If None, whitespace is removed. + + Returns + ------- + cell: + The stripped string. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [" ab ", "~ bc ~", None]) + >>> column.transform(lambda cell: cell.str.strip_end()) + +--------+ + | a | + | --- | + | str | + +========+ + | ab | + | ~ bc ~ | + | null | + +--------+ + + >>> column.transform(lambda cell: cell.str.strip_end(characters=" ~")) + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | ~ bc | + | null | + +------+ + """ + + @abstractmethod + def strip_start(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + """ + Remove leading characters. + + Parameters + ---------- + characters: + The characters to remove. If None, whitespace is removed. + + Returns + ------- + cell: + The stripped string. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [" ab ", "~ bc ~", None]) + >>> column.transform(lambda cell: cell.str.strip_start()) + +--------+ + | a | + | --- | + | str | + +========+ + | ab | + | ~ bc ~ | + | null | + +--------+ + + >>> column.transform(lambda cell: cell.str.strip_start(characters=" ~")) + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | bc ~ | + | null | + +------+ + """ + @abstractmethod def to_float(self) -> Cell[float | None]: """ @@ -641,87 +767,3 @@ def to_uppercase(self) -> Cell[str | None]: # # # TODO: add to_time # - - # @abstractmethod - # def trim(self) -> Cell[str | None]: - # """ - # Remove whitespace from the start and end of the string value in the cell. - # - # Returns - # ------- - # trimmed: - # The string value without whitespace at the start and end. - # - # Examples - # -------- - # >>> from safeds.data.tabular.containers import Column - # >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) - # >>> column.transform(lambda cell: cell.str.trim()) - # +------+ - # | a | - # | --- | - # | str | - # +======+ - # | | - # | abc | - # | abc | - # | abc | - # | null | - # +------+ - # """ - # - # @abstractmethod - # def trim_end(self) -> Cell[str | None]: - # """ - # Remove whitespace from the end of the string value in the cell. - # - # Returns - # ------- - # trimmed: - # The string value without whitespace at the end. - # - # Examples - # -------- - # >>> from safeds.data.tabular.containers import Column - # >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) - # >>> column.transform(lambda cell: cell.str.trim_end()) - # +------+ - # | a | - # | --- | - # | str | - # +======+ - # | | - # | abc | - # | abc | - # | abc | - # | null | - # +------+ - # """ - # - # @abstractmethod - # def trim_start(self) -> Cell[str | None]: - # """ - # Remove whitespace from the start of the string value in the cell. - # - # Returns - # ------- - # trimmed: - # The string value without whitespace at the start. - # - # Examples - # -------- - # >>> from safeds.data.tabular.containers import Column - # >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) - # >>> column.transform(lambda cell: cell.str.trim_start()) - # +------+ - # | a | - # | --- | - # | str | - # +======+ - # | | - # | abc | - # | abc | - # | abc | - # | null | - # +------+ - # """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py new file mode 100644 index 000000000..ea5135193 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py @@ -0,0 +1,34 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "characters", "expected"), + [ + ("", " ", ""), + ("~ a ~", "", "~ a ~"), + ("~ a ~", "~", " a "), + ("~ a ~", "~ ", "a"), + (None, " ", None), + (" \na\n ", None, "a"), + (None, None, None), + ], + ids=[ + "empty", + "non-empty (empty characters)", + "non-empty (one character)", + "non-empty (multiple characters)", + "None as string", + "None as characters", + "None as both", + ], +) +def test_should_strip(value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip(characters=characters), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py new file mode 100644 index 000000000..1d70963eb --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py @@ -0,0 +1,34 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "characters", "expected"), + [ + ("", " ", ""), + ("~ a ~", "", "~ a ~"), + ("~ a ~", "~", "~ a "), + ("~ a ~", "~ ", "~ a"), + (None, " ", None), + (" \na\n ", None, " \na"), + (None, None, None), + ], + ids=[ + "empty", + "non-empty (empty characters)", + "non-empty (one character)", + "non-empty (multiple characters)", + "None as string", + "None as characters", + "None as both", + ], +) +def test_should_strip_end(value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip_end(characters=characters), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py new file mode 100644 index 000000000..5e7c11f4f --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py @@ -0,0 +1,34 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "characters", "expected"), + [ + ("", " ", ""), + ("~ a ~", "", "~ a ~"), + ("~ a ~", "~", " a ~"), + ("~ a ~", "~ ", "a ~"), + (None, " ", None), + (" \na\n ", None, "a\n "), + (None, None, None), + ], + ids=[ + "empty", + "non-empty (empty characters)", + "non-empty (one character)", + "non-empty (multiple characters)", + "None as string", + "None as characters", + "None as both", + ], +) +def test_should_strip_start(value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip_start(characters=characters), + expected, + type_if_none=ColumnType.string(), + ) From 33c07dcd19fb6661ac2ee7ca2ce13b26d955601a Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 14:46:19 +0100 Subject: [PATCH 14/26] feat: `slice` --- .../tabular/query/_lazy_string_operations.py | 11 ++++ .../data/tabular/query/_string_operations.py | 56 +++++++++++++++++++ .../containers/_table/test_slice_rows.py | 8 +-- .../_lazy_string_operations/test_slice.py | 53 ++++++++++++++++++ 4 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 975f1d936..3462e2a2f 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -77,6 +77,17 @@ def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]: def reverse(self) -> Cell[str | None]: return _LazyCell(self._expression.str.reverse()) + def slice( + self, + *, + start: _ConvertibleToIntCell = 0, + length: _ConvertibleToIntCell = None, + ) -> Cell[str | None]: + if isinstance(length, int): + _check_bounds("length", length, lower_bound=_ClosedBound(0)) + + return _LazyCell(self._expression.str.slice(start, length)) + def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.starts_with(prefix)) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index c0f4427aa..dffb692eb 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -284,6 +284,62 @@ def reverse(self) -> Cell[str | None]: +------+ """ + @abstractmethod + def slice( + self, + *, + start: _ConvertibleToIntCell = 0, + length: _ConvertibleToIntCell = None, + ) -> Cell[str | None]: + """ + Get a slice of the string. + + Parameters + ---------- + start: + The start index of the slice. Nonnegative indices are counted from the beginning (starting at 0), negative + indices from the end (starting at -1). + length: + The length of the slice. If None, the slice contains all characters starting from `start`. Must greater than + or equal to 0. + + Returns + ------- + cell: + The sliced string. + + Raises + ------ + OutOfBoundsError + If `length` is less than 0. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["abc", "de", None]) + >>> column.transform(lambda cell: cell.str.slice(start=1)) + +------+ + | a | + | --- | + | str | + +======+ + | bc | + | e | + | null | + +------+ + + >>> column.transform(lambda cell: cell.str.slice(start=1, length=1)) + +------+ + | a | + | --- | + | str | + +======+ + | b | + | e | + | null | + +------+ + """ + @abstractmethod def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: """ diff --git a/tests/safeds/data/tabular/containers/_table/test_slice_rows.py b/tests/safeds/data/tabular/containers/_table/test_slice_rows.py index 046118da9..8fa0f8694 100644 --- a/tests/safeds/data/tabular/containers/_table/test_slice_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_slice_rows.py @@ -68,12 +68,12 @@ "empty", "no rows", "full table", - "positive start in bounds", - "positive start out of bounds", + "non-negative start in bounds", + "non-negative start out of bounds", "negative start in bounds", "negative start out of bounds", - "positive length in bounds", - "positive length out of bounds", + "non-negative length in bounds", + "non-negative length out of bounds", ], ) class TestHappyPath: diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py new file mode 100644 index 000000000..df205ef10 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py @@ -0,0 +1,53 @@ +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from safeds.exceptions import OutOfBoundsError +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "start", "length", "expected"), + [ + ("", 0, None, ""), + ("abc", 0, None, "abc"), + ("abc", 10, None, ""), + ("abc", -1, None, "c"), + ("abc", -10, None, "abc"), + ("abc", 0, 1, "a"), + ("abc", 0, 10, "abc"), + (None, 0, 1, None), + ("abc", None, 1, None), + (None, None, None, None), + ], + ids=[ + "empty", + "non-negative start in bounds", + "non-negative start out of bounds", + "negative start in bounds", + "negative start out of bounds", + "non-negative length in bounds", + "non-negative length out of bounds", + "None as string", + "None as start", + "None for all", + ], +) +def test_should_slice_characters( + value: str | None, + start: int | None, + length: int | None, + expected: bool | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.slice(start=start, length=length), + expected, + type_if_none=ColumnType.string(), + ) + + +def test_should_raise_for_negative_length() -> None: + column = Column("a", []) + with pytest.raises(OutOfBoundsError): + column.transform(lambda cell: cell.str.slice(length=-1)) From 009fd75245775eb1d533f92bb027ea3e7e2561a2 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 14:58:26 +0100 Subject: [PATCH 15/26] fix: guard bounds check --- src/safeds/data/tabular/query/_lazy_string_operations.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 3462e2a2f..f4243a0bc 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -70,7 +70,8 @@ def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]: return _LazyCell(self._expression.str.pad_start(length, character)) def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]: - _check_bounds("count", count, lower_bound=_ClosedBound(0)) + if isinstance(count, int): + _check_bounds("count", count, lower_bound=_ClosedBound(0)) return _LazyCell(self._expression.repeat_by(count).list.join("", ignore_nulls=False)) From ac502ddb9be04024889614b6b79e45a122bd2e2d Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 14:59:50 +0100 Subject: [PATCH 16/26] chore: remove ported code --- .../tabular/query/_lazy_string_operations.py | 24 +--------- .../data/tabular/query/_string_operations.py | 45 ------------------- 2 files changed, 1 insertion(+), 68 deletions(-) diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index f4243a0bc..bfa2dcfcc 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -126,20 +126,7 @@ def to_uppercase(self) -> Cell[str | None]: # def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: # return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) # - # def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: - # return _LazyCell(self._expression.str.starts_with(prefix)) - # - # def substring( - # self, - # *, - # start: _ConvertibleToIntCell = 0, - # length: _ConvertibleToIntCell = None, - # ) -> Cell[str | None]: - # if isinstance(length, int): - # _check_bounds("length", length, lower_bound=_ClosedBound(0)) - # - # return _LazyCell(self._expression.str.slice(start, length)) - # + # def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: # if format == "iso": # format = "%F" @@ -164,12 +151,3 @@ def to_uppercase(self) -> Cell[str | None]: # format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) # # return _LazyCell(self._expression.str.to_time(format=format, strict=False)) - # - # def trim(self) -> Cell[str | None]: - # return _LazyCell(self._expression.str.strip_chars()) - # - # def trim_end(self) -> Cell[str | None]: - # return _LazyCell(self._expression.str.strip_chars_end()) - # - # def trim_start(self) -> Cell[str | None]: - # return _LazyCell(self._expression.str.strip_chars_start()) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index dffb692eb..012e6ff3c 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -720,51 +720,6 @@ def to_uppercase(self) -> Cell[str | None]: # +------+ # """ - # @abstractmethod - # def substring( - # self, - # *, - # start: _ConvertibleToIntCell = 0, - # length: _ConvertibleToIntCell = None, - # ) -> Cell[str | None]: - # """ - # Get a substring of the string value in the cell. - # - # Parameters - # ---------- - # start: - # The start index of the substring. - # length: - # The length of the substring. If None, the slice contains all rows starting from `start`. Must greater than - # or equal to 0. - # - # Returns - # ------- - # substring: - # The substring of the string value. - # - # Raises - # ------ - # OutOfBoundsError - # If length is less than 0. - # - # Examples - # -------- - # >>> from safeds.data.tabular.containers import Column - # >>> column = Column("a", ["abc", "def", "ghi", None]) - # >>> column.transform(lambda cell: cell.str.substring(start=1, length=2)) - # +------+ - # | a | - # | --- | - # | str | - # +======+ - # | bc | - # | ef | - # | hi | - # | null | - # +------+ - # """ - # # # TODO: add format parameter + document # @abstractmethod # def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: From 84fc9ceac9696259eabca8f050265efb080417e9 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 18:53:42 +0100 Subject: [PATCH 17/26] feat: `remove_prefix` and `remove_suffix` --- .../tabular/query/_lazy_string_operations.py | 93 +++++++++++-------- .../data/tabular/query/_string_operations.py | 62 +++++++++++++ .../_lazy_string_operations/test_pad_end.py | 4 +- .../_lazy_string_operations/test_pad_start.py | 4 +- .../test_remove_prefix.py | 34 +++++++ .../test_remove_suffix.py | 34 +++++++ .../_lazy_string_operations/test_repeat.py | 2 +- .../_lazy_string_operations/test_slice.py | 2 +- 8 files changed, 191 insertions(+), 44 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index bfa2dcfcc..9efc40f78 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -3,12 +3,14 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds._validation import _check_bounds, _ClosedBound +from safeds._validation import _check_bounds, _ClosedBound, _convert_and_check_datetime_format from safeds.data.tabular.containers._lazy_cell import _LazyCell from ._string_operations import StringOperations if TYPE_CHECKING: + import datetime + import polars as pl from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell @@ -46,9 +48,15 @@ def __str__(self) -> str: # String operations # ------------------------------------------------------------------------------------------------------------------ + def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: + return _LazyCell(self._expression.str.contains(substring, literal=True)) + def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.ends_with(suffix)) + def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: + return _LazyCell(self._expression.str.find(substring, literal=True)) + def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: if optimize_for_ascii: return _LazyCell(self._expression.str.len_bytes()) @@ -75,6 +83,28 @@ def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]: return _LazyCell(self._expression.repeat_by(count).list.join("", ignore_nulls=False)) + def remove_prefix(self, prefix: _ConvertibleToStringCell) -> Cell[str | None]: + import polars as pl + + # polars raises an error otherwise + if prefix is None: + prefix = pl.lit(None, pl.String()) + + return _LazyCell(self._expression.str.strip_prefix(prefix)) + + def remove_suffix(self, suffix: _ConvertibleToStringCell) -> Cell[str | None]: + import polars as pl + + # polars raises an error otherwise + if suffix is None: + suffix = pl.lit(None, pl.String()) + + return _LazyCell(self._expression.str.strip_suffix(suffix)) + + # TODO: regex? how many to replace? by default, one or all? + def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: + return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) + def reverse(self) -> Cell[str | None]: return _LazyCell(self._expression.str.reverse()) @@ -101,6 +131,22 @@ def strip_end(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str def strip_start(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: return _LazyCell(self._expression.str.strip_chars_start(characters)) + def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: + if format == "iso": + format = "%F" # noqa: A001 + elif format is not None: + format = _convert_and_check_datetime_format(format, type_="date", used_for_parsing=True) # noqa: A001 + + return _LazyCell(self._expression.str.to_date(format=format, strict=False)) + + def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: + if format == "iso": + format = "%+" # noqa: A001 + elif format is not None: + format = _convert_and_check_datetime_format(format, type_="datetime", used_for_parsing=True) # noqa: A001 + + return _LazyCell(self._expression.str.to_datetime(format=format, strict=False)) + def to_float(self) -> Cell[float | None]: import polars as pl @@ -112,42 +158,13 @@ def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: def to_lowercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_lowercase()) + def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: + if format == "iso": + format = "%T" # noqa: A001 + elif format is not None: + format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) # noqa: A001 + + return _LazyCell(self._expression.str.to_time(format=format, strict=False)) + def to_uppercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_uppercase()) - - # def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: - # return _LazyCell(self._expression.str.contains(substring, literal=True)) - # - - # - # def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: - # return _LazyCell(self._expression.str.find(substring, literal=True)) - # - # def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: - # return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) - # - - # def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: - # if format == "iso": - # format = "%F" - # elif format is not None: - # format = _convert_and_check_datetime_format(format, type_="date", used_for_parsing=True) - # - # return _LazyCell(self._expression.str.to_date(format=format, strict=False)) - # - # def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: - # if format == "iso": - # format = "%+" - # elif format is not None: - # format = _convert_and_check_datetime_format(format, type_="datetime", used_for_parsing=True) - # - # return _LazyCell(self._expression.str.to_datetime(format=format, strict=False)) - # - - # def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: - # if format == "iso": - # format = "%T" - # elif format is not None: - # format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) - # - # return _LazyCell(self._expression.str.to_time(format=format, strict=False)) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 012e6ff3c..79f7e8ed4 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -222,6 +222,68 @@ def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]: +------+ """ + @abstractmethod + def remove_prefix(self, prefix: _ConvertibleToStringCell) -> Cell[str | None]: + """ + Remove a prefix from the string. Strings without the prefix are not changed. + + Parameters + ---------- + prefix: + The prefix to remove. + + Returns + ------- + cell: + The string without the prefix. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.remove_prefix("a")) + +------+ + | a | + | --- | + | str | + +======+ + | b | + | bc | + | null | + +------+ + """ + + @abstractmethod + def remove_suffix(self, suffix: _ConvertibleToStringCell) -> Cell[str | None]: + """ + Remove a suffix from the string. Strings without the suffix are not changed. + + Parameters + ---------- + suffix: + The suffix to remove. + + Returns + ------- + cell: + The string without the suffix. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.remove_suffix("b")) + +------+ + | a | + | --- | + | str | + +======+ + | a | + | bc | + | null | + +------+ + """ + @abstractmethod def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]: """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py index ae9108597..28b0f38b2 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py @@ -35,7 +35,7 @@ def test_should_pad_end(value: str | None, length: int, character: str, expected def test_should_raise_if_length_is_out_of_bounds() -> None: - column = Column("col1", []) + column = Column("col1", [1]) with pytest.raises(OutOfBoundsError): column.transform(lambda cell: cell.str.pad_end(-1)) @@ -52,6 +52,6 @@ def test_should_raise_if_length_is_out_of_bounds() -> None: ], ) def test_should_raise_if_char_is_not_single_character(character: str) -> None: - column = Column("col1", []) + column = Column("col1", [1]) with pytest.raises(ValueError, match=r"Can only pad with a single character\."): column.transform(lambda cell: cell.str.pad_end(1, character=character)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py index 8425c7bef..261055f98 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py @@ -35,7 +35,7 @@ def test_should_pad_start(value: str | None, length: int, character: str, expect def test_should_raise_if_length_is_out_of_bounds() -> None: - column = Column("col1", []) + column = Column("col1", [1]) with pytest.raises(OutOfBoundsError): column.transform(lambda cell: cell.str.pad_start(-1)) @@ -52,6 +52,6 @@ def test_should_raise_if_length_is_out_of_bounds() -> None: ], ) def test_should_raise_if_char_is_not_single_character(character: str) -> None: - column = Column("col1", []) + column = Column("col1", [1]) with pytest.raises(ValueError, match=r"Can only pad with a single character\."): column.transform(lambda cell: cell.str.pad_start(1, character=character)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py new file mode 100644 index 000000000..24b14e9a8 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py @@ -0,0 +1,34 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "prefix", "expected"), + [ + ("", " ", ""), + ("~ a ~", "", "~ a ~"), + ("~ a ~", "~ ", "a ~"), + ("~ a ~", " ~", "~ a ~"), + (None, " ", None), + ("~ a ~", None, None), + (None, None, None), + ], + ids=[ + "empty", + "empty prefix", + "non-empty (has prefix)", + "non-empty (does not have prefix)", + "None as string", + "None as prefix", + "None as both", + ], +) +def test_should_remove_prefix(value: str | None, prefix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.remove_prefix(prefix), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py new file mode 100644 index 000000000..cdaeb84a5 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py @@ -0,0 +1,34 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "suffix", "expected"), + [ + ("", " ", ""), + ("~ a ~", "", "~ a ~"), + ("~ a ~", " ~", "~ a"), + ("~ a ~", "~ ", "~ a ~"), + (None, " ", None), + ("~ a ~", None, None), + (None, None, None), + ], + ids=[ + "empty", + "empty suffix", + "non-empty (has suffix)", + "non-empty (does not have suffix)", + "None as string", + "None as suffix", + "None as both", + ], +) +def test_should_remove_suffix(value: str | None, suffix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.remove_suffix(suffix), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py index 547e6e0c4..d7a3a2b85 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py @@ -34,6 +34,6 @@ def test_should_repeat_string(value: str | None, count: int | None, expected: st def test_should_raise_if_count_is_out_of_bounds() -> None: - column = Column("a", []) + column = Column("a", [1]) with pytest.raises(OutOfBoundsError): column.transform(lambda cell: cell.str.repeat(-1)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py index df205ef10..fb3374021 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py @@ -48,6 +48,6 @@ def test_should_slice_characters( def test_should_raise_for_negative_length() -> None: - column = Column("a", []) + column = Column("a", [1]) with pytest.raises(OutOfBoundsError): column.transform(lambda cell: cell.str.slice(length=-1)) From 1c385892e5b99506fd5b8fc6d68bdbe3edad9993 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 19:30:45 +0100 Subject: [PATCH 18/26] fix: convert Cell to Expr --- src/safeds/data/tabular/containers/_cell.py | 36 +++++++-- .../query/_lazy_datetime_operations.py | 9 +++ .../tabular/query/_lazy_string_operations.py | 31 ++++--- .../containers/_lazy_cell/test_constant.py | 13 +-- .../_lazy_datetime_operations/test_replace.py | 80 +++++++++++++------ .../_lazy_string_operations/test_ends_with.py | 26 ++++-- .../test_remove_prefix.py | 26 ++++-- .../test_remove_suffix.py | 26 ++++-- .../_lazy_string_operations/test_repeat.py | 22 ++++- .../_lazy_string_operations/test_slice.py | 45 ++++++++--- .../test_starts_with.py | 30 ++++--- .../_lazy_string_operations/test_strip.py | 26 ++++-- .../_lazy_string_operations/test_strip_end.py | 26 ++++-- .../test_strip_start.py | 26 ++++-- .../_lazy_string_operations/test_to_int.py | 26 ++++-- 15 files changed, 327 insertions(+), 121 deletions(-) diff --git a/src/safeds/data/tabular/containers/_cell.py b/src/safeds/data/tabular/containers/_cell.py index 202af7661..dad805a0a 100644 --- a/src/safeds/data/tabular/containers/_cell.py +++ b/src/safeds/data/tabular/containers/_cell.py @@ -44,7 +44,7 @@ class Cell(ABC, Generic[T_co]): # ------------------------------------------------------------------------------------------------------------------ @staticmethod - def constant(value: _PythonLiteral | None) -> Cell: + def constant(value: _PythonLiteral | None, *, type: ColumnType | None = None) -> Cell: """ Create a cell with a constant value. @@ -52,6 +52,8 @@ def constant(value: _PythonLiteral | None) -> Cell: ---------- value: The value to create the cell from. + type: + The type of the cell. If None, the type is inferred from the value. Returns ------- @@ -77,7 +79,9 @@ def constant(value: _PythonLiteral | None) -> Cell: from ._lazy_cell import _LazyCell # circular import - return _LazyCell(pl.lit(value)) + dtype = type._polars_data_type if type is not None else None + + return _LazyCell(pl.lit(value, dtype=dtype)) @staticmethod def date( @@ -1453,7 +1457,7 @@ def cast(self, type: ColumnType) -> Cell: @property @abstractmethod def _polars_expression(self) -> pl.Expr: - """The Polars expression that corresponds to this cell.""" + """The polars expression that corresponds to this cell.""" @abstractmethod def _equals(self, other: object) -> bool: @@ -1464,10 +1468,32 @@ def _equals(self, other: object) -> bool: """ -def _to_polars_expression(cell_proxy: _ConvertibleToCell) -> pl.Expr: +def _to_polars_expression(cell_proxy: _ConvertibleToCell, *, type_if_none: ColumnType | None = None) -> pl.Expr: + """ + Convert a cell proxy to a polars expression. + + Parameters + ---------- + cell_proxy: + The cell proxy to convert. + type_if_none: + The type to use if `cell_proxy` is `None`. If `None`, the type is inferred from the context. + + Returns + ------- + expression: + The polars expression. + """ import polars as pl + # Cell if isinstance(cell_proxy, Cell): return cell_proxy._polars_expression + + # Plain value + if cell_proxy is None and type_if_none is not None: + dtype = type_if_none._polars_data_type else: - return pl.lit(cell_proxy) + dtype = None + + return pl.lit(cell_proxy, dtype) diff --git a/src/safeds/data/tabular/query/_lazy_datetime_operations.py b/src/safeds/data/tabular/query/_lazy_datetime_operations.py index 298c8fdb4..05169c405 100644 --- a/src/safeds/data/tabular/query/_lazy_datetime_operations.py +++ b/src/safeds/data/tabular/query/_lazy_datetime_operations.py @@ -4,6 +4,7 @@ from safeds._utils import _structural_hash from safeds._validation import _convert_and_check_datetime_format +from safeds.data.tabular.containers._cell import _to_polars_expression from safeds.data.tabular.containers._lazy_cell import _LazyCell from ._datetime_operations import DatetimeOperations @@ -114,6 +115,14 @@ def replace( second: _ConvertibleToIntCell = None, microsecond: _ConvertibleToIntCell = None, ) -> Cell: + year = _to_polars_expression(year) + month = _to_polars_expression(month) + day = _to_polars_expression(day) + hour = _to_polars_expression(hour) + minute = _to_polars_expression(minute) + second = _to_polars_expression(second) + microsecond = _to_polars_expression(microsecond) + return _LazyCell( self._expression.dt.replace( year=year, diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 9efc40f78..0027b279a 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -4,7 +4,9 @@ from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound, _convert_and_check_datetime_format +from safeds.data.tabular.containers._cell import _to_polars_expression from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.typing import ColumnType from ._string_operations import StringOperations @@ -52,6 +54,8 @@ def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.contains(substring, literal=True)) def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: + suffix = _to_polars_expression(suffix) + return _LazyCell(self._expression.str.ends_with(suffix)) def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: @@ -81,23 +85,17 @@ def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]: if isinstance(count, int): _check_bounds("count", count, lower_bound=_ClosedBound(0)) + count = _to_polars_expression(count) + return _LazyCell(self._expression.repeat_by(count).list.join("", ignore_nulls=False)) def remove_prefix(self, prefix: _ConvertibleToStringCell) -> Cell[str | None]: - import polars as pl - - # polars raises an error otherwise - if prefix is None: - prefix = pl.lit(None, pl.String()) + prefix = _to_polars_expression(prefix, type_if_none=ColumnType.string()) return _LazyCell(self._expression.str.strip_prefix(prefix)) def remove_suffix(self, suffix: _ConvertibleToStringCell) -> Cell[str | None]: - import polars as pl - - # polars raises an error otherwise - if suffix is None: - suffix = pl.lit(None, pl.String()) + suffix = _to_polars_expression(suffix, type_if_none=ColumnType.string()) return _LazyCell(self._expression.str.strip_suffix(suffix)) @@ -117,18 +115,29 @@ def slice( if isinstance(length, int): _check_bounds("length", length, lower_bound=_ClosedBound(0)) + start = _to_polars_expression(start) + length = _to_polars_expression(length) + return _LazyCell(self._expression.str.slice(start, length)) def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: + prefix = _to_polars_expression(prefix) + return _LazyCell(self._expression.str.starts_with(prefix)) def strip(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + characters = _to_polars_expression(characters) + return _LazyCell(self._expression.str.strip_chars(characters)) def strip_end(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + characters = _to_polars_expression(characters) + return _LazyCell(self._expression.str.strip_chars_end(characters)) def strip_start(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + characters = _to_polars_expression(characters) + return _LazyCell(self._expression.str.strip_chars_start(characters)) def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: @@ -153,6 +162,8 @@ def to_float(self) -> Cell[float | None]: return _LazyCell(self._expression.cast(pl.Float64(), strict=False)) def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: + base = _to_polars_expression(base) + return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) def to_lowercase(self) -> Cell[str | None]: diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py index 1be74f34a..669404cbe 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py @@ -3,19 +3,22 @@ import pytest from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - "value", + ("value", "type_", "expected"), [ - None, - 1, + (None, None, None), + (1, None, 1), + (1, ColumnType.string(), "1"), ], ids=[ "None", "int", + "with explicit type", ], ) -def test_should_return_constant_value(value: Any) -> None: - assert_cell_operation_works(None, lambda _: Cell.constant(value), value) +def test_should_return_constant_value(value: Any, type_: ColumnType | None, expected: Any) -> None: + assert_cell_operation_works(None, lambda _: Cell.constant(value, type=type_), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py index a201398e0..9d9196a59 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py @@ -2,6 +2,7 @@ import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -100,28 +101,57 @@ "None", ], ) -def test_should_replace_components( - value: datetime | date | None, - year: int | None, - month: int | None, - day: int | None, - hour: int | None, - minute: int | None, - second: int | None, - microsecond: int | None, - expected: int | None, -) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.dt.replace( - year=year, - month=month, - day=day, - hour=hour, - minute=minute, - second=second, - microsecond=microsecond, - ), - expected, - type_if_none=ColumnType.datetime(), - ) +class TestShouldReplaceComponents: + def test_plain_arguments( + self, + value: datetime | date | None, + year: int | None, + month: int | None, + day: int | None, + hour: int | None, + minute: int | None, + second: int | None, + microsecond: int | None, + expected: int | None, + ) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.replace( + year=year, + month=month, + day=day, + hour=hour, + minute=minute, + second=second, + microsecond=microsecond, + ), + expected, + type_if_none=ColumnType.datetime(), + ) + + def test_arguments_wrapped_in_cell( + self, + value: datetime | date | None, + year: int | None, + month: int | None, + day: int | None, + hour: int | None, + minute: int | None, + second: int | None, + microsecond: int | None, + expected: int | None, + ) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.replace( + year=Cell.constant(year), + month=Cell.constant(month), + day=Cell.constant(day), + hour=Cell.constant(hour), + minute=Cell.constant(minute), + second=Cell.constant(second), + microsecond=Cell.constant(microsecond), + ), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py index d7629506c..d893b31ce 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -29,10 +30,21 @@ "None for both", ], ) -def test_should_check_if_string_ends_with_suffix(value: str | None, suffix: str | None, expected: bool | None) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.str.ends_with(suffix), - expected, - type_if_none=ColumnType.string(), - ) +class TestShouldCheckIfStringEndsWithSuffix: + def test_plain_arguments(self, value: str | None, suffix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.ends_with(suffix), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, suffix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.ends_with( + Cell.constant(suffix), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py index 24b14e9a8..05479c86b 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -25,10 +26,21 @@ "None as both", ], ) -def test_should_remove_prefix(value: str | None, prefix: str | None, expected: bool | None) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.str.remove_prefix(prefix), - expected, - type_if_none=ColumnType.string(), - ) +class TestShouldRemovePrefix: + def test_plain_arguments(self, value: str | None, prefix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.remove_prefix(prefix), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, prefix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.remove_prefix( + Cell.constant(prefix, type=ColumnType.string()), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py index cdaeb84a5..47e1dcfac 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -25,10 +26,21 @@ "None as both", ], ) -def test_should_remove_suffix(value: str | None, suffix: str | None, expected: bool | None) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.str.remove_suffix(suffix), - expected, - type_if_none=ColumnType.string(), - ) +class TestShouldRemoveSuffix: + def test_plain_arguments(self, value: str | None, suffix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.remove_suffix(suffix), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, suffix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.remove_suffix( + Cell.constant(suffix, type=ColumnType.string()), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py index d7a3a2b85..546aab740 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py @@ -1,6 +1,6 @@ import pytest -from safeds.data.tabular.containers import Column +from safeds.data.tabular.containers import Cell, Column from safeds.data.tabular.typing import ColumnType from safeds.exceptions import OutOfBoundsError from tests.helpers import assert_cell_operation_works @@ -29,8 +29,24 @@ "None for both", ], ) -def test_should_repeat_string(value: str | None, count: int | None, expected: str | None) -> None: - assert_cell_operation_works(value, lambda cell: cell.str.repeat(count), expected, type_if_none=ColumnType.string()) +class TestShouldRepeatString: + def test_plain_arguments(self, value: str | None, count: int | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.repeat(count), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, count: int | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.repeat( + Cell.constant(count), + ), + expected, + type_if_none=ColumnType.string(), + ) def test_should_raise_if_count_is_out_of_bounds() -> None: diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py index fb3374021..b7f551de9 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py @@ -1,6 +1,6 @@ import pytest -from safeds.data.tabular.containers import Column +from safeds.data.tabular.containers import Cell, Column from safeds.data.tabular.typing import ColumnType from safeds.exceptions import OutOfBoundsError from tests.helpers import assert_cell_operation_works @@ -33,18 +33,37 @@ "None for all", ], ) -def test_should_slice_characters( - value: str | None, - start: int | None, - length: int | None, - expected: bool | None, -) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.str.slice(start=start, length=length), - expected, - type_if_none=ColumnType.string(), - ) +class TestShouldSliceCharacters: + def test_plain_arguments( + self, + value: str | None, + start: int | None, + length: int | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.slice(start=start, length=length), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell( + self, + value: str | None, + start: int | None, + length: int | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.slice( + start=Cell.constant(start), + length=Cell.constant(length), + ), + expected, + type_if_none=ColumnType.string(), + ) def test_should_raise_for_negative_length() -> None: diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py index 141974bef..32e322483 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -29,14 +30,21 @@ "None for both", ], ) -def test_should_check_if_string_starts_with_prefix( - value: str | None, - prefix: str | None, - expected: bool | None, -) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.str.starts_with(prefix), - expected, - type_if_none=ColumnType.string(), - ) +class TestShouldCheckIfStringStartsWithPrefix: + def test_plain_arguments(self, value: str | None, prefix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.starts_with(prefix), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, prefix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.starts_with( + Cell.constant(prefix), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py index ea5135193..d99890d4c 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -25,10 +26,21 @@ "None as both", ], ) -def test_should_strip(value: str | None, characters: str | None, expected: bool | None) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.str.strip(characters=characters), - expected, - type_if_none=ColumnType.string(), - ) +class TestShouldStrip: + def test_plain_arguments(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip(characters=characters), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip( + characters=Cell.constant(characters), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py index 1d70963eb..e09bdc637 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -25,10 +26,21 @@ "None as both", ], ) -def test_should_strip_end(value: str | None, characters: str | None, expected: bool | None) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.str.strip_end(characters=characters), - expected, - type_if_none=ColumnType.string(), - ) +class TestShouldStripEnd: + def test_plain_arguments(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip_end(characters=characters), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip_end( + characters=Cell.constant(characters), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py index 5e7c11f4f..cfbb1075c 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -25,10 +26,21 @@ "None as both", ], ) -def test_should_strip_start(value: str | None, characters: str | None, expected: bool | None) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.str.strip_start(characters=characters), - expected, - type_if_none=ColumnType.string(), - ) +class TestShouldStripStart: + def test_plain_arguments(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip_start(characters=characters), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip_start( + characters=Cell.constant(characters), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py index 9350e1012..9df3c1235 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -25,10 +26,21 @@ "None for both", ], ) -def test_should_convert_string_to_integer(value: str | None, base: int | None, expected: float | None) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.str.to_int(base=base), - expected, - type_if_none=ColumnType.string(), - ) +class TestShouldConvertStringToInteger: + def test_plain_arguments(self, value: str | None, base: int | None, expected: float | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_int(base=base), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, base: int | None, expected: float | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_int( + base=Cell.constant(base), + ), + expected, + type_if_none=ColumnType.string(), + ) From 0a3bad8e23d9afe4fc8d90e1dc64a254411c5140 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Jan 2025 20:10:59 +0100 Subject: [PATCH 19/26] feat: `to_time` --- .../tabular/query/_lazy_string_operations.py | 2 +- .../data/tabular/query/_string_operations.py | 82 ++++++++++- .../test_to_string.py | 2 +- .../_lazy_string_operations/test_to_time.py | 136 ++++++++++++++++++ 4 files changed, 218 insertions(+), 4 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 0027b279a..28dc8b444 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -171,7 +171,7 @@ def to_lowercase(self) -> Cell[str | None]: def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: if format == "iso": - format = "%T" # noqa: A001 + format = "%T%.f" # noqa: A001 elif format is not None: format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) # noqa: A001 diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 79f7e8ed4..770e33c47 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -4,6 +4,8 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + import datetime + from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell from safeds.data.tabular.containers import Cell from safeds.exceptions import OutOfBoundsError # noqa: F401 @@ -657,6 +659,84 @@ def to_lowercase(self) -> Cell[str | None]: +------+ """ + @abstractmethod + def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: + r""" + Convert a string to a time. + + The `format` parameter controls the presentation. It can be `"iso"` to target ISO 8601 or a custom string. The + custom string can contain fixed specifiers (see below), which are replaced with the corresponding values. The + specifiers are case-sensitive and always enclosed in curly braces. Other text is included in the output + verbatim. To include a literal opening curly brace, use `\{`, and to include a literal backslash, use `\\`. + + The following specifiers are available: + + - `{h}`, `{_h}`, `{^h}`: Hour (zero-padded to two digits, space-padded to two digits, no padding). + - `{h12}`, `{_h12}`, `{^h12}`: Hour in 12-hour format (zero-padded to two digits, space-padded to two digits, no + padding). + - `{m}`, `{_m}`, `{^m}`: Minute (zero-padded to two digits, space-padded to two digits, no padding). + - `{s}`, `{_s}`, `{^s}`: Second (zero-padded to two digits, space-padded to two digits, no padding). + - `{.f}`: Fractional seconds with a leading decimal point. + - `{ms}`: Millisecond (zero-padded to three digits). + - `{us}`: Microsecond (zero-padded to six digits). + - `{ns}`: Nanosecond (zero-padded to nine digits). + - `{AM/PM}`: AM or PM (uppercase). + - `{am/pm}`: am or pm (lowercase). + + The specifiers follow certain conventions: + + - If a component may be formatted in multiple ways, we use shorter specifiers for ISO 8601. Specifiers for + other formats have a prefix (same value with different padding, see below) or suffix (other differences). + - By default, value are zero-padded, where applicable. + - A leading underscore (`_`) means the value is space-padded. + - A leading caret (`^`) means the value has no padding (think of the caret in regular expressions). + + Parameters + ---------- + format: + The format to use. + + Returns + ------- + cell: + The string representation. + + Raises + ------ + ValueError + If the format is invalid. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["12:34", "12:34:56", "12:34:56.789", "abc", None]) + >>> column.transform(lambda cell: cell.str.to_time()) + +--------------+ + | a | + | --- | + | time | + +==============+ + | null | + | 12:34:56 | + | 12:34:56.789 | + | null | + | null | + +--------------+ + + >>> column.transform(lambda cell: cell.str.to_time(format="{h}:{m}")) + +----------+ + | a | + | --- | + | time | + +==========+ + | 12:34:00 | + | null | + | null | + | null | + | null | + +----------+ + """ + @abstractmethod def to_uppercase(self) -> Cell[str | None]: """ @@ -838,5 +918,3 @@ def to_uppercase(self) -> Cell[str | None]: # +-------------------------+ # """ # - # # TODO: add to_time - # diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py index a5041f376..4b11b1dec 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py @@ -224,7 +224,7 @@ def test_should_be_replaced_with_correct_string(self, value: datetime, format_: "tab", ], ) -def test_should_handle_escape_sequences(format_: str, expected: date | time | None) -> None: +def test_should_handle_escape_sequences(format_: str, expected: str) -> None: assert_cell_operation_works( DATETIME, lambda cell: cell.dt.to_string(format=format_), diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py new file mode 100644 index 000000000..ea6857bc9 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py @@ -0,0 +1,136 @@ +from datetime import time + +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + +NO_FRACTIONAL = time(4, 5, 6) +WITH_MILLISECOND = time(4, 5, 6, 7000) +WITH_MICROSECOND = time(4, 5, 6, 7) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("04:05:06", NO_FRACTIONAL), + ("04:05:06.007", WITH_MILLISECOND), + ("04:05:06.000007", WITH_MICROSECOND), + (None, None), + ], + ids=[ + "time without fractional seconds", + "time with milliseconds", + "time with microseconds", + "None", + ], +) +def test_should_handle_iso_8601(value: str | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_time(format="iso"), + expected, + type_if_none=ColumnType.string(), + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("04:05:06", "{h}:{m}:{s}", NO_FRACTIONAL), + (" 4: 5: 6", "{_h}:{_m}:{_s}", NO_FRACTIONAL), + ("4:5:6", "{^h}:{^m}:{^s}", NO_FRACTIONAL), + ("04:05:06 am", "{h12}:{m}:{s} {am/pm}", NO_FRACTIONAL), + (" 4: 5: 6 AM", "{_h12}:{m}:{s} {AM/PM}", NO_FRACTIONAL), + ("4:5:6 AM", "{^h12}:{m}:{s} {AM/PM}", NO_FRACTIONAL), + ("04:05:06 .000007", "{h}:{m}:{s} {.f}", WITH_MICROSECOND), + ("04:05:06 007", "{h}:{m}:{s} {ms}", WITH_MILLISECOND), + ("04:05:06 000007", "{h}:{m}:{s} {us}", WITH_MICROSECOND), + ("04:05:06 000007000", "{h}:{m}:{s} {ns}", WITH_MICROSECOND), + ("04", "{h}", None), + ("05", "{m}", None), + ("04:05:06 04", "{h}:{m}:{s} {h}", NO_FRACTIONAL), + ("04:05:06 07", "{h}:{m}:{s} {h}", None), + ("04:05:06 04", "{h}:{m}:{s} {h12}", NO_FRACTIONAL), + ("04:05:06 07", "{h}:{m}:{s} {h12}", None), + ("24:00:00", "{h}:{m}:{s}", None), + ("invalid", "{h}:{m}:{s}", None), + ], + ids=[ + "{h}:{m}:{s}", + "{_h}:{_m}:{_s}", + "{^h}:{^m}:{^s}", + "{h12}:{m}:{s} {am/pm}", + "{_h12}:{m}:{s} {am/pm}", + "{^h12}:{m}:{s} {AM/PM}", + "{.f}", + "{ms}", + "{us}", + "{ns}", + "no minute", + "no hour", + "duplicate field, same value", + "duplicate field, different value", + "similar field, same value", + "similar field, different value", + "out of bounds", + "no match", + ], +) +def test_should_handle_custom_format_string(value: str, format_: str, expected: time) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_time(format=format_), + expected, + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("04:05\\", "{h}:{m}\\", time(4, 5)), + ("04:05\\", "{h}:{m}\\\\", time(4, 5)), + ("04:05{", "{h}:{m}\\{", time(4, 5)), + ("04:05%", "{h}:{m}%", time(4, 5)), + ("04:05\n", "{h}:{m}\n", time(4, 5)), + ("04:05\t", "{h}:{m}\t", time(4, 5)), + ], + ids=[ + "backslash at end", + "escaped backslash", + "escaped open curly brace", + "percent", + "newline", + "tab", + ], +) +def test_should_handle_escape_sequences(value: str, format_: str, expected: time) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_time(format=format_), + expected, + ) + + +def test_should_raise_for_unclosed_specifier() -> None: + column = Column("a", ["04:05:06"]) + with pytest.raises(ValueError, match="Unclosed specifier"): + column.transform(lambda cell: cell.str.to_time(format="{Y")) + + +@pytest.mark.parametrize( + "format_", + [ + "{invalid}", + "{Y}", + ], + ids=[ + "globally invalid", + "invalid for time", + ], +) +def test_should_raise_for_invalid_specifier(format_: str) -> None: + column = Column("a", ["04:05:06"]) + with pytest.raises(ValueError, match="Invalid specifier"): + column.transform(lambda cell: cell.str.to_time(format=format_)) From 192b81655cf6e6a8f8e044b2239f5379a2c7d74e Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 6 Feb 2025 10:33:35 +0100 Subject: [PATCH 20/26] WIP --- src/safeds/data/tabular/query/_lazy_string_operations.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 28dc8b444..07a851422 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -50,6 +50,7 @@ def __str__(self) -> str: # String operations # ------------------------------------------------------------------------------------------------------------------ + # TODO: convert cell to expr def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.contains(substring, literal=True)) @@ -58,6 +59,7 @@ def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.ends_with(suffix)) + # TODO: convert cell to expr def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: return _LazyCell(self._expression.str.find(substring, literal=True)) @@ -100,6 +102,7 @@ def remove_suffix(self, suffix: _ConvertibleToStringCell) -> Cell[str | None]: return _LazyCell(self._expression.str.strip_suffix(suffix)) # TODO: regex? how many to replace? by default, one or all? + # TODO: convert cell to expr def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) From dc6e6b3cf3dd317f8ece0f54e94f7b9ca6dd7842 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 18 Feb 2025 11:39:47 +0100 Subject: [PATCH 21/26] docs: add missing example --- .../tabular/query/_duration_operations.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/query/_duration_operations.py b/src/safeds/data/tabular/query/_duration_operations.py index 30d032003..a03f3e31b 100644 --- a/src/safeds/data/tabular/query/_duration_operations.py +++ b/src/safeds/data/tabular/query/_duration_operations.py @@ -7,14 +7,27 @@ from safeds.data.tabular.containers import Cell -# TODO: Examples with None - - class DurationOperations(ABC): """ Namespace for operations on durations. This class cannot be instantiated directly. It can only be accessed using the `dur` attribute of a cell. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(days=-1), timedelta(days=0), timedelta(days=1)]) + >>> column.transform(lambda cell: cell.dur.abs()) + +--------------+ + | a | + | --- | + | duration[μs] | + +==============+ + | 1d | + | 0µs | + | 1d | + +--------------+ """ # ------------------------------------------------------------------------------------------------------------------ From 3121d001f8d08ddc3e33dd8f190c75cbc22c5466 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 26 Feb 2025 15:36:42 +0100 Subject: [PATCH 22/26] feat: replace_all --- .../tabular/query/_lazy_string_operations.py | 13 ++-- .../data/tabular/query/_string_operations.py | 33 +++++++++ .../test_replace_all.py | 72 +++++++++++++++++++ 3 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_replace_all.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 07a851422..8ef148e5d 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -50,8 +50,9 @@ def __str__(self) -> str: # String operations # ------------------------------------------------------------------------------------------------------------------ - # TODO: convert cell to expr def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: + substring = _to_polars_expression(substring, type_if_none=ColumnType.string()) + return _LazyCell(self._expression.str.contains(substring, literal=True)) def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: @@ -59,8 +60,9 @@ def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.ends_with(suffix)) - # TODO: convert cell to expr def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: + substring = _to_polars_expression(substring, type_if_none=ColumnType.string()) + return _LazyCell(self._expression.str.find(substring, literal=True)) def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: @@ -101,9 +103,10 @@ def remove_suffix(self, suffix: _ConvertibleToStringCell) -> Cell[str | None]: return _LazyCell(self._expression.str.strip_suffix(suffix)) - # TODO: regex? how many to replace? by default, one or all? - # TODO: convert cell to expr - def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: + def replace_all(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: + old = _to_polars_expression(old, type_if_none=ColumnType.string()) + new = _to_polars_expression(new, type_if_none=ColumnType.string()) + return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) def reverse(self) -> Cell[str | None]: diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 770e33c47..fabfce78f 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -322,6 +322,39 @@ def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]: +------+ """ + @abstractmethod + def replace_all(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: + """ + Replace all occurrences of the old substring with the new substring. + + Parameters + ---------- + old: + The substring to replace. + new: + The substring to replace with. + + Returns + ------- + cell: + The string with all occurrences replaced. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.replace_all("b", "z")) + +------+ + | a | + | --- | + | str | + +======+ + | az | + | zc | + | null | + +------+ + """ + @abstractmethod def reverse(self) -> Cell[str | None]: """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace_all.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace_all.py new file mode 100644 index 000000000..5183bc7b0 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace_all.py @@ -0,0 +1,72 @@ +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "old", "new", "expected"), + [ + # all empty + ("", "", "", ""), + # empty value + ("", "a", "z", ""), + # empty old + ("abc", "", "z", "zazbzcz"), + # empty new + ("abc", "a", "", "bc"), + # no matches + ("abc", "d", "z", "abc"), + # one match + ("abc", "a", "z", "zbc"), + # many matches + ("abcabc", "a", "z", "zbczbc"), + # full match + ("abc", "abc", "z", "z"), + # None value + (None, "a", "z", None), + # None old + pytest.param("abc", None, "z", None, marks=pytest.mark.xfail(reason="Not supported by polars.")), + # None new + pytest.param("abc", "a", None, None, marks=pytest.mark.xfail(reason="Not supported by polars.")), + ], + ids=[ + "all empty", + "empty value", + "empty old", + "empty new", + "no matches", + "one match", + "many matches", + "full match", + "None value", + "None old", + "None new", + ], +) +class TestShouldReplaceAllOccurrencesOfOldWithNew: + def test_plain_arguments(self, value: str | None, old: str | None, new: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.replace_all(old, new), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell( + self, + value: str | None, + old: str | None, + new: str | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.replace_all( + Cell.constant(old), + Cell.constant(new), + ), + expected, + type_if_none=ColumnType.string(), + ) From f8b34d99517ab619de8ef3a26519b8765d3672de Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 26 Feb 2025 15:39:40 +0100 Subject: [PATCH 23/26] feat: contains --- .../data/tabular/query/_string_operations.py | 31 ++++++++++++ .../_lazy_string_operations/test_contains.py | 50 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index fabfce78f..6147dc697 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -56,6 +56,37 @@ def __str__(self) -> str: ... # String operations # ------------------------------------------------------------------------------------------------------------------ + @abstractmethod + def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: + """ + Check if the string contains the substring. + + Parameters + ---------- + substring: + The substring to search for. + + Returns + ------- + contains: + Whether the string contains the substring. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "cd", None]) + >>> column.transform(lambda cell: cell.str.contains("b")) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | None | + +-------+ + """ + @abstractmethod def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py new file mode 100644 index 000000000..8d5065480 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py @@ -0,0 +1,50 @@ +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "substring", "expected"), + [ + ("", "", True), + ("", "c", False), + ("abc", "", True), + ("abc", "c", True), + ("abc", "abc", True), + ("abc", "d", False), + (None, "", None), + ("abc", None, None), + (None, None, None), + ], + ids=[ + "empty string, empty substring", + "empty string, non-empty substring", + "non-empty string, empty substring", + "correct substring", + "substring equal to string", + "incorrect substring", + "None as string", + "None as substring", + "None for both", + ], +) +class TestShouldCheckIfStringContainsSubstring: + def test_plain_arguments(self, value: str | None, substring: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.contains(substring), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, substring: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.contains( + Cell.constant(substring), + ), + expected, + type_if_none=ColumnType.string(), + ) From 63650372d171d24ddec65a70a710f39dfe3c6960 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 26 Feb 2025 15:44:28 +0100 Subject: [PATCH 24/26] feat: index_of --- .../data/tabular/query/_string_operations.py | 33 +++++++++++- .../_lazy_string_operations/test_index_of.py | 50 +++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 6147dc697..296a6c55e 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -83,7 +83,7 @@ def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: +=======+ | true | | false | - | None | + | null | +-------+ """ @@ -118,6 +118,37 @@ def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: +-------+ """ + @abstractmethod + def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: + """ + Get the index of the first occurrence of the substring. + + Parameters + ---------- + substring: + The substring to search for. + + Returns + ------- + cell: + The index of the first occurrence of the substring. If the substring is not found, None is returned. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "cd", None]) + >>> column.transform(lambda cell: cell.str.index_of("b")) + +------+ + | a | + | --- | + | u32 | + +======+ + | 1 | + | null | + | null | + +------+ + """ + @abstractmethod def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py new file mode 100644 index 000000000..3430a42ac --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py @@ -0,0 +1,50 @@ +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "substring", "expected"), + [ + ("", "", 0), + ("", "c", None), + ("abc", "", 0), + ("abc", "c", 2), + ("abc", "abc", 0), + ("abc", "d", None), + (None, "", None), + ("abc", None, None), + (None, None, None), + ], + ids=[ + "empty string, empty substring", + "empty string, non-empty substring", + "non-empty string, empty substring", + "correct substring", + "substring equal to string", + "incorrect substring", + "None as string", + "None as substring", + "None for both", + ], +) +class TestShouldGetIndexOfSubstring: + def test_plain_arguments(self, value: str | None, substring: str | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.index_of(substring), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, substring: str | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.index_of( + Cell.constant(substring), + ), + expected, + type_if_none=ColumnType.string(), + ) From ea12656f1714f85490b599f4fc398570218300c2 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 26 Feb 2025 16:45:28 +0100 Subject: [PATCH 25/26] feat: to_date --- .../data/tabular/query/_string_operations.py | 81 ++++++++++- .../_lazy_string_operations/test_to_date.py | 132 ++++++++++++++++++ .../_lazy_string_operations/test_to_time.py | 2 +- 3 files changed, 213 insertions(+), 2 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 296a6c55e..0befeeb10 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -656,6 +656,85 @@ def strip_start(self, *, characters: _ConvertibleToStringCell = None) -> Cell[st +------+ """ + @abstractmethod + def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: + r""" + Convert a string to a date. + + The `format` parameter controls the presentation. It can be `"iso"` to target ISO 8601 or a custom string. The + custom string can contain fixed specifiers (see below), which are replaced with the corresponding values. The + specifiers are case-sensitive and always enclosed in curly braces. Other text is included in the output + verbatim. To include a literal opening curly brace, use `\{`, and to include a literal backslash, use `\\`. + + The following specifiers are available: + + - `{Y}`, `{_Y}`, `{^Y}`: Year (zero-padded to four digits, space-padded to four digits, no padding). + - `{Y99}`, `{_Y99}`, `{^Y99}`: Year modulo 100 (zero-padded to two digits, space-padded to two digits, no + padding). + - `{M}`, `{_M}`, `{^M}`: Month (zero-padded to two digits, space-padded to two digits, no padding). + - `{M-full}`: Full name of the month (e.g. "January"). + - `{M-short}`: Abbreviated name of the month with three letters (e.g. "Jan"). + - `{W}`, `{_W}`, `{^W}`: Week number as defined by ISO 8601 (zero-padded to two digits, space-padded to two + digits, no padding). + - `{D}`, `{_D}`, `{^D}`: Day of the month (zero-padded to two digits, space-padded to two digits, no padding). + - `{DOW}`: Day of the week as defined by ISO 8601 (1 = Monday, 7 = Sunday). + - `{DOW-full}`: Full name of the day of the week (e.g. "Monday"). + - `{DOW-short}`: Abbreviated name of the day of the week with three letters (e.g. "Mon"). + - `{DOY}`, `{_DOY}`, `{^DOY}`: Day of the year, ranging from 1 to 366 (zero-padded to three digits, space-padded + to three digits, no padding). + + The specifiers follow certain conventions: + + - If a component may be formatted in multiple ways, we use shorter specifiers for ISO 8601. Specifiers for + other formats have a prefix (same value with different padding, see below) or suffix (other differences). + - By default, value are zero-padded, where applicable. + - A leading underscore (`_`) means the value is space-padded. + - A leading caret (`^`) means the value has no padding (think of the caret in regular expressions). + + Parameters + ---------- + format: + The format to use. + + Returns + ------- + cell: + The parsed date. + + Raises + ------ + ValueError + If the format is invalid. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["0001-02-03", "03.02.0001", "abc", None]) + >>> column.transform(lambda cell: cell.str.to_date()) + +------------+ + | a | + | --- | + | date | + +============+ + | 0001-02-03 | + | null | + | null | + | null | + +------------+ + + >>> column.transform(lambda cell: cell.str.to_date(format="{D}.{M}.{Y}")) + +------------+ + | a | + | --- | + | date | + +============+ + | null | + | 0001-02-03 | + | null | + | null | + +------------+ + """ + @abstractmethod def to_float(self) -> Cell[float | None]: """ @@ -794,7 +873,7 @@ def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: Returns ------- cell: - The string representation. + The parsed time. Raises ------ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py new file mode 100644 index 000000000..ab4b1108e --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py @@ -0,0 +1,132 @@ +from datetime import date + +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + +DATE = date(1, 2, 3) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("0001-02-03", DATE), + (None, None), + ], + ids=[ + "date", + "None", + ], +) +def test_should_handle_iso_8601(value: str | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_date(format="iso"), + expected, + type_if_none=ColumnType.string(), + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("0001-02-03", "{Y}-{M}-{D}", DATE), + (" 1- 2- 3", "{_Y}-{_M}-{_D}", DATE), + ("1-2-3", "{^Y}-{^M}-{^D}", DATE), + ("01", "{Y99}", date(2001, 1, 1)), # weird polars behavior + (" 1", "{_Y99}", None), + ("1", "{^Y99}", None), + ("0001-February-03", "{Y}-{M-full}-{D}", DATE), + ("0001-Feb-03", "{Y}-{M-short}-{D}", DATE), + ("0001-02-03 05| 5|5", "{Y}-{M}-{D} {W}|{_W}|{^W}", DATE), + ("0001-02-03 6|Saturday|Sat", "{Y}-{M}-{D} {DOW}|{DOW-full}|{DOW-short}", DATE), + ("0001/034", "{Y}/{DOY}", DATE), + (" 1/ 34", "{Y}/{_DOY}", DATE), + (" 1/034", "{Y}/{^DOY}", DATE), + ("0001-02-03 0001", "{Y}-{M}-{D} {Y}", DATE), + ("0001-02-03 0004", "{Y}-{M}-{D} {Y}", date(4, 2, 3)), # weird polars behavior + ("0001-02-03 01", "{Y}-{M}-{D} {Y99}", date(2001, 2, 3)), # weird polars behavior + ("0001-02-03 04", "{Y}-{M}-{D} {Y99}", date(2004, 2, 3)), # weird polars behavior + ("24:00:00", "{Y}-{M}-{D}", None), + ("invalid", "{Y}-{M}-{D}", None), + ], + ids=[ + "{Y}-{M}-{D}", + "{_Y}-{_M}-{_D}", + "{^Y}-{^M}-{^D}", + "{Y99}", + "{_Y99}", + "{^Y99}", + "{Y}-{M-full}-{D}", + "{Y}-{M-short}-{D}", + "week number", + "day of the week", + "{Y}/{DOY}", + "{_Y}/{_DOY}", + "{^Y}/{^DOY}", + "duplicate field, same value", + "duplicate field, different value", + "similar field, same value", + "similar field, different value", + "out of bounds", + "no match", + ], +) +def test_should_handle_custom_format_string(value: str, format_: str, expected: date) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_date(format=format_), + expected, + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("0001-02-03\\", "{Y}-{M}-{D}\\", DATE), + ("0001-02-03\\", "{Y}-{M}-{D}\\\\", DATE), + ("0001-02-03{", "{Y}-{M}-{D}\\{", DATE), + ("0001-02-03%", "{Y}-{M}-{D}%", DATE), + ("0001-02-03\n", "{Y}-{M}-{D}\n", DATE), + ("0001-02-03\t", "{Y}-{M}-{D}\t", DATE), + ], + ids=[ + "backslash at end", + "escaped backslash", + "escaped open curly brace", + "percent", + "newline", + "tab", + ], +) +def test_should_handle_escape_sequences(value: str, format_: str, expected: date) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_date(format=format_), + expected, + ) + + +def test_should_raise_for_unclosed_specifier() -> None: + column = Column("a", ["0001-02-03"]) + with pytest.raises(ValueError, match="Unclosed specifier"): + column.transform(lambda cell: cell.str.to_date(format="{Y")) + + +@pytest.mark.parametrize( + "format_", + [ + "{invalid}", + "{m}", + ], + ids=[ + "globally invalid", + "invalid for date", + ], +) +def test_should_raise_for_invalid_specifier(format_: str) -> None: + column = Column("a", ["0001-02-03"]) + with pytest.raises(ValueError, match="Invalid specifier"): + column.transform(lambda cell: cell.str.to_date(format=format_)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py index ea6857bc9..abe4932a7 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py @@ -116,7 +116,7 @@ def test_should_handle_escape_sequences(value: str, format_: str, expected: time def test_should_raise_for_unclosed_specifier() -> None: column = Column("a", ["04:05:06"]) with pytest.raises(ValueError, match="Unclosed specifier"): - column.transform(lambda cell: cell.str.to_time(format="{Y")) + column.transform(lambda cell: cell.str.to_time(format="{m")) @pytest.mark.parametrize( From 697117e51396007f2ad342353f7b9a3118a91732 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 26 Feb 2025 17:00:53 +0100 Subject: [PATCH 26/26] feat: to_datetime --- .../data/tabular/query/_string_operations.py | 109 +++++++++++++++++- .../test_to_datetime.py | 102 ++++++++++++++++ 2 files changed, 208 insertions(+), 3 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 0befeeb10..450881185 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -709,14 +709,14 @@ def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["0001-02-03", "03.02.0001", "abc", None]) + >>> column = Column("a", ["1999-02-03", "03.02.2001", "abc", None]) >>> column.transform(lambda cell: cell.str.to_date()) +------------+ | a | | --- | | date | +============+ - | 0001-02-03 | + | 1999-02-03 | | null | | null | | null | @@ -729,12 +729,115 @@ def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: | date | +============+ | null | - | 0001-02-03 | + | 2001-02-03 | | null | | null | +------------+ """ + @abstractmethod + def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: + r""" + Convert a string to a datetime. + + The `format` parameter controls the presentation. It can be `"iso"` to target ISO 8601 or a custom string. The + custom string can contain fixed specifiers (see below), which are replaced with the corresponding values. The + specifiers are case-sensitive and always enclosed in curly braces. Other text is included in the output + verbatim. To include a literal opening curly brace, use `\{`, and to include a literal backslash, use `\\`. + + The following specifiers for _date components_ are available for **datetime** and **date**: + + - `{Y}`, `{_Y}`, `{^Y}`: Year (zero-padded to four digits, space-padded to four digits, no padding). + - `{Y99}`, `{_Y99}`, `{^Y99}`: Year modulo 100 (zero-padded to two digits, space-padded to two digits, no + padding). + - `{M}`, `{_M}`, `{^M}`: Month (zero-padded to two digits, space-padded to two digits, no padding). + - `{M-full}`: Full name of the month (e.g. "January"). + - `{M-short}`: Abbreviated name of the month with three letters (e.g. "Jan"). + - `{W}`, `{_W}`, `{^W}`: Week number as defined by ISO 8601 (zero-padded to two digits, space-padded to two + digits, no padding). + - `{D}`, `{_D}`, `{^D}`: Day of the month (zero-padded to two digits, space-padded to two digits, no padding). + - `{DOW}`: Day of the week as defined by ISO 8601 (1 = Monday, 7 = Sunday). + - `{DOW-full}`: Full name of the day of the week (e.g. "Monday"). + - `{DOW-short}`: Abbreviated name of the day of the week with three letters (e.g. "Mon"). + - `{DOY}`, `{_DOY}`, `{^DOY}`: Day of the year, ranging from 1 to 366 (zero-padded to three digits, space-padded + to three digits, no padding). + + The following specifiers for _time components_ are available for **datetime** and **time**: + + - `{h}`, `{_h}`, `{^h}`: Hour (zero-padded to two digits, space-padded to two digits, no padding). + - `{h12}`, `{_h12}`, `{^h12}`: Hour in 12-hour format (zero-padded to two digits, space-padded to two digits, no + padding). + - `{m}`, `{_m}`, `{^m}`: Minute (zero-padded to two digits, space-padded to two digits, no padding). + - `{s}`, `{_s}`, `{^s}`: Second (zero-padded to two digits, space-padded to two digits, no padding). + - `{.f}`: Fractional seconds with a leading decimal point. + - `{ms}`: Millisecond (zero-padded to three digits). + - `{us}`: Microsecond (zero-padded to six digits). + - `{ns}`: Nanosecond (zero-padded to nine digits). + - `{AM/PM}`: AM or PM (uppercase). + - `{am/pm}`: am or pm (lowercase). + + The following specifiers are available for **datetime** only: + + - `{z}`: Offset of the timezone from UTC without a colon (e.g. "+0000"). + - `{:z}`: Offset of the timezone from UTC with a colon (e.g. "+00:00"). + - `{u}`: The UNIX timestamp in seconds. + + The specifiers follow certain conventions: + + - Generally, date components use uppercase letters and time components use lowercase letters. + - If a component may be formatted in multiple ways, we use shorter specifiers for ISO 8601. Specifiers for + other formats have a prefix (same value with different padding, see below) or suffix (other differences). + - By default, value are zero-padded, where applicable. + - A leading underscore (`_`) means the value is space-padded. + - A leading caret (`^`) means the value has no padding (think of the caret in regular expressions). + + Parameters + ---------- + format: + The format to use. + + Returns + ------- + cell: + The parsed datetime. + + Raises + ------ + ValueError + If the format is invalid. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", ["1999-12-31T01:02:03Z", "12:30 Jan 23 2024", "abc", None]) + >>> column1.transform(lambda cell: cell.str.to_datetime()) + +-------------------------+ + | a | + | --- | + | datetime[μs, UTC] | + +=========================+ + | 1999-12-31 01:02:03 UTC | + | null | + | null | + | null | + +-------------------------+ + + >>> column1.transform(lambda cell: cell.str.to_datetime( + ... format="{h}:{m} {M-short} {D} {Y}" + ... )) + +---------------------+ + | a | + | --- | + | datetime[μs] | + +=====================+ + | null | + | 2024-01-23 12:30:00 | + | null | + | null | + +---------------------+ + """ + @abstractmethod def to_float(self) -> Cell[float | None]: """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py new file mode 100644 index 000000000..89c312074 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py @@ -0,0 +1,102 @@ +from datetime import datetime +from zoneinfo import ZoneInfo + +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + +DATETIME = datetime(1, 2, 3, 4, 5, 6) # noqa: DTZ001 +DATETIME_UTC = datetime(1, 2, 3, 4, 5, 6, tzinfo=ZoneInfo("UTC")) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("0001-02-03T04:05:06Z", DATETIME_UTC), + (None, None), + ], + ids=[ + "datetime", + "None", + ], +) +def test_should_handle_iso_8601(value: str | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_datetime(format="iso"), + expected, + type_if_none=ColumnType.string(), + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("0001-02-03 04:05:06", "{Y}-{M}-{D} {h}:{m}:{s}", DATETIME), + (" 1- 2- 3 4: 5: 6", "{_Y}-{_M}-{_D} {_h}:{_m}:{_s}", DATETIME), + ("1-2-3 4:5:6", "{^Y}-{^M}-{^D} {^h}:{^m}:{^s}", DATETIME), + ("invalid", "{Y}-{M}-{D} {h}:{m}:{s}", None), + ], + ids=[ + "{Y}-{M}-{D} {h}:{m}:{s}", + "{_Y}-{_M}-{_D} {_h}:{_m}:{_s}", + "{^Y}-{^M}-{^D} {^h}:{^m}:{^s}", + "no match", + ], +) +def test_should_handle_custom_format_string(value: str, format_: str, expected: datetime) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_datetime(format=format_), + expected, + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("0001-02-03 04:05:06\\", "{Y}-{M}-{D} {h}:{m}:{s}\\", DATETIME), + ("0001-02-03 04:05:06\\", "{Y}-{M}-{D} {h}:{m}:{s}\\\\", DATETIME), + ("0001-02-03 04:05:06{", "{Y}-{M}-{D} {h}:{m}:{s}\\{", DATETIME), + ("0001-02-03 04:05:06%", "{Y}-{M}-{D} {h}:{m}:{s}%", DATETIME), + ("0001-02-03 04:05:06\n", "{Y}-{M}-{D} {h}:{m}:{s}\n", DATETIME), + ("0001-02-03 04:05:06\t", "{Y}-{M}-{D} {h}:{m}:{s}\t", DATETIME), + ], + ids=[ + "backslash at end", + "escaped backslash", + "escaped open curly brace", + "percent", + "newline", + "tab", + ], +) +def test_should_handle_escape_sequences(value: str, format_: str, expected: datetime) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_datetime(format=format_), + expected, + ) + + +def test_should_raise_for_unclosed_specifier() -> None: + column = Column("a", ["0001-02-03 04:05:06"]) + with pytest.raises(ValueError, match="Unclosed specifier"): + column.transform(lambda cell: cell.str.to_datetime(format="{Y")) + + +@pytest.mark.parametrize( + "format_", + [ + "{invalid}", + ], + ids=[ + "globally invalid", + ], +) +def test_should_raise_for_invalid_specifier(format_: str) -> None: + column = Column("a", ["0001-02-03"]) + with pytest.raises(ValueError, match="Invalid specifier"): + column.transform(lambda cell: cell.str.to_datetime(format=format_))