diff --git a/src/safeds/data/tabular/containers/_cell.py b/src/safeds/data/tabular/containers/_cell.py index 202af7661..dad805a0a 100644 --- a/src/safeds/data/tabular/containers/_cell.py +++ b/src/safeds/data/tabular/containers/_cell.py @@ -44,7 +44,7 @@ class Cell(ABC, Generic[T_co]): # ------------------------------------------------------------------------------------------------------------------ @staticmethod - def constant(value: _PythonLiteral | None) -> Cell: + def constant(value: _PythonLiteral | None, *, type: ColumnType | None = None) -> Cell: """ Create a cell with a constant value. @@ -52,6 +52,8 @@ def constant(value: _PythonLiteral | None) -> Cell: ---------- value: The value to create the cell from. + type: + The type of the cell. If None, the type is inferred from the value. Returns ------- @@ -77,7 +79,9 @@ def constant(value: _PythonLiteral | None) -> Cell: from ._lazy_cell import _LazyCell # circular import - return _LazyCell(pl.lit(value)) + dtype = type._polars_data_type if type is not None else None + + return _LazyCell(pl.lit(value, dtype=dtype)) @staticmethod def date( @@ -1453,7 +1457,7 @@ def cast(self, type: ColumnType) -> Cell: @property @abstractmethod def _polars_expression(self) -> pl.Expr: - """The Polars expression that corresponds to this cell.""" + """The polars expression that corresponds to this cell.""" @abstractmethod def _equals(self, other: object) -> bool: @@ -1464,10 +1468,32 @@ def _equals(self, other: object) -> bool: """ -def _to_polars_expression(cell_proxy: _ConvertibleToCell) -> pl.Expr: +def _to_polars_expression(cell_proxy: _ConvertibleToCell, *, type_if_none: ColumnType | None = None) -> pl.Expr: + """ + Convert a cell proxy to a polars expression. + + Parameters + ---------- + cell_proxy: + The cell proxy to convert. + type_if_none: + The type to use if `cell_proxy` is `None`. If `None`, the type is inferred from the context. + + Returns + ------- + expression: + The polars expression. + """ import polars as pl + # Cell if isinstance(cell_proxy, Cell): return cell_proxy._polars_expression + + # Plain value + if cell_proxy is None and type_if_none is not None: + dtype = type_if_none._polars_data_type else: - return pl.lit(cell_proxy) + dtype = None + + return pl.lit(cell_proxy, dtype) diff --git a/src/safeds/data/tabular/query/_duration_operations.py b/src/safeds/data/tabular/query/_duration_operations.py index 30d032003..a03f3e31b 100644 --- a/src/safeds/data/tabular/query/_duration_operations.py +++ b/src/safeds/data/tabular/query/_duration_operations.py @@ -7,14 +7,27 @@ from safeds.data.tabular.containers import Cell -# TODO: Examples with None - - class DurationOperations(ABC): """ Namespace for operations on durations. This class cannot be instantiated directly. It can only be accessed using the `dur` attribute of a cell. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(days=-1), timedelta(days=0), timedelta(days=1)]) + >>> column.transform(lambda cell: cell.dur.abs()) + +--------------+ + | a | + | --- | + | duration[μs] | + +==============+ + | 1d | + | 0µs | + | 1d | + +--------------+ """ # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/data/tabular/query/_lazy_datetime_operations.py b/src/safeds/data/tabular/query/_lazy_datetime_operations.py index 298c8fdb4..05169c405 100644 --- a/src/safeds/data/tabular/query/_lazy_datetime_operations.py +++ b/src/safeds/data/tabular/query/_lazy_datetime_operations.py @@ -4,6 +4,7 @@ from safeds._utils import _structural_hash from safeds._validation import _convert_and_check_datetime_format +from safeds.data.tabular.containers._cell import _to_polars_expression from safeds.data.tabular.containers._lazy_cell import _LazyCell from ._datetime_operations import DatetimeOperations @@ -114,6 +115,14 @@ def replace( second: _ConvertibleToIntCell = None, microsecond: _ConvertibleToIntCell = None, ) -> Cell: + year = _to_polars_expression(year) + month = _to_polars_expression(month) + day = _to_polars_expression(day) + hour = _to_polars_expression(hour) + minute = _to_polars_expression(minute) + second = _to_polars_expression(second) + microsecond = _to_polars_expression(microsecond) + return _LazyCell( self._expression.dt.replace( year=year, diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 94cc4ac25..8ef148e5d 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -4,7 +4,9 @@ from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound, _convert_and_check_datetime_format +from safeds.data.tabular.containers._cell import _to_polars_expression from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.typing import ColumnType from ._string_operations import StringOperations @@ -49,27 +51,68 @@ def __str__(self) -> str: # ------------------------------------------------------------------------------------------------------------------ def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: + substring = _to_polars_expression(substring, type_if_none=ColumnType.string()) + return _LazyCell(self._expression.str.contains(substring, literal=True)) + def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: + suffix = _to_polars_expression(suffix) + + return _LazyCell(self._expression.str.ends_with(suffix)) + + def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: + substring = _to_polars_expression(substring, type_if_none=ColumnType.string()) + + return _LazyCell(self._expression.str.find(substring, literal=True)) + def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: if optimize_for_ascii: return _LazyCell(self._expression.str.len_bytes()) else: return _LazyCell(self._expression.str.len_chars()) - def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: - return _LazyCell(self._expression.str.ends_with(suffix)) + def pad_end(self, length: int, *, character: str = " ") -> Cell[str | None]: + _check_bounds("length", length, lower_bound=_ClosedBound(0)) + if len(character) != 1: + raise ValueError("Can only pad with a single character.") - def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: - return _LazyCell(self._expression.str.find(substring, literal=True)) + return _LazyCell(self._expression.str.pad_end(length, character)) + + def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]: + _check_bounds("length", length, lower_bound=_ClosedBound(0)) + if len(character) != 1: + raise ValueError("Can only pad with a single character.") + + return _LazyCell(self._expression.str.pad_start(length, character)) + + def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]: + if isinstance(count, int): + _check_bounds("count", count, lower_bound=_ClosedBound(0)) + + count = _to_polars_expression(count) + + return _LazyCell(self._expression.repeat_by(count).list.join("", ignore_nulls=False)) + + def remove_prefix(self, prefix: _ConvertibleToStringCell) -> Cell[str | None]: + prefix = _to_polars_expression(prefix, type_if_none=ColumnType.string()) + + return _LazyCell(self._expression.str.strip_prefix(prefix)) + + def remove_suffix(self, suffix: _ConvertibleToStringCell) -> Cell[str | None]: + suffix = _to_polars_expression(suffix, type_if_none=ColumnType.string()) + + return _LazyCell(self._expression.str.strip_suffix(suffix)) + + def replace_all(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: + old = _to_polars_expression(old, type_if_none=ColumnType.string()) + new = _to_polars_expression(new, type_if_none=ColumnType.string()) - def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) - def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: - return _LazyCell(self._expression.str.starts_with(prefix)) + def reverse(self) -> Cell[str | None]: + return _LazyCell(self._expression.str.reverse()) - def substring( + def slice( self, *, start: _ConvertibleToIntCell = 0, @@ -78,8 +121,31 @@ def substring( if isinstance(length, int): _check_bounds("length", length, lower_bound=_ClosedBound(0)) + start = _to_polars_expression(start) + length = _to_polars_expression(length) + return _LazyCell(self._expression.str.slice(start, length)) + def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: + prefix = _to_polars_expression(prefix) + + return _LazyCell(self._expression.str.starts_with(prefix)) + + def strip(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + characters = _to_polars_expression(characters) + + return _LazyCell(self._expression.str.strip_chars(characters)) + + def strip_end(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + characters = _to_polars_expression(characters) + + return _LazyCell(self._expression.str.strip_chars_end(characters)) + + def strip_start(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + characters = _to_polars_expression(characters) + + return _LazyCell(self._expression.str.strip_chars_start(characters)) + def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: if format == "iso": format = "%F" # noqa: A001 @@ -96,7 +162,14 @@ def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | return _LazyCell(self._expression.str.to_datetime(format=format, strict=False)) + def to_float(self) -> Cell[float | None]: + import polars as pl + + return _LazyCell(self._expression.cast(pl.Float64(), strict=False)) + def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: + base = _to_polars_expression(base) + return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) def to_lowercase(self) -> Cell[str | None]: @@ -104,7 +177,7 @@ def to_lowercase(self) -> Cell[str | None]: def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: if format == "iso": - format = "%T" # noqa: A001 + format = "%T%.f" # noqa: A001 elif format is not None: format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) # noqa: A001 @@ -112,12 +185,3 @@ def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: def to_uppercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_uppercase()) - - def trim(self) -> Cell[str | None]: - return _LazyCell(self._expression.str.strip_chars()) - - def trim_end(self) -> Cell[str | None]: - return _LazyCell(self._expression.str.strip_chars_end()) - - def trim_start(self) -> Cell[str | None]: - return _LazyCell(self._expression.str.strip_chars_start()) diff --git a/src/safeds/data/tabular/query/_math_operations.py b/src/safeds/data/tabular/query/_math_operations.py index 0c33ccd69..e3d64e75d 100644 --- a/src/safeds/data/tabular/query/_math_operations.py +++ b/src/safeds/data/tabular/query/_math_operations.py @@ -669,7 +669,7 @@ def round_to_significant_figures(self, significant_figures: int) -> Cell: @abstractmethod def sign(self) -> Cell: """ - Get the sign (-1 for negative numbers, 0 for zero, and 1 for positive numbers). + Get the sign (-1 if negative, 0 for zero, and 1 if positive). Note that IEEE 754 defines a negative zero (-0) and a positive zero (+0). This method return a negative zero for -0 and a positive zero for +0. diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 6b99afe59..450881185 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -8,12 +8,7 @@ from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell from safeds.data.tabular.containers import Cell - -# TODO: examples with None -# TODO: add more methods -# - reverse -# - to_time -# - ... + from safeds.exceptions import OutOfBoundsError # noqa: F401 class StringOperations(ABC): @@ -64,7 +59,7 @@ def __str__(self) -> str: ... @abstractmethod def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: """ - Check if the string value in the cell contains the substring. + Check if the string contains the substring. Parameters ---------- @@ -74,12 +69,12 @@ def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: Returns ------- contains: - Whether the string value contains the substring. + Whether the string contains the substring. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) + >>> column = Column("a", ["ab", "cd", None]) >>> column.transform(lambda cell: cell.str.contains("b")) +-------+ | a | @@ -87,7 +82,6 @@ def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: | bool | +=======+ | true | - | true | | false | | null | +-------+ @@ -96,29 +90,28 @@ def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: @abstractmethod def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: """ - Check if the string value in the cell ends with the suffix. + Check if the string ends with the suffix. Parameters ---------- suffix: - The suffix to search for. + The expected suffix. Returns ------- - ends_with: - Whether the string value ends with the suffix. + cell: + Whether the string ends with the suffix. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) - >>> column.transform(lambda cell: cell.str.ends_with("c")) + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.ends_with("b")) +-------+ | a | | --- | | bool | +=======+ - | false | | true | | false | | null | @@ -128,7 +121,7 @@ def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: @abstractmethod def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: """ - Get the index of the first occurrence of the substring in the string value in the cell. + Get the index of the first occurrence of the substring. Parameters ---------- @@ -137,13 +130,13 @@ def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: Returns ------- - index_of: + cell: The index of the first occurrence of the substring. If the substring is not found, None is returned. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) + >>> column = Column("a", ["ab", "cd", None]) >>> column.transform(lambda cell: cell.str.index_of("b")) +------+ | a | @@ -151,7 +144,6 @@ def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: | u32 | +======+ | 1 | - | 0 | | null | | null | +------+ @@ -160,7 +152,7 @@ def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: @abstractmethod def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: """ - Get the number of characters of the string value in the cell. + Get the number of characters. Parameters ---------- @@ -170,8 +162,8 @@ def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: Returns ------- - length: - The length of the string value. + cell: + The number of characters. Examples -------- @@ -191,9 +183,211 @@ def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: """ @abstractmethod - def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: + def pad_end(self, length: int, *, character: str = " ") -> Cell[str | None]: + """ + Pad the end of the string with the given character until it has the given length. + + Parameters + ---------- + length: + The minimum length of the string. If the string is already at least as long, it is returned unchanged. Must + be greater than or equal to 0. + character: + How to pad the string. Must be a single character. + + Returns + ------- + cell: + The padded string. + + Raises + ------ + OutOfBoundsError + If `length` is less than 0. + ValueError + If `char` is not a single character. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bcde", None]) + >>> column.transform(lambda cell: cell.str.pad_end(3)) + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | bcde | + | null | + +------+ + + >>> column.transform(lambda cell: cell.str.pad_end(3, character="~")) + +------+ + | a | + | --- | + | str | + +======+ + | ab~ | + | bcde | + | null | + +------+ + """ + + @abstractmethod + def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]: + """ + Pad the start of the string with the given character until it has the given length. + + Parameters + ---------- + length: + The minimum length of the string. If the string is already at least as long, it is returned unchanged. Must + be greater than or equal to 0. + character: + How to pad the string. Must be a single character. + + Returns + ------- + cell: + The padded string. + + Raises + ------ + OutOfBoundsError + If `length` is less than 0. + ValueError + If `char` is not a single character. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bcde", None]) + >>> column.transform(lambda cell: cell.str.pad_start(3)) + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | bcde | + | null | + +------+ + + >>> column.transform(lambda cell: cell.str.pad_start(3, character="~")) + +------+ + | a | + | --- | + | str | + +======+ + | ~ab | + | bcde | + | null | + +------+ + """ + + @abstractmethod + def remove_prefix(self, prefix: _ConvertibleToStringCell) -> Cell[str | None]: + """ + Remove a prefix from the string. Strings without the prefix are not changed. + + Parameters + ---------- + prefix: + The prefix to remove. + + Returns + ------- + cell: + The string without the prefix. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.remove_prefix("a")) + +------+ + | a | + | --- | + | str | + +======+ + | b | + | bc | + | null | + +------+ + """ + + @abstractmethod + def remove_suffix(self, suffix: _ConvertibleToStringCell) -> Cell[str | None]: + """ + Remove a suffix from the string. Strings without the suffix are not changed. + + Parameters + ---------- + suffix: + The suffix to remove. + + Returns + ------- + cell: + The string without the suffix. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.remove_suffix("b")) + +------+ + | a | + | --- | + | str | + +======+ + | a | + | bc | + | null | + +------+ + """ + + @abstractmethod + def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]: + """ + Repeat the string a number of times. + + Parameters + ---------- + count: + The number of times to repeat the string. Must be greater than or equal to 0. + + Returns + ------- + cell: + The repeated string. + + Raises + ------ + OutOfBoundsError + If `count` is less than 0. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.repeat(2)) + +------+ + | a | + | --- | + | str | + +======+ + | abab | + | bcbc | + | null | + +------+ + """ + + @abstractmethod + def replace_all(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: """ - Replace occurrences of the old substring with the new substring in the string value in the cell. + Replace all occurrences of the old substring with the new substring. Parameters ---------- @@ -204,14 +398,14 @@ def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) Returns ------- - replaced_string: - The string value with the occurrences replaced. + cell: + The string with all occurrences replaced. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) - >>> column.transform(lambda cell: cell.str.replace("b", "z")) + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.replace_all("b", "z")) +------+ | a | | --- | @@ -219,7 +413,88 @@ def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) +======+ | az | | zc | - | cd | + | null | + +------+ + """ + + @abstractmethod + def reverse(self) -> Cell[str | None]: + """ + Reverse the string. + + Returns + ------- + cell: + The reversed string. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.reverse()) + +------+ + | a | + | --- | + | str | + +======+ + | ba | + | cb | + | null | + +------+ + """ + + @abstractmethod + def slice( + self, + *, + start: _ConvertibleToIntCell = 0, + length: _ConvertibleToIntCell = None, + ) -> Cell[str | None]: + """ + Get a slice of the string. + + Parameters + ---------- + start: + The start index of the slice. Nonnegative indices are counted from the beginning (starting at 0), negative + indices from the end (starting at -1). + length: + The length of the slice. If None, the slice contains all characters starting from `start`. Must greater than + or equal to 0. + + Returns + ------- + cell: + The sliced string. + + Raises + ------ + OutOfBoundsError + If `length` is less than 0. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["abc", "de", None]) + >>> column.transform(lambda cell: cell.str.slice(start=1)) + +------+ + | a | + | --- | + | str | + +======+ + | bc | + | e | + | null | + +------+ + + >>> column.transform(lambda cell: cell.str.slice(start=1, length=1)) + +------+ + | a | + | --- | + | str | + +======+ + | b | + | e | | null | +------+ """ @@ -227,22 +502,22 @@ def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) @abstractmethod def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: """ - Check if the string value in the cell starts with the prefix. + Check if the string starts with the prefix. Parameters ---------- prefix: - The prefix to search for. + The expected prefix. Returns ------- - starts_with: - Whether the string value starts with the prefix. + cell: + Whether the string starts with the prefix. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) + >>> column = Column("a", ["ab", "bc", None]) >>> column.transform(lambda cell: cell.str.starts_with("a")) +-------+ | a | @@ -251,118 +526,349 @@ def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: +=======+ | true | | false | - | false | | null | +-------+ """ @abstractmethod - def substring( - self, - *, - start: _ConvertibleToIntCell = 0, - length: _ConvertibleToIntCell = None, - ) -> Cell[str | None]: + def strip(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: """ - Get a substring of the string value in the cell. + Remove leading and trailing characters. Parameters ---------- - start: - The start index of the substring. - length: - The length of the substring. If None, the slice contains all rows starting from `start`. Must greater than - or equal to 0. + characters: + The characters to remove. If None, whitespace is removed. Returns ------- - substring: - The substring of the string value. - - Raises - ------ - OutOfBoundsError - If length is less than 0. + cell: + The stripped string. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["abc", "def", "ghi", None]) - >>> column.transform(lambda cell: cell.str.substring(start=1, length=2)) + >>> column = Column("a", [" ab ", "~ bc ~", None]) + >>> column.transform(lambda cell: cell.str.strip()) + +--------+ + | a | + | --- | + | str | + +========+ + | ab | + | ~ bc ~ | + | null | + +--------+ + + >>> column.transform(lambda cell: cell.str.strip(characters=" ~")) +------+ | a | | --- | | str | +======+ + | ab | | bc | - | ef | - | hi | | null | +------+ """ - # TODO: add format parameter + document @abstractmethod - def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: + def strip_end(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: + """ + Remove trailing characters. + + Parameters + ---------- + characters: + The characters to remove. If None, whitespace is removed. + + Returns + ------- + cell: + The stripped string. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [" ab ", "~ bc ~", None]) + >>> column.transform(lambda cell: cell.str.strip_end()) + +--------+ + | a | + | --- | + | str | + +========+ + | ab | + | ~ bc ~ | + | null | + +--------+ + + >>> column.transform(lambda cell: cell.str.strip_end(characters=" ~")) + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | ~ bc | + | null | + +------+ + """ + + @abstractmethod + def strip_start(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]: """ - Convert the string value in the cell to a date. + Remove leading characters. + + Parameters + ---------- + characters: + The characters to remove. If None, whitespace is removed. Returns ------- - date: - The date value. If the string cannot be converted to a date, None is returned. + cell: + The stripped string. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["2021-01-01", "2021-02-01", "abc", None]) + >>> column = Column("a", [" ab ", "~ bc ~", None]) + >>> column.transform(lambda cell: cell.str.strip_start()) + +--------+ + | a | + | --- | + | str | + +========+ + | ab | + | ~ bc ~ | + | null | + +--------+ + + >>> column.transform(lambda cell: cell.str.strip_start(characters=" ~")) + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | bc ~ | + | null | + +------+ + """ + + @abstractmethod + def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: + r""" + Convert a string to a date. + + The `format` parameter controls the presentation. It can be `"iso"` to target ISO 8601 or a custom string. The + custom string can contain fixed specifiers (see below), which are replaced with the corresponding values. The + specifiers are case-sensitive and always enclosed in curly braces. Other text is included in the output + verbatim. To include a literal opening curly brace, use `\{`, and to include a literal backslash, use `\\`. + + The following specifiers are available: + + - `{Y}`, `{_Y}`, `{^Y}`: Year (zero-padded to four digits, space-padded to four digits, no padding). + - `{Y99}`, `{_Y99}`, `{^Y99}`: Year modulo 100 (zero-padded to two digits, space-padded to two digits, no + padding). + - `{M}`, `{_M}`, `{^M}`: Month (zero-padded to two digits, space-padded to two digits, no padding). + - `{M-full}`: Full name of the month (e.g. "January"). + - `{M-short}`: Abbreviated name of the month with three letters (e.g. "Jan"). + - `{W}`, `{_W}`, `{^W}`: Week number as defined by ISO 8601 (zero-padded to two digits, space-padded to two + digits, no padding). + - `{D}`, `{_D}`, `{^D}`: Day of the month (zero-padded to two digits, space-padded to two digits, no padding). + - `{DOW}`: Day of the week as defined by ISO 8601 (1 = Monday, 7 = Sunday). + - `{DOW-full}`: Full name of the day of the week (e.g. "Monday"). + - `{DOW-short}`: Abbreviated name of the day of the week with three letters (e.g. "Mon"). + - `{DOY}`, `{_DOY}`, `{^DOY}`: Day of the year, ranging from 1 to 366 (zero-padded to three digits, space-padded + to three digits, no padding). + + The specifiers follow certain conventions: + + - If a component may be formatted in multiple ways, we use shorter specifiers for ISO 8601. Specifiers for + other formats have a prefix (same value with different padding, see below) or suffix (other differences). + - By default, value are zero-padded, where applicable. + - A leading underscore (`_`) means the value is space-padded. + - A leading caret (`^`) means the value has no padding (think of the caret in regular expressions). + + Parameters + ---------- + format: + The format to use. + + Returns + ------- + cell: + The parsed date. + + Raises + ------ + ValueError + If the format is invalid. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["1999-02-03", "03.02.2001", "abc", None]) >>> column.transform(lambda cell: cell.str.to_date()) +------------+ | a | | --- | | date | +============+ - | 2021-01-01 | - | 2021-02-01 | + | 1999-02-03 | + | null | + | null | + | null | + +------------+ + + >>> column.transform(lambda cell: cell.str.to_date(format="{D}.{M}.{Y}")) + +------------+ + | a | + | --- | + | date | + +============+ + | null | + | 2001-02-03 | | null | | null | +------------+ """ - # TODO: add format parameter + document @abstractmethod def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: - """ - Convert the string value in the cell to a datetime. + r""" + Convert a string to a datetime. + + The `format` parameter controls the presentation. It can be `"iso"` to target ISO 8601 or a custom string. The + custom string can contain fixed specifiers (see below), which are replaced with the corresponding values. The + specifiers are case-sensitive and always enclosed in curly braces. Other text is included in the output + verbatim. To include a literal opening curly brace, use `\{`, and to include a literal backslash, use `\\`. + + The following specifiers for _date components_ are available for **datetime** and **date**: + + - `{Y}`, `{_Y}`, `{^Y}`: Year (zero-padded to four digits, space-padded to four digits, no padding). + - `{Y99}`, `{_Y99}`, `{^Y99}`: Year modulo 100 (zero-padded to two digits, space-padded to two digits, no + padding). + - `{M}`, `{_M}`, `{^M}`: Month (zero-padded to two digits, space-padded to two digits, no padding). + - `{M-full}`: Full name of the month (e.g. "January"). + - `{M-short}`: Abbreviated name of the month with three letters (e.g. "Jan"). + - `{W}`, `{_W}`, `{^W}`: Week number as defined by ISO 8601 (zero-padded to two digits, space-padded to two + digits, no padding). + - `{D}`, `{_D}`, `{^D}`: Day of the month (zero-padded to two digits, space-padded to two digits, no padding). + - `{DOW}`: Day of the week as defined by ISO 8601 (1 = Monday, 7 = Sunday). + - `{DOW-full}`: Full name of the day of the week (e.g. "Monday"). + - `{DOW-short}`: Abbreviated name of the day of the week with three letters (e.g. "Mon"). + - `{DOY}`, `{_DOY}`, `{^DOY}`: Day of the year, ranging from 1 to 366 (zero-padded to three digits, space-padded + to three digits, no padding). + + The following specifiers for _time components_ are available for **datetime** and **time**: + + - `{h}`, `{_h}`, `{^h}`: Hour (zero-padded to two digits, space-padded to two digits, no padding). + - `{h12}`, `{_h12}`, `{^h12}`: Hour in 12-hour format (zero-padded to two digits, space-padded to two digits, no + padding). + - `{m}`, `{_m}`, `{^m}`: Minute (zero-padded to two digits, space-padded to two digits, no padding). + - `{s}`, `{_s}`, `{^s}`: Second (zero-padded to two digits, space-padded to two digits, no padding). + - `{.f}`: Fractional seconds with a leading decimal point. + - `{ms}`: Millisecond (zero-padded to three digits). + - `{us}`: Microsecond (zero-padded to six digits). + - `{ns}`: Nanosecond (zero-padded to nine digits). + - `{AM/PM}`: AM or PM (uppercase). + - `{am/pm}`: am or pm (lowercase). + + The following specifiers are available for **datetime** only: + + - `{z}`: Offset of the timezone from UTC without a colon (e.g. "+0000"). + - `{:z}`: Offset of the timezone from UTC with a colon (e.g. "+00:00"). + - `{u}`: The UNIX timestamp in seconds. + + The specifiers follow certain conventions: + + - Generally, date components use uppercase letters and time components use lowercase letters. + - If a component may be formatted in multiple ways, we use shorter specifiers for ISO 8601. Specifiers for + other formats have a prefix (same value with different padding, see below) or suffix (other differences). + - By default, value are zero-padded, where applicable. + - A leading underscore (`_`) means the value is space-padded. + - A leading caret (`^`) means the value has no padding (think of the caret in regular expressions). + + Parameters + ---------- + format: + The format to use. Returns ------- - datetime: - The datetime value. If the string cannot be converted to a datetime, None is returned. + cell: + The parsed datetime. + + Raises + ------ + ValueError + If the format is invalid. Examples -------- + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["2021-01-01T00:00:00Z", "2021-02-01T00:00:00Z", "abc", None]) - >>> column.transform(lambda cell: cell.str.to_datetime()) + >>> column1 = Column("a", ["1999-12-31T01:02:03Z", "12:30 Jan 23 2024", "abc", None]) + >>> column1.transform(lambda cell: cell.str.to_datetime()) +-------------------------+ | a | | --- | | datetime[μs, UTC] | +=========================+ - | 2021-01-01 00:00:00 UTC | - | 2021-02-01 00:00:00 UTC | + | 1999-12-31 01:02:03 UTC | + | null | | null | | null | +-------------------------+ + + >>> column1.transform(lambda cell: cell.str.to_datetime( + ... format="{h}:{m} {M-short} {D} {Y}" + ... )) + +---------------------+ + | a | + | --- | + | datetime[μs] | + +=====================+ + | null | + | 2024-01-23 12:30:00 | + | null | + | null | + +---------------------+ """ - # TODO: add to_time + @abstractmethod + def to_float(self) -> Cell[float | None]: + """ + Convert the string to a float. + + Returns + ------- + cell: + The float value. If the string cannot be converted to a float, None is returned. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["1", "1.5", "abc", None]) + >>> column.transform(lambda cell: cell.str.to_float()) + +---------+ + | a | + | --- | + | f64 | + +=========+ + | 1.00000 | + | 1.50000 | + | null | + | null | + +---------+ + """ @abstractmethod def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: """ - Convert the string value in the cell to an integer. + Convert the string to an integer. Parameters ---------- @@ -371,13 +877,13 @@ def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: Returns ------- - int: + cell: The integer value. If the string cannot be converted to an integer, None is returned. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column1 = Column("a", ["1", "2", "3", "abc", None]) + >>> column1 = Column("a", ["1", "10", "abc", None]) >>> column1.transform(lambda cell: cell.str.to_int()) +------+ | a | @@ -385,13 +891,12 @@ def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: | i64 | +======+ | 1 | - | 2 | - | 3 | + | 10 | | null | | null | +------+ - >>> column2 = Column("a", ["1", "10", "11", "abc", None]) + >>> column2 = Column("a", ["1", "10", "abc", None]) >>> column2.transform(lambda cell: cell.str.to_int(base=2)) +------+ | a | @@ -400,7 +905,6 @@ def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: +======+ | 1 | | 2 | - | 3 | | null | | null | +------+ @@ -409,17 +913,17 @@ def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: @abstractmethod def to_lowercase(self) -> Cell[str | None]: """ - Convert the string value in the cell to lowercase. + Convert the string to lowercase. Returns ------- - lowercase: - The string value in lowercase. + cell: + The lowercase string. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["AB", "BC", "CD", None]) + >>> column = Column("a", ["AB", "BC", None]) >>> column.transform(lambda cell: cell.str.to_lowercase()) +------+ | a | @@ -428,118 +932,266 @@ def to_lowercase(self) -> Cell[str | None]: +======+ | ab | | bc | - | cd | | null | +------+ """ @abstractmethod - def to_uppercase(self) -> Cell[str | None]: - """ - Convert the string value in the cell to uppercase. - - Returns - ------- - uppercase: - The string value in uppercase. + def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: + r""" + Convert a string to a time. + + The `format` parameter controls the presentation. It can be `"iso"` to target ISO 8601 or a custom string. The + custom string can contain fixed specifiers (see below), which are replaced with the corresponding values. The + specifiers are case-sensitive and always enclosed in curly braces. Other text is included in the output + verbatim. To include a literal opening curly brace, use `\{`, and to include a literal backslash, use `\\`. + + The following specifiers are available: + + - `{h}`, `{_h}`, `{^h}`: Hour (zero-padded to two digits, space-padded to two digits, no padding). + - `{h12}`, `{_h12}`, `{^h12}`: Hour in 12-hour format (zero-padded to two digits, space-padded to two digits, no + padding). + - `{m}`, `{_m}`, `{^m}`: Minute (zero-padded to two digits, space-padded to two digits, no padding). + - `{s}`, `{_s}`, `{^s}`: Second (zero-padded to two digits, space-padded to two digits, no padding). + - `{.f}`: Fractional seconds with a leading decimal point. + - `{ms}`: Millisecond (zero-padded to three digits). + - `{us}`: Microsecond (zero-padded to six digits). + - `{ns}`: Nanosecond (zero-padded to nine digits). + - `{AM/PM}`: AM or PM (uppercase). + - `{am/pm}`: am or pm (lowercase). + + The specifiers follow certain conventions: + + - If a component may be formatted in multiple ways, we use shorter specifiers for ISO 8601. Specifiers for + other formats have a prefix (same value with different padding, see below) or suffix (other differences). + - By default, value are zero-padded, where applicable. + - A leading underscore (`_`) means the value is space-padded. + - A leading caret (`^`) means the value has no padding (think of the caret in regular expressions). - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd", None]) - >>> column.transform(lambda cell: cell.str.to_uppercase()) - +------+ - | a | - | --- | - | str | - +======+ - | AB | - | BC | - | CD | - | null | - +------+ - """ - - @abstractmethod - def trim(self) -> Cell[str | None]: - """ - Remove whitespace from the start and end of the string value in the cell. + Parameters + ---------- + format: + The format to use. Returns ------- - trimmed: - The string value without whitespace at the start and end. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) - >>> column.transform(lambda cell: cell.str.trim()) - +------+ - | a | - | --- | - | str | - +======+ - | | - | abc | - | abc | - | abc | - | null | - +------+ - """ - - @abstractmethod - def trim_end(self) -> Cell[str | None]: - """ - Remove whitespace from the end of the string value in the cell. + cell: + The parsed time. - Returns - ------- - trimmed: - The string value without whitespace at the end. + Raises + ------ + ValueError + If the format is invalid. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) - >>> column.transform(lambda cell: cell.str.trim_end()) - +------+ - | a | - | --- | - | str | - +======+ - | | - | abc | - | abc | - | abc | - | null | - +------+ + >>> column = Column("a", ["12:34", "12:34:56", "12:34:56.789", "abc", None]) + >>> column.transform(lambda cell: cell.str.to_time()) + +--------------+ + | a | + | --- | + | time | + +==============+ + | null | + | 12:34:56 | + | 12:34:56.789 | + | null | + | null | + +--------------+ + + >>> column.transform(lambda cell: cell.str.to_time(format="{h}:{m}")) + +----------+ + | a | + | --- | + | time | + +==========+ + | 12:34:00 | + | null | + | null | + | null | + | null | + +----------+ """ @abstractmethod - def trim_start(self) -> Cell[str | None]: + def to_uppercase(self) -> Cell[str | None]: """ - Remove whitespace from the start of the string value in the cell. + Convert the string to uppercase. Returns ------- - trimmed: - The string value without whitespace at the start. + cell: + The uppercase string. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) - >>> column.transform(lambda cell: cell.str.trim_start()) + >>> column = Column("a", ["ab", "bc", None]) + >>> column.transform(lambda cell: cell.str.to_uppercase()) +------+ | a | | --- | | str | +======+ - | | - | abc | - | abc | - | abc | + | AB | + | BC | | null | +------+ """ + + # @abstractmethod + # def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: + # """ + # Check if the string value in the cell contains the substring. + # + # Parameters + # ---------- + # substring: + # The substring to search for. + # + # Returns + # ------- + # contains: + # Whether the string value contains the substring. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["ab", "bc", "cd", None]) + # >>> column.transform(lambda cell: cell.str.contains("b")) + # +-------+ + # | a | + # | --- | + # | bool | + # +=======+ + # | true | + # | true | + # | false | + # | null | + # +-------+ + # """ + + # @abstractmethod + # def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: + # """ + # Get the index of the first occurrence of the substring in the string value in the cell. + # + # Parameters + # ---------- + # substring: + # The substring to search for. + # + # Returns + # ------- + # index_of: + # The index of the first occurrence of the substring. If the substring is not found, None is returned. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["ab", "bc", "cd", None]) + # >>> column.transform(lambda cell: cell.str.index_of("b")) + # +------+ + # | a | + # | --- | + # | u32 | + # +======+ + # | 1 | + # | 0 | + # | null | + # | null | + # +------+ + # """ + # + + # @abstractmethod + # def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: + # """ + # Replace occurrences of the old substring with the new substring in the string value in the cell. + # + # Parameters + # ---------- + # old: + # The substring to replace. + # new: + # The substring to replace with. + # + # Returns + # ------- + # replaced_string: + # The string value with the occurrences replaced. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["ab", "bc", "cd", None]) + # >>> column.transform(lambda cell: cell.str.replace("b", "z")) + # +------+ + # | a | + # | --- | + # | str | + # +======+ + # | az | + # | zc | + # | cd | + # | null | + # +------+ + # """ + + # # TODO: add format parameter + document + # @abstractmethod + # def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: + # """ + # Convert the string value in the cell to a date. + # + # Returns + # ------- + # date: + # The date value. If the string cannot be converted to a date, None is returned. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["2021-01-01", "2021-02-01", "abc", None]) + # >>> column.transform(lambda cell: cell.str.to_date()) + # +------------+ + # | a | + # | --- | + # | date | + # +============+ + # | 2021-01-01 | + # | 2021-02-01 | + # | null | + # | null | + # +------------+ + # """ + # + # # TODO: add format parameter + document + # @abstractmethod + # def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: + # """ + # Convert the string value in the cell to a datetime. + # + # Returns + # ------- + # datetime: + # The datetime value. If the string cannot be converted to a datetime, None is returned. + # + # Examples + # -------- + # >>> from safeds.data.tabular.containers import Column + # >>> column = Column("a", ["2021-01-01T00:00:00Z", "2021-02-01T00:00:00Z", "abc", None]) + # >>> column.transform(lambda cell: cell.str.to_datetime()) + # +-------------------------+ + # | a | + # | --- | + # | datetime[μs, UTC] | + # +=========================+ + # | 2021-01-01 00:00:00 UTC | + # | 2021-02-01 00:00:00 UTC | + # | null | + # | null | + # +-------------------------+ + # """ + # diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py index 1be74f34a..669404cbe 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py @@ -3,19 +3,22 @@ import pytest from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - "value", + ("value", "type_", "expected"), [ - None, - 1, + (None, None, None), + (1, None, 1), + (1, ColumnType.string(), "1"), ], ids=[ "None", "int", + "with explicit type", ], ) -def test_should_return_constant_value(value: Any) -> None: - assert_cell_operation_works(None, lambda _: Cell.constant(value), value) +def test_should_return_constant_value(value: Any, type_: ColumnType | None, expected: Any) -> None: + assert_cell_operation_works(None, lambda _: Cell.constant(value, type=type_), expected) diff --git a/tests/safeds/data/tabular/containers/_table/test_slice_rows.py b/tests/safeds/data/tabular/containers/_table/test_slice_rows.py index 046118da9..8fa0f8694 100644 --- a/tests/safeds/data/tabular/containers/_table/test_slice_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_slice_rows.py @@ -68,12 +68,12 @@ "empty", "no rows", "full table", - "positive start in bounds", - "positive start out of bounds", + "non-negative start in bounds", + "non-negative start out of bounds", "negative start in bounds", "negative start out of bounds", - "positive length in bounds", - "positive length out of bounds", + "non-negative length in bounds", + "non-negative length out of bounds", ], ) class TestHappyPath: diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py index a201398e0..9d9196a59 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py @@ -2,6 +2,7 @@ import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -100,28 +101,57 @@ "None", ], ) -def test_should_replace_components( - value: datetime | date | None, - year: int | None, - month: int | None, - day: int | None, - hour: int | None, - minute: int | None, - second: int | None, - microsecond: int | None, - expected: int | None, -) -> None: - assert_cell_operation_works( - value, - lambda cell: cell.dt.replace( - year=year, - month=month, - day=day, - hour=hour, - minute=minute, - second=second, - microsecond=microsecond, - ), - expected, - type_if_none=ColumnType.datetime(), - ) +class TestShouldReplaceComponents: + def test_plain_arguments( + self, + value: datetime | date | None, + year: int | None, + month: int | None, + day: int | None, + hour: int | None, + minute: int | None, + second: int | None, + microsecond: int | None, + expected: int | None, + ) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.replace( + year=year, + month=month, + day=day, + hour=hour, + minute=minute, + second=second, + microsecond=microsecond, + ), + expected, + type_if_none=ColumnType.datetime(), + ) + + def test_arguments_wrapped_in_cell( + self, + value: datetime | date | None, + year: int | None, + month: int | None, + day: int | None, + hour: int | None, + minute: int | None, + second: int | None, + microsecond: int | None, + expected: int | None, + ) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.replace( + year=Cell.constant(year), + month=Cell.constant(month), + day=Cell.constant(day), + hour=Cell.constant(hour), + minute=Cell.constant(minute), + second=Cell.constant(second), + microsecond=Cell.constant(microsecond), + ), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py index a5041f376..4b11b1dec 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py @@ -224,7 +224,7 @@ def test_should_be_replaced_with_correct_string(self, value: datetime, format_: "tab", ], ) -def test_should_handle_escape_sequences(format_: str, expected: date | time | None) -> None: +def test_should_handle_escape_sequences(format_: str, expected: str) -> None: assert_cell_operation_works( DATETIME, lambda cell: cell.dt.to_string(format=format_), diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py index bac8e354a..8d5065480 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py @@ -1,22 +1,50 @@ import pytest +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - ("string", "substring", "expected"), + ("value", "substring", "expected"), [ - ("", "a", False), + ("", "", True), + ("", "c", False), ("abc", "", True), - ("abc", "a", True), + ("abc", "c", True), + ("abc", "abc", True), ("abc", "d", False), + (None, "", None), + ("abc", None, None), + (None, None, None), ], ids=[ - "empty string", - "empty substring", - "contained", - "not contained", + "empty string, empty substring", + "empty string, non-empty substring", + "non-empty string, empty substring", + "correct substring", + "substring equal to string", + "incorrect substring", + "None as string", + "None as substring", + "None for both", ], ) -def test_should_check_whether_string_contains_substring(string: str, substring: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.contains(substring), expected) +class TestShouldCheckIfStringContainsSubstring: + def test_plain_arguments(self, value: str | None, substring: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.contains(substring), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, substring: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.contains( + Cell.constant(substring), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py index 78102c900..d893b31ce 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py @@ -1,22 +1,50 @@ import pytest +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - ("string", "suffix", "expected"), + ("value", "suffix", "expected"), [ - ("", "a", False), + ("", "", True), + ("", "c", False), ("abc", "", True), ("abc", "c", True), - ("abc", "a", False), + ("abc", "abc", True), + ("abc", "d", False), + (None, "", None), + ("abc", None, None), + (None, None, None), ], ids=[ - "empty string", - "empty suffix", - "ends with", - "does not end with", + "empty string, empty suffix", + "empty string, non-empty suffix", + "non-empty string, empty suffix", + "correct suffix", + "suffix equal to string", + "incorrect suffix", + "None as string", + "None as suffix", + "None for both", ], ) -def test_should_check_whether_string_ends_with_prefix(string: str, suffix: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.ends_with(suffix), expected) +class TestShouldCheckIfStringEndsWithSuffix: + def test_plain_arguments(self, value: str | None, suffix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.ends_with(suffix), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, suffix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.ends_with( + Cell.constant(suffix), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py index 84e79ad1b..3430a42ac 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py @@ -1,22 +1,50 @@ import pytest +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - ("string", "substring", "expected"), + ("value", "substring", "expected"), [ - ("", "a", None), + ("", "", 0), + ("", "c", None), ("abc", "", 0), - ("abc", "b", 1), + ("abc", "c", 2), + ("abc", "abc", 0), ("abc", "d", None), + (None, "", None), + ("abc", None, None), + (None, None, None), ], ids=[ - "empty string", - "empty substring", - "contained", - "not contained", + "empty string, empty substring", + "empty string, non-empty substring", + "non-empty string, empty substring", + "correct substring", + "substring equal to string", + "incorrect substring", + "None as string", + "None as substring", + "None for both", ], ) -def test_should_return_index_of_first_occurrence_of_substring(string: str, substring: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.index_of(substring), expected) +class TestShouldGetIndexOfSubstring: + def test_plain_arguments(self, value: str | None, substring: str | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.index_of(substring), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, substring: str | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.index_of( + Cell.constant(substring), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py index 5b7f0370b..16a492048 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py @@ -1,26 +1,34 @@ import pytest +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - ("string", "optimize_for_ascii", "expected"), + ("value", "optimize_for_ascii", "expected"), [ ("", False, 0), ("", True, 0), ("abc", False, 3), ("abc", True, 3), + ("a 🪲", False, 3), + ("a 🪲", True, 6), + (None, False, None), ], ids=[ - "empty (unoptimized)", + "empty (not optimized)", "empty (optimized)", - "non-empty (unoptimized)", - "non-empty (optimized)", + "ASCII only (not optimized)", + "ASCII only (optimized)", + "unicode (not optimized)", + "unicode (optimized)", + "None", ], ) -def test_should_return_number_of_characters(string: str, optimize_for_ascii: bool, expected: bool) -> None: +def test_should_get_number_of_characters(value: str | None, optimize_for_ascii: bool, expected: str | None) -> None: assert_cell_operation_works( - string, + value, lambda cell: cell.str.length(optimize_for_ascii=optimize_for_ascii), expected, + type_if_none=ColumnType.string(), ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py new file mode 100644 index 000000000..28b0f38b2 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_end.py @@ -0,0 +1,57 @@ +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from safeds.exceptions import OutOfBoundsError +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "length", "character", "expected"), + [ + ("", 0, "a", ""), + ("", 1, "a", "a"), + ("b", 2, "a", "ba"), + ("bc", 2, "a", "bc"), + ("abc", 2, "a", "abc"), + (None, 1, " ", None), + ], + ids=[ + "empty (length 0)", + "empty (length 1)", + "non-empty (shorter length)", + "non-empty (same length)", + "non-empty (longer length)", + "None", + ], +) +def test_should_pad_end(value: str | None, length: int, character: str, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.pad_end(length, character=character), + expected, + type_if_none=ColumnType.string(), + ) + + +def test_should_raise_if_length_is_out_of_bounds() -> None: + column = Column("col1", [1]) + with pytest.raises(OutOfBoundsError): + column.transform(lambda cell: cell.str.pad_end(-1)) + + +@pytest.mark.parametrize( + "character", + [ + "", + "ab", + ], + ids=[ + "empty string", + "multiple characters", + ], +) +def test_should_raise_if_char_is_not_single_character(character: str) -> None: + column = Column("col1", [1]) + with pytest.raises(ValueError, match=r"Can only pad with a single character\."): + column.transform(lambda cell: cell.str.pad_end(1, character=character)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py new file mode 100644 index 000000000..261055f98 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_pad_start.py @@ -0,0 +1,57 @@ +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from safeds.exceptions import OutOfBoundsError +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "length", "character", "expected"), + [ + ("", 0, "a", ""), + ("", 1, "a", "a"), + ("b", 2, "a", "ab"), + ("bc", 2, "a", "bc"), + ("abc", 2, "a", "abc"), + (None, 1, " ", None), + ], + ids=[ + "empty (length 0)", + "empty (length 1)", + "non-empty (shorter length)", + "non-empty (same length)", + "non-empty (longer length)", + "None", + ], +) +def test_should_pad_start(value: str | None, length: int, character: str, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.pad_start(length, character=character), + expected, + type_if_none=ColumnType.string(), + ) + + +def test_should_raise_if_length_is_out_of_bounds() -> None: + column = Column("col1", [1]) + with pytest.raises(OutOfBoundsError): + column.transform(lambda cell: cell.str.pad_start(-1)) + + +@pytest.mark.parametrize( + "character", + [ + "", + "ab", + ], + ids=[ + "empty string", + "multiple characters", + ], +) +def test_should_raise_if_char_is_not_single_character(character: str) -> None: + column = Column("col1", [1]) + with pytest.raises(ValueError, match=r"Can only pad with a single character\."): + column.transform(lambda cell: cell.str.pad_start(1, character=character)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py new file mode 100644 index 000000000..05479c86b --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_prefix.py @@ -0,0 +1,46 @@ +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "prefix", "expected"), + [ + ("", " ", ""), + ("~ a ~", "", "~ a ~"), + ("~ a ~", "~ ", "a ~"), + ("~ a ~", " ~", "~ a ~"), + (None, " ", None), + ("~ a ~", None, None), + (None, None, None), + ], + ids=[ + "empty", + "empty prefix", + "non-empty (has prefix)", + "non-empty (does not have prefix)", + "None as string", + "None as prefix", + "None as both", + ], +) +class TestShouldRemovePrefix: + def test_plain_arguments(self, value: str | None, prefix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.remove_prefix(prefix), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, prefix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.remove_prefix( + Cell.constant(prefix, type=ColumnType.string()), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py new file mode 100644 index 000000000..47e1dcfac --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_remove_suffix.py @@ -0,0 +1,46 @@ +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "suffix", "expected"), + [ + ("", " ", ""), + ("~ a ~", "", "~ a ~"), + ("~ a ~", " ~", "~ a"), + ("~ a ~", "~ ", "~ a ~"), + (None, " ", None), + ("~ a ~", None, None), + (None, None, None), + ], + ids=[ + "empty", + "empty suffix", + "non-empty (has suffix)", + "non-empty (does not have suffix)", + "None as string", + "None as suffix", + "None as both", + ], +) +class TestShouldRemoveSuffix: + def test_plain_arguments(self, value: str | None, suffix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.remove_suffix(suffix), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, suffix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.remove_suffix( + Cell.constant(suffix, type=ColumnType.string()), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py new file mode 100644 index 000000000..546aab740 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repeat.py @@ -0,0 +1,55 @@ +import pytest + +from safeds.data.tabular.containers import Cell, Column +from safeds.data.tabular.typing import ColumnType +from safeds.exceptions import OutOfBoundsError +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "count", "expected"), + [ + ("", 1, ""), + ("a", 0, ""), + ("a", 1, "a"), + ("a", 2, "aa"), + (None, 0, ""), + (None, 1, None), + ("", None, None), + (None, None, None), + ], + ids=[ + "empty", + "zero count", + "non-empty (count 1)", + "non-empty (count 2)", + "None as string (count 0)", + "None as string (count 1)", + "None as count", + "None for both", + ], +) +class TestShouldRepeatString: + def test_plain_arguments(self, value: str | None, count: int | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.repeat(count), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, count: int | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.repeat( + Cell.constant(count), + ), + expected, + type_if_none=ColumnType.string(), + ) + + +def test_should_raise_if_count_is_out_of_bounds() -> None: + column = Column("a", [1]) + with pytest.raises(OutOfBoundsError): + column.transform(lambda cell: cell.str.repeat(-1)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py deleted file mode 100644 index f1f32c07a..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "old", "new", "expected"), - [ - ("", "a", "b", ""), - ("abc", "", "d", "dadbdcd"), - ("abc", "a", "", "bc"), - ("abc", "d", "e", "abc"), - ("aba", "a", "d", "dbd"), - ], - ids=[ - "empty string", - "empty old", - "empty new", - "no occurrences", - "replace all occurrences", - ], -) -def test_should_replace_all_occurrences_of_old_with_new(string: str, old: str, new: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.replace(old, new), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace_all.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace_all.py new file mode 100644 index 000000000..5183bc7b0 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace_all.py @@ -0,0 +1,72 @@ +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "old", "new", "expected"), + [ + # all empty + ("", "", "", ""), + # empty value + ("", "a", "z", ""), + # empty old + ("abc", "", "z", "zazbzcz"), + # empty new + ("abc", "a", "", "bc"), + # no matches + ("abc", "d", "z", "abc"), + # one match + ("abc", "a", "z", "zbc"), + # many matches + ("abcabc", "a", "z", "zbczbc"), + # full match + ("abc", "abc", "z", "z"), + # None value + (None, "a", "z", None), + # None old + pytest.param("abc", None, "z", None, marks=pytest.mark.xfail(reason="Not supported by polars.")), + # None new + pytest.param("abc", "a", None, None, marks=pytest.mark.xfail(reason="Not supported by polars.")), + ], + ids=[ + "all empty", + "empty value", + "empty old", + "empty new", + "no matches", + "one match", + "many matches", + "full match", + "None value", + "None old", + "None new", + ], +) +class TestShouldReplaceAllOccurrencesOfOldWithNew: + def test_plain_arguments(self, value: str | None, old: str | None, new: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.replace_all(old, new), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell( + self, + value: str | None, + old: str | None, + new: str | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.replace_all( + Cell.constant(old), + Cell.constant(new), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py new file mode 100644 index 000000000..30d694c90 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_reverse.py @@ -0,0 +1,21 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("", ""), + ("abc", "cba"), + (None, None), + ], + ids=[ + "empty", + "non-empty", + "None", + ], +) +def test_should_reverse_string(value: str | None, expected: str | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.str.reverse(), expected, type_if_none=ColumnType.string()) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py new file mode 100644 index 000000000..b7f551de9 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_slice.py @@ -0,0 +1,72 @@ +import pytest + +from safeds.data.tabular.containers import Cell, Column +from safeds.data.tabular.typing import ColumnType +from safeds.exceptions import OutOfBoundsError +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "start", "length", "expected"), + [ + ("", 0, None, ""), + ("abc", 0, None, "abc"), + ("abc", 10, None, ""), + ("abc", -1, None, "c"), + ("abc", -10, None, "abc"), + ("abc", 0, 1, "a"), + ("abc", 0, 10, "abc"), + (None, 0, 1, None), + ("abc", None, 1, None), + (None, None, None, None), + ], + ids=[ + "empty", + "non-negative start in bounds", + "non-negative start out of bounds", + "negative start in bounds", + "negative start out of bounds", + "non-negative length in bounds", + "non-negative length out of bounds", + "None as string", + "None as start", + "None for all", + ], +) +class TestShouldSliceCharacters: + def test_plain_arguments( + self, + value: str | None, + start: int | None, + length: int | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.slice(start=start, length=length), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell( + self, + value: str | None, + start: int | None, + length: int | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.slice( + start=Cell.constant(start), + length=Cell.constant(length), + ), + expected, + type_if_none=ColumnType.string(), + ) + + +def test_should_raise_for_negative_length() -> None: + column = Column("a", [1]) + with pytest.raises(OutOfBoundsError): + column.transform(lambda cell: cell.str.slice(length=-1)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py index 7d402cd0b..32e322483 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py @@ -1,22 +1,50 @@ import pytest +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - ("string", "prefix", "expected"), + ("value", "prefix", "expected"), [ + ("", "", True), ("", "a", False), ("abc", "", True), ("abc", "a", True), - ("abc", "c", False), + ("abc", "abc", True), + ("abc", "d", False), + (None, "", None), + ("abc", None, None), + (None, None, None), ], ids=[ - "empty string", - "empty prefix", - "starts with", - "does not start with", + "empty string, empty prefix", + "empty string, non-empty prefix", + "non-empty string, empty prefix", + "correct prefix", + "prefix equal to string", + "incorrect prefix", + "None as string", + "None as prefix", + "None for both", ], ) -def test_should_check_whether_string_start_with_prefix(string: str, prefix: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.starts_with(prefix), expected) +class TestShouldCheckIfStringStartsWithPrefix: + def test_plain_arguments(self, value: str | None, prefix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.starts_with(prefix), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, prefix: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.starts_with( + Cell.constant(prefix), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py new file mode 100644 index 000000000..d99890d4c --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip.py @@ -0,0 +1,46 @@ +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "characters", "expected"), + [ + ("", " ", ""), + ("~ a ~", "", "~ a ~"), + ("~ a ~", "~", " a "), + ("~ a ~", "~ ", "a"), + (None, " ", None), + (" \na\n ", None, "a"), + (None, None, None), + ], + ids=[ + "empty", + "non-empty (empty characters)", + "non-empty (one character)", + "non-empty (multiple characters)", + "None as string", + "None as characters", + "None as both", + ], +) +class TestShouldStrip: + def test_plain_arguments(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip(characters=characters), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip( + characters=Cell.constant(characters), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py new file mode 100644 index 000000000..e09bdc637 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_end.py @@ -0,0 +1,46 @@ +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "characters", "expected"), + [ + ("", " ", ""), + ("~ a ~", "", "~ a ~"), + ("~ a ~", "~", "~ a "), + ("~ a ~", "~ ", "~ a"), + (None, " ", None), + (" \na\n ", None, " \na"), + (None, None, None), + ], + ids=[ + "empty", + "non-empty (empty characters)", + "non-empty (one character)", + "non-empty (multiple characters)", + "None as string", + "None as characters", + "None as both", + ], +) +class TestShouldStripEnd: + def test_plain_arguments(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip_end(characters=characters), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip_end( + characters=Cell.constant(characters), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py new file mode 100644 index 000000000..cfbb1075c --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_strip_start.py @@ -0,0 +1,46 @@ +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "characters", "expected"), + [ + ("", " ", ""), + ("~ a ~", "", "~ a ~"), + ("~ a ~", "~", " a ~"), + ("~ a ~", "~ ", "a ~"), + (None, " ", None), + (" \na\n ", None, "a\n "), + (None, None, None), + ], + ids=[ + "empty", + "non-empty (empty characters)", + "non-empty (one character)", + "non-empty (multiple characters)", + "None as string", + "None as characters", + "None as both", + ], +) +class TestShouldStripStart: + def test_plain_arguments(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip_start(characters=characters), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, characters: str | None, expected: bool | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.strip_start( + characters=Cell.constant(characters), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py deleted file mode 100644 index 8d1164a38..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py +++ /dev/null @@ -1,36 +0,0 @@ -import pytest - -from safeds.exceptions import OutOfBoundsError -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "start", "length", "expected"), - [ - ("", 0, None, ""), - ("abc", 0, None, "abc"), - ("abc", 1, None, "bc"), - ("abc", 10, None, ""), - ("abc", -1, None, "c"), - ("abc", -10, None, "abc"), - ("abc", 0, 1, "a"), - ("abc", 0, 10, "abc"), - ], - ids=[ - "empty", - "full string", - "positive start in bounds", - "positive start out of bounds", - "negative start in bounds", - "negative start out of bounds", - "positive length in bounds", - "positive length out of bounds", - ], -) -def test_should_return_substring(string: str, start: int, length: int | None, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.substring(start=start, length=length), expected) - - -def test_should_raise_if_length_is_negative() -> None: - with pytest.raises(OutOfBoundsError): - assert_cell_operation_works("abc", lambda cell: cell.str.substring(length=-1), None) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py index 677438e0a..ab4b1108e 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py @@ -1,22 +1,132 @@ -import datetime +from datetime import date import pytest +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works +DATE = date(1, 2, 3) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("0001-02-03", DATE), + (None, None), + ], + ids=[ + "date", + "None", + ], +) +def test_should_handle_iso_8601(value: str | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_date(format="iso"), + expected, + type_if_none=ColumnType.string(), + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("0001-02-03", "{Y}-{M}-{D}", DATE), + (" 1- 2- 3", "{_Y}-{_M}-{_D}", DATE), + ("1-2-3", "{^Y}-{^M}-{^D}", DATE), + ("01", "{Y99}", date(2001, 1, 1)), # weird polars behavior + (" 1", "{_Y99}", None), + ("1", "{^Y99}", None), + ("0001-February-03", "{Y}-{M-full}-{D}", DATE), + ("0001-Feb-03", "{Y}-{M-short}-{D}", DATE), + ("0001-02-03 05| 5|5", "{Y}-{M}-{D} {W}|{_W}|{^W}", DATE), + ("0001-02-03 6|Saturday|Sat", "{Y}-{M}-{D} {DOW}|{DOW-full}|{DOW-short}", DATE), + ("0001/034", "{Y}/{DOY}", DATE), + (" 1/ 34", "{Y}/{_DOY}", DATE), + (" 1/034", "{Y}/{^DOY}", DATE), + ("0001-02-03 0001", "{Y}-{M}-{D} {Y}", DATE), + ("0001-02-03 0004", "{Y}-{M}-{D} {Y}", date(4, 2, 3)), # weird polars behavior + ("0001-02-03 01", "{Y}-{M}-{D} {Y99}", date(2001, 2, 3)), # weird polars behavior + ("0001-02-03 04", "{Y}-{M}-{D} {Y99}", date(2004, 2, 3)), # weird polars behavior + ("24:00:00", "{Y}-{M}-{D}", None), + ("invalid", "{Y}-{M}-{D}", None), + ], + ids=[ + "{Y}-{M}-{D}", + "{_Y}-{_M}-{_D}", + "{^Y}-{^M}-{^D}", + "{Y99}", + "{_Y99}", + "{^Y99}", + "{Y}-{M-full}-{D}", + "{Y}-{M-short}-{D}", + "week number", + "day of the week", + "{Y}/{DOY}", + "{_Y}/{_DOY}", + "{^Y}/{^DOY}", + "duplicate field, same value", + "duplicate field, different value", + "similar field, same value", + "similar field, different value", + "out of bounds", + "no match", + ], +) +def test_should_handle_custom_format_string(value: str, format_: str, expected: date) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_date(format=format_), + expected, + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("0001-02-03\\", "{Y}-{M}-{D}\\", DATE), + ("0001-02-03\\", "{Y}-{M}-{D}\\\\", DATE), + ("0001-02-03{", "{Y}-{M}-{D}\\{", DATE), + ("0001-02-03%", "{Y}-{M}-{D}%", DATE), + ("0001-02-03\n", "{Y}-{M}-{D}\n", DATE), + ("0001-02-03\t", "{Y}-{M}-{D}\t", DATE), + ], + ids=[ + "backslash at end", + "escaped backslash", + "escaped open curly brace", + "percent", + "newline", + "tab", + ], +) +def test_should_handle_escape_sequences(value: str, format_: str, expected: date) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_date(format=format_), + expected, + ) + + +def test_should_raise_for_unclosed_specifier() -> None: + column = Column("a", ["0001-02-03"]) + with pytest.raises(ValueError, match="Unclosed specifier"): + column.transform(lambda cell: cell.str.to_date(format="{Y")) + @pytest.mark.parametrize( - ("string", "expected"), + "format_", [ - ("", None), - ("2022-01-09", datetime.date(2022, 1, 9)), - ("abc", None), + "{invalid}", + "{m}", ], ids=[ - "empty", - "ISO date", - "invalid string", + "globally invalid", + "invalid for date", ], ) -def test_should_parse_date(string: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.to_date(), expected) +def test_should_raise_for_invalid_specifier(format_: str) -> None: + column = Column("a", ["0001-02-03"]) + with pytest.raises(ValueError, match="Invalid specifier"): + column.transform(lambda cell: cell.str.to_date(format=format_)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py index 4c96d03d0..89c312074 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py @@ -1,22 +1,102 @@ -import datetime +from datetime import datetime +from zoneinfo import ZoneInfo import pytest +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works +DATETIME = datetime(1, 2, 3, 4, 5, 6) # noqa: DTZ001 +DATETIME_UTC = datetime(1, 2, 3, 4, 5, 6, tzinfo=ZoneInfo("UTC")) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("0001-02-03T04:05:06Z", DATETIME_UTC), + (None, None), + ], + ids=[ + "datetime", + "None", + ], +) +def test_should_handle_iso_8601(value: str | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_datetime(format="iso"), + expected, + type_if_none=ColumnType.string(), + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("0001-02-03 04:05:06", "{Y}-{M}-{D} {h}:{m}:{s}", DATETIME), + (" 1- 2- 3 4: 5: 6", "{_Y}-{_M}-{_D} {_h}:{_m}:{_s}", DATETIME), + ("1-2-3 4:5:6", "{^Y}-{^M}-{^D} {^h}:{^m}:{^s}", DATETIME), + ("invalid", "{Y}-{M}-{D} {h}:{m}:{s}", None), + ], + ids=[ + "{Y}-{M}-{D} {h}:{m}:{s}", + "{_Y}-{_M}-{_D} {_h}:{_m}:{_s}", + "{^Y}-{^M}-{^D} {^h}:{^m}:{^s}", + "no match", + ], +) +def test_should_handle_custom_format_string(value: str, format_: str, expected: datetime) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_datetime(format=format_), + expected, + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("0001-02-03 04:05:06\\", "{Y}-{M}-{D} {h}:{m}:{s}\\", DATETIME), + ("0001-02-03 04:05:06\\", "{Y}-{M}-{D} {h}:{m}:{s}\\\\", DATETIME), + ("0001-02-03 04:05:06{", "{Y}-{M}-{D} {h}:{m}:{s}\\{", DATETIME), + ("0001-02-03 04:05:06%", "{Y}-{M}-{D} {h}:{m}:{s}%", DATETIME), + ("0001-02-03 04:05:06\n", "{Y}-{M}-{D} {h}:{m}:{s}\n", DATETIME), + ("0001-02-03 04:05:06\t", "{Y}-{M}-{D} {h}:{m}:{s}\t", DATETIME), + ], + ids=[ + "backslash at end", + "escaped backslash", + "escaped open curly brace", + "percent", + "newline", + "tab", + ], +) +def test_should_handle_escape_sequences(value: str, format_: str, expected: datetime) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_datetime(format=format_), + expected, + ) + + +def test_should_raise_for_unclosed_specifier() -> None: + column = Column("a", ["0001-02-03 04:05:06"]) + with pytest.raises(ValueError, match="Unclosed specifier"): + column.transform(lambda cell: cell.str.to_datetime(format="{Y")) + @pytest.mark.parametrize( - ("string", "expected"), + "format_", [ - ("", None), - ("2022-01-09T23:29:01Z", datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), - ("abc", None), + "{invalid}", ], ids=[ - "empty", - "ISO datetime", - "invalid string", + "globally invalid", ], ) -def test_should_parse_datetimes(string: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.to_datetime(), expected) +def test_should_raise_for_invalid_specifier(format_: str) -> None: + column = Column("a", ["0001-02-03"]) + with pytest.raises(ValueError, match="Invalid specifier"): + column.transform(lambda cell: cell.str.to_datetime(format=format_)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py new file mode 100644 index 000000000..5cd415b83 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py @@ -0,0 +1,34 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("", None), + ("abc", None), + ("1", 1.0), + ("1.5", 1.5), + ("-1.5", -1.5), + ("1e3", 1000), + (None, None), + ], + ids=[ + "empty", + "invalid", + "int", + "positive float", + "negative float", + "exponential", + "None", + ], +) +def test_should_convert_string_to_float(value: str | None, expected: float | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_float(), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py index b4b3256cc..9df3c1235 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py @@ -1,26 +1,46 @@ import pytest +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - ("string", "base", "expected"), + ("value", "base", "expected"), [ ("", 10, None), - ("11", 10, 11), - ("11", 2, 3), ("abc", 10, None), + ("10", 10, 10), + ("10", 2, 2), + (None, 10, None), + ("0", None, None), + (None, None, None), ], ids=[ "empty", - "11 base 10", - "11 base 2", - "invalid string", + "invalid", + "base 10", + "base 2", + "None as value", + "None as base", + "None for both", ], ) -def test_should_parse_integer(string: str, base: int, expected: bool) -> None: - assert_cell_operation_works( - string, - lambda cell: cell.str.to_int(base=base), - expected, - ) +class TestShouldConvertStringToInteger: + def test_plain_arguments(self, value: str | None, base: int | None, expected: float | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_int(base=base), + expected, + type_if_none=ColumnType.string(), + ) + + def test_arguments_wrapped_in_cell(self, value: str | None, base: int | None, expected: float | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_int( + base=Cell.constant(base), + ), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py index f4c880761..21a9db2a3 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py @@ -1,18 +1,30 @@ import pytest +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - ("string", "expected"), + ("value", "expected"), [ ("", ""), - ("AbC", "abc"), + ("abc", "abc"), + ("ABC", "abc"), + ("aBc", "abc"), + (None, None), ], ids=[ "empty", - "non-empty", + "full lowercase", + "full uppercase", + "mixed", + "None", ], ) -def test_should_lowercase_a_string(string: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.to_lowercase(), expected) +def test_should_convert_string_to_lowercase(value: str | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_lowercase(), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py new file mode 100644 index 000000000..abe4932a7 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_time.py @@ -0,0 +1,136 @@ +from datetime import time + +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + +NO_FRACTIONAL = time(4, 5, 6) +WITH_MILLISECOND = time(4, 5, 6, 7000) +WITH_MICROSECOND = time(4, 5, 6, 7) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("04:05:06", NO_FRACTIONAL), + ("04:05:06.007", WITH_MILLISECOND), + ("04:05:06.000007", WITH_MICROSECOND), + (None, None), + ], + ids=[ + "time without fractional seconds", + "time with milliseconds", + "time with microseconds", + "None", + ], +) +def test_should_handle_iso_8601(value: str | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_time(format="iso"), + expected, + type_if_none=ColumnType.string(), + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("04:05:06", "{h}:{m}:{s}", NO_FRACTIONAL), + (" 4: 5: 6", "{_h}:{_m}:{_s}", NO_FRACTIONAL), + ("4:5:6", "{^h}:{^m}:{^s}", NO_FRACTIONAL), + ("04:05:06 am", "{h12}:{m}:{s} {am/pm}", NO_FRACTIONAL), + (" 4: 5: 6 AM", "{_h12}:{m}:{s} {AM/PM}", NO_FRACTIONAL), + ("4:5:6 AM", "{^h12}:{m}:{s} {AM/PM}", NO_FRACTIONAL), + ("04:05:06 .000007", "{h}:{m}:{s} {.f}", WITH_MICROSECOND), + ("04:05:06 007", "{h}:{m}:{s} {ms}", WITH_MILLISECOND), + ("04:05:06 000007", "{h}:{m}:{s} {us}", WITH_MICROSECOND), + ("04:05:06 000007000", "{h}:{m}:{s} {ns}", WITH_MICROSECOND), + ("04", "{h}", None), + ("05", "{m}", None), + ("04:05:06 04", "{h}:{m}:{s} {h}", NO_FRACTIONAL), + ("04:05:06 07", "{h}:{m}:{s} {h}", None), + ("04:05:06 04", "{h}:{m}:{s} {h12}", NO_FRACTIONAL), + ("04:05:06 07", "{h}:{m}:{s} {h12}", None), + ("24:00:00", "{h}:{m}:{s}", None), + ("invalid", "{h}:{m}:{s}", None), + ], + ids=[ + "{h}:{m}:{s}", + "{_h}:{_m}:{_s}", + "{^h}:{^m}:{^s}", + "{h12}:{m}:{s} {am/pm}", + "{_h12}:{m}:{s} {am/pm}", + "{^h12}:{m}:{s} {AM/PM}", + "{.f}", + "{ms}", + "{us}", + "{ns}", + "no minute", + "no hour", + "duplicate field, same value", + "duplicate field, different value", + "similar field, same value", + "similar field, different value", + "out of bounds", + "no match", + ], +) +def test_should_handle_custom_format_string(value: str, format_: str, expected: time) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_time(format=format_), + expected, + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + ("04:05\\", "{h}:{m}\\", time(4, 5)), + ("04:05\\", "{h}:{m}\\\\", time(4, 5)), + ("04:05{", "{h}:{m}\\{", time(4, 5)), + ("04:05%", "{h}:{m}%", time(4, 5)), + ("04:05\n", "{h}:{m}\n", time(4, 5)), + ("04:05\t", "{h}:{m}\t", time(4, 5)), + ], + ids=[ + "backslash at end", + "escaped backslash", + "escaped open curly brace", + "percent", + "newline", + "tab", + ], +) +def test_should_handle_escape_sequences(value: str, format_: str, expected: time) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_time(format=format_), + expected, + ) + + +def test_should_raise_for_unclosed_specifier() -> None: + column = Column("a", ["04:05:06"]) + with pytest.raises(ValueError, match="Unclosed specifier"): + column.transform(lambda cell: cell.str.to_time(format="{m")) + + +@pytest.mark.parametrize( + "format_", + [ + "{invalid}", + "{Y}", + ], + ids=[ + "globally invalid", + "invalid for time", + ], +) +def test_should_raise_for_invalid_specifier(format_: str) -> None: + column = Column("a", ["04:05:06"]) + with pytest.raises(ValueError, match="Invalid specifier"): + column.transform(lambda cell: cell.str.to_time(format=format_)) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py index cfb14c7d2..87d05bacb 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py @@ -1,18 +1,30 @@ import pytest +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - ("string", "expected"), + ("value", "expected"), [ ("", ""), - ("AbC", "ABC"), + ("abc", "ABC"), + ("ABC", "ABC"), + ("aBc", "ABC"), + (None, None), ], ids=[ "empty", - "non-empty", + "full lowercase", + "full uppercase", + "mixed", + "None", ], ) -def test_should_uppercase_a_string(string: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.to_uppercase(), expected) +def test_should_convert_string_to_uppercase(value: str | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.str.to_uppercase(), + expected, + type_if_none=ColumnType.string(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py deleted file mode 100644 index 2b2101e4e..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", ""), - ("abc", "abc"), - (" abc", "abc"), - ("abc ", "abc"), - (" abc ", "abc"), - ], - ids=[ - "empty", - "non-empty", - "whitespace start", - "whitespace end", - "whitespace start and end", - ], -) -def test_should_remove_whitespace_prefix_and_suffix(string: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.trim(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py deleted file mode 100644 index af0cd88dc..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", ""), - ("abc", "abc"), - (" abc", " abc"), - ("abc ", "abc"), - (" abc ", " abc"), - ], - ids=[ - "empty", - "non-empty", - "whitespace start", - "whitespace end", - "whitespace start and end", - ], -) -def test_should_remove_whitespace_suffix(string: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.trim_end(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py deleted file mode 100644 index 6b487f6e7..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", ""), - ("abc", "abc"), - (" abc", "abc"), - ("abc ", "abc "), - (" abc ", "abc "), - ], - ids=[ - "empty", - "non-empty", - "whitespace start", - "whitespace end", - "whitespace start and end", - ], -) -def test_should_remove_whitespace_prefix(string: str, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.trim_start(), expected)