From 66289d4659dbe741162a14ec7b20906d61ff6533 Mon Sep 17 00:00:00 2001 From: hadware Date: Mon, 9 Dec 2019 04:03:45 +0100 Subject: [PATCH 01/30] Type annotations for annotation.py and part of timeline.py --- pyannote/core/annotation.py | 114 ++++++++++++++++++++---------------- pyannote/core/timeline.py | 43 +++++++------- 2 files changed, 87 insertions(+), 70 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index c1f18d8..42f0ecb 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -108,7 +108,10 @@ """ import itertools +from typing import Optional, Dict, Union, Iterable, Generator, Tuple, List, Set + import numpy as np +import pandas as pd from . import PYANNOTE_URI, PYANNOTE_MODALITY, \ PYANNOTE_SEGMENT, PYANNOTE_TRACK, PYANNOTE_LABEL @@ -118,8 +121,11 @@ from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT from .utils.generators import string_generator, int_generator +# TODO : Make sure this is true, it may just be "Hashable" +Label = Union[str, int] + -class Annotation(object): +class Annotation: """Annotation Parameters @@ -137,7 +143,7 @@ class Annotation(object): """ @classmethod - def from_df(cls, df, uri=None, modality=None): + def from_df(cls, df: pd.DataFrame, uri=None, modality=None): df = df[[PYANNOTE_SEGMENT, PYANNOTE_TRACK, PYANNOTE_LABEL]] @@ -158,32 +164,35 @@ def from_df(cls, df, uri=None, modality=None): return annotation - def __init__(self, uri=None, modality=None): + def __init__(self, uri: Optional[str] = None, modality: Optional[str] = None): - super(Annotation, self).__init__() + super().__init__() - self._uri = uri - self.modality = modality + self._uri: Optional[str] = uri + self.modality: Optional[str] = modality # sorted dictionary # keys: annotated segments # values: {track: label} dictionary - self._tracks = SortedDict() + # TODO : check the type is good for track values + self._tracks: Dict[Segment, Dict[str, Label]] = SortedDict() # dictionary # key: label # value: timeline - self._labels = {} - self._labelNeedsUpdate = {} + self._labels: Dict[Label, Timeline] = {} + self._labelNeedsUpdate: [Label, bool] = {} # timeline meant to store all annotated segments - self._timeline = None - self._timelineNeedsUpdate = True + self._timeline: Timeline = None + self._timelineNeedsUpdate: bool = True - def _get_uri(self): + @property + def uri(self): return self._uri - def _set_uri(self, uri): + @uri.setter + def uri(self, uri: str): # update uri for all internal timelines for label in self.labels(): timeline = self.label_timeline(label, copy=False) @@ -192,8 +201,6 @@ def _set_uri(self, uri): timeline.uri = uri self._uri = uri - uri = property(_get_uri, fset=_set_uri, doc="Resource identifier") - def _updateLabels(self): # list of labels that needs to be updated @@ -249,7 +256,7 @@ def itersegments(self): """ return iter(self._tracks) - def itertracks(self, yield_label=False): + def itertracks(self, yield_label: bool = False): """Iterate over tracks (in chronological order) Parameters @@ -281,7 +288,7 @@ def _updateTimeline(self): self._timeline = Timeline(segments=self._tracks, uri=self.uri) self._timelineNeedsUpdate = False - def get_timeline(self, copy=True): + def get_timeline(self, copy: bool = True) -> Timeline: """Get timeline made of all annotated segments Parameters @@ -308,7 +315,7 @@ def get_timeline(self, copy=True): return self._timeline.copy() return self._timeline - def __eq__(self, other): + def __eq__(self, other: 'Annotation'): """Equality >>> annotation == other @@ -321,7 +328,7 @@ def __eq__(self, other): other.itertracks(yield_label=True)) return all(t1 == t2 for t1, t2 in pairOfTracks) - def __ne__(self, other): + def __ne__(self, other: 'Annotation'): """Inequality""" pairOfTracks = itertools.zip_longest( self.itertracks(yield_label=True), @@ -329,7 +336,7 @@ def __ne__(self, other): return any(t1 != t2 for t1, t2 in pairOfTracks) - def __contains__(self, included): + def __contains__(self, included: Union[Segment, Timeline]): """Inclusion Check whether every segment of `included` does exist in annotation. @@ -348,7 +355,8 @@ def __contains__(self, included): """ return included in self.get_timeline(copy=False) - def crop(self, support, mode='intersection'): + def crop(self, support: Union[Segment, Timeline], + mode: str = 'intersection'): """Crop annotation to new support Parameters @@ -391,8 +399,7 @@ def crop(self, support, mode='intersection'): _labels = set([]) for segment, _ in \ - self.get_timeline(copy=False).co_iter(support): - + self.get_timeline(copy=False).co_iter(support): tracks = dict(self._tracks[segment]) _tracks[segment] = tracks _labels.update(tracks.values()) @@ -448,8 +455,7 @@ def crop(self, support, mode='intersection'): else: raise NotImplementedError("unsupported mode: '%s'" % mode) - - def get_tracks(self, segment): + def get_tracks(self, segment: Segment): """Query tracks by segment Parameters @@ -468,7 +474,7 @@ def get_tracks(self, segment): """ return set(self._tracks.get(segment, {})) - def has_track(self, segment, track): + def has_track(self, segment: Segment, track: str) -> bool: """Check whether a given track exists Parameters @@ -485,7 +491,7 @@ def has_track(self, segment, track): """ return track in self._tracks.get(segment, {}) - def copy(self): + def copy(self) -> 'Annotation': """Get a copy of the annotation Returns @@ -513,7 +519,9 @@ def copy(self): return copied - def new_track(self, segment, candidate=None, prefix=None): + def new_track(self, segment: Segment, + candidate: Optional[str] = None, + prefix: Optional[str] = None) -> str: """Generate a new track name for given segment Ensures that the returned track name does not already @@ -564,7 +572,7 @@ def __str__(self): return "\n".join(["%s %s %s" % (s, t, l) for s, t, l in self.itertracks(yield_label=True)]) - def __delitem__(self, key): + def __delitem__(self, key: Union[Segment, Tuple[Segment, str]]): """Delete one track >>> del annotation[segment, track] @@ -616,7 +624,7 @@ def __delitem__(self, key): 'Deletion only works with Segment or (Segment, track) keys.') # label = annotation[segment, track] - def __getitem__(self, key): + def __getitem__(self, key: Union[Segment, Tuple[Segment, str]]): """Get track label >>> label = annotation[segment, track] @@ -633,7 +641,9 @@ def __getitem__(self, key): return self._tracks[key[0]][key[1]] # annotation[segment, track] = label - def __setitem__(self, key, label): + def __setitem__(self, + key: Union[Segment, Tuple[Segment, str]], + label: Label): """Add new or update existing track >>> annotation[segment, track] = label @@ -675,7 +685,7 @@ def __setitem__(self, key, label): self._tracks[segment][track] = label self._labelNeedsUpdate[label] = True - def empty(self): + def empty(self) -> 'Annotation': """Return an empty copy Returns @@ -686,7 +696,7 @@ def empty(self): """ return self.__class__(uri=self.uri, modality=self.modality) - def labels(self): + def labels(self) -> List[Label]: """Get sorted list of labels Returns @@ -698,7 +708,7 @@ def labels(self): self._updateLabels() return sorted(self._labels, key=str) - def get_labels(self, segment, unique=True): + def get_labels(self, segment: Segment, unique: bool = True) -> Set[Label]: """Query labels by segment Parameters @@ -734,7 +744,7 @@ def get_labels(self, segment, unique=True): return labels - def subset(self, labels, invert=False): + def subset(self, labels: Iterable[Label], invert: bool = False): """Filter annotation by labels Parameters @@ -762,7 +772,7 @@ def subset(self, labels, invert=False): _tracks, _labels = {}, set([]) for segment, tracks in self._tracks.items(): sub_tracks = {track: label for track, label in tracks.items() - if label in labels} + if label in labels} if sub_tracks: _tracks[segment] = sub_tracks _labels.update(sub_tracks.values()) @@ -777,7 +787,7 @@ def subset(self, labels, invert=False): return sub - def update(self, annotation, copy=False): + def update(self, annotation: 'Annotation', copy: bool = False): """Add every track of an existing annotation (in place) Parameters @@ -806,7 +816,7 @@ def update(self, annotation, copy=False): return result - def label_timeline(self, label, copy=True): + def label_timeline(self, label: Label, copy: bool=True): """Query segments by label Parameters @@ -845,7 +855,7 @@ def label_timeline(self, label, copy=True): return self._labels[label] - def label_support(self, label): + def label_support(self, label: Label): """Label support Equivalent to ``Annotation.label_timeline(label).support()`` @@ -868,7 +878,7 @@ def label_support(self, label): """ return self.label_timeline(label, copy=False).support() - def label_duration(self, label): + def label_duration(self, label: Label): """Label duration Equivalent to ``Annotation.label_timeline(label).duration()`` @@ -892,7 +902,7 @@ def label_duration(self, label): return self.label_timeline(label, copy=False).duration() - def chart(self, percent=False): + def chart(self, percent: bool = False): """Get labels chart (from longest to shortest duration) Parameters @@ -916,7 +926,7 @@ def chart(self, percent=False): return chart - def argmax(self, support=None): + def argmax(self, support: Optional[Union[Segment, Timeline]] = None): """Get label with longest duration Parameters @@ -954,7 +964,7 @@ def argmax(self, support=None): return max(((_, cropped.label_duration(_)) for _ in cropped.labels()), key=lambda x: x[1])[0] - def rename_tracks(self, generator='string'): + def rename_tracks(self, generator: Union[str, Iterable[str]] = 'string'): """Rename all tracks Parameters @@ -1000,7 +1010,10 @@ def rename_tracks(self, generator='string'): renamed[s, next(generator)] = label return renamed - def rename_labels(self, mapping=None, generator='string', copy=True): + def rename_labels(self, + mapping: Optional[Dict] = None, + generator: Union[str, Generator[str]] = 'string', + copy: bool = True) -> 'Annotation': """Rename labels Parameters @@ -1050,7 +1063,8 @@ def rename_labels(self, mapping=None, generator='string', copy=True): return renamed - def relabel_tracks(self, generator='string'): + def relabel_tracks(self, generator: Union[str, Iterable[str]] = 'string')\ + -> 'Annotation': """Relabel tracks Create a new annotation where each track has a unique label. @@ -1078,7 +1092,7 @@ def relabel_tracks(self, generator='string'): return relabeled - def support(self, collar=0.): + def support(self, collar: float = 0.) -> 'Annotation': """Annotation support The support of an annotation is an annotation where contiguous tracks @@ -1137,7 +1151,7 @@ def support(self, collar=0.): return support - def co_iter(self, other): + def co_iter(self, other: 'Annotation'): """Iterate over pairs of intersecting tracks Parameters @@ -1164,7 +1178,7 @@ def co_iter(self, other): for t, T in itertools.product(tracks, other_tracks): yield (s, t), (S, T) - def __mul__(self, other): + def __mul__(self, other: 'Annotation') -> np.ndarray: """Cooccurrence (or confusion) matrix >>> matrix = annotation * other @@ -1203,7 +1217,7 @@ def __mul__(self, other): return matrix - def for_json(self): + def for_json(self) -> Dict: """Serialization See also @@ -1227,7 +1241,7 @@ def for_json(self): return data @classmethod - def from_json(cls, data): + def from_json(cls, data: Dict) -> 'Annotation': """Deserialization See also diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index ee1a265..7d949bb 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -88,11 +88,14 @@ See :class:`pyannote.core.Timeline` for the complete reference. """ +from typing import Optional, Iterable -from .segment import Segment +import pandas as pd from sortedcontainers import SortedList + from . import PYANNOTE_URI, PYANNOTE_SEGMENT from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT +from .segment import Segment # ===================================================================== @@ -100,7 +103,7 @@ # ===================================================================== -class Timeline(object): +class Timeline: """ Ordered set of segments. @@ -122,14 +125,14 @@ class Timeline(object): """ @classmethod - def from_df(cls, df, uri=None): + def from_df(cls, df: pd.DataFrame, uri: Optional[str] = None): segments = list(df[PYANNOTE_SEGMENT]) timeline = cls(segments=segments, uri=uri) return timeline - def __init__(self, segments=None, uri=None): + def __init__(self, segments: Optional[Iterable[Segment]] = None, uri=None): - super(Timeline, self).__init__() + super().__init__() if segments is None: segments = () @@ -150,7 +153,7 @@ def __init__(self, segments=None, uri=None): self.segments_boundaries_ = SortedList(boundaries) # path to (or any identifier of) segmented resource - self.uri = uri + self.uri: str = uri def __len__(self): """Number of segments @@ -173,7 +176,7 @@ def __bool__(self): """ return len(self.segments_set_) > 0 - def __iter__(self): + def __iter__(self) -> Iterable[Segment]: """Iterate over segments (in chronological order) >>> for segment in timeline: @@ -185,7 +188,7 @@ def __iter__(self): """ return iter(self.segments_list_) - def __getitem__(self, k): + def __getitem__(self, k: int) -> Segment: """Get segment by index (in chronological order) >>> first_segment = timeline[0] @@ -193,7 +196,7 @@ def __getitem__(self, k): """ return self.segments_list_[k] - def __eq__(self, other): + def __eq__(self, other: 'Timeline'): """Equality Two timelines are equal if and only if their segments are equal. @@ -208,11 +211,11 @@ def __eq__(self, other): """ return self.segments_set_ == other.segments_set_ - def __ne__(self, other): + def __ne__(self, other: 'Timeline'): """Inequality""" return self.segments_set_ != other.segments_set_ - def index(self, segment): + def index(self, segment: Segment) -> int: """Get index of (existing) segment Parameters @@ -231,7 +234,7 @@ def index(self, segment): """ return self.segments_list_.index(segment) - def add(self, segment): + def add(self, segment: Segment) -> 'Timeline': """Add a segment (in place) Parameters @@ -267,7 +270,7 @@ def add(self, segment): return self - def remove(self, segment): + def remove(self, segment: Segment): """Remove a segment (in place) Parameters @@ -299,7 +302,7 @@ def remove(self, segment): return self - def discard(self, segment): + def discard(self, segment: Segment): """Same as `remove` See also @@ -308,10 +311,10 @@ def discard(self, segment): """ return self.remove(segment) - def __ior__(self, timeline): + def __ior__(self, timeline: 'Timeline'): return self.update(timeline) - def update(self, timeline): + def update(self, timeline: Segment): """Add every segments of an existing timeline (in place) Parameters @@ -344,10 +347,10 @@ def update(self, timeline): return self - def __or__(self, timeline): + def __or__(self, timeline: 'Timeline') -> 'Timeline': return self.union(timeline) - def union(self, timeline): + def union(self, timeline: 'Timeline') -> 'Timeline': """Create new timeline made of union of segments Parameters @@ -368,7 +371,7 @@ def union(self, timeline): segments = self.segments_set_ | timeline.segments_set_ return Timeline(segments=segments, uri=self.uri) - def co_iter(self, other): + def co_iter(self, other: 'Timeline'): """Iterate over pairs of intersecting segments >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) @@ -548,7 +551,7 @@ def __str__(self): string = "[" for i, segment in enumerate(self.segments_list_): string += str(segment) - string += "\n " if i+1 < n else "" + string += "\n " if i + 1 < n else "" string += "]" return string From ed1d2d689ee82584ec60d364a6f84a1bcb69c45a Mon Sep 17 00:00:00 2001 From: hadware Date: Mon, 9 Dec 2019 20:56:40 +0100 Subject: [PATCH 02/30] Type hinting for timelines is mostly done. --- pyannote/core/annotation.py | 8 +++----- pyannote/core/scores.py | 6 ++---- pyannote/core/timeline.py | 40 +++++++++++++++++++------------------ 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index 42f0ecb..c442e87 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -108,7 +108,7 @@ """ import itertools -from typing import Optional, Dict, Union, Iterable, Generator, Tuple, List, Set +from typing import Optional, Dict, Union, Iterable, Generator, Tuple, List, Set, Hashable import numpy as np import pandas as pd @@ -122,7 +122,7 @@ from .utils.generators import string_generator, int_generator # TODO : Make sure this is true, it may just be "Hashable" -Label = Union[str, int] +Label = Hashable class Annotation: @@ -166,8 +166,6 @@ def from_df(cls, df: pd.DataFrame, uri=None, modality=None): def __init__(self, uri: Optional[str] = None, modality: Optional[str] = None): - super().__init__() - self._uri: Optional[str] = uri self.modality: Optional[str] = modality @@ -964,7 +962,7 @@ def argmax(self, support: Optional[Union[Segment, Timeline]] = None): return max(((_, cropped.label_duration(_)) for _ in cropped.labels()), key=lambda x: x[1])[0] - def rename_tracks(self, generator: Union[str, Iterable[str]] = 'string'): + def rename_tracks(self, generator: Union[str, Iterable[Label]] = 'string'): """Rename all tracks Parameters diff --git a/pyannote/core/scores.py b/pyannote/core/scores.py index a8a071b..08d3b45 100644 --- a/pyannote/core/scores.py +++ b/pyannote/core/scores.py @@ -35,7 +35,7 @@ from .timeline import Timeline -class Unknown(object): +class Unknown: nextID = 0 @@ -49,7 +49,6 @@ def getNewID(cls): return cls.nextID def __init__(self, format='#{id:d}'): - super(Unknown, self).__init__() self.ID = Unknown.getNewID() self._format = format @@ -78,8 +77,7 @@ def __gt__(self, other): return True - -class Scores(object): +class Scores: """ Parameters diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index 7d949bb..9d34f2a 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -88,7 +88,7 @@ See :class:`pyannote.core.Timeline` for the complete reference. """ -from typing import Optional, Iterable +from typing import Optional, Iterable, List, Generator, Union, Callable import pandas as pd from sortedcontainers import SortedList @@ -131,9 +131,6 @@ def from_df(cls, df: pd.DataFrame, uri: Optional[str] = None): return timeline def __init__(self, segments: Optional[Iterable[Segment]] = None, uri=None): - - super().__init__() - if segments is None: segments = () @@ -509,7 +506,7 @@ def crop(self, support, mode='intersection', returns_mapping=False): return Timeline(segments=self.crop_iter(support, mode=mode), uri=self.uri) - def overlapping(self, t): + def overlapping(self, t: float) -> List[Segment]: """Get list of segments overlapping `t` Parameters @@ -524,7 +521,7 @@ def overlapping(self, t): """ return list(self.overlapping_iter(t)) - def overlapping_iter(self, t): + def overlapping_iter(self, t: float) -> Generator[Segment]: """Like `overlapping` but returns a segment iterator instead See also @@ -532,6 +529,7 @@ def overlapping_iter(self, t): :func:`pyannote.core.Timeline.overlapping` """ segment = Segment(start=t, end=t) + # TODO: maybe this line should be removed? iterable = self.segments_list_.irange(maximum=segment) for segment in self.segments_list_.irange(maximum=segment): if segment.overlaps(t): @@ -566,7 +564,7 @@ def __repr__(self): return "" % (self.uri, list(self.segments_list_)) - def __contains__(self, included): + def __contains__(self, included: Union[Segment, 'Timeline']): """Inclusion Check whether every segment of `included` does exist in timeline. @@ -598,14 +596,14 @@ def __contains__(self, included): return included in self.segments_set_ elif isinstance(included, Timeline): - return self.segments_set_.issuperset(included._segments) + return self.segments_set_.issuperset(included.segments_set_) else: raise TypeError( 'Checking for inclusion only supports Segment and ' 'Timeline instances') - def empty(self): + def empty(self) -> 'Timeline': """Return an empty copy Returns @@ -616,7 +614,8 @@ def empty(self): """ return Timeline(uri=self.uri) - def copy(self, segment_func=None): + def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \ + -> 'Timeline': """Get a copy of the timeline If `segment_func` is provided, it is applied to each segment first. @@ -644,7 +643,7 @@ def copy(self, segment_func=None): return Timeline(segments=[segment_func(s) for s in self.segments_list_], uri=self.uri) - def extent(self): + def extent(self) -> Segment: """Extent The extent of a timeline is the segment of minimum duration that @@ -681,8 +680,8 @@ def extent(self): import numpy as np return Segment(start=np.inf, end=-np.inf) - def support_iter(self): - """Like `support` but returns a segment iterator instead + def support_iter(self) -> Generator[Segment]: + """Like `support` but returns a segment generator instead See also -------- @@ -723,7 +722,7 @@ def support_iter(self): # Add new segment to the timeline support yield new_segment - def support(self): + def support(self) -> 'Timeline': """Timeline support The support of a timeline is the timeline with the minimum number of @@ -763,8 +762,9 @@ def duration(self): # of the segments in the timeline support. return sum(s.duration for s in self.support_iter()) - def gaps_iter(self, support=None): - """Like `gaps` but returns a segment iterator instead + def gaps_iter(self, support: Optional[Union[Segment, 'Timeline']] = None) \ + -> Generator[Segment]: + """Like `gaps` but returns a segment generator instead See also -------- @@ -811,7 +811,8 @@ def gaps_iter(self, support=None): for gap in self.gaps_iter(support=segment): yield gap - def gaps(self, support=None): + def gaps(self, support: Optional[Union[Segment, 'Timeline']] = None) \ + -> 'Timeline': """Gaps A picture is worth a thousand words:: @@ -842,7 +843,7 @@ def gaps(self, support=None): return Timeline(segments=self.gaps_iter(support=support), uri=self.uri) - def segmentation(self): + def segmentation(self) -> 'Timeline': """Segmentation Create the unique timeline with same support and same set of segment @@ -901,7 +902,8 @@ def segmentation(self): return Timeline(segments=segments, uri=self.uri) - def to_annotation(self, generator='string', modality=None): + def to_annotation(self, generator: Union[str, Iterable['Label']] = 'string', + modality: Optional[str] = None): """Turn timeline into an annotation Each segment is labeled by a unique label. From aa96d5bb27772194b7280f796fef507e01ef5ef3 Mon Sep 17 00:00:00 2001 From: hadware Date: Tue, 10 Dec 2019 02:59:16 +0100 Subject: [PATCH 03/30] Added a "typedef" file for commonly used types in the library. Most type hints should be good for the API objects. --- pyannote/core/annotation.py | 28 +++++----- pyannote/core/feature.py | 33 +++++++----- pyannote/core/json.py | 6 ++- pyannote/core/scores.py | 69 +++++++++++++----------- pyannote/core/segment.py | 99 +++++++++++++++++++---------------- pyannote/core/timeline.py | 16 +++--- pyannote/core/utils/helper.py | 2 +- pyannote/core/utils/types.py | 6 +++ setup.py | 1 + 9 files changed, 149 insertions(+), 111 deletions(-) create mode 100644 pyannote/core/utils/types.py diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index c442e87..06f5fc6 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -120,9 +120,7 @@ from .timeline import Timeline from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT from .utils.generators import string_generator, int_generator - -# TODO : Make sure this is true, it may just be "Hashable" -Label = Hashable +from .utils.types import Label, Key, Support, LabelGenerator class Annotation: @@ -143,7 +141,10 @@ class Annotation: """ @classmethod - def from_df(cls, df: pd.DataFrame, uri=None, modality=None): + def from_df(cls, + df: pd.DataFrame, + uri: Optional[str] = None, + modality: Optional[str] = None): df = df[[PYANNOTE_SEGMENT, PYANNOTE_TRACK, PYANNOTE_LABEL]] @@ -353,8 +354,7 @@ def __contains__(self, included: Union[Segment, Timeline]): """ return included in self.get_timeline(copy=False) - def crop(self, support: Union[Segment, Timeline], - mode: str = 'intersection'): + def crop(self, support: Support, mode: str = 'intersection'): """Crop annotation to new support Parameters @@ -570,7 +570,7 @@ def __str__(self): return "\n".join(["%s %s %s" % (s, t, l) for s, t, l in self.itertracks(yield_label=True)]) - def __delitem__(self, key: Union[Segment, Tuple[Segment, str]]): + def __delitem__(self, key: Key): """Delete one track >>> del annotation[segment, track] @@ -622,7 +622,7 @@ def __delitem__(self, key: Union[Segment, Tuple[Segment, str]]): 'Deletion only works with Segment or (Segment, track) keys.') # label = annotation[segment, track] - def __getitem__(self, key: Union[Segment, Tuple[Segment, str]]): + def __getitem__(self, key: Key): """Get track label >>> label = annotation[segment, track] @@ -640,7 +640,7 @@ def __getitem__(self, key: Union[Segment, Tuple[Segment, str]]): # annotation[segment, track] = label def __setitem__(self, - key: Union[Segment, Tuple[Segment, str]], + key: Key, label: Label): """Add new or update existing track @@ -814,7 +814,7 @@ def update(self, annotation: 'Annotation', copy: bool = False): return result - def label_timeline(self, label: Label, copy: bool=True): + def label_timeline(self, label: Label, copy: bool = True): """Query segments by label Parameters @@ -924,7 +924,7 @@ def chart(self, percent: bool = False): return chart - def argmax(self, support: Optional[Union[Segment, Timeline]] = None): + def argmax(self, support: Optional[Support] = None): """Get label with longest duration Parameters @@ -962,7 +962,7 @@ def argmax(self, support: Optional[Union[Segment, Timeline]] = None): return max(((_, cropped.label_duration(_)) for _ in cropped.labels()), key=lambda x: x[1])[0] - def rename_tracks(self, generator: Union[str, Iterable[Label]] = 'string'): + def rename_tracks(self, generator: LabelGenerator = 'string'): """Rename all tracks Parameters @@ -1010,7 +1010,7 @@ def rename_tracks(self, generator: Union[str, Iterable[Label]] = 'string'): def rename_labels(self, mapping: Optional[Dict] = None, - generator: Union[str, Generator[str]] = 'string', + generator: LabelGenerator = 'string', copy: bool = True) -> 'Annotation': """Rename labels @@ -1061,7 +1061,7 @@ def rename_labels(self, return renamed - def relabel_tracks(self, generator: Union[str, Iterable[str]] = 'string')\ + def relabel_tracks(self, generator: LabelGenerator = 'string') \ -> 'Annotation': """Relabel tracks diff --git a/pyannote/core/feature.py b/pyannote/core/feature.py index abc494b..9975083 100755 --- a/pyannote/core/feature.py +++ b/pyannote/core/feature.py @@ -34,6 +34,7 @@ See :class:`pyannote.core.SlidingWindowFeature` for the complete reference. """ +from typing import Any, Tuple, Optional, Union import numpy as np from .segment import Segment @@ -41,8 +42,7 @@ from .timeline import Timeline -class SlidingWindowFeature(object): - +class SlidingWindowFeature: """Periodic feature vectors Parameters @@ -54,19 +54,20 @@ class SlidingWindowFeature(object): """ - def __init__(self, data, sliding_window): - super(SlidingWindowFeature, self).__init__() - self.sliding_window = sliding_window + def __init__(self, data: Any, sliding_window: SlidingWindow): + self.sliding_window: SlidingWindow = sliding_window self.data = data - self.__i = -1 + self.__i: int = -1 def __len__(self): return self.data.shape[0] + # TODO : name is not pep8 def getNumber(self): """Number of feature vectors""" return self.data.shape[0] + # TODO : name is not pep8 def getDimension(self): """Dimension of feature vectors""" return self.data.shape[1] @@ -74,7 +75,7 @@ def getDimension(self): def getExtent(self): return self.sliding_window.rangeToSegment(0, self.getNumber()) - def __getitem__(self, i): + def __getitem__(self, i: int): """Get ith feature vector""" return self.data[i] @@ -82,7 +83,7 @@ def __iter__(self): self.__i = -1 return self - def __next__(self): + def __next__(self) -> Tuple[Segment, Any]: self.__i += 1 try: return self.sliding_window[self.__i], self.data[self.__i] @@ -92,7 +93,7 @@ def __next__(self): def next(self): return self.__next__() - def iterfeatures(self, window=False): + def iterfeatures(self, window: Optional[bool] = False): """Feature vector iterator Parameters @@ -109,7 +110,12 @@ def iterfeatures(self, window=False): else: yield self.data[i] - def crop(self, focus, mode='loose', fixed=None, return_data=True): + def crop(self, + focus: Union[Segment, Timeline], + mode: str = 'loose', + fixed: Optional[float] = None, + return_data: bool = True) \ + -> Union[np.ndarray, 'SlidingWindowFeature']: """Extract frames Parameters @@ -174,7 +180,7 @@ def crop(self, focus, mode='loose', fixed=None, return_data=True): [self.data[start: end, :] for start, end in clipped_ranges]) else: # if all ranges are out of bounds, just return empty data - shape = (0, ) + self.data.shape[1:] + shape = (0,) + self.data.shape[1:] data = np.empty(shape) # corner case when 'fixed' duration cropping is requested: @@ -182,11 +188,11 @@ def crop(self, focus, mode='loose', fixed=None, return_data=True): if fixed is not None: data = np.vstack([ # repeat first sample as many times as needed - np.tile(self.data[0], (repeat_first, ) + (1,) * n_dimensions), + np.tile(self.data[0], (repeat_first,) + (1,) * n_dimensions), data, # repeat last sample as many times as needed np.tile(self.data[n_samples - 1], - (repeat_last,) + (1, ) * n_dimensions)]) + (repeat_last,) + (1,) * n_dimensions)]) # return data if return_data: @@ -206,4 +212,5 @@ def _repr_png_(self): if __name__ == "__main__": import doctest + doctest.testmod() diff --git a/pyannote/core/json.py b/pyannote/core/json.py index 5581b2b..6ce2320 100644 --- a/pyannote/core/json.py +++ b/pyannote/core/json.py @@ -25,6 +25,8 @@ # AUTHORS # Hervé BREDIN - http://herve.niderb.fr +from pathlib import Path +from typing import Union import simplejson as json @@ -78,12 +80,12 @@ def loads(s): return json.loads(s, encoding='utf-8', object_hook=object_hook) -def load_from(path): +def load_from(path: Union[str, Path]): """Deserialize Parameters ---------- - path : string + path : string or Path Path to file containing serialized `pyannote.core` data structure Returns diff --git a/pyannote/core/scores.py b/pyannote/core/scores.py index 08d3b45..281adef 100644 --- a/pyannote/core/scores.py +++ b/pyannote/core/scores.py @@ -25,6 +25,7 @@ # AUTHORS # Hervé BREDIN - http://herve.niderb.fr +from typing import Optional, Callable, Iterable, Hashable, List, Set import numpy as np from pandas import Index, MultiIndex, DataFrame, pivot_table @@ -33,22 +34,24 @@ from .annotation import Annotation from .segment import Segment from .timeline import Timeline +from .utils.types import Key, Label, LabelGenerator, Support class Unknown: + # TODO : document this class nextID = 0 @classmethod def reset(cls): - cls.nextID = 0 + cls.nextID: int = 0 @classmethod - def getNewID(cls): + def getNewID(cls) -> int: cls.nextID += 1 return cls.nextID - def __init__(self, format='#{id:d}'): + def __init__(self, format: str = '#{id:d}'): self.ID = Unknown.getNewID() self._format = format @@ -105,10 +108,13 @@ class Scores: >>> s[Segment(2,3), 's1', 'C'] = 0.3 """ + @classmethod def from_df( - cls, df, - uri=None, modality=None, aggfunc=np.mean + cls, df: DataFrame, + uri: Optional[str] = None, + modality: Optional[str] = None, + aggfunc: Callable = np.mean ): """ @@ -147,11 +153,13 @@ def from_df( annotation=annotation, labels=labels, values=dataframe.values) - def __init__(self, uri=None, modality=None, - annotation=None, labels=None, - values=None, dtype=None): - - super(Scores, self).__init__() + def __init__(self, + uri: Optional[str] = None, + modality: Optional[str] = None, + annotation: Optional[Annotation] = None, + labels: Iterable[Hashable] = None, + values: Optional[np.ndarray] = None, + dtype=None): # TODO maybe this should get removed names = [PYANNOTE_SEGMENT + '_' + field for field in Segment._fields] + [PYANNOTE_TRACK] @@ -159,7 +167,7 @@ def __init__(self, uri=None, modality=None, if annotation: annotation = annotation.copy() index = Index( - [s + (t, ) for s, t in annotation.itertracks()], + [s + (t,) for s, t in annotation.itertracks()], name=names) else: @@ -181,7 +189,7 @@ def __init__(self, uri=None, modality=None, self.modality = modality self.uri = uri - def copy(self): + def copy(self) -> 'Scores': self._reindexIfNeeded() copied = self.__class__(uri=self.uri, modality=self.modality) copied.dataframe_ = self.dataframe_.copy() @@ -192,7 +200,7 @@ def copy(self): # del scores[segment] # del scores[segment, :] # del scores[segment, track] - def __delitem__(self, key): + def __delitem__(self, key: Key): if isinstance(key, Segment): segment = key @@ -202,7 +210,7 @@ def __delitem__(self, key): elif isinstance(key, tuple) and len(key) == 2: segment, track = key - self.dataframe_.drop(tuple(segment) + (track, ), + self.dataframe_.drop(tuple(segment) + (track,), axis=0, inplace=True) del self.annotation_[segment, track] self.hasChanged_ = True @@ -217,7 +225,7 @@ def __getitem__(self, key): key = (key[0], '_', key[1]) segment, track, label = key - return self.dataframe_.at[tuple(segment) + (track, ), label] + return self.dataframe_.at[tuple(segment) + (track,), label] # scores[segment, track, label] = value # scores[segment, label] ==== scores[segment, '_', label] @@ -272,7 +280,7 @@ def __reversed__(self): def itersegments(self): return iter(self) - def tracks(self, segment): + def tracks(self, segment: Segment): """Set of tracks for query segment Parameters @@ -287,7 +295,7 @@ def tracks(self, segment): """ return self.annotation_.get_tracks(segment) - def has_track(self, segment, track): + def has_track(self, segment: Segment, track): """Check whether a given track exists Parameters @@ -358,7 +366,7 @@ def itervalues(self): if not np.isnan(value): yield segment, track, label, value - def get_track_scores(self, segment, track): + def get_track_scores(self, segment: Segment, track): """Get all scores for a given track. Parameters @@ -372,9 +380,9 @@ def get_track_scores(self, segment, track): scores : dict {label: score} dictionary """ - return dict(self.dataframe_.xs(tuple(segment) + (track, ))) + return dict(self.dataframe_.xs(tuple(segment) + (track,))) - def labels(self): + def labels(self) -> List[Label]: """List of labels Returns @@ -397,7 +405,7 @@ def _reindexIfNeeded(self): for field in Segment._fields] + [PYANNOTE_TRACK] new_index = Index( - [s + (t, ) for s, t in self.annotation_.itertracks()], + [s + (t,) for s, t in self.annotation_.itertracks()], name=names) self.dataframe_ = self.dataframe_.reindex(new_index) @@ -406,7 +414,7 @@ def _reindexIfNeeded(self): return - def rename_tracks(self, generator='int'): + def rename_tracks(self, generator: LabelGenerator = 'int'): """Rename tracks""" self._reindexIfNeeded() @@ -418,13 +426,13 @@ def rename_tracks(self, generator='int'): names = [PYANNOTE_SEGMENT + '_' + field for field in Segment._fields] + [PYANNOTE_TRACK] new_index = Index( - [s + (t, ) for s, t in annotation.itertracks()], + [s + (t,) for s, t in annotation.itertracks()], name=names) retracked.dataframe_.index = new_index return retracked - def apply(self, func, axis=0): + def apply(self, func: Callable, axis=0): applied = self.copy() applied.dataframe_ = self.dataframe_.apply(func, axis=axis) @@ -432,7 +440,7 @@ def apply(self, func, axis=0): return applied - def rank(self, ascending=False): + def rank(self, ascending: bool = False): """ Parameters @@ -452,7 +460,7 @@ def rank(self, ascending=False): ranked.hasChanged_ = True return ranked - def nbest(self, n, ascending=False): + def nbest(self, n: int, ascending: bool = False): """ Parameters @@ -476,7 +484,7 @@ def nbest(self, n, ascending=False): filtered.hasChanged_ = True return filtered - def subset(self, labels, invert=False): + def subset(self, labels: Set[Label], invert: bool = False): """Scores subset Extract scores subset based on labels @@ -510,7 +518,7 @@ def subset(self, labels, invert=False): return subset - def to_annotation(self, threshold=-np.inf, posterior=False): + def to_annotation(self, threshold: float = -np.inf, posterior: bool = False): """ Parameters @@ -556,7 +564,7 @@ def to_annotation(self, threshold=-np.inf, posterior=False): return annotation - def map(self, func): + def map(self, func: Callable): """Apply function to all values""" mapped = self.copy() @@ -564,7 +572,7 @@ def map(self, func): mapped.hasChanged_ = True return mapped - def crop(self, focus, mode='strict'): + def crop(self, focus: Support, mode: str = 'strict') -> Support: """Crop on focus Parameters @@ -651,4 +659,5 @@ def _repr_png_(self): if __name__ == "__main__": import doctest + doctest.testmod() diff --git a/pyannote/core/segment.py b/pyannote/core/segment.py index ec2f061..aa0f28c 100755 --- a/pyannote/core/segment.py +++ b/pyannote/core/segment.py @@ -67,13 +67,18 @@ """ from collections import namedtuple +from typing import Union, Optional, Tuple + import numpy as np +from dataclasses import dataclass + # 1 μs (one microsecond) SEGMENT_PRECISION = 1e-6 -class Segment(namedtuple('Segment', ['start', 'end'])): +@dataclass +class Segment: """ Time interval @@ -106,9 +111,8 @@ class Segment(namedtuple('Segment', ['start', 'end'])): - `segment.start == other_segment.start` and `segment.end < other_segment.end` """ - - def __new__(cls, start=0., end=0.): - return super(Segment, cls).__new__(cls, float(start), float(end)) + start: float = 0.0 + end: float = 0.0 def __bool__(self): """Emptiness @@ -125,25 +129,26 @@ def __bool__(self): """ return (self.end - self.start) > SEGMENT_PRECISION - def _get_duration(self): + @property + def duration(self): + """Segment duration (read-only)""" return self.end - self.start if self else 0. - duration = property(fget=_get_duration) - """Segment duration (read-only)""" - def _get_middle(self): + @property + def middle(self): + """Segment mid-time (read-only)""" return .5 * (self.start + self.end) - middle = property(fget=_get_middle) - """Segment mid-time (read-only)""" def __iter__(self): """Unpack segment boundaries >>> segment = Segment(start, end) >>> start, end = segment """ + # todo: is there a reason for not returning a tuple? yield self.start yield self.end - def copy(self): + def copy(self) -> 'Segment': """Get a copy of the segment Returns @@ -157,7 +162,7 @@ def copy(self): # Inclusion (in), intersection (&), union (|) and gap (^) # # ------------------------------------------------------- # - def __contains__(self, other): + def __contains__(self, other: 'Segment'): """Inclusion >>> segment = Segment(start=0, end=10) @@ -190,7 +195,7 @@ def __and__(self, other): end = min(self.end, other.end) return Segment(start=start, end=end) - def intersects(self, other): + def intersects(self, other: 'Segment'): """Check whether two segments intersect each other Parameters @@ -210,7 +215,7 @@ def intersects(self, other): self.start < other.end - SEGMENT_PRECISION) or \ (self.start == other.start) - def overlaps(self, t): + def overlaps(self, t: float): """Check if segment overlaps a given time Parameters @@ -225,7 +230,7 @@ def overlaps(self, t): """ return self.start <= t and self.end >= t - def __or__(self, other): + def __or__(self, other: 'Segment'): """Union >>> segment = Segment(0, 10) @@ -255,7 +260,7 @@ def __or__(self, other): end = max(self.end, other.end) return Segment(start=start, end=end) - def __xor__(self, other): + def __xor__(self, other: 'Segment'): """Gap >>> segment = Segment(0, 10) @@ -283,7 +288,7 @@ def __xor__(self, other): end = max(self.start, other.start) return Segment(start=start, end=end) - def _str_helper(self, seconds): + def _str_helper(self, seconds: float): from datetime import timedelta negative = seconds < 0 seconds = abs(seconds) @@ -312,6 +317,7 @@ def __str__(self): return '[]' def pretty(self): + # TODO: where does warning come from? warnings.warn( '"pretty" has been replaced by "__str__"', DeprecationWarning) @@ -355,7 +361,7 @@ def _repr_png_(self): return repr_segment(self) -class SlidingWindow(object): +class SlidingWindow: """Sliding window Parameters @@ -389,7 +395,6 @@ class SlidingWindow(object): """ def __init__(self, duration=0.030, step=0.010, start=0.000, end=None): - super(SlidingWindow, self).__init__() # duration must be a float > 0 if duration <= 0: @@ -416,27 +421,27 @@ def __init__(self, duration=0.030, step=0.010, start=0.000, end=None): # current index of iterator self.__i = -1 - def __get_start(self): + @property + def start(self): + """Sliding window start time in seconds.""" return self.__start - start = property(fget=__get_start) - """Sliding window start time in seconds.""" - def __get_end(self): + @property + def end(self): + """Sliding window end time in seconds.""" return self.__end - end = property(fget=__get_end) - """Sliding window end time in seconds.""" - def __get_step(self): + @property + def step(self): + """Sliding window step in seconds.""" return self.__step - step = property(fget=__get_step) - """Sliding window step in seconds.""" - def __get_duration(self): + @property + def duration(self): + """Sliding window duration in seconds.""" return self.__duration - duration = property(fget=__get_duration) - """Sliding window duration in seconds.""" - def closest_frame(self, t): + def closest_frame(self, t: float) -> int: """Closest frame to timestamp. Parameters @@ -454,7 +459,7 @@ def closest_frame(self, t): (t - self.__start - .5 * self.__duration) / self.__step )) - def samples(self, from_duration, mode='strict'): + def samples(self, from_duration: float, mode: str = 'strict') -> int: """Number of frames Parameters @@ -481,7 +486,10 @@ def samples(self, from_duration, mode='strict'): elif mode == 'center': return int(np.rint((from_duration / self.step))) - def crop(self, focus, mode='loose', fixed=None, return_ranges=False): + def crop(self, focus: Union[Segment, 'Timeline'], + mode: str = 'loose', + fixed: Optional[float] = None, + return_ranges: Optional[bool] = False) -> np.ndarray: """Crop sliding window Parameters @@ -604,7 +612,8 @@ def crop(self, focus, mode='loose', fixed=None, return_ranges=False): return np.array(range(*rng), dtype=np.int64) - def segmentToRange(self, segment): + # TODO : this is not PEP8, should be segment_to_range + def segmentToRange(self, segment: Segment) -> Tuple[int, int]: """Convert segment to 0-indexed frame range Parameters @@ -634,7 +643,8 @@ def segmentToRange(self, segment): return i0, n - def rangeToSegment(self, i0, n): + # TODO : this is not PEP8, should be range_to_segment + def rangeToSegment(self, i0: int, n: int) -> Segment: """Convert 0-indexed frame range to segment Each frame represents a unique segment of duration 'step', centered on @@ -679,16 +689,17 @@ def rangeToSegment(self, i0, n): return Segment(start, end) - def samplesToDuration(self, nSamples): + # TODO : this is not PEP8, should be samples_to_duration + def samplesToDuration(self, nSamples: int) -> float: """Returns duration of samples""" return self.rangeToSegment(0, nSamples).duration - - def durationToSamples(self, duration): + # TODO : this is not PEP8, should be duration_to_samples + def durationToSamples(self, duration: float) -> int: """Returns samples in duration""" return self.segmentToRange(Segment(0, duration))[1] - def __getitem__(self, i): + def __getitem__(self, i: int) -> Segment: """ Parameters ---------- @@ -712,7 +723,7 @@ def __getitem__(self, i): return Segment(start=start, end=start + self.__duration) - def next(self): + def next(self) -> Segment: return self.__next__() def __next__(self): @@ -770,19 +781,19 @@ def __len__(self): # based on frame closest to the end i = self.closest_frame(self.__end) - while(self[i]): + while (self[i]): i += 1 length = i return length - def copy(self): + def copy(self) -> 'SlidingWindow': """Duplicate sliding window""" duration = self.duration step = self.step start = self.start end = self.end - sliding_window = SlidingWindow( + sliding_window = self.__class__( duration=duration, step=step, start=start, end=end ) return sliding_window diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index 9d34f2a..9bc664c 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -96,6 +96,7 @@ from . import PYANNOTE_URI, PYANNOTE_SEGMENT from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT from .segment import Segment +from .utils.types import Support, Label # ===================================================================== @@ -521,7 +522,7 @@ def overlapping(self, t: float) -> List[Segment]: """ return list(self.overlapping_iter(t)) - def overlapping_iter(self, t: float) -> Generator[Segment]: + def overlapping_iter(self, t: float) -> Generator[Segment, None, None]: """Like `overlapping` but returns a segment iterator instead See also @@ -680,7 +681,7 @@ def extent(self) -> Segment: import numpy as np return Segment(start=np.inf, end=-np.inf) - def support_iter(self) -> Generator[Segment]: + def support_iter(self) -> Generator[Segment, None, None]: """Like `support` but returns a segment generator instead See also @@ -746,7 +747,7 @@ def support(self) -> 'Timeline': """ return Timeline(segments=self.support_iter(), uri=self.uri) - def duration(self): + def duration(self) -> float: """Timeline duration The timeline duration is the sum of the durations of the segments @@ -762,8 +763,8 @@ def duration(self): # of the segments in the timeline support. return sum(s.duration for s in self.support_iter()) - def gaps_iter(self, support: Optional[Union[Segment, 'Timeline']] = None) \ - -> Generator[Segment]: + def gaps_iter(self, support: Optional[Support] = None) \ + -> Generator[Segment, None, None]: """Like `gaps` but returns a segment generator instead See also @@ -811,7 +812,7 @@ def gaps_iter(self, support: Optional[Union[Segment, 'Timeline']] = None) \ for gap in self.gaps_iter(support=segment): yield gap - def gaps(self, support: Optional[Union[Segment, 'Timeline']] = None) \ + def gaps(self, support: Optional[Support] = None) \ -> 'Timeline': """Gaps @@ -902,7 +903,8 @@ def segmentation(self) -> 'Timeline': return Timeline(segments=segments, uri=self.uri) - def to_annotation(self, generator: Union[str, Iterable['Label']] = 'string', + def to_annotation(self, + generator: Union[str, Iterable[Label], None, None] = 'string', modality: Optional[str] = None): """Turn timeline into an annotation diff --git a/pyannote/core/utils/helper.py b/pyannote/core/utils/helper.py index ed63cc5..ddd37c2 100644 --- a/pyannote/core/utils/helper.py +++ b/pyannote/core/utils/helper.py @@ -31,7 +31,7 @@ def get_class_by_name(class_name: str, - default_module_name: Optional[str] = None) -> type : + default_module_name: Optional[str] = None) -> type: """Load class by its name Parameters diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py new file mode 100644 index 0000000..a316875 --- /dev/null +++ b/pyannote/core/utils/types.py @@ -0,0 +1,6 @@ +from typing import Hashable, Union, Tuple, Iterable, Generator + +Label = Hashable +Support = Union['Segment', 'Timeline'] +Key = Union['Segment', Tuple['Segment', str]] +LabelGenerator = Union[str, Generator[Label, None, None]] diff --git a/setup.py b/setup.py index d2094f1..b537aba 100755 --- a/setup.py +++ b/setup.py @@ -42,6 +42,7 @@ 'pandas >= 0.17.1', 'simplejson >= 3.8.1', 'matplotlib >= 2.0.0', + 'dataclasses >= 0.7' ], # versioneer version=versioneer.get_version(), From eae045ee33bde12a0d1c91eb4e99180601da7e32 Mon Sep 17 00:00:00 2001 From: hadware Date: Wed, 11 Dec 2019 14:33:15 +0100 Subject: [PATCH 04/30] Fixed tests to accomodate for the namedtuples -> dataclass conversion of `Segment`. --- pyannote/core/scores.py | 17 +++++++++-------- pyannote/core/segment.py | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pyannote/core/scores.py b/pyannote/core/scores.py index 281adef..6f5dc4c 100644 --- a/pyannote/core/scores.py +++ b/pyannote/core/scores.py @@ -28,6 +28,7 @@ from typing import Optional, Callable, Iterable, Hashable, List, Set import numpy as np +from dataclasses import fields, astuple from pandas import Index, MultiIndex, DataFrame, pivot_table from . import PYANNOTE_SEGMENT, PYANNOTE_TRACK, PYANNOTE_LABEL, PYANNOTE_SCORE @@ -161,8 +162,8 @@ def __init__(self, values: Optional[np.ndarray] = None, dtype=None): # TODO maybe this should get removed - names = [PYANNOTE_SEGMENT + '_' + field - for field in Segment._fields] + [PYANNOTE_TRACK] + names = [PYANNOTE_SEGMENT + '_' + field.name + for field in fields(Segment)] + [PYANNOTE_TRACK] if annotation: annotation = annotation.copy() @@ -401,11 +402,11 @@ def _reindexIfNeeded(self): if not self.hasChanged_: return - names = [PYANNOTE_SEGMENT + '_' + field - for field in Segment._fields] + [PYANNOTE_TRACK] + names = [PYANNOTE_SEGMENT + '_' + field.name + for field in fields(Segment)] + [PYANNOTE_TRACK] new_index = Index( - [s + (t,) for s, t in self.annotation_.itertracks()], + [astuple(s) + (t,) for s, t in self.annotation_.itertracks()], name=names) self.dataframe_ = self.dataframe_.reindex(new_index) @@ -423,10 +424,10 @@ def rename_tracks(self, generator: LabelGenerator = 'int'): annotation = self.annotation_.rename_tracks(generator=generator) retracked.annotation_ = annotation - names = [PYANNOTE_SEGMENT + '_' + field - for field in Segment._fields] + [PYANNOTE_TRACK] + names = [PYANNOTE_SEGMENT + '_' + field.name + for field in fields(Segment)] + [PYANNOTE_TRACK] new_index = Index( - [s + (t,) for s, t in annotation.itertracks()], + [astuple(s) + (t,) for s, t in annotation.itertracks()], name=names) retracked.dataframe_.index = new_index diff --git a/pyannote/core/segment.py b/pyannote/core/segment.py index 38bc46f..66e4869 100755 --- a/pyannote/core/segment.py +++ b/pyannote/core/segment.py @@ -77,7 +77,8 @@ SEGMENT_PRECISION = 1e-6 -@dataclass +# setting 'frozen' to True makes it hashable and immutable +@dataclass(frozen=True, order=True) class Segment: """ Time interval @@ -316,7 +317,6 @@ def __str__(self): self._str_helper(self.end)) return '[]' - def __repr__(self): """Computer-readable representation From d55e2322e6e51f9fe29461bd79434c8104c78fe8 Mon Sep 17 00:00:00 2001 From: hadware Date: Wed, 11 Dec 2019 15:13:16 +0100 Subject: [PATCH 05/30] fixing dataclasses install on 3.7. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b537aba..78da886 100755 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ 'pandas >= 0.17.1', 'simplejson >= 3.8.1', 'matplotlib >= 2.0.0', - 'dataclasses >= 0.7' + "dataclasses >= 0.7; python_version <'3.7'" ], # versioneer version=versioneer.get_version(), From 4497d40212dd1a05f58b5dc65df9f4a9039efe65 Mon Sep 17 00:00:00 2001 From: hadware Date: Wed, 11 Dec 2019 16:53:45 +0100 Subject: [PATCH 06/30] Added hints for notebook.py and a couple of helpers. Fixed some incorrect type hints. --- pyannote/core/annotation.py | 25 ++++----- pyannote/core/feature.py | 6 +-- pyannote/core/json.py | 19 +++---- pyannote/core/notebook.py | 90 ++++++++++++++++--------------- pyannote/core/scores.py | 15 +++--- pyannote/core/utils/distance.py | 2 +- pyannote/core/utils/generators.py | 11 ++-- pyannote/core/utils/types.py | 7 ++- 8 files changed, 97 insertions(+), 78 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index 0afec1c..3a83a51 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -108,7 +108,7 @@ """ import itertools -from typing import Optional, Dict, Union, Iterable, List, Set, TextIO +from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple import numpy as np import pandas as pd @@ -120,7 +120,7 @@ from .segment import Segment from .timeline import Timeline from .utils.generators import string_generator, int_generator -from .utils.types import Label, Key, Support, LabelGenerator +from .utils.types import Label, Key, Support, LabelGenerator, TrackName class Annotation: @@ -174,7 +174,7 @@ def __init__(self, uri: Optional[str] = None, modality: Optional[str] = None): # keys: annotated segments # values: {track: label} dictionary # TODO : check the type is good for track values - self._tracks: Dict[Segment, Dict[str, Label]] = SortedDict() + self._tracks: Dict[Segment, Dict[TrackName, Label]] = SortedDict() # dictionary # key: label @@ -494,7 +494,7 @@ def get_tracks(self, segment: Segment): """ return set(self._tracks.get(segment, {})) - def has_track(self, segment: Segment, track: str) -> bool: + def has_track(self, segment: Segment, track: TrackName) -> bool: """Check whether a given track exists Parameters @@ -540,8 +540,8 @@ def copy(self) -> 'Annotation': return copied def new_track(self, segment: Segment, - candidate: Optional[str] = None, - prefix: Optional[str] = None) -> str: + candidate: Optional[TrackName] = None, + prefix: Optional[str] = None) -> TrackName: """Generate a new track name for given segment Ensures that the returned track name does not already @@ -644,7 +644,7 @@ def __delitem__(self, key: Key): 'Deletion only works with Segment or (Segment, track) keys.') # label = annotation[segment, track] - def __getitem__(self, key: Key): + def __getitem__(self, key: Key) -> TrackName: """Get track label >>> label = annotation[segment, track] @@ -764,7 +764,8 @@ def get_labels(self, segment: Segment, unique: bool = True) -> Set[Label]: return labels - def subset(self, labels: Iterable[Label], invert: bool = False): + def subset(self, labels: Iterable[Label], invert: bool = False) \ + -> 'Annotation': """Filter annotation by labels Parameters @@ -836,7 +837,7 @@ def update(self, annotation: 'Annotation', copy: bool = False): return result - def label_timeline(self, label: Label, copy: bool = True): + def label_timeline(self, label: Label, copy: bool = True) -> Timeline: """Query segments by label Parameters @@ -875,7 +876,7 @@ def label_timeline(self, label: Label, copy: bool = True): return self._labels[label] - def label_support(self, label: Label): + def label_support(self, label: Label) -> Timeline: """Label support Equivalent to ``Annotation.label_timeline(label).support()`` @@ -922,7 +923,7 @@ def label_duration(self, label: Label): return self.label_timeline(label, copy=False).duration() - def chart(self, percent: bool = False): + def chart(self, percent: bool = False) -> List[Tuple[Label, float]]: """Get labels chart (from longest to shortest duration) Parameters @@ -946,7 +947,7 @@ def chart(self, percent: bool = False): return chart - def argmax(self, support: Optional[Support] = None): + def argmax(self, support: Optional[Support] = None) -> Optional[Label]: """Get label with longest duration Parameters diff --git a/pyannote/core/feature.py b/pyannote/core/feature.py index 8b8ce67..08e9994 100755 --- a/pyannote/core/feature.py +++ b/pyannote/core/feature.py @@ -55,7 +55,7 @@ class SlidingWindowFeature(np.lib.mixins.NDArrayOperatorsMixin): """ - def __init__(self, data: Any, sliding_window: SlidingWindow): + def __init__(self, data: np.ndarray, sliding_window: SlidingWindow): self.sliding_window: SlidingWindow = sliding_window self.data = data self.__i: int = -1 @@ -104,8 +104,8 @@ def iterfeatures(self, window: Optional[bool] = False): Default is to only yield feature vector """ - nSamples = self.data.shape[0] - for i in range(nSamples): + n_samples = self.data.shape[0] + for i in range(n_samples): if window: yield self.data[i], self.sliding_window[i] else: diff --git a/pyannote/core/json.py b/pyannote/core/json.py index 6ce2320..05def99 100644 --- a/pyannote/core/json.py +++ b/pyannote/core/json.py @@ -26,9 +26,10 @@ # AUTHORS # Hervé BREDIN - http://herve.niderb.fr from pathlib import Path -from typing import Union +from typing import Union, TextIO import simplejson as json +from .utils.types import Resource PYANNOTE_JSON = 'pyannote' PYANNOTE_JSON_CONTENT = 'content' @@ -49,7 +50,7 @@ def object_hook(d): return d -def load(fp): +def load(fp: TextIO) -> Resource: """Deserialize Parameters @@ -65,7 +66,7 @@ def load(fp): return json.load(fp, encoding='utf-8', object_hook=object_hook) -def loads(s): +def loads(s: str) -> Resource: """Deserialize Parameters @@ -80,7 +81,7 @@ def loads(s): return json.loads(s, encoding='utf-8', object_hook=object_hook) -def load_from(path: Union[str, Path]): +def load_from(path: Union[str, Path]) -> Resource: """Deserialize Parameters @@ -97,13 +98,13 @@ def load_from(path: Union[str, Path]): return load(fp) -def dump(resource, fp): +def dump(resource: Resource, fp: TextIO): """Serialize Parameters ---------- resource : `pyannote.core` data structure - Resource to deserialize + Resource to serialize fp : file File in which `resource` serialization is written """ @@ -111,7 +112,7 @@ def dump(resource, fp): json.dump(resource, fp, encoding='utf-8', for_json=True) -def dumps(resource): +def dumps(resource: Resource) -> str: """Serialize to string Parameters @@ -126,13 +127,13 @@ def dumps(resource): return json.dumps(resource, encoding='utf-8', for_json=True) -def dump_to(resource, path): +def dump_to(resource: Resource, path: Union[str, Path]): """Serialize Parameters ---------- resource : `pyannote.core` data structure - Resource to deserialize + Resource to serialize path : string Path to file in which `resource` serialization is written """ diff --git a/pyannote/core/notebook.py b/pyannote/core/notebook.py index f255ae0..bbd4603 100644 --- a/pyannote/core/notebook.py +++ b/pyannote/core/notebook.py @@ -31,6 +31,7 @@ Visualization ############# """ +from typing import Iterable, Union try: from IPython.core.pylabtools import print_figure @@ -46,10 +47,9 @@ from .feature import SlidingWindowFeature -class Notebook(object): +class Notebook: def __init__(self): - super(Notebook, self).__init__() self.reset() def reset(self): @@ -64,29 +64,33 @@ def reset(self): del self.crop del self.width - def crop(): - doc = "The crop property." - def fget(self): - return self._crop - def fset(self, segment): - self._crop = segment - def fdel(self): - self._crop = None - return locals() - crop = property(**crop()) - - def width(): - doc = "The width property." - def fget(self): - return self._width - def fset(self, value): - self._width = value - def fdel(self): - self._width = 20 - return locals() - width = property(**width()) - - def __getitem__(self, label): + @property + def crop(self): + """The crop property.""" + return self._crop + + @crop.setter + def crop(self, segment: Segment): + self._crop = segment + + @crop.deleter + def crop(self): + self._crop = None + + @property + def width(self): + """The width property""" + return self._width + + @width.setter + def width(self, value: int): + self._width = value + + @width.deleter + def width(self): + self._width = 20 + + def __getitem__(self, label: str): if label not in self._style: self._style[label] = next(self._style_generator) return self._style[label] @@ -104,7 +108,7 @@ def setup(self, ax=None, ylim=(0, 1), yaxis=False, time=True): ax.axes.get_yaxis().set_visible(yaxis) return ax - def draw_segment(self, ax, segment, y, label=None, boundaries=True): + def draw_segment(self, ax, segment: Segment, y, label=None, boundaries=True): # do nothing if segment is empty if not segment: @@ -114,7 +118,7 @@ def draw_segment(self, ax, segment, y, label=None, boundaries=True): # draw segment ax.hlines(y, segment.start, segment.end, color, - linewidth=linewidth, linestyle=linestyle, label=label) + linewidth=linewidth, linestyle=linestyle, label=label) if boundaries: ax.vlines(segment.start, y + 0.05, y - 0.05, color, linewidth=1, linestyle='solid') @@ -124,13 +128,13 @@ def draw_segment(self, ax, segment, y, label=None, boundaries=True): if label is None: return - def get_y(self, segments): + def get_y(self, segments: Iterable[Segment]) -> np.ndarray: """ Parameters ---------- - segments : iterator - `Segment` iterator (sorted) + segments : Iterable + `Segment` iterable (sorted) Returns ------- @@ -169,8 +173,9 @@ def get_y(self, segments): return y - - def __call__(self, resource, time=True, legend=True): + def __call__(self, resource: Union[Segment, Timeline, Annotation, Scores], + time: bool = True, + legend: bool = True): if isinstance(resource, Segment): self.plot_segment(resource, time=time) @@ -184,7 +189,6 @@ def __call__(self, resource, time=True, legend=True): elif isinstance(resource, Scores): self.plot_scores(resource, time=time, legend=legend) - def plot_segment(self, segment, ax=None, time=True): if not self.crop: @@ -193,7 +197,7 @@ def plot_segment(self, segment, ax=None, time=True): ax = self.setup(ax=ax, time=time) self.draw_segment(ax, segment, 0.5) - def plot_timeline(self, timeline, ax=None, time=True): + def plot_timeline(self, timeline: Timeline, ax=None, time=True): if not self.crop and timeline: self.crop = timeline.extent() @@ -207,7 +211,7 @@ def plot_timeline(self, timeline, ax=None, time=True): # ax.set_aspect(3. / self.crop.duration) - def plot_annotation(self, annotation, ax=None, time=True, legend=True): + def plot_annotation(self, annotation: Annotation, ax=None, time=True, legend=True): if not self.crop: self.crop = annotation.get_timeline(copy=False).extent() @@ -233,7 +237,7 @@ def plot_annotation(self, annotation, ax=None, time=True, legend=True): ax.legend(H, L, bbox_to_anchor=(0, 1), loc=3, ncol=5, borderaxespad=0., frameon=False) - def plot_scores(self, scores, ax=None, time=True, legend=True): + def plot_scores(self, scores: Scores, ax=None, time=True, legend=True): if not self.crop: self.crop = scores.to_annotation().get_timeline(copy=False).extent() @@ -264,7 +268,8 @@ def plot_scores(self, scores, ax=None, time=True, legend=True): ax.legend(H, L, bbox_to_anchor=(0, 1), loc=3, ncol=5, borderaxespad=0., frameon=False) - def plot_feature(self, feature, ax=None, time=True, ylim=None): + def plot_feature(self, feature: SlidingWindowFeature, + ax=None, time=True, ylim=None): if not self.crop: self.crop = feature.getExtent() @@ -289,10 +294,11 @@ def plot_feature(self, feature, ax=None, time=True, ylim=None): ax.plot(t, data) ax.set_xlim(xlim) + notebook = Notebook() -def repr_segment(segment): +def repr_segment(segment: Segment): """Get `png` data for `segment`""" import matplotlib.pyplot as plt figsize = plt.rcParams['figure.figsize'] @@ -305,7 +311,7 @@ def repr_segment(segment): return data -def repr_timeline(timeline): +def repr_timeline(timeline: Timeline): """Get `png` data for `timeline`""" import matplotlib.pyplot as plt figsize = plt.rcParams['figure.figsize'] @@ -318,7 +324,7 @@ def repr_timeline(timeline): return data -def repr_annotation(annotation): +def repr_annotation(annotation: Annotation): """Get `png` data for `annotation`""" import matplotlib.pyplot as plt figsize = plt.rcParams['figure.figsize'] @@ -331,7 +337,7 @@ def repr_annotation(annotation): return data -def repr_scores(scores): +def repr_scores(scores: Scores): """Get `png` data for `scores`""" import matplotlib.pyplot as plt figsize = plt.rcParams['figure.figsize'] @@ -344,7 +350,7 @@ def repr_scores(scores): return data -def repr_feature(feature): +def repr_feature(feature: SlidingWindowFeature): """Get `png` data for `feature`""" import matplotlib.pyplot as plt figsize = plt.rcParams['figure.figsize'] diff --git a/pyannote/core/scores.py b/pyannote/core/scores.py index 6f5dc4c..241558a 100644 --- a/pyannote/core/scores.py +++ b/pyannote/core/scores.py @@ -25,7 +25,7 @@ # AUTHORS # Hervé BREDIN - http://herve.niderb.fr -from typing import Optional, Callable, Iterable, Hashable, List, Set +from typing import Optional, Callable, Iterable, Hashable, List, Set, Tuple import numpy as np from dataclasses import fields, astuple @@ -35,7 +35,7 @@ from .annotation import Annotation from .segment import Segment from .timeline import Timeline -from .utils.types import Key, Label, LabelGenerator, Support +from .utils.types import Key, Label, LabelGenerator, Support, TrackName class Unknown: @@ -313,7 +313,7 @@ def has_track(self, segment: Segment, track): """ return self.annotation_.has_track(segment, track) - def get_track_by_name(self, track): + def get_track_by_name(self, track: TrackName) -> List[Tuple[Segment]]: """Get all tracks with given name Parameters @@ -326,17 +326,20 @@ def get_track_by_name(self, track): tracks : list List of (segment, track) tuples """ + # WARNING: this doesn't call a valid class return self.annotation_.get_track_by_name(track) - def new_track(self, segment, candidate=None, prefix=None): + def new_track(self, + segment: Segment, + candidate: Optional[TrackName]=None, + prefix: Optional[str]=None): """Track name generator Parameters ---------- segment : Segment - prefix : str, optional candidate : any valid track name - + prefix : str, optional Returns ------- diff --git a/pyannote/core/utils/distance.py b/pyannote/core/utils/distance.py index 73aa48c..f3971fa 100644 --- a/pyannote/core/utils/distance.py +++ b/pyannote/core/utils/distance.py @@ -31,7 +31,7 @@ import scipy.cluster.hierarchy -def l2_normalize(X): +def l2_normalize(X: np.ndarray): """L2 normalize vectors Parameters diff --git a/pyannote/core/utils/generators.py b/pyannote/core/utils/generators.py index de8be59..54b5934 100644 --- a/pyannote/core/utils/generators.py +++ b/pyannote/core/utils/generators.py @@ -28,16 +28,18 @@ import itertools from string import ascii_uppercase +from typing import Iterable, Union, List, Set, Optional, Generator -def pairwise(iterable): - "s -> (s0,s1), (s1,s2), (s2, s3), ..." +def pairwise(iterable: Iterable): + """s -> (s0,s1), (s1,s2), (s2, s3), ...""" a, b = itertools.tee(iterable) next(b, None) return zip(a, b) -def string_generator(skip=[]): +def string_generator(skip: Optional[Union[List, Set]] = None) \ + -> Generator[str, None, None]: """Label generator Parameters @@ -61,6 +63,8 @@ def string_generator(skip=[]): next(t) -> 'AAA' # then 3-letters labels ... # (you get the idea) """ + if skip is None: + skip = list() # label length r = 1 @@ -77,6 +81,7 @@ def string_generator(skip=[]): # increment label length when all possibilities are exhausted r = r + 1 + def int_generator(): i = 0 while True: diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py index a316875..da5181e 100644 --- a/pyannote/core/utils/types.py +++ b/pyannote/core/utils/types.py @@ -1,6 +1,9 @@ -from typing import Hashable, Union, Tuple, Iterable, Generator +from typing import Hashable, Union, Tuple, Generator Label = Hashable Support = Union['Segment', 'Timeline'] -Key = Union['Segment', Tuple['Segment', str]] LabelGenerator = Union[str, Generator[Label, None, None]] +TrackName = Union[str, int] +Key = Union['Segment', Tuple['Segment', TrackName]] +Resource = Union['Segment', 'Timeline', 'Score', 'SlidingWindowFeature'] + From d19be1aef68942b8304b2cd6ce40afcf6c799098 Mon Sep 17 00:00:00 2001 From: hadware Date: Thu, 12 Dec 2019 18:24:43 +0100 Subject: [PATCH 07/30] Added typing_extension dependency, correct annotation.py as per comments on the WIP Pull request. --- pyannote/core/annotation.py | 38 +++++++++++++++++++++--------------- pyannote/core/utils/types.py | 4 +++- setup.py | 3 ++- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index 3a83a51..fbf89ae 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -108,7 +108,7 @@ """ import itertools -from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple +from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple, Generator import numpy as np import pandas as pd @@ -120,7 +120,7 @@ from .segment import Segment from .timeline import Timeline from .utils.generators import string_generator, int_generator -from .utils.types import Label, Key, Support, LabelGenerator, TrackName +from .utils.types import Label, Key, Support, LabelGenerator, TrackName, CropMode class Annotation: @@ -144,7 +144,7 @@ class Annotation: def from_df(cls, df: pd.DataFrame, uri: Optional[str] = None, - modality: Optional[str] = None): + modality: Optional[str] = None) -> 'Annotation': df = df[[PYANNOTE_SEGMENT, PYANNOTE_TRACK, PYANNOTE_LABEL]] @@ -173,7 +173,6 @@ def __init__(self, uri: Optional[str] = None, modality: Optional[str] = None): # sorted dictionary # keys: annotated segments # values: {track: label} dictionary - # TODO : check the type is good for track values self._tracks: Dict[Segment, Dict[TrackName, Label]] = SortedDict() # dictionary @@ -255,7 +254,12 @@ def itersegments(self): """ return iter(self._tracks) - def itertracks(self, yield_label: bool = False): + def itertracks(self, yield_label: bool = False) \ + -> Generator[Union[ + Tuple[Segment, TrackName], + Tuple[Segment, TrackName, Label] + ], + None, None]: """Iterate over tracks (in chronological order) Parameters @@ -376,7 +380,7 @@ def write_rttm(self, file: TextIO): ) file.write(line) - def crop(self, support: Support, mode: str = 'intersection'): + def crop(self, support: Support, mode: CropMode = 'intersection'): """Crop annotation to new support Parameters @@ -475,7 +479,7 @@ def crop(self, support: Support, mode: str = 'intersection'): else: raise NotImplementedError("unsupported mode: '%s'" % mode) - def get_tracks(self, segment: Segment): + def get_tracks(self, segment: Segment) -> Set[TrackName]: """Query tracks by segment Parameters @@ -492,7 +496,7 @@ def get_tracks(self, segment: Segment): ---- This will return an empty set if segment does not exist. """ - return set(self._tracks.get(segment, {})) + return set(self._tracks.get(segment, {}).keys()) def has_track(self, segment: Segment, track: TrackName) -> bool: """Check whether a given track exists @@ -644,7 +648,7 @@ def __delitem__(self, key: Key): 'Deletion only works with Segment or (Segment, track) keys.') # label = annotation[segment, track] - def __getitem__(self, key: Key) -> TrackName: + def __getitem__(self, key: Key) -> Label: """Get track label >>> label = annotation[segment, track] @@ -661,9 +665,7 @@ def __getitem__(self, key: Key) -> TrackName: return self._tracks[key[0]][key[1]] # annotation[segment, track] = label - def __setitem__(self, - key: Key, - label: Label): + def __setitem__(self, key: Key, label: Label): """Add new or update existing track >>> annotation[segment, track] = label @@ -808,7 +810,8 @@ def subset(self, labels: Iterable[Label], invert: bool = False) \ return sub - def update(self, annotation: 'Annotation', copy: bool = False): + def update(self, annotation: 'Annotation', copy: bool = False) \ + -> 'Annotation': """Add every track of an existing annotation (in place) Parameters @@ -899,7 +902,7 @@ def label_support(self, label: Label) -> Timeline: """ return self.label_timeline(label, copy=False).support() - def label_duration(self, label: Label): + def label_duration(self, label: Label) -> float: """Label duration Equivalent to ``Annotation.label_timeline(label).duration()`` @@ -1172,7 +1175,10 @@ def support(self, collar: float = 0.) -> 'Annotation': return support - def co_iter(self, other: 'Annotation'): + def co_iter(self, other: 'Annotation') -> \ + Generator[Tuple[Tuple[Segment, TrackName], + Tuple[Segment, TrackName]], + None, None]: """Iterate over pairs of intersecting tracks Parameters @@ -1183,7 +1189,7 @@ def co_iter(self, other: 'Annotation'): Returns ------- iterable : (Segment, object), (Segment, object) iterable - Yields pairs of intersectins tracks, in chronological (then + Yields pairs of intersecting tracks, in chronological (then alphabetical) order. See also diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py index da5181e..a0a7cb2 100644 --- a/pyannote/core/utils/types.py +++ b/pyannote/core/utils/types.py @@ -1,9 +1,11 @@ from typing import Hashable, Union, Tuple, Generator +from typing_extensions import Literal + Label = Hashable Support = Union['Segment', 'Timeline'] LabelGenerator = Union[str, Generator[Label, None, None]] TrackName = Union[str, int] Key = Union['Segment', Tuple['Segment', TrackName]] Resource = Union['Segment', 'Timeline', 'Score', 'SlidingWindowFeature'] - +CropMode = Literal['intersection', 'loose', 'strict'] \ No newline at end of file diff --git a/setup.py b/setup.py index 78da886..01e259c 100755 --- a/setup.py +++ b/setup.py @@ -42,7 +42,8 @@ 'pandas >= 0.17.1', 'simplejson >= 3.8.1', 'matplotlib >= 2.0.0', - "dataclasses >= 0.7; python_version <'3.7'" + "dataclasses >= 0.7; python_version <'3.7'", + 'typing-extensions >= 3.7.4.1' ], # versioneer version=versioneer.get_version(), From 6042969363309a7e52b1c5f94bde30924e66a042 Mon Sep 17 00:00:00 2001 From: hadware Date: Thu, 12 Dec 2019 19:14:25 +0100 Subject: [PATCH 08/30] Corrected typing for segment.py and feature.py. Deprecated non-PEP8 methods. --- pyannote/core/feature.py | 42 ++++++++++++++----- pyannote/core/segment.py | 81 ++++++++++++++++++++++-------------- pyannote/core/utils/types.py | 6 ++- 3 files changed, 84 insertions(+), 45 deletions(-) diff --git a/pyannote/core/feature.py b/pyannote/core/feature.py index 08e9994..b90e8f2 100755 --- a/pyannote/core/feature.py +++ b/pyannote/core/feature.py @@ -34,10 +34,14 @@ See :class:`pyannote.core.SlidingWindowFeature` for the complete reference. """ -from typing import Any, Tuple, Optional, Union +import warnings +from logging import warning +from typing import Any, Tuple, Optional, Union, Generator import numpy as np import numbers + +from pyannote.core.utils.types import CropMode from .segment import Segment from .segment import SlidingWindow from .timeline import Timeline @@ -61,22 +65,35 @@ def __init__(self, data: np.ndarray, sliding_window: SlidingWindow): self.__i: int = -1 def __len__(self): + """Number of feature vectors""" return self.data.shape[0] - # TODO : name is not pep8 + @property + def extent(self): + return self.sliding_window.range_to_segment(0, len(self)) + + @property + def dimension(self): + """Dimension of feature vectors""" + return self.data.shape[1] + def getNumber(self): - """Number of feature vectors""" + warnings.warn("This is deprecated in favor of `__len__`", + DeprecationWarning) return self.data.shape[0] # TODO : name is not pep8 def getDimension(self): - """Dimension of feature vectors""" - return self.data.shape[1] + warnings.warn("This is deprecated in favor of `dimension` property", + DeprecationWarning) + return self.dimension def getExtent(self): - return self.sliding_window.rangeToSegment(0, self.getNumber()) + warnings.warn("This is deprecated in favor of `extent` property", + DeprecationWarning) + return self.extent - def __getitem__(self, i: int): + def __getitem__(self, i: int) -> np.ndarray: """Get ith feature vector""" return self.data[i] @@ -84,7 +101,7 @@ def __iter__(self): self.__i = -1 return self - def __next__(self) -> Tuple[Segment, Any]: + def __next__(self) -> Tuple[Segment, np.ndarray]: self.__i += 1 try: return self.sliding_window[self.__i], self.data[self.__i] @@ -94,7 +111,10 @@ def __next__(self) -> Tuple[Segment, Any]: def next(self): return self.__next__() - def iterfeatures(self, window: Optional[bool] = False): + def iterfeatures(self, window: Optional[bool] = False) -> \ + Generator[Union[Tuple[np.ndarray, Segment], + np.ndarray], + None, None]: """Feature vector iterator Parameters @@ -113,7 +133,7 @@ def iterfeatures(self, window: Optional[bool] = False): def crop(self, focus: Union[Segment, Timeline], - mode: str = 'loose', + mode: CropMode = 'loose', fixed: Optional[float] = None, return_data: bool = True) \ -> Union[np.ndarray, 'SlidingWindowFeature']: @@ -212,7 +232,7 @@ def _repr_png_(self): _HANDLED_TYPES = (np.ndarray, numbers.Number) - def __array__(self): + def __array__(self) -> np.ndarray: return self.data def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): diff --git a/pyannote/core/segment.py b/pyannote/core/segment.py index 66e4869..da72338 100755 --- a/pyannote/core/segment.py +++ b/pyannote/core/segment.py @@ -65,15 +65,15 @@ See :class:`pyannote.core.Segment` for the complete reference. """ - -from collections import namedtuple -from typing import Union, Optional, Tuple +import warnings +from typing import Union, Optional, Tuple, List import numpy as np - from dataclasses import dataclass # 1 μs (one microsecond) +from pyannote.core.utils.types import SegmentCropMode + SEGMENT_PRECISION = 1e-6 @@ -131,12 +131,12 @@ def __bool__(self): return (self.end - self.start) > SEGMENT_PRECISION @property - def duration(self): + def duration(self) -> float: """Segment duration (read-only)""" return self.end - self.start if self else 0. @property - def middle(self): + def middle(self) -> float: """Segment mid-time (read-only)""" return .5 * (self.start + self.end) @@ -196,7 +196,7 @@ def __and__(self, other): end = min(self.end, other.end) return Segment(start=start, end=end) - def intersects(self, other: 'Segment'): + def intersects(self, other: 'Segment') -> bool: """Check whether two segments intersect each other Parameters @@ -216,7 +216,7 @@ def intersects(self, other: 'Segment'): self.start < other.end - SEGMENT_PRECISION) or \ (self.start == other.start) - def overlaps(self, t: float): + def overlaps(self, t: float) -> bool: """Check if segment overlaps a given time Parameters @@ -231,7 +231,7 @@ def overlaps(self, t: float): """ return self.start <= t and self.end >= t - def __or__(self, other: 'Segment'): + def __or__(self, other: 'Segment') -> 'Segment': """Union >>> segment = Segment(0, 10) @@ -289,7 +289,7 @@ def __xor__(self, other: 'Segment'): end = max(self.start, other.start) return Segment(start=start, end=end) - def _str_helper(self, seconds: float): + def _str_helper(self, seconds: float) -> str: from datetime import timedelta negative = seconds < 0 seconds = abs(seconds) @@ -398,40 +398,40 @@ def __init__(self, duration=0.030, step=0.010, start=0.000, end=None): # step must be a float > 0 if step <= 0: raise ValueError("'step' must be a float > 0.") - self.__step = step + self.__step: float = step # start must be a float. - self.__start = start + self.__start: float = start # if end is not provided, set it to infinity if end is None: - self.__end = np.inf + self.__end: float = np.inf else: # end must be greater than start if end <= start: raise ValueError("'end' must be greater than 'start'.") - self.__end = end + self.__end: float = end # current index of iterator - self.__i = -1 + self.__i: int = -1 @property - def start(self): + def start(self) -> float: """Sliding window start time in seconds.""" return self.__start @property - def end(self): + def end(self) -> float: """Sliding window end time in seconds.""" return self.__end @property - def step(self): + def step(self) -> float: """Sliding window step in seconds.""" return self.__step @property - def duration(self): + def duration(self) -> float: """Sliding window duration in seconds.""" return self.__duration @@ -453,7 +453,7 @@ def closest_frame(self, t: float) -> int: (t - self.__start - .5 * self.__duration) / self.__step )) - def samples(self, from_duration: float, mode: str = 'strict') -> int: + def samples(self, from_duration: float, mode: SegmentCropMode = 'strict') -> int: """Number of frames Parameters @@ -481,9 +481,10 @@ def samples(self, from_duration: float, mode: str = 'strict') -> int: return int(np.rint((from_duration / self.step))) def crop(self, focus: Union[Segment, 'Timeline'], - mode: str = 'loose', + mode: SegmentCropMode = 'loose', fixed: Optional[float] = None, - return_ranges: Optional[bool] = False) -> np.ndarray: + return_ranges: Optional[bool] = False) -> \ + Union[np.ndarray, List[List[int]]]: """Crop sliding window Parameters @@ -606,8 +607,12 @@ def crop(self, focus: Union[Segment, 'Timeline'], return np.array(range(*rng), dtype=np.int64) - # TODO : this is not PEP8, should be segment_to_range def segmentToRange(self, segment: Segment) -> Tuple[int, int]: + warnings.warn("Deprecated in favor of `segment_to_range`", + DeprecationWarning) + return self.segment_to_range(segment) + + def segment_to_range(self, segment: Segment) -> Tuple[int, int]: """Convert segment to 0-indexed frame range Parameters @@ -625,7 +630,7 @@ def segmentToRange(self, segment: Segment) -> Tuple[int, int]: -------- >>> window = SlidingWindow() - >>> print window.segmentToRange(Segment(10, 15)) + >>> print window.segment_to_range(Segment(10, 15)) i0, n """ @@ -637,8 +642,12 @@ def segmentToRange(self, segment: Segment) -> Tuple[int, int]: return i0, n - # TODO : this is not PEP8, should be range_to_segment def rangeToSegment(self, i0: int, n: int) -> Segment: + warnings.warn("This is deprecated in favor of `range_to_segment`", + DeprecationWarning) + return self.range_to_segment(i0, n) + + def range_to_segment(self, i0: int, n: int) -> Segment: """Convert 0-indexed frame range to segment Each frame represents a unique segment of duration 'step', centered on @@ -662,7 +671,7 @@ def rangeToSegment(self, i0: int, n: int) -> Segment: -------- >>> window = SlidingWindow() - >>> print window.rangeToSegment(3, 2) + >>> print window.range_to_segment(3, 2) [ --> ] """ @@ -683,15 +692,23 @@ def rangeToSegment(self, i0: int, n: int) -> Segment: return Segment(start, end) - # TODO : this is not PEP8, should be samples_to_duration def samplesToDuration(self, nSamples: int) -> float: + warnings.warn("This is deprecated in favor of `samples_to_duration`", + DeprecationWarning) + return self.samples_to_duration(nSamples) + + def samples_to_duration(self, n_samples: int) -> float: """Returns duration of samples""" - return self.rangeToSegment(0, nSamples).duration + return self.range_to_segment(0, n_samples).duration - # TODO : this is not PEP8, should be duration_to_samples def durationToSamples(self, duration: float) -> int: + warnings.warn("This is deprecated in favor of `duration_to_samples`", + DeprecationWarning) + return self.duration_to_samples(duration) + + def duration_to_samples(self, duration: float) -> int: """Returns samples in duration""" - return self.segmentToRange(Segment(0, duration))[1] + return self.segment_to_range(Segment(0, duration))[1] def __getitem__(self, i: int) -> Segment: """ @@ -720,7 +737,7 @@ def __getitem__(self, i: int) -> Segment: def next(self) -> Segment: return self.__next__() - def __next__(self): + def __next__(self) -> Segment: self.__i += 1 window = self[self.__i] @@ -756,7 +773,7 @@ def __iter__(self): self.__i = -1 return self - def __len__(self): + def __len__(self) -> int: """Number of positions Equivalent to len([segment for segment in window]) diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py index a0a7cb2..f37e98d 100644 --- a/pyannote/core/utils/types.py +++ b/pyannote/core/utils/types.py @@ -7,5 +7,7 @@ LabelGenerator = Union[str, Generator[Label, None, None]] TrackName = Union[str, int] Key = Union['Segment', Tuple['Segment', TrackName]] -Resource = Union['Segment', 'Timeline', 'Score', 'SlidingWindowFeature'] -CropMode = Literal['intersection', 'loose', 'strict'] \ No newline at end of file +Resource = Union['Segment', 'Timeline', 'Score', 'SlidingWindowFeature', + 'Annotation'] +CropMode = Literal['intersection', 'loose', 'strict'] +SegmentCropMode = Literal['center', 'loose', 'strict'] From ac552b4b474fd34bcb18b177cc7b6ad82c8c59e1 Mon Sep 17 00:00:00 2001 From: hadware Date: Thu, 12 Dec 2019 19:34:08 +0100 Subject: [PATCH 09/30] Corrected timeline.py and notebook.py type hinting (and some other one that I forgot) --- pyannote/core/annotation.py | 3 ++- pyannote/core/feature.py | 1 - pyannote/core/notebook.py | 16 ++++++++++++---- pyannote/core/segment.py | 5 ++--- pyannote/core/timeline.py | 28 +++++++++++++++------------- pyannote/core/utils/types.py | 1 + 6 files changed, 32 insertions(+), 22 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index fbf89ae..2ae1265 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -988,7 +988,8 @@ def argmax(self, support: Optional[Support] = None) -> Optional[Label]: return max(((_, cropped.label_duration(_)) for _ in cropped.labels()), key=lambda x: x[1])[0] - def rename_tracks(self, generator: LabelGenerator = 'string'): + def rename_tracks(self, generator: LabelGenerator = 'string') \ + -> 'Annotation': """Rename all tracks Parameters diff --git a/pyannote/core/feature.py b/pyannote/core/feature.py index b90e8f2..8714a09 100755 --- a/pyannote/core/feature.py +++ b/pyannote/core/feature.py @@ -82,7 +82,6 @@ def getNumber(self): DeprecationWarning) return self.data.shape[0] - # TODO : name is not pep8 def getDimension(self): warnings.warn("This is deprecated in favor of `dimension` property", DeprecationWarning) diff --git a/pyannote/core/notebook.py b/pyannote/core/notebook.py index bbd4603..47d3ed5 100644 --- a/pyannote/core/notebook.py +++ b/pyannote/core/notebook.py @@ -31,7 +31,9 @@ Visualization ############# """ -from typing import Iterable, Union +from typing import Iterable, Union, Dict, Optional + +from .utils.types import Label, LabelStyle, Resource try: from IPython.core.pylabtools import print_figure @@ -60,7 +62,9 @@ def reset(self): colors = [cm(1. * i / 8) for i in range(9)] self._style_generator = cycle(product(linestyle, linewidth, colors)) - self._style = {None: ('solid', 1, (0.0, 0.0, 0.0))} + self._style: Dict[Optional[Label], LabelStyle] = { + None: ('solid', 1, (0.0, 0.0, 0.0)) + } del self.crop del self.width @@ -90,7 +94,7 @@ def width(self, value: int): def width(self): self._width = 20 - def __getitem__(self, label: str): + def __getitem__(self, label: Label) -> LabelStyle: if label not in self._style: self._style[label] = next(self._style_generator) return self._style[label] @@ -173,7 +177,7 @@ def get_y(self, segments: Iterable[Segment]) -> np.ndarray: return y - def __call__(self, resource: Union[Segment, Timeline, Annotation, Scores], + def __call__(self, resource: Resource, time: bool = True, legend: bool = True): @@ -189,6 +193,10 @@ def __call__(self, resource: Union[Segment, Timeline, Annotation, Scores], elif isinstance(resource, Scores): self.plot_scores(resource, time=time, legend=legend) + elif isinstance(resource, SlidingWindowFeature): + # TODO : check this + self.plot_feature(resource, time=time) + def plot_segment(self, segment, ax=None, time=True): if not self.crop: diff --git a/pyannote/core/segment.py b/pyannote/core/segment.py index da72338..a05bdce 100755 --- a/pyannote/core/segment.py +++ b/pyannote/core/segment.py @@ -145,7 +145,6 @@ def __iter__(self): >>> segment = Segment(start, end) >>> start, end = segment """ - # todo: is there a reason for not returning a tuple? yield self.start yield self.end @@ -261,7 +260,7 @@ def __or__(self, other: 'Segment') -> 'Segment': end = max(self.end, other.end) return Segment(start=start, end=end) - def __xor__(self, other: 'Segment'): + def __xor__(self, other: 'Segment') -> 'Segment': """Gap >>> segment = Segment(0, 10) @@ -746,7 +745,7 @@ def __next__(self) -> Segment: else: raise StopIteration() - def __iter__(self): + def __iter__(self) -> 'SlidingWindow': """Sliding window iterator Use expression 'for segment in sliding_window' diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index 31b11e6..c800927 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -88,7 +88,7 @@ See :class:`pyannote.core.Timeline` for the complete reference. """ -from typing import Optional, Iterable, List, Generator, Union, Callable, TextIO +from typing import Optional, Iterable, List, Generator, Union, Callable, TextIO, Tuple import pandas as pd from sortedcontainers import SortedList @@ -126,12 +126,14 @@ class Timeline: """ @classmethod - def from_df(cls, df: pd.DataFrame, uri: Optional[str] = None): + def from_df(cls, df: pd.DataFrame, uri: Optional[str] = None) -> 'Timeline': segments = list(df[PYANNOTE_SEGMENT]) timeline = cls(segments=segments, uri=uri) return timeline - def __init__(self, segments: Optional[Iterable[Segment]] = None, uri=None): + def __init__(self, + segments: Optional[Iterable[Segment]] = None, + uri: str = None): if segments is None: segments = () @@ -268,7 +270,7 @@ def add(self, segment: Segment) -> 'Timeline': return self - def remove(self, segment: Segment): + def remove(self, segment: Segment) -> 'Timeline': """Remove a segment (in place) Parameters @@ -300,7 +302,7 @@ def remove(self, segment: Segment): return self - def discard(self, segment: Segment): + def discard(self, segment: Segment) -> 'Timeline': """Same as `remove` See also @@ -309,10 +311,10 @@ def discard(self, segment: Segment): """ return self.remove(segment) - def __ior__(self, timeline: 'Timeline'): + def __ior__(self, timeline: 'Timeline') -> 'Timeline': return self.update(timeline) - def update(self, timeline: Segment): + def update(self, timeline: Segment) -> 'Timeline': """Add every segments of an existing timeline (in place) Parameters @@ -369,7 +371,8 @@ def union(self, timeline: 'Timeline') -> 'Timeline': segments = self.segments_set_ | timeline.segments_set_ return Timeline(segments=segments, uri=self.uri) - def co_iter(self, other: 'Timeline'): + def co_iter(self, other: 'Timeline') \ + -> Generator[Tuple[Segment, Segment], None, None]: """Iterate over pairs of intersecting segments >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) @@ -530,8 +533,6 @@ def overlapping_iter(self, t: float) -> Generator[Segment, None, None]: :func:`pyannote.core.Timeline.overlapping` """ segment = Segment(start=t, end=t) - # TODO: maybe this line should be removed? - iterable = self.segments_list_.irange(maximum=segment) for segment in self.segments_list_.irange(maximum=segment): if segment.overlaps(t): yield segment @@ -905,7 +906,8 @@ def segmentation(self) -> 'Timeline': def to_annotation(self, generator: Union[str, Iterable[Label], None, None] = 'string', - modality: Optional[str] = None): + modality: Optional[str] = None) \ + -> 'Annotation': """Turn timeline into an annotation Each segment is labeled by a unique label. @@ -937,7 +939,7 @@ def to_annotation(self, return annotation - def write_uem(self,file: TextIO): + def write_uem(self, file: TextIO): """Dump timeline to file using UEM format Parameters @@ -951,7 +953,7 @@ def write_uem(self,file: TextIO): """ uri = self.uri if self.uri else "" - + for segment in self: line = f"{uri} 1 {segment.start:.3f} {segment.end:.3f}\n" file.write(line) diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py index f37e98d..f594d69 100644 --- a/pyannote/core/utils/types.py +++ b/pyannote/core/utils/types.py @@ -11,3 +11,4 @@ 'Annotation'] CropMode = Literal['intersection', 'loose', 'strict'] SegmentCropMode = Literal['center', 'loose', 'strict'] +LabelStyle = Tuple[str, int, Tuple[float, float, float]] From 7a94a487c835c0d924cb96e2ad52fc6a4a799119 Mon Sep 17 00:00:00 2001 From: hadware Date: Thu, 12 Dec 2019 20:08:30 +0100 Subject: [PATCH 10/30] Switched Generators to Iterators types hints. Fixed the Flake8 test failing in timeline.py. Added a type for LabelGenerator modes. --- pyannote/core/annotation.py | 15 +++++++-------- pyannote/core/feature.py | 11 ++++------- pyannote/core/segment.py | 4 ++-- pyannote/core/timeline.py | 19 ++++++++++++------- pyannote/core/utils/generators.py | 6 +++--- pyannote/core/utils/types.py | 5 +++-- 6 files changed, 31 insertions(+), 29 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index 2ae1265..c1c62c1 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -108,7 +108,7 @@ """ import itertools -from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple, Generator +from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple, Iterator import numpy as np import pandas as pd @@ -255,11 +255,10 @@ def itersegments(self): return iter(self._tracks) def itertracks(self, yield_label: bool = False) \ - -> Generator[Union[ + -> Iterator[Union[ Tuple[Segment, TrackName], Tuple[Segment, TrackName, Label] - ], - None, None]: + ]]: """Iterate over tracks (in chronological order) Parameters @@ -1176,10 +1175,10 @@ def support(self, collar: float = 0.) -> 'Annotation': return support - def co_iter(self, other: 'Annotation') -> \ - Generator[Tuple[Tuple[Segment, TrackName], - Tuple[Segment, TrackName]], - None, None]: + def co_iter(self, other: 'Annotation') \ + -> Iterator[Tuple[Tuple[Segment, TrackName], + Tuple[Segment, TrackName]] + ]: """Iterate over pairs of intersecting tracks Parameters diff --git a/pyannote/core/feature.py b/pyannote/core/feature.py index 8714a09..d008ada 100755 --- a/pyannote/core/feature.py +++ b/pyannote/core/feature.py @@ -34,12 +34,11 @@ See :class:`pyannote.core.SlidingWindowFeature` for the complete reference. """ +import numbers import warnings -from logging import warning -from typing import Any, Tuple, Optional, Union, Generator +from typing import Tuple, Optional, Union, Iterator import numpy as np -import numbers from pyannote.core.utils.types import CropMode from .segment import Segment @@ -110,10 +109,8 @@ def __next__(self) -> Tuple[Segment, np.ndarray]: def next(self): return self.__next__() - def iterfeatures(self, window: Optional[bool] = False) -> \ - Generator[Union[Tuple[np.ndarray, Segment], - np.ndarray], - None, None]: + def iterfeatures(self, window: Optional[bool] = False) \ + -> Iterator[Union[Tuple[np.ndarray, Segment], np.ndarray]]: """Feature vector iterator Parameters diff --git a/pyannote/core/segment.py b/pyannote/core/segment.py index a05bdce..13b719d 100755 --- a/pyannote/core/segment.py +++ b/pyannote/core/segment.py @@ -66,7 +66,7 @@ See :class:`pyannote.core.Segment` for the complete reference. """ import warnings -from typing import Union, Optional, Tuple, List +from typing import Union, Optional, Tuple, List, Iterator import numpy as np from dataclasses import dataclass @@ -140,7 +140,7 @@ def middle(self) -> float: """Segment mid-time (read-only)""" return .5 * (self.start + self.end) - def __iter__(self): + def __iter__(self) -> Iterator[float]: """Unpack segment boundaries >>> segment = Segment(start, end) >>> start, end = segment diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index c800927..88ed6ad 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -88,7 +88,8 @@ See :class:`pyannote.core.Timeline` for the complete reference. """ -from typing import Optional, Iterable, List, Generator, Union, Callable, TextIO, Tuple +from typing import (Optional, Iterable, List, Union, Callable, + TextIO, Tuple, TYPE_CHECKING, Iterator) import pandas as pd from sortedcontainers import SortedList @@ -98,6 +99,12 @@ from .segment import Segment from .utils.types import Support, Label +#  this is a moderately ugly way to import `Annotation` to the namespace +# without causing some circular imports : +# https://stackoverflow.com/questions/39740632/python-type-hinting-without-cyclic-imports +if TYPE_CHECKING: + from .annotation import Annotation + # ===================================================================== # Timeline class @@ -371,8 +378,7 @@ def union(self, timeline: 'Timeline') -> 'Timeline': segments = self.segments_set_ | timeline.segments_set_ return Timeline(segments=segments, uri=self.uri) - def co_iter(self, other: 'Timeline') \ - -> Generator[Tuple[Segment, Segment], None, None]: + def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]: """Iterate over pairs of intersecting segments >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) @@ -525,7 +531,7 @@ def overlapping(self, t: float) -> List[Segment]: """ return list(self.overlapping_iter(t)) - def overlapping_iter(self, t: float) -> Generator[Segment, None, None]: + def overlapping_iter(self, t: float) -> Iterator[Segment]: """Like `overlapping` but returns a segment iterator instead See also @@ -682,7 +688,7 @@ def extent(self) -> Segment: import numpy as np return Segment(start=np.inf, end=-np.inf) - def support_iter(self) -> Generator[Segment, None, None]: + def support_iter(self) -> Iterator[Segment]: """Like `support` but returns a segment generator instead See also @@ -764,8 +770,7 @@ def duration(self) -> float: # of the segments in the timeline support. return sum(s.duration for s in self.support_iter()) - def gaps_iter(self, support: Optional[Support] = None) \ - -> Generator[Segment, None, None]: + def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: """Like `gaps` but returns a segment generator instead See also diff --git a/pyannote/core/utils/generators.py b/pyannote/core/utils/generators.py index 54b5934..00a9600 100644 --- a/pyannote/core/utils/generators.py +++ b/pyannote/core/utils/generators.py @@ -28,7 +28,7 @@ import itertools from string import ascii_uppercase -from typing import Iterable, Union, List, Set, Optional, Generator +from typing import Iterable, Union, List, Set, Optional, Iterator def pairwise(iterable: Iterable): @@ -39,7 +39,7 @@ def pairwise(iterable: Iterable): def string_generator(skip: Optional[Union[List, Set]] = None) \ - -> Generator[str, None, None]: + -> Iterator[str]: """Label generator Parameters @@ -82,7 +82,7 @@ def string_generator(skip: Optional[Union[List, Set]] = None) \ r = r + 1 -def int_generator(): +def int_generator() -> Iterator[int]: i = 0 while True: yield i diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py index f594d69..7624a0d 100644 --- a/pyannote/core/utils/types.py +++ b/pyannote/core/utils/types.py @@ -1,10 +1,11 @@ -from typing import Hashable, Union, Tuple, Generator +from typing import Hashable, Union, Tuple, Iterator from typing_extensions import Literal Label = Hashable Support = Union['Segment', 'Timeline'] -LabelGenerator = Union[str, Generator[Label, None, None]] +LabelGeneratorMode = Literal['int', 'string'] +LabelGenerator = Union[LabelGeneratorMode, Iterator[Label]] TrackName = Union[str, int] Key = Union['Segment', Tuple['Segment', TrackName]] Resource = Union['Segment', 'Timeline', 'Score', 'SlidingWindowFeature', From acb581cda9fe2fe52d51babe23221bd4acfc512f Mon Sep 17 00:00:00 2001 From: hadware Date: Fri, 13 Dec 2019 01:09:25 +0100 Subject: [PATCH 11/30] Changed `SegmentCropMode` to `Alignment` --- pyannote/core/notebook.py | 1 - pyannote/core/segment.py | 6 +++--- pyannote/core/utils/types.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pyannote/core/notebook.py b/pyannote/core/notebook.py index 47d3ed5..27ee264 100644 --- a/pyannote/core/notebook.py +++ b/pyannote/core/notebook.py @@ -194,7 +194,6 @@ def __call__(self, resource: Resource, self.plot_scores(resource, time=time, legend=legend) elif isinstance(resource, SlidingWindowFeature): - # TODO : check this self.plot_feature(resource, time=time) def plot_segment(self, segment, ax=None, time=True): diff --git a/pyannote/core/segment.py b/pyannote/core/segment.py index 13b719d..d5258ae 100755 --- a/pyannote/core/segment.py +++ b/pyannote/core/segment.py @@ -72,7 +72,7 @@ from dataclasses import dataclass # 1 μs (one microsecond) -from pyannote.core.utils.types import SegmentCropMode +from pyannote.core.utils.types import Alignment SEGMENT_PRECISION = 1e-6 @@ -452,7 +452,7 @@ def closest_frame(self, t: float) -> int: (t - self.__start - .5 * self.__duration) / self.__step )) - def samples(self, from_duration: float, mode: SegmentCropMode = 'strict') -> int: + def samples(self, from_duration: float, mode: Alignment = 'strict') -> int: """Number of frames Parameters @@ -480,7 +480,7 @@ def samples(self, from_duration: float, mode: SegmentCropMode = 'strict') -> int return int(np.rint((from_duration / self.step))) def crop(self, focus: Union[Segment, 'Timeline'], - mode: SegmentCropMode = 'loose', + mode: Alignment = 'loose', fixed: Optional[float] = None, return_ranges: Optional[bool] = False) -> \ Union[np.ndarray, List[List[int]]]: diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py index 7624a0d..218e03e 100644 --- a/pyannote/core/utils/types.py +++ b/pyannote/core/utils/types.py @@ -11,5 +11,5 @@ Resource = Union['Segment', 'Timeline', 'Score', 'SlidingWindowFeature', 'Annotation'] CropMode = Literal['intersection', 'loose', 'strict'] -SegmentCropMode = Literal['center', 'loose', 'strict'] +Alignment = Literal['center', 'loose', 'strict'] LabelStyle = Tuple[str, int, Tuple[float, float, float]] From 4444ddc271440bf02c5483a167fdbf923fe78868 Mon Sep 17 00:00:00 2001 From: hadware Date: Fri, 13 Dec 2019 02:01:20 +0100 Subject: [PATCH 12/30] For some reason the timeline cropping functions slipped through the cracks. --- pyannote/core/timeline.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index 88ed6ad..614e19f 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -97,7 +97,7 @@ from . import PYANNOTE_URI, PYANNOTE_SEGMENT from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT from .segment import Segment -from .utils.types import Support, Label +from .utils.types import Support, Label, CropMode #  this is a moderately ugly way to import `Annotation` to the namespace # without causing some circular imports : @@ -408,7 +408,11 @@ def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]: if segment.intersects(other_segment): yield segment, other_segment - def crop_iter(self, support, mode='intersection', returns_mapping=False): + def crop_iter(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Iterator[Union[Segment, Segment], Segment]: """Like `crop` but returns a segment iterator instead See also @@ -459,7 +463,10 @@ def crop_iter(self, support, mode='intersection', returns_mapping=False): else: yield mapped_to - def crop(self, support, mode='intersection', returns_mapping=False): + def crop(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) -> 'Timeline': """Crop timeline to new support Parameters From 234d00f617d94c53f0273b32c0d68d818669757b Mon Sep 17 00:00:00 2001 From: hadware Date: Fri, 13 Dec 2019 02:11:43 +0100 Subject: [PATCH 13/30] Tweaking the TL cropping type hinting. --- pyannote/core/timeline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index 614e19f..61b3cb8 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -89,7 +89,7 @@ See :class:`pyannote.core.Timeline` for the complete reference. """ from typing import (Optional, Iterable, List, Union, Callable, - TextIO, Tuple, TYPE_CHECKING, Iterator) + TextIO, Tuple, TYPE_CHECKING, Iterator, Dict) import pandas as pd from sortedcontainers import SortedList @@ -466,7 +466,8 @@ def crop_iter(self, def crop(self, support: Support, mode: CropMode = 'intersection', - returns_mapping: bool = False) -> 'Timeline': + returns_mapping: bool = False) \ + -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]: """Crop timeline to new support Parameters From bbac3a304cf3f4c9aaa1ac23806574fb15f43ca9 Mon Sep 17 00:00:00 2001 From: hadware Date: Fri, 13 Dec 2019 03:33:47 +0100 Subject: [PATCH 14/30] Bugfix on the type hint. --- pyannote/core/timeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index 61b3cb8..313f527 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -412,7 +412,7 @@ def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) \ - -> Iterator[Union[Segment, Segment], Segment]: + -> Iterator[Union[Tuple[Segment, Segment], Segment]]: """Like `crop` but returns a segment iterator instead See also From e436aa58497387fd2fa53746a4d3445473231caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= Date: Fri, 13 Dec 2019 11:13:10 +0100 Subject: [PATCH 15/30] chore: use relative import for Alignment --- pyannote/core/segment.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyannote/core/segment.py b/pyannote/core/segment.py index d5258ae..2bf5e1d 100755 --- a/pyannote/core/segment.py +++ b/pyannote/core/segment.py @@ -65,18 +65,17 @@ See :class:`pyannote.core.Segment` for the complete reference. """ + import warnings from typing import Union, Optional, Tuple, List, Iterator +from .utils.types import Alignment import numpy as np from dataclasses import dataclass # 1 μs (one microsecond) -from pyannote.core.utils.types import Alignment - SEGMENT_PRECISION = 1e-6 - # setting 'frozen' to True makes it hashable and immutable @dataclass(frozen=True, order=True) class Segment: From f73836a26a6d781fc043b403bd2d780fd6564ac4 Mon Sep 17 00:00:00 2001 From: hadware Date: Tue, 29 Sep 2020 23:10:42 +0200 Subject: [PATCH 16/30] Started implementing the textgrid classes from an annotation and timeline base. --- pyannote/core/textgrid.py | 1380 ++++++++++++++++++++++++++++++++++ pyannote/core/utils/types.py | 2 + 2 files changed, 1382 insertions(+) create mode 100644 pyannote/core/textgrid.py diff --git a/pyannote/core/textgrid.py b/pyannote/core/textgrid.py new file mode 100644 index 0000000..6f3e37c --- /dev/null +++ b/pyannote/core/textgrid.py @@ -0,0 +1,1380 @@ +#!/usr/bin/env python +# encoding: utf-8 + +# The MIT License (MIT) + +# Copyright (c) 2014-2020 CNRS + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# AUTHORS +# Hervé BREDIN - http://herve.niderb.fr +# Paul LERNER + +""" +########## +Annotation +########## + +.. plot:: pyplots/annotation.py + +:class:`pyannote.core.Annotation` instances are ordered sets of non-empty +tracks: + + - ordered, because segments are sorted by start time (and end time in case of tie) + - set, because one cannot add twice the same track + - non-empty, because one cannot add empty track + +A track is a (support, name) pair where `support` is a Segment instance, +and `name` is an additional identifier so that it is possible to add multiple +tracks with the same support. + +To define the annotation depicted above: + +.. code-block:: ipython + + In [1]: from pyannote.core import Annotation, Segment + + In [6]: annotation = Annotation() + ...: annotation[Segment(1, 5)] = 'Carol' + ...: annotation[Segment(6, 8)] = 'Bob' + ...: annotation[Segment(12, 18)] = 'Carol' + ...: annotation[Segment(7, 20)] = 'Alice' + ...: + +which is actually a shortcut for + +.. code-block:: ipython + + In [6]: annotation = Annotation() + ...: annotation[Segment(1, 5), '_'] = 'Carol' + ...: annotation[Segment(6, 8), '_'] = 'Bob' + ...: annotation[Segment(12, 18), '_'] = 'Carol' + ...: annotation[Segment(7, 20), '_'] = 'Alice' + ...: + +where all tracks share the same (default) name ``'_'``. + +In case two tracks share the same support, use a different track name: + +.. code-block:: ipython + + In [6]: annotation = Annotation(uri='my_video_file', modality='speaker') + ...: annotation[Segment(1, 5), 1] = 'Carol' # track name = 1 + ...: annotation[Segment(1, 5), 2] = 'Bob' # track name = 2 + ...: annotation[Segment(12, 18)] = 'Carol' + ...: + +The track name does not have to be unique over the whole set of tracks. + +.. note:: + + The optional *uri* and *modality* keywords argument can be used to remember + which document and modality (e.g. speaker or face) it describes. + +Several convenient methods are available. Here are a few examples: + +.. code-block:: ipython + + In [9]: annotation.labels() # sorted list of labels + Out[9]: ['Bob', 'Carol'] + + In [10]: annotation.chart() # label duration chart + Out[10]: [('Carol', 10), ('Bob', 4)] + + In [11]: list(annotation.itertracks()) + Out[11]: [(, 1), (, 2), (, u'_')] + + In [12]: annotation.label_timeline('Carol') + Out[12]: , ])> + +See :class:`pyannote.core.Annotation` for the complete reference. +""" +import itertools +from collections import defaultdict +from pathlib import Path +from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple, Iterator, Text + +import numpy as np +from sortedcontainers import SortedDict + +from pyannote.core import Annotation +from . import PYANNOTE_URI, PYANNOTE_MODALITY, \ + PYANNOTE_SEGMENT, PYANNOTE_TRACK, PYANNOTE_LABEL +from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT +from .segment import Segment +from .timeline import Timeline +from .utils.generators import string_generator, int_generator +from .utils.types import Label, Key, Support, LabelGenerator, TierName, CropMode + + +class PraatTier: + + def __init__(self, name: str = None, uri: str = None): + self.name = name + self.uri = uri + self._segments = SortedDict() + + self._needs_labels_update = False + self._labels_index: Dict[str, Timeline] = dict() + + self._needs_timeline_update = False + self._timeline = Timeline() + + def __setitem__(self, key: Segment, value: str): + self._segments.irange() + self._needs_timeline_update = True + self._needs_labels_update = True + + def __getitem__(self, item: Segment) -> str: + return self._segments[item] + + def __delitem__(self, item: Segment): + del self._segments[item] + self._needs_timeline_update = True + self._needs_labels_update = True + + def get_index(self, k: int) -> Tuple[Segment, str]: + """ + # TODO + :param k: + :return: + """ + return self._segments.peekitem(k) + + def get_timeline(self, copy: bool = False) -> Timeline: + pass # TODO + + def update(self, tier: 'PraatTier') -> 'PraatTier': + """Add every segments of an existing tier (in place) + + Parameters + ---------- + tier : PraatTier + Tier whose segments and their annotations are being added + + Returns + ------- + self : PraatTier + Updated tier + + Note + ---- + Only segments that do not already exist will be added, as a timeline is + meant to be a **set** of segments (not a list). + + """ + pass # TODO + + def __len__(self): + """Number of segments in the tier + + >>> len(tier) # tier contains three segments + 3 + """ + return len(self._segments) + + def __nonzero__(self): + return self.__bool__() + + def __bool__(self): + """Emptiness + + >>> if tier: + ... # timeline is empty + ... else: + ... # timeline is not empty + """ + return len(self._segments) > 0 + + def __iter__(self) -> Iterable[Segment, str]: + """Iterate over segments (in chronological order) + + >>> for segment, annotation in tier: + ... # do something with the segment + + See also + -------- + :class:`pyannote.core.Segment` describes how segments are sorted. + """ + return iter(self._segments.items()) + + def __eq__(self, other: 'PraatTier'): + """Equality + + Two PraatTiers are equal if and only if their segments and their annotations are equal. + + # TODO : doc + >>> timeline1 = Timeline([Segment(0, 1), Segment(2, 3)]) + >>> timeline2 = Timeline([Segment(2, 3), Segment(0, 1)]) + >>> timeline3 = Timeline([Segment(2, 3)]) + >>> timeline1 == timeline2 + True + >>> timeline1 == timeline3 + False + """ + return self._segments == other._segments + + def __ne__(self, other: 'PraatTier'): + """Inequality""" + return self._segments != other._segments + + def __or__(self, timeline: 'Timeline') -> 'Timeline': + return self.union(timeline) + + def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]: + """Iterate over pairs of intersecting segments + + >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) + >>> timeline2 = Timeline([Segment(1, 3), Segment(3, 5)]) + >>> for segment1, segment2 in timeline1.co_iter(timeline2): + ... print(segment1, segment2) + (, ) + (, ) + (, ) + + Parameters + ---------- + other : Timeline + Second timeline + + Returns + ------- + iterable : (Segment, Segment) iterable + Yields pairs of intersecting segments in chronological order. + """ + + for segment in self.segments_list_: + + # iterate over segments that starts before 'segment' ends + temp = Segment(start=segment.end, end=segment.end) + for other_segment in other.segments_list_.irange(maximum=temp): + if segment.intersects(other_segment): + yield segment, other_segment + + def crop_iter(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Iterator[Union[Tuple[Segment, Segment], Segment]]: + """Like `crop` but returns a segment iterator instead + + See also + -------- + :func:`pyannote.core.Timeline.crop` + """ + + if mode not in {'loose', 'strict', 'intersection'}: + raise ValueError("Mode must be one of 'loose', 'strict', or " + "'intersection'.") + + if not isinstance(support, (Segment, Timeline)): + raise TypeError("Support must be a Segment or a Timeline.") + + if isinstance(support, Segment): + # corner case where "support" is empty + if support: + segments = [support] + else: + segments = [] + + support = Timeline(segments=segments, uri=self.uri) + for yielded in self.crop_iter(support, mode=mode, + returns_mapping=returns_mapping): + yield yielded + return + + # if 'support' is a `Timeline`, we use its support + support = support.support() + + # loose mode + if mode == 'loose': + for segment, _ in self.co_iter(support): + yield segment + return + + # strict mode + if mode == 'strict': + for segment, other_segment in self.co_iter(support): + if segment in other_segment: + yield segment + return + + # intersection mode + for segment, other_segment in self.co_iter(support): + mapped_to = segment & other_segment + if not mapped_to: + continue + if returns_mapping: + yield segment, mapped_to + else: + yield mapped_to + + def crop(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> 'PraatTier': + """Crop timeline to new support + + Parameters + ---------- + support : Segment or Timeline + If `support` is a `Timeline`, its support is used. + mode : {'strict', 'loose', 'intersection'}, optional + Controls how segments that are not fully included in `support` are + handled. 'strict' mode only keeps fully included segments. 'loose' + mode keeps any intersecting segment. 'intersection' mode keeps any + intersecting segment but replace them by their actual intersection. + returns_mapping : bool, optional + In 'intersection' mode, return a dictionary whose keys are segments + of the cropped timeline, and values are list of the original + segments that were cropped. Defaults to False. + + Returns + ------- + cropped : Timeline + Cropped timeline + mapping : dict + When 'returns_mapping' is True, dictionary whose keys are segments + of 'cropped', and values are lists of corresponding original + segments. + + Examples + -------- + + >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) + >>> timeline.crop(Segment(1, 3)) + ])> + + >>> timeline.crop(Segment(1, 3), mode='loose') + , ])> + + >>> timeline.crop(Segment(1, 3), mode='strict') + ])> + + >>> cropped, mapping = timeline.crop(Segment(1, 3), returns_mapping=True) + >>> print(mapping) + {: [, ]} + + """ + + if mode == 'intersection' and returns_mapping: + segments, mapping = [], {} + for segment, mapped_to in self.crop_iter(support, + mode='intersection', + returns_mapping=True): + segments.append(mapped_to) + mapping[mapped_to] = mapping.get(mapped_to, list()) + [segment] + return Timeline(segments=segments, uri=self.uri), mapping + + return Timeline(segments=self.crop_iter(support, mode=mode), + uri=self.uri) + + def overlapping(self, t: float) -> List[Segment]: + """Get list of segments overlapping `t` + + Parameters + ---------- + t : float + Timestamp, in seconds. + + Returns + ------- + segments : list + List of all segments of timeline containing time t + """ + return list(self.overlapping_iter(t)) + + def overlapping_iter(self, t: float) -> Iterator[Segment]: + """Like `overlapping` but returns a segment iterator instead + + See also + -------- + :func:`pyannote.core.Timeline.overlapping` + """ + segment = Segment(start=t, end=t) + for segment in self.segments_list_.irange(maximum=segment): + if segment.overlaps(t): + yield segment + + def __str__(self): + """Human-readable representation + + >>> timeline = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) + >>> print(timeline) + [[ 00:00:00.000 --> 00:00:10.000] + [ 00:00:01.000 --> 00:00:13.370]] + + """ + + n = len(self.segments_list_) + string = "[" + for i, segment in enumerate(self.segments_list_): + string += str(segment) + string += "\n " if i + 1 < n else "" + string += "]" + return string + + def __repr__(self): + """Computer-readable representation + + >>> Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) + , ])> + + """ + + return "" % (self.uri, + list(self.segments_list_)) + + def __contains__(self, included: Union[Segment, 'Timeline']): + """Inclusion + + Check whether every segment of `included` does exist in timeline. + + Parameters + ---------- + included : Segment or Timeline + Segment or timeline being checked for inclusion + + Returns + ------- + contains : bool + True if every segment in `included` exists in timeline, + False otherwise + + Examples + -------- + >>> timeline1 = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) + >>> timeline2 = Timeline(segments=[Segment(0, 10)]) + >>> timeline1 in timeline2 + False + >>> timeline2 in timeline1 + >>> Segment(1, 13.37) in timeline1 + True + + """ + + if isinstance(included, Segment): + return included in self.segments_set_ + + elif isinstance(included, Timeline): + return self.segments_set_.issuperset(included.segments_set_) + + else: + raise TypeError( + 'Checking for inclusion only supports Segment and ' + 'Timeline instances') + + def empty(self) -> 'PraatTier': + """Return an empty copy + + Returns + ------- + empty : PraatTier + Empty timeline using the same 'uri' attribute. + + """ + return PraatTier(self.name, uri=self.uri) + + def covers(self, other: Union[Timeline, 'PraatTier']) -> bool: + """Check whether other timeline is fully covered by the timeline + + Parameter + --------- + other : Timeline + Second timeline + + Returns + ------- + covers : bool + True if timeline covers "other" timeline entirely. False if at least + one segment of "other" is not fully covered by timeline + """ + + # compute gaps within "other" extent + # this is where we should look for possible faulty segments + gaps = self.gaps(support=other.extent()) + + # if at least one gap intersects with a segment from "other", + # "self" does not cover "other" entirely --> return False + for _ in gaps.co_iter(other): + return False + + # if no gap intersects with a segment from "other", + # "self" covers "other" entirely --> return True + return True + + def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \ + -> 'Timeline': + """Get a copy of the timeline + + If `segment_func` is provided, it is applied to each segment first. + + Parameters + ---------- + segment_func : callable, optional + Callable that takes a segment as input, and returns a segment. + Defaults to identity function (segment_func(segment) = segment) + + Returns + ------- + timeline : Timeline + Copy of the timeline + + """ + + # if segment_func is not provided + # just add every segment + if segment_func is None: + return Timeline(segments=self.segments_list_, uri=self.uri) + + # if is provided + # apply it to each segment before adding them + return Timeline(segments=[segment_func(s) for s in self.segments_list_], + uri=self.uri) + + def extent(self) -> Segment: + """Extent + + The extent of a timeline is the segment of minimum duration that + contains every segments of the timeline. It is unique, by definition. + The extent of an empty timeline is an empty segment. + + A picture is worth a thousand words:: + + timeline + |------| |------| |----| + |--| |-----| |----------| + + timeline.extent() + |--------------------------------| + + Returns + ------- + extent : Segment + Timeline extent + + Examples + -------- + >>> timeline = Timeline(segments=[Segment(0, 1), Segment(9, 10)]) + >>> timeline.extent() + + + """ + if self.segments_set_: + segments_boundaries_ = self.segments_boundaries_ + start = segments_boundaries_[0] + end = segments_boundaries_[-1] + return Segment(start=start, end=end) + else: + import numpy as np + return Segment(start=np.inf, end=-np.inf) + + def support_iter(self, collar: float = 0.) -> Iterator[Segment]: + """Like `support` but returns a segment generator instead + + See also + -------- + :func:`pyannote.core.Timeline.support` + """ + + # The support of an empty timeline is an empty timeline. + if not self: + return + + # Principle: + # * gather all segments with no gap between them + # * add one segment per resulting group (their union |) + # Note: + # Since segments are kept sorted internally, + # there is no need to perform an exhaustive segment clustering. + # We just have to consider them in their natural order. + + # Initialize new support segment + # as very first segment of the timeline + new_segment = self.segments_list_[0] + + for segment in self: + + # If there is no gap between new support segment and next segment + # OR there is a gap with duration < collar seconds, + possible_gap = segment ^ new_segment + if not possible_gap or possible_gap.duration < collar: + # Extend new support segment using next segment + new_segment |= segment + + # If there actually is a gap and the gap duration >= collar + # seconds, + else: + yield new_segment + + # Initialize new support segment as next segment + # (right after the gap) + new_segment = segment + + # Add new segment to the timeline support + yield new_segment + + def support(self, collar: float = 0.) -> 'Timeline': + """Timeline support + + The support of a timeline is the timeline with the minimum number of + segments with exactly the same time span as the original timeline. It + is (by definition) unique and does not contain any overlapping + segments. + + A picture is worth a thousand words:: + + collar + |---| + + timeline + |------| |------| |----| + |--| |-----| |----------| + + timeline.support() + |------| |--------| |----------| + + timeline.support(collar) + |------------------| |----------| + + Parameters + ---------- + collar : float, optional + Merge separated by less than `collar` seconds. This is why there + are only two segments in the final timeline in the above figure. + Defaults to 0. + + Returns + ------- + support : Timeline + Timeline support + """ + return Timeline(segments=self.support_iter(collar), uri=self.uri) + + def duration(self) -> float: + """Timeline duration + + The timeline duration is the sum of the durations of the segments + in the timeline support. + + Returns + ------- + duration : float + Duration of timeline support, in seconds. + """ + + # The timeline duration is the sum of the durations + # of the segments in the timeline support. + return sum(s.duration for s in self.support_iter()) + + def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: + """Like `gaps` but returns a segment generator instead + + See also + -------- + :func:`pyannote.core.Timeline.gaps` + + """ + + if support is None: + support = self.extent() + + if not isinstance(support, (Segment, Timeline)): + raise TypeError("unsupported operand type(s) for -':" + "%s and Timeline." % type(support).__name__) + + # segment support + if isinstance(support, Segment): + + # `end` is meant to store the end time of former segment + # initialize it with beginning of provided segment `support` + end = support.start + + # support on the intersection of timeline and provided segment + for segment in self.crop(support, mode='intersection').support(): + + # add gap between each pair of consecutive segments + # if there is no gap, segment is empty, therefore not added + gap = Segment(start=end, end=segment.start) + if gap: + yield gap + + # keep track of the end of former segment + end = segment.end + + # add final gap (if not empty) + gap = Segment(start=end, end=support.end) + if gap: + yield gap + + # timeline support + elif isinstance(support, Timeline): + + # yield gaps for every segment in support of provided timeline + for segment in support.support(): + for gap in self.gaps_iter(support=segment): + yield gap + + def gaps(self, support: Optional[Support] = None) -> 'Timeline': + """Gaps + + A picture is worth a thousand words:: + + tier + |------| |------| |----| + + timeline.gaps() + |--| |----| + + Parameters + ---------- + support : None, Segment or Timeline + Support in which gaps are looked for. Defaults to timeline extent + + Returns + ------- + gaps : Timeline + Timeline made of all gaps from original timeline, and delimited + by provided support + + See also + -------- + :func:`pyannote.core.Timeline.extent` + + """ + return Timeline(segments=self.gaps_iter(support=support), + uri=self.uri) + + def segmentation(self) -> 'Timeline': + """Segmentation + + Create the unique timeline with same support and same set of segment + boundaries as original timeline, but with no overlapping segments. + + A picture is worth a thousand words:: + + timeline + |------| |------| |----| + |--| |-----| |----------| + + timeline.segmentation() + |-|--|-| |-|---|--| |--|----|--| + + Returns + ------- + timeline : Timeline + (unique) timeline with same support and same set of segment + boundaries as original timeline, but with no overlapping segments. + """ + # COMPLEXITY: O(n) + support = self.support() + + # COMPLEXITY: O(n.log n) + # get all boundaries (sorted) + # |------| |------| |----| + # |--| |-----| |----------| + # becomes + # | | | | | | | | | | | | + timestamps = set([]) + for (start, end) in self: + timestamps.add(start) + timestamps.add(end) + timestamps = sorted(timestamps) + + # create new partition timeline + # | | | | | | | | | | | | + # becomes + # |-|--|-| |-|---|--| |--|----|--| + + # start with an empty copy + timeline = Timeline(uri=self.uri) + + if len(timestamps) == 0: + return Timeline(uri=self.uri) + + segments = [] + start = timestamps[0] + for end in timestamps[1:]: + # only add segments that are covered by original timeline + segment = Segment(start=start, end=end) + if segment and support.overlapping(segment.middle): + segments.append(segment) + # next segment... + start = end + + return Timeline(segments=segments, uri=self.uri) + + def to_annotation(self, modality: Optional[str] = None) -> 'Annotation': + """Turn tier into an annotation + + Each segment is labeled by a unique label. + + Parameters + ---------- + generator : 'string', 'int', or iterable, optional + If 'string' (default) generate string labels. If 'int', generate + integer labels. If iterable, use it to generate labels. + modality : str, optional + + Returns + ------- + annotation : Annotation + Annotation + """ + + from .annotation import Annotation + annotation = Annotation(uri=self.uri, modality=modality) + if generator == 'string': + from .utils.generators import string_generator + generator = string_generator() + elif generator == 'int': + from .utils.generators import int_generator + generator = int_generator() + + for segment in self: + annotation[segment] = next(generator) + + return annotation + + +class PraatTextGrid: + """Tiered Annotation. Implementation of Praat's TextGrid file structure + + Parameters + ---------- + uri : string, optional + name of annotated resource (e.g. audio or video file) + modality : string, optional + name of annotated modality + + Returns + ------- + annotation : Annotation + New annotation + + """ + + def __init__(self, uri: Optional[str] = None): + + self._uri: Optional[str] = uri + self.modality: Optional[str] = modality + + # sorted dictionary + # values: {tiername: tier} dictionary + self._tiers: Dict[TierName, PraatTier] = SortedDict() + + # timeline meant to store all annotated segments + self._timeline: Timeline = None + self._timelineNeedsUpdate: bool = True + + @property + def uri(self): + return self._uri + + @uri.setter + def uri(self, uri: str): + # update uri for all internal timelines + timeline = self.get_timeline(copy=False) + timeline.uri = uri + self._uri = uri + + @property + def tiers(self) -> List[TierName]: + return list(self._tiers.keys()) + + @property + def tiers_nb(self): + return len(self._tiers) + + def __len__(self): + """Number of segments + + >>> len(textgrid) # textgrid contains 10 segments + 10 + """ + return sum(len(tier) for tier in self._tiers.values()) + + def __nonzero__(self): + return self.__bool__() + + def __bool__(self): + """Emptiness + + >>> if annotation: + ... # annotation is empty + ... else: + ... # annotation is not empty + """ + return len(self) > 0 + + def itersegments(self): + """Iterate over segments (in chronological order) + + >>> for segment in annotation.itersegments(): + ... # do something with the segment + + See also + -------- + :class:`pyannote.core.Segment` describes how segments are sorted. + """ + return iter(self._timeline) + + def __iter__(self): + return iter(self._tiers.items()) + + def _update_timeline(self): + segments = list(itertools.chain.from_iterable(self._tiers.keys())) + self._timeline = Timeline(segments=segments, uri=self.uri) + self._timelineNeedsUpdate = False + + def get_timeline(self, copy: bool = True) -> Timeline: + """Get timeline made of all annotated segments + + Parameters + ---------- + copy : bool, optional + Defaults (True) to returning a copy of the internal timeline. + Set to False to return the actual internal timeline (faster). + + Returns + ------- + timeline : Timeline + Timeline made of all annotated segments. + + Note + ---- + In case copy is set to False, be careful **not** to modify the returned + timeline, as it may lead to weird subsequent behavior of the annotation + instance. + + """ + if self._timelineNeedsUpdate: + self._update_timeline() + if copy: + return self._timeline.copy() + return self._timeline + + def __eq__(self, other: 'PraatTextGrid'): + """Equality + + >>> annotation == other + + Two annotations are equal if and only if their tracks and associated + labels are equal. + """ + pairOfTracks = itertools.zip_longest( + self.itertracks(yield_label=True), + other.itertracks(yield_label=True)) + return all(t1 == t2 for t1, t2 in pairOfTracks) + + def __ne__(self, other: 'PraatTextGrid'): + """Inequality""" + pairOfTracks = itertools.zip_longest( + self.itertracks(yield_label=True), + other.itertracks(yield_label=True)) + + return any(t1 != t2 for t1, t2 in pairOfTracks) + + def __contains__(self, included: Union[Segment, Timeline]): + """Inclusion + + Check whether every segment of `included` does exist in annotation. + + Parameters + ---------- + included : Segment or Timeline + Segment or timeline being checked for inclusion + + Returns + ------- + contains : bool + True if every segment in `included` exists in timeline, + False otherwise + + """ + return included in self.get_timeline(copy=False) + + def to_textgrid(self, file: Union[str, Path, TextIO]): + pass + + def to_annotation(self, modality: Optional[str] = None) -> Annotation: + """Convert to an annotation object. The new annotation's labels + are the tier names of each segments. In short, the segment's + # TODO : visual example + + Parameters + ---------- + modality: optional str + + Returns + ------- + annotation : Annotation + A new Annotation Object + + Note + ---- + If you want to convert part of a `PraatTextGrid` to an `Annotation` object + while keeping the segment's labels, you can use the tier's + :func:`~pyannote.textgrid.PraatTier.to_annotation` + """ + annotation = Annotation(uri=self.uri, modality=modality) + for tier_name, tier in self._tiers.items(): + for segment, _ in tier: + annotation[segment] = tier_name + return annotation + + def crop(self, support: Support, mode: CropMode = 'intersection') \ + -> 'PraatTextGrid': + """Crop textgrid to new support + + Parameters + ---------- + support : Segment or Timeline + If `support` is a `Timeline`, its support is used. + mode : {'strict', 'loose', 'intersection'}, optional + Controls how segments that are not fully included in `support` are + handled. 'strict' mode only keeps fully included segments. 'loose' + mode keeps any intersecting segment. 'intersection' mode keeps any + intersecting segment but replace them by their actual intersection. + + Returns + ------- + cropped : PraatTextGrid + Cropped textgrid + """ + new_tg = PraatTextGrid(self.uri) + for tier_name, tier in self._tiers.items(): + new_tg._tiers[tier_name] = tier.crop(support) + return new_tg + + def copy(self) -> 'PraatTextGrid': + """Get a copy of the annotation + + Returns + ------- + annotation : PraatTextGrid + Copy of the textgrid + """ + + # create new empty annotation + pass + + def __str__(self): + """Human-friendly representation""" + # TODO: use pandas.DataFrame + return "\n".join(["%s %s %s" % (s, t, l) + for s, t, l in self.itertracks(yield_label=True)]) + + def __delitem__(self, key: TierName): + """Delete a tier + # TODO : doc + """ + del self._tiers[key] + + def __getitem__(self, key: TierName) -> PraatTier: + """Get a tier + + >>> praat_tier = annotation[tiername] + + """ + + return self._tiers[key] + + def __setitem__(self, key: Key, label: Label): + """Add new or update existing track + + >>> annotation[segment, track] = label + + If (segment, track) does not exist, it is added. + If (segment, track) already exists, it is updated. + + Note + ---- + ``annotation[segment] = label`` is equivalent to ``annotation[segment, '_'] = label`` + + Note + ---- + If `segment` is empty, it does nothing. + """ + + if isinstance(key, Segment): + key = (key, '_') + + segment, track = key + + # do not add empty track + if not segment: + return + + # in case we create a new segment + # mark timeline as modified + if segment not in self._tiers: + self._tiers[segment] = {} + self._timelineNeedsUpdate = True + + # in case we modify an existing track + # mark old label as modified + if track in self._tiers[segment]: + old_label = self._tiers[segment][track] + self._labelNeedsUpdate[old_label] = True + + # mark new label as modified + self._tiers[segment][track] = label + self._labelNeedsUpdate[label] = True + + def empty(self) -> 'Annotation': + """Return an empty copy + + Returns + ------- + empty : Annotation + Empty annotation using the same 'uri' and 'modality' attributes. + + """ + return self.__class__(uri=self.uri, modality=self.modality) + + def update(self, textgrid: 'PraatTextGrid', copy: bool = False) \ + -> 'PraatTextGrid': + """Add every track of an existing annotation (in place) + + Parameters + ---------- + annotation : Annotation + Annotation whose tracks are being added + copy : bool, optional + Return a copy of the annotation. Defaults to updating the + annotation in-place. + + Returns + ------- + self : Annotation + Updated annotation + + Note + ---- + Existing tracks are updated with the new label. + """ + + result = self.copy() if copy else self + + # TODO speed things up by working directly with annotation internals + for segment, track, label in annotation.itertracks(yield_label=True): + result[segment, track] = label + + return result + + def chart(self, percent: bool = False) -> List[Tuple[Label, float]]: + """Get labels chart (from longest to shortest duration) + + Parameters + ---------- + percent : bool, optional + Return list of (label, percentage) tuples. + Defaults to returning list of (label, duration) tuples. + + Returns + ------- + chart : list + List of (label, duration), sorted by duration in decreasing order. + """ + + chart = sorted(((L, self.label_duration(L)) for L in self.labels()), + key=lambda x: x[1], reverse=True) + + if percent: + total = np.sum([duration for _, duration in chart]) + chart = [(label, duration / total) for (label, duration) in chart] + + return chart + + def argmax(self, support: Optional[Support] = None) -> Optional[Label]: + """Get label with longest duration + + Parameters + ---------- + support : Segment or Timeline, optional + Find label with longest duration within provided support. + Defaults to whole extent. + + Returns + ------- + label : any existing label or None + Label with longest intersection + + Examples + -------- + >>> annotation = Annotation(modality='speaker') + >>> annotation[Segment(0, 10), 'speaker1'] = 'Alice' + >>> annotation[Segment(8, 20), 'speaker1'] = 'Bob' + >>> print "%s is such a talker!" % annotation.argmax() + Bob is such a talker! + >>> segment = Segment(22, 23) + >>> if not annotation.argmax(support): + ... print "No label intersecting %s" % segment + No label intersection [22 --> 23] + + """ + + cropped = self + if support is not None: + cropped = cropped.crop(support, mode='intersection') + + if not cropped: + return None + + return max(((_, cropped.label_duration(_)) for _ in cropped.labels()), + key=lambda x: x[1])[0] + + def support(self, collar: float = 0.) -> 'PraatTextGrid': + # TODO + """Annotation support + + The support of an annotation is an annotation where contiguous tracks + with same label are merged into one unique covering track. + + A picture is worth a thousand words:: + + collar + |---| + + annotation + |--A--| |--A--| |-B-| + |-B-| |--C--| |----B-----| + + annotation.support(collar) + |------A------| |------B------| + |-B-| |--C--| + + Parameters + ---------- + collar : float, optional + Merge tracks with same label and separated by less than `collar` + seconds. This is why 'A' tracks are merged in above figure. + Defaults to 0. + + Returns + ------- + support : Annotation + Annotation support + + Note + ---- + Track names are lost in the process. + """ + + generator = string_generator() + + # initialize an empty annotation + # with same uri and modality as original + support = self.empty() + for label in self.labels(): + + # get timeline for current label + timeline = self.label_timeline(label, copy=True) + + # fill the gaps shorter than collar + timeline = timeline.support(collar) + + # reconstruct annotation with merged tracks + for segment in timeline.support(): + support[segment, next(generator)] = label + + return support + + def co_iter(self, other: 'Annotation') \ + -> Iterator[Tuple[Tuple[Segment, TierName], + Tuple[Segment, TierName]] + ]: + """Iterate over pairs of intersecting tracks + + Parameters + ---------- + other : Annotation + Second annotation + + Returns + ------- + iterable : (Segment, object), (Segment, object) iterable + Yields pairs of intersecting tracks, in chronological (then + alphabetical) order. + + See also + -------- + :func:`~pyannote.core.Timeline.co_iter` + + """ + timeline = self.get_timeline(copy=False) + other_timeline = other.get_timeline(copy=False) + for s, S in timeline.co_iter(other_timeline): + tracks = sorted(self.get_tracks(s), key=str) + other_tracks = sorted(other.get_tracks(S), key=str) + for t, T in itertools.product(tracks, other_tracks): + yield (s, t), (S, T) + + def __mul__(self, other: 'Annotation') -> np.ndarray: + """Cooccurrence (or confusion) matrix + + >>> matrix = annotation * other + + Parameters + ---------- + other : Annotation + Second annotation + + Returns + ------- + cooccurrence : (n_self, n_other) np.ndarray + Cooccurrence matrix where `n_self` (resp. `n_other`) is the number + of labels in `self` (resp. `other`). + """ + + if not isinstance(other, Annotation): + raise TypeError( + 'computing cooccurrence matrix only works with Annotation ' + 'instances.') + + i_labels = self.labels() + j_labels = other.labels() + + I = {label: i for i, label in enumerate(i_labels)} + J = {label: j for j, label in enumerate(j_labels)} + + matrix = np.zeros((len(I), len(J))) + + # iterate over intersecting tracks and accumulate durations + for (segment, track), (other_segment, other_track) in self.co_iter(other): + i = I[self[segment, track]] + j = J[other[other_segment, other_track]] + duration = (segment & other_segment).duration + matrix[i, j] += duration + + return matrix + + def _repr_png(self): + """IPython notebook support + + See also + -------- + :mod:`pyannote.core.notebook` + """ + + from .notebook import repr_annotation + return repr_annotation(self) diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py index 0105a16..8507861 100644 --- a/pyannote/core/utils/types.py +++ b/pyannote/core/utils/types.py @@ -8,6 +8,8 @@ LabelGenerator = Union[LabelGeneratorMode, Iterator[Label]] TrackName = Union[str, int] Key = Union['Segment', Tuple['Segment', TrackName]] +TierName = str +TierKey = Tuple[TierName, 'Segment'] Resource = Union['Segment', 'Timeline', 'SlidingWindowFeature', 'Annotation'] CropMode = Literal['intersection', 'loose', 'strict'] From 0b5092d98a8f7a88ff21b6574db4d9ecd9d46447 Mon Sep 17 00:00:00 2001 From: hadware Date: Wed, 23 Jun 2021 02:10:39 +0200 Subject: [PATCH 17/30] pre-checkout commit --- pyannote/core/textgrid.py | 113 ++++---------------------------------- 1 file changed, 11 insertions(+), 102 deletions(-) diff --git a/pyannote/core/textgrid.py b/pyannote/core/textgrid.py index 6f3e37c..3a72d5d 100644 --- a/pyannote/core/textgrid.py +++ b/pyannote/core/textgrid.py @@ -124,6 +124,8 @@ from .utils.types import Label, Key, Support, LabelGenerator, TierName, CropMode +# TODO: add JSON dumping/loading + class PraatTier: def __init__(self, name: str = None, uri: str = None): @@ -159,7 +161,7 @@ def get_index(self, k: int) -> Tuple[Segment, str]: return self._segments.peekitem(k) def get_timeline(self, copy: bool = False) -> Timeline: - pass # TODO + pass # TODO def update(self, tier: 'PraatTier') -> 'PraatTier': """Add every segments of an existing tier (in place) @@ -763,65 +765,6 @@ def gaps(self, support: Optional[Support] = None) -> 'Timeline': return Timeline(segments=self.gaps_iter(support=support), uri=self.uri) - def segmentation(self) -> 'Timeline': - """Segmentation - - Create the unique timeline with same support and same set of segment - boundaries as original timeline, but with no overlapping segments. - - A picture is worth a thousand words:: - - timeline - |------| |------| |----| - |--| |-----| |----------| - - timeline.segmentation() - |-|--|-| |-|---|--| |--|----|--| - - Returns - ------- - timeline : Timeline - (unique) timeline with same support and same set of segment - boundaries as original timeline, but with no overlapping segments. - """ - # COMPLEXITY: O(n) - support = self.support() - - # COMPLEXITY: O(n.log n) - # get all boundaries (sorted) - # |------| |------| |----| - # |--| |-----| |----------| - # becomes - # | | | | | | | | | | | | - timestamps = set([]) - for (start, end) in self: - timestamps.add(start) - timestamps.add(end) - timestamps = sorted(timestamps) - - # create new partition timeline - # | | | | | | | | | | | | - # becomes - # |-|--|-| |-|---|--| |--|----|--| - - # start with an empty copy - timeline = Timeline(uri=self.uri) - - if len(timestamps) == 0: - return Timeline(uri=self.uri) - - segments = [] - start = timestamps[0] - for end in timestamps[1:]: - # only add segments that are covered by original timeline - segment = Segment(start=start, end=end) - if segment and support.overlapping(segment.middle): - segments.append(segment) - # next segment... - start = end - - return Timeline(segments=segments, uri=self.uri) - def to_annotation(self, modality: Optional[str] = None) -> 'Annotation': """Turn tier into an annotation @@ -829,9 +772,6 @@ def to_annotation(self, modality: Optional[str] = None) -> 'Annotation': Parameters ---------- - generator : 'string', 'int', or iterable, optional - If 'string' (default) generate string labels. If 'int', generate - integer labels. If iterable, use it to generate labels. modality : str, optional Returns @@ -842,16 +782,7 @@ def to_annotation(self, modality: Optional[str] = None) -> 'Annotation': from .annotation import Annotation annotation = Annotation(uri=self.uri, modality=modality) - if generator == 'string': - from .utils.generators import string_generator - generator = string_generator() - elif generator == 'int': - from .utils.generators import int_generator - generator = int_generator() - - for segment in self: - annotation[segment] = next(generator) - + # TODO return annotation @@ -897,7 +828,11 @@ def uri(self, uri: str): self._uri = uri @property - def tiers(self) -> List[TierName]: + def tiers(self) -> List[PraatTier]: + return list(self._tiers.values()) + + @property + def tiers_names(self) -> List[TierName]: return list(self._tiers.keys()) @property @@ -1175,43 +1110,17 @@ def update(self, textgrid: 'PraatTextGrid', copy: bool = False) \ result = self.copy() if copy else self - # TODO speed things up by working directly with annotation internals - for segment, track, label in annotation.itertracks(yield_label=True): - result[segment, track] = label + # TODO return result - def chart(self, percent: bool = False) -> List[Tuple[Label, float]]: - """Get labels chart (from longest to shortest duration) - - Parameters - ---------- - percent : bool, optional - Return list of (label, percentage) tuples. - Defaults to returning list of (label, duration) tuples. - - Returns - ------- - chart : list - List of (label, duration), sorted by duration in decreasing order. - """ - - chart = sorted(((L, self.label_duration(L)) for L in self.labels()), - key=lambda x: x[1], reverse=True) - - if percent: - total = np.sum([duration for _, duration in chart]) - chart = [(label, duration / total) for (label, duration) in chart] - - return chart - def argmax(self, support: Optional[Support] = None) -> Optional[Label]: """Get label with longest duration Parameters ---------- support : Segment or Timeline, optional - Find label with longest duration within provided support. + Find label with longest duration within provided support.p Defaults to whole extent. Returns From 40a190a41617ab2902f8170292a8c5b507f7efb2 Mon Sep 17 00:00:00 2001 From: hadware Date: Wed, 24 Aug 2022 14:59:34 +0200 Subject: [PATCH 18/30] Changed class names, added textgrid-parser dependency --- pyannote/core/textgrid.py | 50 +++++++++++++++++++-------------------- setup.py | 3 +++ 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/pyannote/core/textgrid.py b/pyannote/core/textgrid.py index 3a72d5d..0aee9c9 100644 --- a/pyannote/core/textgrid.py +++ b/pyannote/core/textgrid.py @@ -126,7 +126,7 @@ # TODO: add JSON dumping/loading -class PraatTier: +class Tier: def __init__(self, name: str = None, uri: str = None): self.name = name @@ -163,17 +163,17 @@ def get_index(self, k: int) -> Tuple[Segment, str]: def get_timeline(self, copy: bool = False) -> Timeline: pass # TODO - def update(self, tier: 'PraatTier') -> 'PraatTier': + def update(self, tier: 'Tier') -> 'Tier': """Add every segments of an existing tier (in place) Parameters ---------- - tier : PraatTier + tier : Tier Tier whose segments and their annotations are being added Returns ------- - self : PraatTier + self : Tier Updated tier Note @@ -217,7 +217,7 @@ def __iter__(self) -> Iterable[Segment, str]: """ return iter(self._segments.items()) - def __eq__(self, other: 'PraatTier'): + def __eq__(self, other: 'Tier'): """Equality Two PraatTiers are equal if and only if their segments and their annotations are equal. @@ -233,7 +233,7 @@ def __eq__(self, other: 'PraatTier'): """ return self._segments == other._segments - def __ne__(self, other: 'PraatTier'): + def __ne__(self, other: 'Tier'): """Inequality""" return self._segments != other._segments @@ -332,7 +332,7 @@ def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) \ - -> 'PraatTier': + -> 'Tier': """Crop timeline to new support Parameters @@ -484,18 +484,18 @@ def __contains__(self, included: Union[Segment, 'Timeline']): 'Checking for inclusion only supports Segment and ' 'Timeline instances') - def empty(self) -> 'PraatTier': + def empty(self) -> 'Tier': """Return an empty copy Returns ------- - empty : PraatTier + empty : Tier Empty timeline using the same 'uri' attribute. """ - return PraatTier(self.name, uri=self.uri) + return Tier(self.name, uri=self.uri) - def covers(self, other: Union[Timeline, 'PraatTier']) -> bool: + def covers(self, other: Union[Timeline, 'Tier']) -> bool: """Check whether other timeline is fully covered by the timeline Parameter @@ -786,7 +786,7 @@ def to_annotation(self, modality: Optional[str] = None) -> 'Annotation': return annotation -class PraatTextGrid: +class TieredAnnotation: """Tiered Annotation. Implementation of Praat's TextGrid file structure Parameters @@ -810,7 +810,7 @@ def __init__(self, uri: Optional[str] = None): # sorted dictionary # values: {tiername: tier} dictionary - self._tiers: Dict[TierName, PraatTier] = SortedDict() + self._tiers: Dict[TierName, Tier] = SortedDict() # timeline meant to store all annotated segments self._timeline: Timeline = None @@ -828,7 +828,7 @@ def uri(self, uri: str): self._uri = uri @property - def tiers(self) -> List[PraatTier]: + def tiers(self) -> List[Tier]: return list(self._tiers.values()) @property @@ -907,7 +907,7 @@ def get_timeline(self, copy: bool = True) -> Timeline: return self._timeline.copy() return self._timeline - def __eq__(self, other: 'PraatTextGrid'): + def __eq__(self, other: 'TieredAnnotation'): """Equality >>> annotation == other @@ -920,7 +920,7 @@ def __eq__(self, other: 'PraatTextGrid'): other.itertracks(yield_label=True)) return all(t1 == t2 for t1, t2 in pairOfTracks) - def __ne__(self, other: 'PraatTextGrid'): + def __ne__(self, other: 'TieredAnnotation'): """Inequality""" pairOfTracks = itertools.zip_longest( self.itertracks(yield_label=True), @@ -977,7 +977,7 @@ def to_annotation(self, modality: Optional[str] = None) -> Annotation: return annotation def crop(self, support: Support, mode: CropMode = 'intersection') \ - -> 'PraatTextGrid': + -> 'TieredAnnotation': """Crop textgrid to new support Parameters @@ -992,20 +992,20 @@ def crop(self, support: Support, mode: CropMode = 'intersection') \ Returns ------- - cropped : PraatTextGrid + cropped : TieredAnnotation Cropped textgrid """ - new_tg = PraatTextGrid(self.uri) + new_tg = TieredAnnotation(self.uri) for tier_name, tier in self._tiers.items(): new_tg._tiers[tier_name] = tier.crop(support) return new_tg - def copy(self) -> 'PraatTextGrid': + def copy(self) -> 'TieredAnnotation': """Get a copy of the annotation Returns ------- - annotation : PraatTextGrid + annotation : TieredAnnotation Copy of the textgrid """ @@ -1024,7 +1024,7 @@ def __delitem__(self, key: TierName): """ del self._tiers[key] - def __getitem__(self, key: TierName) -> PraatTier: + def __getitem__(self, key: TierName) -> Tier: """Get a tier >>> praat_tier = annotation[tiername] @@ -1086,8 +1086,8 @@ def empty(self) -> 'Annotation': """ return self.__class__(uri=self.uri, modality=self.modality) - def update(self, textgrid: 'PraatTextGrid', copy: bool = False) \ - -> 'PraatTextGrid': + def update(self, textgrid: 'TieredAnnotation', copy: bool = False) \ + -> 'TieredAnnotation': """Add every track of an existing annotation (in place) Parameters @@ -1152,7 +1152,7 @@ def argmax(self, support: Optional[Support] = None) -> Optional[Label]: return max(((_, cropped.label_duration(_)) for _ in cropped.labels()), key=lambda x: x[1])[0] - def support(self, collar: float = 0.) -> 'PraatTextGrid': + def support(self, collar: float = 0.) -> 'TieredAnnotation': # TODO """Annotation support diff --git a/setup.py b/setup.py index f10a834..e55bbbe 100755 --- a/setup.py +++ b/setup.py @@ -65,6 +65,9 @@ "Topic :: Scientific/Engineering" ], extras_require={ + 'textgrid': [ + "git+ssh://git@github.com/hadware/textgrid-parser.git" + ], 'testing': ['pytest', 'pandas >= 0.17.1', 'flake8==3.7.9'], From b6046374c5a3b57bceb043b242d96df2c81fa0e1 Mon Sep 17 00:00:00 2001 From: hadware Date: Fri, 26 Aug 2022 01:45:43 +0200 Subject: [PATCH 19/30] Switching to a timeline-based tier system --- pyannote/core/textgrid.py | 316 +++++++++++++++----------------------- pyannote/core/timeline.py | 17 +- 2 files changed, 134 insertions(+), 199 deletions(-) diff --git a/pyannote/core/textgrid.py b/pyannote/core/textgrid.py index 0aee9c9..1459e3a 100644 --- a/pyannote/core/textgrid.py +++ b/pyannote/core/textgrid.py @@ -108,8 +108,9 @@ """ import itertools from collections import defaultdict +from numbers import Number from pathlib import Path -from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple, Iterator, Text +from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple, Iterator, Text, Callable import numpy as np from sortedcontainers import SortedDict @@ -123,48 +124,69 @@ from .utils.generators import string_generator, int_generator from .utils.types import Label, Key, Support, LabelGenerator, TierName, CropMode - # TODO: add JSON dumping/loading +# TODO: QUESTIONS: +# - iterator for the TieredAnnotation + +# TODO: IDEA: use a timeline in the Tier to do all the cropping/etc/ operations +# and just make this class a thin wrapper for it +TierLabel = Union[Text, Number] +TierValuePair = Tuple[Segment, TierLabel] + class Tier: + """A set of chronologically-ordered, non-overlapping and annotated segments""" - def __init__(self, name: str = None, uri: str = None): + def __init__(self, name: str = None, + uri: str = None, + allow_overlap: bool = True): self.name = name self.uri = uri - self._segments = SortedDict() - - self._needs_labels_update = False - self._labels_index: Dict[str, Timeline] = dict() - - self._needs_timeline_update = False + self.allow_overlap = allow_overlap + self._segments: Dict[Segment, TierLabel] = dict() self._timeline = Timeline() - def __setitem__(self, key: Segment, value: str): - self._segments.irange() - self._needs_timeline_update = True - self._needs_labels_update = True + def __setitem__(self, segment: Segment, label: str): + if not self.allow_overlap: + for seg, _ in self._timeline.crop_iter(segment, mode="intersection"): + raise ValueError(f"Segment overlaps with {seg}") - def __getitem__(self, item: Segment) -> str: - return self._segments[item] + self._timeline.add(segment) + self._segments[segment] = label - def __delitem__(self, item: Segment): - del self._segments[item] - self._needs_timeline_update = True - self._needs_labels_update = True + def __getitem__(self, segment: Segment) -> str: + return self._segments[segment] - def get_index(self, k: int) -> Tuple[Segment, str]: - """ + def __delitem__(self, segment: Segment): + del self._segments[segment] + self._timeline.remove(segment) + + def __contains__(self, included: Union[Segment, Timeline]): # TODO - :param k: - :return: + """Inclusion + + Check whether every segment of `included` does exist in annotation. + + Parameters + ---------- + included : Segment or Timeline + Segment or timeline being checked for inclusion + + Returns + ------- + contains : bool + True if every segment in `included` exists in timeline, + False otherwise + """ - return self._segments.peekitem(k) + return included in self._timeline def get_timeline(self, copy: bool = False) -> Timeline: - pass # TODO + return self._timeline def update(self, tier: 'Tier') -> 'Tier': - """Add every segments of an existing tier (in place) + # TODO : Doc + """Add every segment of an existing tier (in place) Parameters ---------- @@ -182,7 +204,10 @@ def update(self, tier: 'Tier') -> 'Tier': meant to be a **set** of segments (not a list). """ - pass # TODO + if not self.allow_overlap and \ + any(True for _ in self._timeline.crop_iter(tier.get_timeline(), + mode="intersection")): + raise ValueError("Segments in a tier cannot overlap") def __len__(self): """Number of segments in the tier @@ -240,7 +265,8 @@ def __ne__(self, other: 'Tier'): def __or__(self, timeline: 'Timeline') -> 'Timeline': return self.union(timeline) - def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]: + def co_iter(self, other: Union[Timeline, Segment]) -> Iterator[Tuple[Segment, Segment]]: + # TODO : Doc """Iterate over pairs of intersecting segments >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) @@ -262,13 +288,7 @@ def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]: Yields pairs of intersecting segments in chronological order. """ - for segment in self.segments_list_: - - # iterate over segments that starts before 'segment' ends - temp = Segment(start=segment.end, end=segment.end) - for other_segment in other.segments_list_.irange(maximum=temp): - if segment.intersects(other_segment): - yield segment, other_segment + yield from self._timeline.co_iter(other) def crop_iter(self, support: Support, @@ -377,6 +397,8 @@ def crop(self, """ + # TODO + if mode == 'intersection' and returns_mapping: segments, mapping = [], {} for segment, mapped_to in self.crop_iter(support, @@ -402,7 +424,7 @@ def overlapping(self, t: float) -> List[Segment]: segments : list List of all segments of timeline containing time t """ - return list(self.overlapping_iter(t)) + return self._timeline.overlapping(t) def overlapping_iter(self, t: float) -> Iterator[Segment]: """Like `overlapping` but returns a segment iterator instead @@ -509,6 +531,7 @@ def covers(self, other: Union[Timeline, 'Tier']) -> bool: True if timeline covers "other" timeline entirely. False if at least one segment of "other" is not fully covered by timeline """ + # TODO # compute gaps within "other" extent # this is where we should look for possible faulty segments @@ -525,6 +548,7 @@ def covers(self, other: Union[Timeline, 'Tier']) -> bool: def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \ -> 'Timeline': + # TODO """Get a copy of the timeline If `segment_func` is provided, it is applied to each segment first. @@ -580,14 +604,7 @@ def extent(self) -> Segment: """ - if self.segments_set_: - segments_boundaries_ = self.segments_boundaries_ - start = segments_boundaries_[0] - end = segments_boundaries_[-1] - return Segment(start=start, end=end) - else: - import numpy as np - return Segment(start=np.inf, end=-np.inf) + return self._timeline.extent() def support_iter(self, collar: float = 0.) -> Iterator[Segment]: """Like `support` but returns a segment generator instead @@ -597,44 +614,10 @@ def support_iter(self, collar: float = 0.) -> Iterator[Segment]: :func:`pyannote.core.Timeline.support` """ - # The support of an empty timeline is an empty timeline. - if not self: - return - - # Principle: - # * gather all segments with no gap between them - # * add one segment per resulting group (their union |) - # Note: - # Since segments are kept sorted internally, - # there is no need to perform an exhaustive segment clustering. - # We just have to consider them in their natural order. - - # Initialize new support segment - # as very first segment of the timeline - new_segment = self.segments_list_[0] - - for segment in self: - - # If there is no gap between new support segment and next segment - # OR there is a gap with duration < collar seconds, - possible_gap = segment ^ new_segment - if not possible_gap or possible_gap.duration < collar: - # Extend new support segment using next segment - new_segment |= segment - - # If there actually is a gap and the gap duration >= collar - # seconds, - else: - yield new_segment - - # Initialize new support segment as next segment - # (right after the gap) - new_segment = segment - - # Add new segment to the timeline support - yield new_segment + yield from self._timeline.support_iter(collar) def support(self, collar: float = 0.) -> 'Timeline': + # TODO: doc """Timeline support The support of a timeline is the timeline with the minimum number of @@ -669,7 +652,7 @@ def support(self, collar: float = 0.) -> 'Timeline': support : Timeline Timeline support """ - return Timeline(segments=self.support_iter(collar), uri=self.uri) + return self._timeline.support(collar) def duration(self) -> float: """Timeline duration @@ -685,7 +668,7 @@ def duration(self) -> float: # The timeline duration is the sum of the durations # of the segments in the timeline support. - return sum(s.duration for s in self.support_iter()) + return self._timeline.duration() def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: """Like `gaps` but returns a segment generator instead @@ -765,6 +748,44 @@ def gaps(self, support: Optional[Support] = None) -> 'Timeline': return Timeline(segments=self.gaps_iter(support=support), uri=self.uri) + def argmax(self, support: Optional[Support] = None) -> Optional[Label]: + """Get label with longest duration + + Parameters + ---------- + support : Segment or Timeline, optional + Find label with longest duration within provided support. + Defaults to whole extent. + + Returns + ------- + label : any existing label or None + Label with longest intersection + + Examples + -------- + >>> annotation = Annotation(modality='speaker') + >>> annotation[Segment(0, 10), 'speaker1'] = 'Alice' + >>> annotation[Segment(8, 20), 'speaker1'] = 'Bob' + >>> print "%s is such a talker!" % annotation.argmax() + Bob is such a talker! + >>> segment = Segment(22, 23) + >>> if not annotation.argmax(support): + ... print "No label intersecting %s" % segment + No label intersection [22 --> 23] + + """ + + cropped = self + if support is not None: + cropped = cropped.crop(support, mode='intersection') + + if not cropped: + return None + + return max(((_, cropped.label_duration(_)) for _ in cropped.labels()), + key=lambda x: x[1])[0] + def to_annotation(self, modality: Optional[str] = None) -> 'Annotation': """Turn tier into an annotation @@ -787,7 +808,7 @@ def to_annotation(self, modality: Optional[str] = None) -> 'Annotation': class TieredAnnotation: - """Tiered Annotation. Implementation of Praat's TextGrid file structure + """Tiered Annotation. Parameters ---------- @@ -806,7 +827,6 @@ class TieredAnnotation: def __init__(self, uri: Optional[str] = None): self._uri: Optional[str] = uri - self.modality: Optional[str] = modality # sorted dictionary # values: {tiername: tier} dictionary @@ -816,6 +836,18 @@ def __init__(self, uri: Optional[str] = None): self._timeline: Timeline = None self._timelineNeedsUpdate: bool = True + @classmethod + def from_textgrid(cls, textgrid: Union[str, Path, TextIO], + textgrid_format: str = "full"): + try: + from textgrid_parser import parse_textgrid + except ImportError: + raise ImportError("The dependencies used to parse TextGrid file cannot be found. " + "Please install using pyannote.core[textgrid]") + # TODO : check for tiers with duplicate names + + return parse_textgrid(textgrid, textgrid_format=textgrid_format) + @property def uri(self): return self._uri @@ -836,7 +868,7 @@ def tiers_names(self) -> List[TierName]: return list(self._tiers.keys()) @property - def tiers_nb(self): + def tiers_count(self): return len(self._tiers) def __len__(self): @@ -852,7 +884,7 @@ def __nonzero__(self): def __bool__(self): """Emptiness - + # TODO : docfix >>> if annotation: ... # annotation is empty ... else: @@ -872,7 +904,7 @@ def itersegments(self): """ return iter(self._timeline) - def __iter__(self): + def __iter__(self) -> Iterable[Tuple[Segment, str]]: return iter(self._tiers.items()) def _update_timeline(self): @@ -915,6 +947,7 @@ def __eq__(self, other: 'TieredAnnotation'): Two annotations are equal if and only if their tracks and associated labels are equal. """ + # TODO pairOfTracks = itertools.zip_longest( self.itertracks(yield_label=True), other.itertracks(yield_label=True)) @@ -922,6 +955,7 @@ def __eq__(self, other: 'TieredAnnotation'): def __ne__(self, other: 'TieredAnnotation'): """Inequality""" + # TODO pairOfTracks = itertools.zip_longest( self.itertracks(yield_label=True), other.itertracks(yield_label=True)) @@ -1010,6 +1044,7 @@ def copy(self) -> 'TieredAnnotation': """ # create new empty annotation + # TODO pass def __str__(self): @@ -1114,44 +1149,6 @@ def update(self, textgrid: 'TieredAnnotation', copy: bool = False) \ return result - def argmax(self, support: Optional[Support] = None) -> Optional[Label]: - """Get label with longest duration - - Parameters - ---------- - support : Segment or Timeline, optional - Find label with longest duration within provided support.p - Defaults to whole extent. - - Returns - ------- - label : any existing label or None - Label with longest intersection - - Examples - -------- - >>> annotation = Annotation(modality='speaker') - >>> annotation[Segment(0, 10), 'speaker1'] = 'Alice' - >>> annotation[Segment(8, 20), 'speaker1'] = 'Bob' - >>> print "%s is such a talker!" % annotation.argmax() - Bob is such a talker! - >>> segment = Segment(22, 23) - >>> if not annotation.argmax(support): - ... print "No label intersecting %s" % segment - No label intersection [22 --> 23] - - """ - - cropped = self - if support is not None: - cropped = cropped.crop(support, mode='intersection') - - if not cropped: - return None - - return max(((_, cropped.label_duration(_)) for _ in cropped.labels()), - key=lambda x: x[1])[0] - def support(self, collar: float = 0.) -> 'TieredAnnotation': # TODO """Annotation support @@ -1208,75 +1205,6 @@ def support(self, collar: float = 0.) -> 'TieredAnnotation': return support - def co_iter(self, other: 'Annotation') \ - -> Iterator[Tuple[Tuple[Segment, TierName], - Tuple[Segment, TierName]] - ]: - """Iterate over pairs of intersecting tracks - - Parameters - ---------- - other : Annotation - Second annotation - - Returns - ------- - iterable : (Segment, object), (Segment, object) iterable - Yields pairs of intersecting tracks, in chronological (then - alphabetical) order. - - See also - -------- - :func:`~pyannote.core.Timeline.co_iter` - - """ - timeline = self.get_timeline(copy=False) - other_timeline = other.get_timeline(copy=False) - for s, S in timeline.co_iter(other_timeline): - tracks = sorted(self.get_tracks(s), key=str) - other_tracks = sorted(other.get_tracks(S), key=str) - for t, T in itertools.product(tracks, other_tracks): - yield (s, t), (S, T) - - def __mul__(self, other: 'Annotation') -> np.ndarray: - """Cooccurrence (or confusion) matrix - - >>> matrix = annotation * other - - Parameters - ---------- - other : Annotation - Second annotation - - Returns - ------- - cooccurrence : (n_self, n_other) np.ndarray - Cooccurrence matrix where `n_self` (resp. `n_other`) is the number - of labels in `self` (resp. `other`). - """ - - if not isinstance(other, Annotation): - raise TypeError( - 'computing cooccurrence matrix only works with Annotation ' - 'instances.') - - i_labels = self.labels() - j_labels = other.labels() - - I = {label: i for i, label in enumerate(i_labels)} - J = {label: j for j, label in enumerate(j_labels)} - - matrix = np.zeros((len(I), len(J))) - - # iterate over intersecting tracks and accumulate durations - for (segment, track), (other_segment, other_track) in self.co_iter(other): - i = I[self[segment, track]] - j = J[other[other_segment, other_track]] - duration = (segment & other_segment).duration - matrix[i, j] += duration - - return matrix - def _repr_png(self): """IPython notebook support diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index 57114be..efa442a 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -99,7 +99,6 @@ from .segment import Segment from .utils.types import Support, Label, CropMode - # this is a moderately ugly way to import `Annotation` to the namespace # without causing some circular imports : # https://stackoverflow.com/questions/39740632/python-type-hinting-without-cyclic-imports @@ -321,7 +320,7 @@ def discard(self, segment: Segment) -> 'Timeline': def __ior__(self, timeline: 'Timeline') -> 'Timeline': return self.update(timeline) - def update(self, timeline: Segment) -> 'Timeline': + def update(self, timeline: 'Timeline') -> 'Timeline': """Add every segments of an existing timeline (in place) Parameters @@ -354,10 +353,10 @@ def update(self, timeline: Segment) -> 'Timeline': return self - def __or__(self, timeline: 'Timeline') -> 'Timeline': + def __or__(self, timeline: Union['Timeline', Segment]) -> 'Timeline': return self.union(timeline) - def union(self, timeline: 'Timeline') -> 'Timeline': + def union(self, timeline: Union['Timeline', Segment]) -> 'Timeline': """Create new timeline made of union of segments Parameters @@ -375,10 +374,16 @@ def union(self, timeline: 'Timeline') -> 'Timeline': This does the same as timeline.update(...) except it returns a new timeline, and the original one is not modified. """ + if isinstance(timeline, Segment): + timeline = Timeline([timeline]) + segments = self.segments_set_ | timeline.segments_set_ return Timeline(segments=segments, uri=self.uri) - def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]: + def __and__(self, timeline: Union['Timeline', Segment]) -> 'Timeline': + return self.crop(timeline, mode="intersection") + + def co_iter(self, other: Union['Timeline', Segment]) -> Iterator[Tuple[Segment, Segment]]: """Iterate over pairs of intersecting segments >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) @@ -399,6 +404,8 @@ def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]: iterable : (Segment, Segment) iterable Yields pairs of intersecting segments in chronological order. """ + if isinstance(other, Segment): + other = Timeline([other]) for segment in self.segments_list_: From c458895ab7839c517b2f03a7b8fc1664959d448f Mon Sep 17 00:00:00 2001 From: hadware Date: Wed, 7 Sep 2022 01:58:23 +0200 Subject: [PATCH 20/30] Added abstract class for all segmentation classes --- pyannote/core/annotation.py | 2 +- pyannote/core/base.py | 126 ++++++++++++++++++++++++++++++++++++ pyannote/core/textgrid.py | 59 +++++------------ 3 files changed, 142 insertions(+), 45 deletions(-) create mode 100644 pyannote/core/base.py diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index 1a934b7..20a182f 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -634,7 +634,7 @@ def get_overlap(self, labels: Optional[Iterable[Label]] = None) -> "Timeline": annotation.get_overlap() |------| |-----| |--------| - annotation.get_overlap(for_labels=["A", "B"]) + annotation.get_overlap(labels=["A", "B"]) |--| |--| |----| Parameters diff --git a/pyannote/core/base.py b/pyannote/core/base.py new file mode 100644 index 0000000..7ceef96 --- /dev/null +++ b/pyannote/core/base.py @@ -0,0 +1,126 @@ +from abc import ABCMeta, abstractmethod +from typing import Optional, Iterable, Iterator, Tuple, Union, Dict, TYPE_CHECKING, Callable + +from typing_extensions import Self + +from pyannote.core import Segment +from pyannote.core.utils.types import Support, CropMode + +if TYPE_CHECKING: + from .timeline import Timeline + + +class BaseSegmentation(metaclass=ABCMeta): + """Abstract base class for all segmented annotations""" + + @abstractmethod + def __len__(self) -> int: + pass + + @abstractmethod + def __nonzero__(self): + pass + + @abstractmethod + def __bool__(self): + pass + + @abstractmethod + def __eq__(self, other: Self): + pass + + @abstractmethod + def __ne__(self, other: Self): + pass + + @abstractmethod + def update(self, other: Self) -> Self: + pass + + @abstractmethod + def co_iter(self, other: 'BaseSegmentation') -> Iterator[Tuple[Segment, Segment]]: + pass + + @abstractmethod + def crop_iter(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Iterator[Union[Tuple[Segment, Segment], Segment]]: + pass + + @abstractmethod + def crop(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]: + pass + + @abstractmethod + def get_overlap(self) -> 'Timeline': + pass + + @abstractmethod + def extrude(self, + removed: Support, + mode: CropMode = 'intersection') -> 'Timeline': + pass + + @abstractmethod + def __str__(self): + pass + + @abstractmethod + def __repr__(self): + pass + + @abstractmethod + def __contains__(self, included: Union[Segment, 'Timeline']): + pass + + @abstractmethod + def empty(self) -> 'Timeline': + pass + + @abstractmethod + def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \ + -> Self: + pass + + @abstractmethod + def extent(self) -> Segment: + pass + + @abstractmethod + def support_iter(self, collar: float = 0.0) -> Iterator[Segment]: + pass + + @abstractmethod + def support(self, collar: float = 0.) -> 'Timeline': + pass + + @abstractmethod + def duration(self) -> float: + pass + + @abstractmethod + def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: + pass + + @abstractmethod + def gaps(self, support: Optional[Support] = None) -> 'Timeline': + pass + + @abstractmethod + def for_json(self): + pass + + @classmethod + # TODO + def from_json(cls, data): + pass + + @abstractmethod + def _repr_png_(self): + pass diff --git a/pyannote/core/textgrid.py b/pyannote/core/textgrid.py index 1459e3a..ac70fe0 100644 --- a/pyannote/core/textgrid.py +++ b/pyannote/core/textgrid.py @@ -128,6 +128,8 @@ # TODO: QUESTIONS: # - iterator for the TieredAnnotation +# TODO: add segmentation abstract class + # TODO: IDEA: use a timeline in the Tier to do all the cropping/etc/ operations # and just make this class a thin wrapper for it TierLabel = Union[Text, Number] @@ -135,7 +137,8 @@ class Tier: - """A set of chronologically-ordered, non-overlapping and annotated segments""" + """A set of chronologically-ordered, optionally non-overlapping + and annotated segments""" def __init__(self, name: str = None, uri: str = None, @@ -154,12 +157,17 @@ def __setitem__(self, segment: Segment, label: str): self._timeline.add(segment) self._segments[segment] = label - def __getitem__(self, segment: Segment) -> str: - return self._segments[segment] + def __getitem__(self, key: Union[Segment, int]) -> str: + if isinstance(key, int): + key = self._timeline.__getitem__(key) + return self._segments[key] + + def __delitem__(self, key: Union[Segment, int]): + if isinstance(key, int): + key = self._timeline.__getitem__(key) - def __delitem__(self, segment: Segment): - del self._segments[segment] - self._timeline.remove(segment) + del self._segments[key] + self._timeline.remove(key) def __contains__(self, included: Union[Segment, Timeline]): # TODO @@ -679,44 +687,7 @@ def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: """ - if support is None: - support = self.extent() - - if not isinstance(support, (Segment, Timeline)): - raise TypeError("unsupported operand type(s) for -':" - "%s and Timeline." % type(support).__name__) - - # segment support - if isinstance(support, Segment): - - # `end` is meant to store the end time of former segment - # initialize it with beginning of provided segment `support` - end = support.start - - # support on the intersection of timeline and provided segment - for segment in self.crop(support, mode='intersection').support(): - - # add gap between each pair of consecutive segments - # if there is no gap, segment is empty, therefore not added - gap = Segment(start=end, end=segment.start) - if gap: - yield gap - - # keep track of the end of former segment - end = segment.end - - # add final gap (if not empty) - gap = Segment(start=end, end=support.end) - if gap: - yield gap - - # timeline support - elif isinstance(support, Timeline): - - # yield gaps for every segment in support of provided timeline - for segment in support.support(): - for gap in self.gaps_iter(support=segment): - yield gap + yield from self._timeline.gaps_iter(support) def gaps(self, support: Optional[Support] = None) -> 'Timeline': """Gaps From 8eadcfd57dfc033f5327c69415ef200bff194679 Mon Sep 17 00:00:00 2001 From: hadware Date: Mon, 30 Jan 2023 15:28:43 +0100 Subject: [PATCH 21/30] Attempts at creating abstract classes for all annotations --- pyannote/core/base.py | 61 +- pyannote/core/partition.py | 1046 ++++++++++++++++++++++++++++++++ pyannote/core/timeline.py | 5 +- pyannote/core/utils/loaders.py | 214 +++++++ 4 files changed, 1295 insertions(+), 31 deletions(-) create mode 100755 pyannote/core/partition.py create mode 100644 pyannote/core/utils/loaders.py diff --git a/pyannote/core/base.py b/pyannote/core/base.py index 7ceef96..bc8f166 100644 --- a/pyannote/core/base.py +++ b/pyannote/core/base.py @@ -41,32 +41,10 @@ def update(self, other: Self) -> Self: def co_iter(self, other: 'BaseSegmentation') -> Iterator[Tuple[Segment, Segment]]: pass - @abstractmethod - def crop_iter(self, - support: Support, - mode: CropMode = 'intersection', - returns_mapping: bool = False) \ - -> Iterator[Union[Tuple[Segment, Segment], Segment]]: - pass - - @abstractmethod - def crop(self, - support: Support, - mode: CropMode = 'intersection', - returns_mapping: bool = False) \ - -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]: - pass - @abstractmethod def get_overlap(self) -> 'Timeline': pass - @abstractmethod - def extrude(self, - removed: Support, - mode: CropMode = 'intersection') -> 'Timeline': - pass - @abstractmethod def __str__(self): pass @@ -105,22 +83,47 @@ def duration(self) -> float: pass @abstractmethod - def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: + def for_json(self): + pass + + @classmethod + # TODO + def from_json(cls, data): pass @abstractmethod - def gaps(self, support: Optional[Support] = None) -> 'Timeline': + def _repr_png_(self): pass + +class GappedAnnotationMixin(metaclass=ABCMeta): + @abstractmethod - def for_json(self): + def crop_iter(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Iterator[Union[Tuple[Segment, Segment], Segment]]: pass - @classmethod - # TODO - def from_json(cls, data): + @abstractmethod + def crop(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]: pass @abstractmethod - def _repr_png_(self): + def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: + pass + + @abstractmethod + def gaps(self, support: Optional[Support] = None) -> 'Timeline': + pass + + @abstractmethod + def extrude(self, + removed: Support, + mode: CropMode = 'intersection') -> 'Timeline': pass diff --git a/pyannote/core/partition.py b/pyannote/core/partition.py new file mode 100755 index 0000000..83f8e5e --- /dev/null +++ b/pyannote/core/partition.py @@ -0,0 +1,1046 @@ +#!/usr/bin/env python +# encoding: utf-8 + +# The MIT License (MIT) + +# Copyright (c) 2014-2020 CNRS + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# AUTHORS +# Hervé BREDIN - http://herve.niderb.fr +# Grant JENKS - http://www.grantjenks.com/ +# Paul LERNER + +""" +######## +Timeline +######## + +.. plot:: pyplots/timeline.py + +:class:`pyannote.core.Timeline` instances are ordered sets of non-empty +segments: + + - ordered, because segments are sorted by start time (and end time in case of tie) + - set, because one cannot add twice the same segment + - non-empty, because one cannot add empty segments (*i.e.* start >= end) + +There are two ways to define the timeline depicted above: + +.. code-block:: ipython + + In [25]: from pyannote.core import Timeline, Segment + + In [26]: timeline = Timeline() + ....: timeline.add(Segment(1, 5)) + ....: timeline.add(Segment(6, 8)) + ....: timeline.add(Segment(12, 18)) + ....: timeline.add(Segment(7, 20)) + ....: + + In [27]: segments = [Segment(1, 5), Segment(6, 8), Segment(12, 18), Segment(7, 20)] + ....: timeline = Timeline(segments=segments, uri='my_audio_file') # faster + ....: + + In [9]: for segment in timeline: + ...: print(segment) + ...: + [ 00:00:01.000 --> 00:00:05.000] + [ 00:00:06.000 --> 00:00:08.000] + [ 00:00:07.000 --> 00:00:20.000] + [ 00:00:12.000 --> 00:00:18.000] + + +.. note:: + + The optional *uri* keyword argument can be used to remember which document it describes. + +Several convenient methods are available. Here are a few examples: + +.. code-block:: ipython + + In [3]: timeline.extent() # extent + Out[3]: + + In [5]: timeline.support() # support + Out[5]: , ])> + + In [6]: timeline.duration() # support duration + Out[6]: 18 + + +See :class:`pyannote.core.Timeline` for the complete reference. +""" +import warnings +from typing import (Optional, Iterable, List, Union, Callable, + TextIO, Tuple, TYPE_CHECKING, Iterator, Dict, Text) + +from sortedcontainers import SortedList + +from . import PYANNOTE_URI, PYANNOTE_SEGMENT, Timeline +from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT +from .segment import Segment +from .utils.types import Support, Label, CropMode + +if TYPE_CHECKING: + from .annotation import Annotation + + +# ===================================================================== +# Partition class +# ===================================================================== + +# TODO: Questions: +# - "autofill" the partition if the initialized segments aren't filling? +# - partition empty if only one segment? + +class Partition(Timeline): + """ + Ordered set of segments. + + A timeline can be seen as an ordered set of non-empty segments (Segment). + Segments can overlap -- though adding an already exisiting segment to a + timeline does nothing. + + Parameters + ---------- + segments : Segment iterator, optional + initial set of (non-empty) segments + uri : string, optional + name of segmented resource + + Returns + ------- + timeline : Timeline + New timeline + """ + + def __init__(self, + segments: Optional[Iterable[Segment]] = None, + start: float = 0.0, + end: float = None, + uri: str = None): + segments = list(segments) + if segments is None and end is None: + raise ValueError("Cannot initialize an empty timeline without and end boundary") + elif end is None: + end = max(seg.end for seg in segments) + elif not segments: + segments = Segment(start, end) + + self.start = start + self.end = end + super().__init__(segments, uri) + + # TODO: check "filling"? autofill if not valid? + self.update(self.gaps(support=self.extent())) + if self[0].start < self.start or self[-1].end > self.end: + raise ValueError(f"Segments have to be within ({start, end}) bounds") + + def __nonzero__(self): + # TODO + return self.__bool__() + + def __bool__(self): + # TODO + """Emptiness + + >>> if timeline: + ... # timeline is not empty + ... else: + ... # timeline is empty + """ + return len(self.segments_set_) > 0 + + def bisect(self, at: float): + pass + + def add(self, segment: Segment) -> 'Timeline': + """Add a segment (in place) + + Parameters + ---------- + segment : Segment + Segment that is being added + + Returns + ------- + self : Timeline + Updated timeline. + + Note + ---- + If the timeline already contains this segment, it will not be added + again, as a timeline is meant to be a **set** of segments (not a list). + + If the segment is empty, it will not be added either, as a timeline + only contains non-empty segments. + """ + + segments_set_ = self.segments_set_ + if segment in segments_set_ or not segment: + return self + + segments_set_.add(segment) + + self.segments_list_.add(segment) + + segments_boundaries_ = self.segments_boundaries_ + segments_boundaries_.add(segment.start) + segments_boundaries_.add(segment.end) + + return self + + + + def remove(self, segment: Segment) -> 'Timeline': + """Remove a segment (in place) + + Parameters + ---------- + segment : Segment + Segment that is being removed + + Returns + ------- + self : Timeline + Updated timeline. + + Note + ---- + If the timeline does not contain this segment, this does nothing + """ + + segments_set_ = self.segments_set_ + if segment not in segments_set_: + return self + + segments_set_.remove(segment) + + self.segments_list_.remove(segment) + + segments_boundaries_ = self.segments_boundaries_ + segments_boundaries_.remove(segment.start) + segments_boundaries_.remove(segment.end) + + return self + + def discard(self, segment: Segment) -> 'Timeline': + """Same as `remove` + + See also + -------- + :func:`pyannote.core.Timeline.remove` + """ + return self.remove(segment) + + def __ior__(self, timeline: 'Timeline') -> 'Timeline': + return self.update(timeline) + + def update(self, timeline: 'Timeline') -> 'Timeline': + """Add every segments of an existing timeline (in place) + + Parameters + ---------- + timeline : Timeline + Timeline whose segments are being added + + Returns + ------- + self : Timeline + Updated timeline + + Note + ---- + Only segments that do not already exist will be added, as a timeline is + meant to be a **set** of segments (not a list). + + """ + + segments_set = self.segments_set_ + + segments_set |= timeline.segments_set_ + + # sorted list of segments (used for sorted iteration) + self.segments_list_ = SortedList(segments_set) + + # sorted list of (possibly redundant) segment boundaries + boundaries = (boundary for segment in segments_set for boundary in segment) + self.segments_boundaries_ = SortedList(boundaries) + + return self + + def __or__(self, timeline: Union['Timeline', Segment]) -> 'Timeline': + return self.union(timeline) + + def union(self, timeline: Union['Timeline', Segment]) -> 'Timeline': + """Create new timeline made of union of segments + + Parameters + ---------- + timeline : Timeline + Timeline whose segments are being added + + Returns + ------- + union : Timeline + New timeline containing the union of both timelines. + + Note + ---- + This does the same as timeline.update(...) except it returns a new + timeline, and the original one is not modified. + """ + if isinstance(timeline, Segment): + timeline = Timeline([timeline]) + + segments = self.segments_set_ | timeline.segments_set_ + return Timeline(segments=segments, uri=self.uri) + + def __and__(self, timeline: Union['Timeline', Segment]) -> 'Timeline': + return self.crop(timeline, mode="intersection") + + def co_iter(self, other: Union['Timeline', Segment]) -> Iterator[Tuple[Segment, Segment]]: + """Iterate over pairs of intersecting segments + + >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) + >>> timeline2 = Timeline([Segment(1, 3), Segment(3, 5)]) + >>> for segment1, segment2 in timeline1.co_iter(timeline2): + ... print(segment1, segment2) + (, ) + (, ) + (, ) + + Parameters + ---------- + other : Timeline + Second timeline + + Returns + ------- + iterable : (Segment, Segment) iterable + Yields pairs of intersecting segments in chronological order. + """ + if isinstance(other, Segment): + other = Timeline([other]) + + for segment in self.segments_list_: + + # iterate over segments that starts before 'segment' ends + temp = Segment(start=segment.end, end=segment.end) + for other_segment in other.segments_list_.irange(maximum=temp): + if segment.intersects(other_segment): + yield segment, other_segment + + def crop_iter(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Iterator[Union[Tuple[Segment, Segment], Segment]]: + """Like `crop` but returns a segment iterator instead + + See also + -------- + :func:`pyannote.core.Timeline.crop` + """ + + if mode not in {'loose', 'strict', 'intersection'}: + raise ValueError("Mode must be one of 'loose', 'strict', or " + "'intersection'.") + + if not isinstance(support, (Segment, Timeline)): + raise TypeError("Support must be a Segment or a Timeline.") + + if isinstance(support, Segment): + # corner case where "support" is empty + if support: + segments = [support] + else: + segments = [] + + support = Timeline(segments=segments, uri=self.uri) + for yielded in self.crop_iter(support, mode=mode, + returns_mapping=returns_mapping): + yield yielded + return + + # if 'support' is a `Timeline`, we use its support + support = support.support() + + # loose mode + if mode == 'loose': + for segment, _ in self.co_iter(support): + yield segment + return + + # strict mode + if mode == 'strict': + for segment, other_segment in self.co_iter(support): + if segment in other_segment: + yield segment + return + + # intersection mode + for segment, other_segment in self.co_iter(support): + mapped_to = segment & other_segment + if not mapped_to: + continue + if returns_mapping: + yield segment, mapped_to + else: + yield mapped_to + + def crop(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]: + """Crop timeline to new support + + Parameters + ---------- + support : Segment or Timeline + If `support` is a `Timeline`, its support is used. + mode : {'strict', 'loose', 'intersection'}, optional + Controls how segments that are not fully included in `support` are + handled. 'strict' mode only keeps fully included segments. 'loose' + mode keeps any intersecting segment. 'intersection' mode keeps any + intersecting segment but replace them by their actual intersection. + returns_mapping : bool, optional + In 'intersection' mode, return a dictionary whose keys are segments + of the cropped timeline, and values are list of the original + segments that were cropped. Defaults to False. + + Returns + ------- + cropped : Timeline + Cropped timeline + mapping : dict + When 'returns_mapping' is True, dictionary whose keys are segments + of 'cropped', and values are lists of corresponding original + segments. + + Examples + -------- + + >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) + >>> timeline.crop(Segment(1, 3)) + ])> + + >>> timeline.crop(Segment(1, 3), mode='loose') + , ])> + + >>> timeline.crop(Segment(1, 3), mode='strict') + ])> + + >>> cropped, mapping = timeline.crop(Segment(1, 3), returns_mapping=True) + >>> print(mapping) + {: [, ]} + + """ + + if mode == 'intersection' and returns_mapping: + segments, mapping = [], {} + for segment, mapped_to in self.crop_iter(support, + mode='intersection', + returns_mapping=True): + segments.append(mapped_to) + mapping[mapped_to] = mapping.get(mapped_to, list()) + [segment] + return Timeline(segments=segments, uri=self.uri), mapping + + return Timeline(segments=self.crop_iter(support, mode=mode), + uri=self.uri) + + def overlapping(self, t: float) -> List[Segment]: + """Get list of segments overlapping `t` + + Parameters + ---------- + t : float + Timestamp, in seconds. + + Returns + ------- + segments : list + List of all segments of timeline containing time t + """ + return list(self.overlapping_iter(t)) + + def overlapping_iter(self, t: float) -> Iterator[Segment]: + """Like `overlapping` but returns a segment iterator instead + + See also + -------- + :func:`pyannote.core.Timeline.overlapping` + """ + segment = Segment(start=t, end=t) + for segment in self.segments_list_.irange(maximum=segment): + if segment.overlaps(t): + yield segment + + def get_overlap(self) -> 'Timeline': + """Get overlapping parts of the timeline. + + A simple illustration: + + timeline + |------| |------| |----| + |--| |-----| |----------| + + timeline.get_overlap() + |--| |---| |----| + + + Returns + ------- + overlap : `pyannote.core.Timeline` + Timeline of the overlaps. + """ + overlaps_tl = Timeline(uri=self.uri) + for s1, s2 in self.co_iter(self): + if s1 == s2: + continue + overlaps_tl.add(s1 & s2) + return overlaps_tl.support() + + def extrude(self, + removed: Support, + mode: CropMode = 'strict') -> 'Timeline': + """Remove segments that overlap `removed` support. + + Parameters + ---------- + removed : Segment or Timeline + If `support` is a `Timeline`, its support is used. + mode : {'strict', 'loose'}, optional + Controls how segments that are not fully included in `removed` are + handled. 'strict' mode only removes fully included segments. 'loose' + mode removes any intersecting segment. 'intersection' mode removes + the overlapping part of any intersecting segment. + + Returns + ------- + extruded : Timeline + Extruded timeline + + Examples + -------- + + >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 5)]) + >>> timeline.extrude(Segment(1, 2)) + , ])> + + >>> timeline.extrude(Segment(1, 3), mode='loose') + ])> + + >>> timeline.extrude(Segment(1, 3), mode='strict') + , ])> + + """ + if isinstance(removed, Segment): + removed = Timeline([removed]) + + extent_tl = Timeline([self.extent()], uri=self.uri) + truncating_support = removed.gaps(support=extent_tl) + # loose for truncate means strict for crop and vice-versa + if mode == "loose": + mode = "strict" + elif mode == "strict": + mode = "loose" + return self.crop(truncating_support, mode=mode) + + + def __repr__(self): + """Computer-readable representation + + >>> Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) + , ])> + + """ + + return "" % (self.uri, + list(self.segments_list_)) + + def __contains__(self, included: Union[Segment, 'Timeline']): + """Inclusion + + Check whether every segment of `included` does exist in timeline. + + Parameters + ---------- + included : Segment or Timeline + Segment or timeline being checked for inclusion + + Returns + ------- + contains : bool + True if every segment in `included` exists in timeline, + False otherwise + + Examples + -------- + >>> timeline1 = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) + >>> timeline2 = Timeline(segments=[Segment(0, 10)]) + >>> timeline1 in timeline2 + False + >>> timeline2 in timeline1 + >>> Segment(1, 13.37) in timeline1 + True + + """ + + if isinstance(included, Segment): + return included in self.segments_set_ + + elif isinstance(included, Timeline): + return self.segments_set_.issuperset(included.segments_set_) + + else: + raise TypeError( + 'Checking for inclusion only supports Segment and ' + 'Timeline instances') + + def empty(self) -> 'Timeline': + """Return an empty copy + + Returns + ------- + empty : Timeline + Empty timeline using the same 'uri' attribute. + + """ + return Timeline(uri=self.uri) + + def covers(self, other: 'Timeline') -> bool: + """Check whether other timeline is fully covered by the timeline + + Parameter + --------- + other : Timeline + Second timeline + + Returns + ------- + covers : bool + True if timeline covers "other" timeline entirely. False if at least + one segment of "other" is not fully covered by timeline + """ + + # compute gaps within "other" extent + # this is where we should look for possible faulty segments + gaps = self.gaps(support=other.extent()) + + # if at least one gap intersects with a segment from "other", + # "self" does not cover "other" entirely --> return False + for _ in gaps.co_iter(other): + return False + + # if no gap intersects with a segment from "other", + # "self" covers "other" entirely --> return True + return True + + def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \ + -> 'Timeline': + """Get a copy of the timeline + + If `segment_func` is provided, it is applied to each segment first. + + Parameters + ---------- + segment_func : callable, optional + Callable that takes a segment as input, and returns a segment. + Defaults to identity function (segment_func(segment) = segment) + + Returns + ------- + timeline : Timeline + Copy of the timeline + + """ + + # if segment_func is not provided + # just add every segment + if segment_func is None: + return Timeline(segments=self.segments_list_, uri=self.uri) + + # if is provided + # apply it to each segment before adding them + return Timeline(segments=[segment_func(s) for s in self.segments_list_], + uri=self.uri) + + def extent(self) -> Segment: + return Segment(start=self.start, end=self.end) + + def support_iter(self, collar: float = 0.0) -> Iterator[Segment]: + """Like `support` but returns a segment generator instead + + See also + -------- + :func:`pyannote.core.Timeline.support` + """ + + # The support of an empty timeline is an empty timeline. + if not self: + return + + # Principle: + # * gather all segments with no gap between them + # * add one segment per resulting group (their union |) + # Note: + # Since segments are kept sorted internally, + # there is no need to perform an exhaustive segment clustering. + # We just have to consider them in their natural order. + + # Initialize new support segment + # as very first segment of the timeline + new_segment = self.segments_list_[0] + + for segment in self: + + # If there is no gap between new support segment and next segment + # OR there is a gap with duration < collar seconds, + possible_gap = segment ^ new_segment + if not possible_gap or possible_gap.duration < collar: + # Extend new support segment using next segment + new_segment |= segment + + # If there actually is a gap and the gap duration >= collar + # seconds, + else: + yield new_segment + + # Initialize new support segment as next segment + # (right after the gap) + new_segment = segment + + # Add new segment to the timeline support + yield new_segment + + def support(self, collar: float = 0.) -> 'Timeline': + """Timeline support + + The support of a timeline is the timeline with the minimum number of + segments with exactly the same time span as the original timeline. It + is (by definition) unique and does not contain any overlapping + segments. + + A picture is worth a thousand words:: + + collar + |---| + + timeline + |------| |------| |----| + |--| |-----| |----------| + + timeline.support() + |------| |--------| |----------| + + timeline.support(collar) + |------------------| |----------| + + Parameters + ---------- + collar : float, optional + Merge separated by less than `collar` seconds. This is why there + are only two segments in the final timeline in the above figure. + Defaults to 0. + + Returns + ------- + support : Timeline + Timeline support + """ + return Timeline(segments=self.support_iter(collar), uri=self.uri) + + def duration(self) -> float: + """Timeline duration + + The timeline duration is the sum of the durations of the segments + in the timeline support. + + Returns + ------- + duration : float + Duration of timeline support, in seconds. + """ + + # The timeline duration is the sum of the durations + # of the segments in the timeline support. + return sum(s.duration for s in self.support_iter()) + + def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: + """Like `gaps` but returns a segment generator instead + + See also + -------- + :func:`pyannote.core.Timeline.gaps` + + """ + + if support is None: + support = self.extent() + + if not isinstance(support, (Segment, Timeline)): + raise TypeError("unsupported operand type(s) for -':" + "%s and Timeline." % type(support).__name__) + + # segment support + if isinstance(support, Segment): + + # `end` is meant to store the end time of former segment + # initialize it with beginning of provided segment `support` + end = support.start + + # support on the intersection of timeline and provided segment + for segment in self.crop(support, mode='intersection').support(): + + # add gap between each pair of consecutive segments + # if there is no gap, segment is empty, therefore not added + gap = Segment(start=end, end=segment.start) + if gap: + yield gap + + # keep track of the end of former segment + end = segment.end + + # add final gap (if not empty) + gap = Segment(start=end, end=support.end) + if gap: + yield gap + + # timeline support + elif isinstance(support, Timeline): + + # yield gaps for every segment in support of provided timeline + for segment in support.support(): + for gap in self.gaps_iter(support=segment): + yield gap + + def gaps(self, support: Optional[Support] = None) \ + -> 'Timeline': + """Gaps + + A picture is worth a thousand words:: + + timeline + |------| |------| |----| + |--| |-----| |----------| + + timeline.gaps() + |--| |--| + + Parameters + ---------- + support : None, Segment or Timeline + Support in which gaps are looked for. Defaults to timeline extent + + Returns + ------- + gaps : Timeline + Timeline made of all gaps from original timeline, and delimited + by provided support + + See also + -------- + :func:`pyannote.core.Timeline.extent` + + """ + return Timeline(segments=self.gaps_iter(support=support), + uri=self.uri) + + def segmentation(self) -> 'Timeline': + """Segmentation + + Create the unique timeline with same support and same set of segment + boundaries as original timeline, but with no overlapping segments. + + A picture is worth a thousand words:: + + timeline + |------| |------| |----| + |--| |-----| |----------| + + timeline.segmentation() + |-|--|-| |-|---|--| |--|----|--| + + Returns + ------- + timeline : Timeline + (unique) timeline with same support and same set of segment + boundaries as original timeline, but with no overlapping segments. + """ + # COMPLEXITY: O(n) + support = self.support() + + # COMPLEXITY: O(n.log n) + # get all boundaries (sorted) + # |------| |------| |----| + # |--| |-----| |----------| + # becomes + # | | | | | | | | | | | | + timestamps = set([]) + for (start, end) in self: + timestamps.add(start) + timestamps.add(end) + timestamps = sorted(timestamps) + + # create new partition timeline + # | | | | | | | | | | | | + # becomes + # |-|--|-| |-|---|--| |--|----|--| + + # start with an empty copy + timeline = Timeline(uri=self.uri) + + if len(timestamps) == 0: + return Timeline(uri=self.uri) + + segments = [] + start = timestamps[0] + for end in timestamps[1:]: + # only add segments that are covered by original timeline + segment = Segment(start=start, end=end) + if segment and support.overlapping(segment.middle): + segments.append(segment) + # next segment... + start = end + + return Timeline(segments=segments, uri=self.uri) + + def to_annotation(self, + generator: Union[str, Iterable[Label], None, None] = 'string', + modality: Optional[str] = None) \ + -> 'Annotation': + """Turn timeline into an annotation + + Each segment is labeled by a unique label. + + Parameters + ---------- + generator : 'string', 'int', or iterable, optional + If 'string' (default) generate string labels. If 'int', generate + integer labels. If iterable, use it to generate labels. + modality : str, optional + + Returns + ------- + annotation : Annotation + Annotation + """ + + from .annotation import Annotation + annotation = Annotation(uri=self.uri, modality=modality) + if generator == 'string': + from .utils.generators import string_generator + generator = string_generator() + elif generator == 'int': + from .utils.generators import int_generator + generator = int_generator() + + for segment in self: + annotation[segment] = next(generator) + + return annotation + + def _iter_uem(self) -> Iterator[Text]: + """Generate lines for a UEM file for this timeline + + Returns + ------- + iterator: Iterator[str] + An iterator over UEM text lines + """ + uri = self.uri if self.uri else "" + if isinstance(uri, Text) and ' ' in uri: + msg = (f'Space-separated UEM file format does not allow file URIs ' + f'containing spaces (got: "{uri}").') + raise ValueError(msg) + for segment in self: + yield f"{uri} 1 {segment.start:.3f} {segment.end:.3f}\n" + + def to_uem(self) -> Text: + """Serialize timeline as a string using UEM format + + Returns + ------- + serialized: str + UEM string + """ + return "".join([line for line in self._iter_uem()]) + + def write_uem(self, file: TextIO): + """Dump timeline to file using UEM format + + Parameters + ---------- + file : file object + + Usage + ----- + >>> with open('file.uem', 'w') as file: + ... timeline.write_uem(file) + """ + for line in self._iter_uem(): + file.write(line) + + def for_json(self): + """Serialization + + See also + -------- + :mod:`pyannote.core.json` + """ + + data = {PYANNOTE_JSON: self.__class__.__name__} + data[PYANNOTE_JSON_CONTENT] = [s.for_json() for s in self] + + if self.uri: + data[PYANNOTE_URI] = self.uri + + return data + + @classmethod + def from_json(cls, data): + """Deserialization + + See also + -------- + :mod:`pyannote.core.json` + """ + + uri = data.get(PYANNOTE_URI, None) + segments = [Segment.from_json(s) for s in data[PYANNOTE_JSON_CONTENT]] + return cls(segments=segments, uri=uri) + + def _repr_png_(self): + """IPython notebook support + + See also + -------- + :mod:`pyannote.core.notebook` + """ + from .notebook import MATPLOTLIB_IS_AVAILABLE, MATPLOTLIB_WARNING + if not MATPLOTLIB_IS_AVAILABLE: + warnings.warn(MATPLOTLIB_WARNING.format(klass=self.__class__.__name__)) + return None + + from .notebook import repr_timeline + return repr_timeline(self) diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index efa442a..1a80dda 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -93,6 +93,7 @@ TextIO, Tuple, TYPE_CHECKING, Iterator, Dict, Text) from sortedcontainers import SortedList +from typing_extensions import Self from . import PYANNOTE_URI, PYANNOTE_SEGMENT from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT @@ -202,7 +203,7 @@ def __getitem__(self, k: int) -> Segment: """ return self.segments_list_[k] - def __eq__(self, other: 'Timeline'): + def __eq__(self, other: Self): """Equality Two timelines are equal if and only if their segments are equal. @@ -217,7 +218,7 @@ def __eq__(self, other: 'Timeline'): """ return self.segments_set_ == other.segments_set_ - def __ne__(self, other: 'Timeline'): + def __ne__(self, other: Self): """Inequality""" return self.segments_set_ != other.segments_set_ diff --git a/pyannote/core/utils/loaders.py b/pyannote/core/utils/loaders.py new file mode 100644 index 0000000..7abdb35 --- /dev/null +++ b/pyannote/core/utils/loaders.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# encoding: utf-8 + +# The MIT License (MIT) + +# Copyright (c) 2016-2020 CNRS + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# AUTHORS +# Hervé BREDIN - http://herve.niderb.fr + +from typing import Text + +import pandas as pd + +from pyannote.core import Segment, Timeline, Annotation + +DatabaseName = Text +PathTemplate = Text + + +def load_rttm(file_rttm): + """Load RTTM file + + Parameter + --------- + file_rttm : `str` + Path to RTTM file. + + Returns + ------- + annotations : `dict` + Speaker diarization as a {uri: pyannote.core.Annotation} dictionary. + """ + + names = [ + "NA1", + "uri", + "NA2", + "start", + "duration", + "NA3", + "NA4", + "speaker", + "NA5", + "NA6", + ] + dtype = {"uri": str, "start": float, "duration": float, "speaker": str} + data = pd.read_csv( + file_rttm, + names=names, + dtype=dtype, + delim_whitespace=True, + keep_default_na=False, + ) + + annotations = dict() + for uri, turns in data.groupby("uri"): + annotation = Annotation(uri=uri) + for i, turn in turns.iterrows(): + segment = Segment(turn.start, turn.start + turn.duration) + annotation[segment, i] = turn.speaker + annotations[uri] = annotation + + return annotations + + +def load_mdtm(file_mdtm): + """Load MDTM file + + Parameter + --------- + file_mdtm : `str` + Path to MDTM file. + + Returns + ------- + annotations : `dict` + Speaker diarization as a {uri: pyannote.core.Annotation} dictionary. + """ + + names = ["uri", "NA1", "start", "duration", "NA2", "NA3", "NA4", "speaker"] + dtype = {"uri": str, "start": float, "duration": float, "speaker": str} + data = pd.read_csv( + file_mdtm, + names=names, + dtype=dtype, + delim_whitespace=True, + keep_default_na=False, + ) + + annotations = dict() + for uri, turns in data.groupby("uri"): + annotation = Annotation(uri=uri) + for i, turn in turns.iterrows(): + segment = Segment(turn.start, turn.start + turn.duration) + annotation[segment, i] = turn.speaker + annotations[uri] = annotation + + return annotations + + +def load_uem(file_uem): + """Load UEM file + + Parameter + --------- + file_uem : `str` + Path to UEM file. + + Returns + ------- + timelines : `dict` + Evaluation map as a {uri: pyannote.core.Timeline} dictionary. + """ + + names = ["uri", "NA1", "start", "end"] + dtype = {"uri": str, "start": float, "end": float} + data = pd.read_csv(file_uem, names=names, dtype=dtype, delim_whitespace=True) + + timelines = dict() + for uri, parts in data.groupby("uri"): + segments = [Segment(part.start, part.end) for i, part in parts.iterrows()] + timelines[uri] = Timeline(segments=segments, uri=uri) + + return timelines + + +def load_lab(path, uri: str = None) -> Annotation: + """Load LAB file + + Parameter + --------- + file_lab : `str` + Path to LAB file + + Returns + ------- + data : `pyannote.core.Annotation` + """ + + names = ["start", "end", "label"] + dtype = {"start": float, "end": float, "label": str} + data = pd.read_csv(path, names=names, dtype=dtype, delim_whitespace=True) + + annotation = Annotation(uri=uri) + for i, turn in data.iterrows(): + segment = Segment(turn.start, turn.end) + annotation[segment, i] = turn.label + + return annotation + + +def load_lst(file_lst): + """Load LST file + + LST files provide a list of URIs (one line per URI) + + Parameter + --------- + file_lst : `str` + Path to LST file. + + Returns + ------- + uris : `list` + List or uris + """ + + with open(file_lst, mode="r") as fp: + lines = fp.readlines() + return [line.strip() for line in lines] + + +def load_mapping(mapping_txt): + """Load mapping file + + Parameter + --------- + mapping_txt : `str` + Path to mapping file + + Returns + ------- + mapping : `dict` + {1st field: 2nd field} dictionary + """ + + with open(mapping_txt, mode="r") as fp: + lines = fp.readlines() + + mapping = dict() + for line in lines: + key, value, *left = line.strip().split() + mapping[key] = value + + return mapping From 226ec8f96d98e9f4e312aa55034e3ebf3e53aa9f Mon Sep 17 00:00:00 2001 From: hadware Date: Fri, 10 Feb 2023 20:03:26 +0100 Subject: [PATCH 22/30] Set up the base classes, their abstract methods and the inheritance hierarchy for the new Partition and Tiers classes --- pyannote/core/annotation.py | 23 +- pyannote/core/base.py | 84 +++- pyannote/core/partition.py | 923 +++-------------------------------- pyannote/core/textgrid.py | 135 ++--- pyannote/core/timeline.py | 6 +- pyannote/core/utils/types.py | 2 +- 6 files changed, 181 insertions(+), 992 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index 20a182f..cdbce04 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -134,6 +134,7 @@ PYANNOTE_TRACK, PYANNOTE_LABEL, ) +from .base import BaseSegmentation, GappedAnnotationMixin from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT from .segment import Segment, SlidingWindow from .timeline import Timeline @@ -145,7 +146,7 @@ import pandas as pd -class Annotation: +class Annotation(GappedAnnotationMixin, BaseSegmentation): """Annotation Parameters @@ -175,7 +176,7 @@ def from_df( def __init__(self, uri: Optional[str] = None, modality: Optional[str] = None): - self._uri: Optional[str] = uri + super().__init__(uri) self.modality: Optional[str] = modality # sorted dictionary @@ -207,7 +208,7 @@ def uri(self, uri: str): timeline.uri = uri self._uri = uri - def _updateLabels(self): + def _update_labels(self): # list of labels that needs to be updated update = set( @@ -293,7 +294,7 @@ def itertracks( else: yield segment, track - def _updateTimeline(self): + def _update_timeline(self): self._timeline = Timeline(segments=self._tracks, uri=self.uri) self._timelineNeedsUpdate = False @@ -319,7 +320,7 @@ def get_timeline(self, copy: bool = True) -> Timeline: """ if self._timelineNeedsUpdate: - self._updateTimeline() + self._update_timeline() if copy: return self._timeline.copy() return self._timeline @@ -332,18 +333,18 @@ def __eq__(self, other: "Annotation"): Two annotations are equal if and only if their tracks and associated labels are equal. """ - pairOfTracks = itertools.zip_longest( + pair_of_tracks = itertools.zip_longest( self.itertracks(yield_label=True), other.itertracks(yield_label=True) ) - return all(t1 == t2 for t1, t2 in pairOfTracks) + return all(t1 == t2 for t1, t2 in pair_of_tracks) def __ne__(self, other: "Annotation"): """Inequality""" - pairOfTracks = itertools.zip_longest( + pair_of_tracks = itertools.zip_longest( self.itertracks(yield_label=True), other.itertracks(yield_label=True) ) - return any(t1 != t2 for t1, t2 in pairOfTracks) + return any(t1 != t2 for t1, t2 in pair_of_tracks) def __contains__(self, included: Union[Segment, Timeline]): """Inclusion @@ -913,7 +914,7 @@ def labels(self) -> List[Label]: Sorted list of labels """ if any([lnu for lnu in self._labelNeedsUpdate.values()]): - self._updateLabels() + self._update_labels() return sorted(self._labels, key=str) def get_labels( @@ -1060,7 +1061,7 @@ def label_timeline(self, label: Label, copy: bool = True) -> Timeline: return Timeline(uri=self.uri) if self._labelNeedsUpdate[label]: - self._updateLabels() + self._update_labels() if copy: return self._labels[label].copy() diff --git a/pyannote/core/base.py b/pyannote/core/base.py index bc8f166..4b24943 100644 --- a/pyannote/core/base.py +++ b/pyannote/core/base.py @@ -1,5 +1,5 @@ from abc import ABCMeta, abstractmethod -from typing import Optional, Iterable, Iterator, Tuple, Union, Dict, TYPE_CHECKING, Callable +from typing import Optional, Iterator, Tuple, Union, Dict, TYPE_CHECKING, Callable, List from typing_extensions import Self @@ -13,6 +13,18 @@ class BaseSegmentation(metaclass=ABCMeta): """Abstract base class for all segmented annotations""" + def __init__(self, uri: Optional[str] = None): + # path to (or any identifier of) segmented resource + self._uri: Optional[str] = uri + + @property + def uri(self): + return self._uri + + @uri.setter + def uri(self, uri: str): + self._uri = uri + @abstractmethod def __len__(self) -> int: pass @@ -23,6 +35,8 @@ def __nonzero__(self): @abstractmethod def __bool__(self): + """Truthiness of the segmentation. Truthy means that it contains something + False means it's empty.""" pass @abstractmethod @@ -33,6 +47,14 @@ def __eq__(self, other: Self): def __ne__(self, other: Self): pass + @abstractmethod + def itersegments(self): + pass + + @abstractmethod + def get_timeline(self) -> 'Timeline': + pass + @abstractmethod def update(self, other: Self) -> Self: pass @@ -54,11 +76,11 @@ def __repr__(self): pass @abstractmethod - def __contains__(self, included: Union[Segment, 'Timeline']): + def __contains__(self, included: Union[Segment, 'Timeline']) -> bool: pass @abstractmethod - def empty(self) -> 'Timeline': + def empty(self) -> Self: pass @abstractmethod @@ -71,11 +93,11 @@ def extent(self) -> Segment: pass @abstractmethod - def support_iter(self, collar: float = 0.0) -> Iterator[Segment]: - pass - - @abstractmethod - def support(self, collar: float = 0.) -> 'Timeline': + def crop_iter(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Iterator[Union[Tuple[Segment, Segment], Segment]]: pass @abstractmethod @@ -83,47 +105,55 @@ def duration(self) -> float: pass @abstractmethod - def for_json(self): + def _repr_png_(self): pass - @classmethod - # TODO - def from_json(cls, data): - pass + +class GappedAnnotationMixin(metaclass=ABCMeta): @abstractmethod - def _repr_png_(self): + def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: pass - -class GappedAnnotationMixin(metaclass=ABCMeta): + @abstractmethod + def gaps(self, support: Optional[Support] = None) -> 'Timeline': + pass @abstractmethod - def crop_iter(self, - support: Support, - mode: CropMode = 'intersection', - returns_mapping: bool = False) \ - -> Iterator[Union[Tuple[Segment, Segment], Segment]]: + def extrude(self, + removed: Support, + mode: CropMode = 'intersection') -> Self: pass + # TODO : maybe put inside the base seg class, add conditions for + # partition @abstractmethod def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) \ - -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]: + -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: pass @abstractmethod - def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: + def support(self, collar: float = 0.) -> Self: pass + +class SegmentSet(metaclass=ABCMeta): + @abstractmethod - def gaps(self, support: Optional[Support] = None) -> 'Timeline': + def add(self, segment: Segment): pass @abstractmethod - def extrude(self, - removed: Support, - mode: CropMode = 'intersection') -> 'Timeline': + def remove(self, segment: Segment): + pass + + @abstractmethod + def index(self, segment: Segment) -> int: + pass + + @abstractmethod + def overlapping(self, t: float) -> List[Segment]: pass diff --git a/pyannote/core/partition.py b/pyannote/core/partition.py index 83f8e5e..18555cd 100755 --- a/pyannote/core/partition.py +++ b/pyannote/core/partition.py @@ -91,10 +91,12 @@ import warnings from typing import (Optional, Iterable, List, Union, Callable, TextIO, Tuple, TYPE_CHECKING, Iterator, Dict, Text) +from typing_extensions import Self from sortedcontainers import SortedList from . import PYANNOTE_URI, PYANNOTE_SEGMENT, Timeline +from .base import BaseSegmentation, SegmentSet from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT from .segment import Segment from .utils.types import Support, Label, CropMode @@ -110,19 +112,23 @@ # TODO: Questions: # - "autofill" the partition if the initialized segments aren't filling? # - partition empty if only one segment? +# - truthiness? -class Partition(Timeline): +class Partition(SegmentSet, BaseSegmentation): """ - Ordered set of segments. + Ordered set of segments that are all contiguous. - A timeline can be seen as an ordered set of non-empty segments (Segment). - Segments can overlap -- though adding an already exisiting segment to a - timeline does nothing. + It has a start and a end boundary, and its segments form a continuum + between those two boundaries. Segments can be created by bisecting + the partition at certain points, and removing a segment amounts to + removing the bisections. Parameters ---------- segments : Segment iterator, optional initial set of (non-empty) segments + start: float TODO + end: float TODO uri : string, optional name of segmented resource @@ -147,900 +153,87 @@ def __init__(self, self.start = start self.end = end - super().__init__(segments, uri) + self._boundaries = None # TODO: figure out if needed + super().__init__(uri) # TODO: check "filling"? autofill if not valid? self.update(self.gaps(support=self.extent())) if self[0].start < self.start or self[-1].end > self.end: raise ValueError(f"Segments have to be within ({start, end}) bounds") + def __len__(self) -> int: + pass + def __nonzero__(self): - # TODO - return self.__bool__() + pass def __bool__(self): - # TODO - """Emptiness - - >>> if timeline: - ... # timeline is not empty - ... else: - ... # timeline is empty - """ - return len(self.segments_set_) > 0 - - def bisect(self, at: float): pass - def add(self, segment: Segment) -> 'Timeline': - """Add a segment (in place) - - Parameters - ---------- - segment : Segment - Segment that is being added - - Returns - ------- - self : Timeline - Updated timeline. - - Note - ---- - If the timeline already contains this segment, it will not be added - again, as a timeline is meant to be a **set** of segments (not a list). - - If the segment is empty, it will not be added either, as a timeline - only contains non-empty segments. - """ - - segments_set_ = self.segments_set_ - if segment in segments_set_ or not segment: - return self - - segments_set_.add(segment) - - self.segments_list_.add(segment) - - segments_boundaries_ = self.segments_boundaries_ - segments_boundaries_.add(segment.start) - segments_boundaries_.add(segment.end) - - return self - - - - def remove(self, segment: Segment) -> 'Timeline': - """Remove a segment (in place) - - Parameters - ---------- - segment : Segment - Segment that is being removed - - Returns - ------- - self : Timeline - Updated timeline. - - Note - ---- - If the timeline does not contain this segment, this does nothing - """ - - segments_set_ = self.segments_set_ - if segment not in segments_set_: - return self - - segments_set_.remove(segment) - - self.segments_list_.remove(segment) - - segments_boundaries_ = self.segments_boundaries_ - segments_boundaries_.remove(segment.start) - segments_boundaries_.remove(segment.end) - - return self - - def discard(self, segment: Segment) -> 'Timeline': - """Same as `remove` - - See also - -------- - :func:`pyannote.core.Timeline.remove` - """ - return self.remove(segment) - - def __ior__(self, timeline: 'Timeline') -> 'Timeline': - return self.update(timeline) - - def update(self, timeline: 'Timeline') -> 'Timeline': - """Add every segments of an existing timeline (in place) - - Parameters - ---------- - timeline : Timeline - Timeline whose segments are being added - - Returns - ------- - self : Timeline - Updated timeline - - Note - ---- - Only segments that do not already exist will be added, as a timeline is - meant to be a **set** of segments (not a list). - - """ - - segments_set = self.segments_set_ + def __eq__(self, other: Self): + pass - segments_set |= timeline.segments_set_ + def __ne__(self, other: Self): + pass - # sorted list of segments (used for sorted iteration) - self.segments_list_ = SortedList(segments_set) + def bisect(self, at: float): + pass - # sorted list of (possibly redundant) segment boundaries - boundaries = (boundary for segment in segments_set for boundary in segment) - self.segments_boundaries_ = SortedList(boundaries) + def add(self, segment: Segment): + pass - return self + def remove(self, segment: Segment): + pass - def __or__(self, timeline: Union['Timeline', Segment]) -> 'Timeline': - return self.union(timeline) + def itersegments(self): + pass - def union(self, timeline: Union['Timeline', Segment]) -> 'Timeline': - """Create new timeline made of union of segments + def get_timeline(self) -> 'Timeline': + pass - Parameters - ---------- - timeline : Timeline - Timeline whose segments are being added + def update(self, other: Self) -> Self: + pass - Returns - ------- - union : Timeline - New timeline containing the union of both timelines. - - Note - ---- - This does the same as timeline.update(...) except it returns a new - timeline, and the original one is not modified. - """ - if isinstance(timeline, Segment): - timeline = Timeline([timeline]) - - segments = self.segments_set_ | timeline.segments_set_ - return Timeline(segments=segments, uri=self.uri) - - def __and__(self, timeline: Union['Timeline', Segment]) -> 'Timeline': - return self.crop(timeline, mode="intersection") - - def co_iter(self, other: Union['Timeline', Segment]) -> Iterator[Tuple[Segment, Segment]]: - """Iterate over pairs of intersecting segments - - >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) - >>> timeline2 = Timeline([Segment(1, 3), Segment(3, 5)]) - >>> for segment1, segment2 in timeline1.co_iter(timeline2): - ... print(segment1, segment2) - (, ) - (, ) - (, ) - - Parameters - ---------- - other : Timeline - Second timeline - - Returns - ------- - iterable : (Segment, Segment) iterable - Yields pairs of intersecting segments in chronological order. - """ - if isinstance(other, Segment): - other = Timeline([other]) - - for segment in self.segments_list_: - - # iterate over segments that starts before 'segment' ends - temp = Segment(start=segment.end, end=segment.end) - for other_segment in other.segments_list_.irange(maximum=temp): - if segment.intersects(other_segment): - yield segment, other_segment - - def crop_iter(self, - support: Support, - mode: CropMode = 'intersection', - returns_mapping: bool = False) \ - -> Iterator[Union[Tuple[Segment, Segment], Segment]]: - """Like `crop` but returns a segment iterator instead - - See also - -------- - :func:`pyannote.core.Timeline.crop` - """ - - if mode not in {'loose', 'strict', 'intersection'}: - raise ValueError("Mode must be one of 'loose', 'strict', or " - "'intersection'.") - - if not isinstance(support, (Segment, Timeline)): - raise TypeError("Support must be a Segment or a Timeline.") - - if isinstance(support, Segment): - # corner case where "support" is empty - if support: - segments = [support] - else: - segments = [] - - support = Timeline(segments=segments, uri=self.uri) - for yielded in self.crop_iter(support, mode=mode, - returns_mapping=returns_mapping): - yield yielded - return - - # if 'support' is a `Timeline`, we use its support - support = support.support() - - # loose mode - if mode == 'loose': - for segment, _ in self.co_iter(support): - yield segment - return - - # strict mode - if mode == 'strict': - for segment, other_segment in self.co_iter(support): - if segment in other_segment: - yield segment - return - - # intersection mode - for segment, other_segment in self.co_iter(support): - mapped_to = segment & other_segment - if not mapped_to: - continue - if returns_mapping: - yield segment, mapped_to - else: - yield mapped_to - - def crop(self, - support: Support, - mode: CropMode = 'intersection', - returns_mapping: bool = False) \ - -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]: - """Crop timeline to new support - - Parameters - ---------- - support : Segment or Timeline - If `support` is a `Timeline`, its support is used. - mode : {'strict', 'loose', 'intersection'}, optional - Controls how segments that are not fully included in `support` are - handled. 'strict' mode only keeps fully included segments. 'loose' - mode keeps any intersecting segment. 'intersection' mode keeps any - intersecting segment but replace them by their actual intersection. - returns_mapping : bool, optional - In 'intersection' mode, return a dictionary whose keys are segments - of the cropped timeline, and values are list of the original - segments that were cropped. Defaults to False. - - Returns - ------- - cropped : Timeline - Cropped timeline - mapping : dict - When 'returns_mapping' is True, dictionary whose keys are segments - of 'cropped', and values are lists of corresponding original - segments. - - Examples - -------- - - >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) - >>> timeline.crop(Segment(1, 3)) - ])> - - >>> timeline.crop(Segment(1, 3), mode='loose') - , ])> - - >>> timeline.crop(Segment(1, 3), mode='strict') - ])> - - >>> cropped, mapping = timeline.crop(Segment(1, 3), returns_mapping=True) - >>> print(mapping) - {: [, ]} - - """ - - if mode == 'intersection' and returns_mapping: - segments, mapping = [], {} - for segment, mapped_to in self.crop_iter(support, - mode='intersection', - returns_mapping=True): - segments.append(mapped_to) - mapping[mapped_to] = mapping.get(mapped_to, list()) + [segment] - return Timeline(segments=segments, uri=self.uri), mapping - - return Timeline(segments=self.crop_iter(support, mode=mode), - uri=self.uri) - - def overlapping(self, t: float) -> List[Segment]: - """Get list of segments overlapping `t` - - Parameters - ---------- - t : float - Timestamp, in seconds. - - Returns - ------- - segments : list - List of all segments of timeline containing time t - """ - return list(self.overlapping_iter(t)) - - def overlapping_iter(self, t: float) -> Iterator[Segment]: - """Like `overlapping` but returns a segment iterator instead - - See also - -------- - :func:`pyannote.core.Timeline.overlapping` - """ - segment = Segment(start=t, end=t) - for segment in self.segments_list_.irange(maximum=segment): - if segment.overlaps(t): - yield segment + def co_iter(self, other: 'BaseSegmentation') -> Iterator[Tuple[Segment, Segment]]: + pass def get_overlap(self) -> 'Timeline': - """Get overlapping parts of the timeline. - - A simple illustration: - - timeline - |------| |------| |----| - |--| |-----| |----------| - - timeline.get_overlap() - |--| |---| |----| - - - Returns - ------- - overlap : `pyannote.core.Timeline` - Timeline of the overlaps. - """ - overlaps_tl = Timeline(uri=self.uri) - for s1, s2 in self.co_iter(self): - if s1 == s2: - continue - overlaps_tl.add(s1 & s2) - return overlaps_tl.support() - - def extrude(self, - removed: Support, - mode: CropMode = 'strict') -> 'Timeline': - """Remove segments that overlap `removed` support. - - Parameters - ---------- - removed : Segment or Timeline - If `support` is a `Timeline`, its support is used. - mode : {'strict', 'loose'}, optional - Controls how segments that are not fully included in `removed` are - handled. 'strict' mode only removes fully included segments. 'loose' - mode removes any intersecting segment. 'intersection' mode removes - the overlapping part of any intersecting segment. - - Returns - ------- - extruded : Timeline - Extruded timeline - - Examples - -------- - - >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 5)]) - >>> timeline.extrude(Segment(1, 2)) - , ])> - - >>> timeline.extrude(Segment(1, 3), mode='loose') - ])> - - >>> timeline.extrude(Segment(1, 3), mode='strict') - , ])> - - """ - if isinstance(removed, Segment): - removed = Timeline([removed]) - - extent_tl = Timeline([self.extent()], uri=self.uri) - truncating_support = removed.gaps(support=extent_tl) - # loose for truncate means strict for crop and vice-versa - if mode == "loose": - mode = "strict" - elif mode == "strict": - mode = "loose" - return self.crop(truncating_support, mode=mode) + pass + def __str__(self): + pass def __repr__(self): - """Computer-readable representation - - >>> Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) - , ])> - - """ + pass - return "" % (self.uri, - list(self.segments_list_)) - - def __contains__(self, included: Union[Segment, 'Timeline']): - """Inclusion - - Check whether every segment of `included` does exist in timeline. - - Parameters - ---------- - included : Segment or Timeline - Segment or timeline being checked for inclusion - - Returns - ------- - contains : bool - True if every segment in `included` exists in timeline, - False otherwise - - Examples - -------- - >>> timeline1 = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) - >>> timeline2 = Timeline(segments=[Segment(0, 10)]) - >>> timeline1 in timeline2 - False - >>> timeline2 in timeline1 - >>> Segment(1, 13.37) in timeline1 - True - - """ - - if isinstance(included, Segment): - return included in self.segments_set_ - - elif isinstance(included, Timeline): - return self.segments_set_.issuperset(included.segments_set_) - - else: - raise TypeError( - 'Checking for inclusion only supports Segment and ' - 'Timeline instances') - - def empty(self) -> 'Timeline': - """Return an empty copy - - Returns - ------- - empty : Timeline - Empty timeline using the same 'uri' attribute. - - """ - return Timeline(uri=self.uri) - - def covers(self, other: 'Timeline') -> bool: - """Check whether other timeline is fully covered by the timeline - - Parameter - --------- - other : Timeline - Second timeline - - Returns - ------- - covers : bool - True if timeline covers "other" timeline entirely. False if at least - one segment of "other" is not fully covered by timeline - """ - - # compute gaps within "other" extent - # this is where we should look for possible faulty segments - gaps = self.gaps(support=other.extent()) - - # if at least one gap intersects with a segment from "other", - # "self" does not cover "other" entirely --> return False - for _ in gaps.co_iter(other): - return False - - # if no gap intersects with a segment from "other", - # "self" covers "other" entirely --> return True - return True - - def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \ - -> 'Timeline': - """Get a copy of the timeline - - If `segment_func` is provided, it is applied to each segment first. - - Parameters - ---------- - segment_func : callable, optional - Callable that takes a segment as input, and returns a segment. - Defaults to identity function (segment_func(segment) = segment) - - Returns - ------- - timeline : Timeline - Copy of the timeline - - """ - - # if segment_func is not provided - # just add every segment - if segment_func is None: - return Timeline(segments=self.segments_list_, uri=self.uri) - - # if is provided - # apply it to each segment before adding them - return Timeline(segments=[segment_func(s) for s in self.segments_list_], - uri=self.uri) + def __contains__(self, included: Union[Segment, 'Timeline']) -> bool: + pass + + def empty(self) -> Self: + pass + + def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) -> Self: + pass def extent(self) -> Segment: - return Segment(start=self.start, end=self.end) + pass def support_iter(self, collar: float = 0.0) -> Iterator[Segment]: - """Like `support` but returns a segment generator instead - - See also - -------- - :func:`pyannote.core.Timeline.support` - """ - - # The support of an empty timeline is an empty timeline. - if not self: - return - - # Principle: - # * gather all segments with no gap between them - # * add one segment per resulting group (their union |) - # Note: - # Since segments are kept sorted internally, - # there is no need to perform an exhaustive segment clustering. - # We just have to consider them in their natural order. - - # Initialize new support segment - # as very first segment of the timeline - new_segment = self.segments_list_[0] - - for segment in self: - - # If there is no gap between new support segment and next segment - # OR there is a gap with duration < collar seconds, - possible_gap = segment ^ new_segment - if not possible_gap or possible_gap.duration < collar: - # Extend new support segment using next segment - new_segment |= segment - - # If there actually is a gap and the gap duration >= collar - # seconds, - else: - yield new_segment - - # Initialize new support segment as next segment - # (right after the gap) - new_segment = segment - - # Add new segment to the timeline support - yield new_segment + pass def support(self, collar: float = 0.) -> 'Timeline': - """Timeline support - - The support of a timeline is the timeline with the minimum number of - segments with exactly the same time span as the original timeline. It - is (by definition) unique and does not contain any overlapping - segments. - - A picture is worth a thousand words:: - - collar - |---| - - timeline - |------| |------| |----| - |--| |-----| |----------| - - timeline.support() - |------| |--------| |----------| - - timeline.support(collar) - |------------------| |----------| + pass - Parameters - ---------- - collar : float, optional - Merge separated by less than `collar` seconds. This is why there - are only two segments in the final timeline in the above figure. - Defaults to 0. + def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Iterator[ + Union[Tuple[Segment, Segment], Segment]]: + pass - Returns - ------- - support : Timeline - Timeline support - """ - return Timeline(segments=self.support_iter(collar), uri=self.uri) + def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Union[ + Self, Tuple[Self, Dict[Segment, Segment]]]: + pass def duration(self) -> float: - """Timeline duration - - The timeline duration is the sum of the durations of the segments - in the timeline support. - - Returns - ------- - duration : float - Duration of timeline support, in seconds. - """ - - # The timeline duration is the sum of the durations - # of the segments in the timeline support. - return sum(s.duration for s in self.support_iter()) - - def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: - """Like `gaps` but returns a segment generator instead - - See also - -------- - :func:`pyannote.core.Timeline.gaps` - - """ - - if support is None: - support = self.extent() - - if not isinstance(support, (Segment, Timeline)): - raise TypeError("unsupported operand type(s) for -':" - "%s and Timeline." % type(support).__name__) - - # segment support - if isinstance(support, Segment): - - # `end` is meant to store the end time of former segment - # initialize it with beginning of provided segment `support` - end = support.start - - # support on the intersection of timeline and provided segment - for segment in self.crop(support, mode='intersection').support(): - - # add gap between each pair of consecutive segments - # if there is no gap, segment is empty, therefore not added - gap = Segment(start=end, end=segment.start) - if gap: - yield gap - - # keep track of the end of former segment - end = segment.end - - # add final gap (if not empty) - gap = Segment(start=end, end=support.end) - if gap: - yield gap - - # timeline support - elif isinstance(support, Timeline): - - # yield gaps for every segment in support of provided timeline - for segment in support.support(): - for gap in self.gaps_iter(support=segment): - yield gap - - def gaps(self, support: Optional[Support] = None) \ - -> 'Timeline': - """Gaps - - A picture is worth a thousand words:: - - timeline - |------| |------| |----| - |--| |-----| |----------| - - timeline.gaps() - |--| |--| - - Parameters - ---------- - support : None, Segment or Timeline - Support in which gaps are looked for. Defaults to timeline extent - - Returns - ------- - gaps : Timeline - Timeline made of all gaps from original timeline, and delimited - by provided support - - See also - -------- - :func:`pyannote.core.Timeline.extent` - - """ - return Timeline(segments=self.gaps_iter(support=support), - uri=self.uri) - - def segmentation(self) -> 'Timeline': - """Segmentation - - Create the unique timeline with same support and same set of segment - boundaries as original timeline, but with no overlapping segments. - - A picture is worth a thousand words:: - - timeline - |------| |------| |----| - |--| |-----| |----------| - - timeline.segmentation() - |-|--|-| |-|---|--| |--|----|--| - - Returns - ------- - timeline : Timeline - (unique) timeline with same support and same set of segment - boundaries as original timeline, but with no overlapping segments. - """ - # COMPLEXITY: O(n) - support = self.support() - - # COMPLEXITY: O(n.log n) - # get all boundaries (sorted) - # |------| |------| |----| - # |--| |-----| |----------| - # becomes - # | | | | | | | | | | | | - timestamps = set([]) - for (start, end) in self: - timestamps.add(start) - timestamps.add(end) - timestamps = sorted(timestamps) - - # create new partition timeline - # | | | | | | | | | | | | - # becomes - # |-|--|-| |-|---|--| |--|----|--| - - # start with an empty copy - timeline = Timeline(uri=self.uri) - - if len(timestamps) == 0: - return Timeline(uri=self.uri) - - segments = [] - start = timestamps[0] - for end in timestamps[1:]: - # only add segments that are covered by original timeline - segment = Segment(start=start, end=end) - if segment and support.overlapping(segment.middle): - segments.append(segment) - # next segment... - start = end - - return Timeline(segments=segments, uri=self.uri) - - def to_annotation(self, - generator: Union[str, Iterable[Label], None, None] = 'string', - modality: Optional[str] = None) \ - -> 'Annotation': - """Turn timeline into an annotation - - Each segment is labeled by a unique label. - - Parameters - ---------- - generator : 'string', 'int', or iterable, optional - If 'string' (default) generate string labels. If 'int', generate - integer labels. If iterable, use it to generate labels. - modality : str, optional - - Returns - ------- - annotation : Annotation - Annotation - """ - - from .annotation import Annotation - annotation = Annotation(uri=self.uri, modality=modality) - if generator == 'string': - from .utils.generators import string_generator - generator = string_generator() - elif generator == 'int': - from .utils.generators import int_generator - generator = int_generator() - - for segment in self: - annotation[segment] = next(generator) - - return annotation - - def _iter_uem(self) -> Iterator[Text]: - """Generate lines for a UEM file for this timeline - - Returns - ------- - iterator: Iterator[str] - An iterator over UEM text lines - """ - uri = self.uri if self.uri else "" - if isinstance(uri, Text) and ' ' in uri: - msg = (f'Space-separated UEM file format does not allow file URIs ' - f'containing spaces (got: "{uri}").') - raise ValueError(msg) - for segment in self: - yield f"{uri} 1 {segment.start:.3f} {segment.end:.3f}\n" - - def to_uem(self) -> Text: - """Serialize timeline as a string using UEM format - - Returns - ------- - serialized: str - UEM string - """ - return "".join([line for line in self._iter_uem()]) - - def write_uem(self, file: TextIO): - """Dump timeline to file using UEM format - - Parameters - ---------- - file : file object - - Usage - ----- - >>> with open('file.uem', 'w') as file: - ... timeline.write_uem(file) - """ - for line in self._iter_uem(): - file.write(line) - - def for_json(self): - """Serialization - - See also - -------- - :mod:`pyannote.core.json` - """ - - data = {PYANNOTE_JSON: self.__class__.__name__} - data[PYANNOTE_JSON_CONTENT] = [s.for_json() for s in self] - - if self.uri: - data[PYANNOTE_URI] = self.uri - - return data - - @classmethod - def from_json(cls, data): - """Deserialization - - See also - -------- - :mod:`pyannote.core.json` - """ - - uri = data.get(PYANNOTE_URI, None) - segments = [Segment.from_json(s) for s in data[PYANNOTE_JSON_CONTENT]] - return cls(segments=segments, uri=uri) + pass def _repr_png_(self): - """IPython notebook support - - See also - -------- - :mod:`pyannote.core.notebook` - """ - from .notebook import MATPLOTLIB_IS_AVAILABLE, MATPLOTLIB_WARNING - if not MATPLOTLIB_IS_AVAILABLE: - warnings.warn(MATPLOTLIB_WARNING.format(klass=self.__class__.__name__)) - return None - - from .notebook import repr_timeline - return repr_timeline(self) + pass diff --git a/pyannote/core/textgrid.py b/pyannote/core/textgrid.py index ac70fe0..a660e9a 100644 --- a/pyannote/core/textgrid.py +++ b/pyannote/core/textgrid.py @@ -110,7 +110,8 @@ from collections import defaultdict from numbers import Number from pathlib import Path -from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple, Iterator, Text, Callable +from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple, Iterator, Text, Callable, Type, Generic, \ + TypeVar import numpy as np from sortedcontainers import SortedDict @@ -118,7 +119,9 @@ from pyannote.core import Annotation from . import PYANNOTE_URI, PYANNOTE_MODALITY, \ PYANNOTE_SEGMENT, PYANNOTE_TRACK, PYANNOTE_LABEL +from .base import BaseSegmentation, GappedAnnotationMixin from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT +from .partition import Partition from .segment import Segment from .timeline import Timeline from .utils.generators import string_generator, int_generator @@ -135,39 +138,66 @@ TierLabel = Union[Text, Number] TierValuePair = Tuple[Segment, TierLabel] +T = Type[Union[Partition, Timeline]] -class Tier: - """A set of chronologically-ordered, optionally non-overlapping - and annotated segments""" - def __init__(self, name: str = None, - uri: str = None, - allow_overlap: bool = True): +class BaseTier(BaseSegmentation, Generic[T]): + _segmentation_type: T + + def __init__(self, name: str = None, uri: str = None): + super().__init__(uri) self.name = name - self.uri = uri - self.allow_overlap = allow_overlap + + self._segmentation = self._segmentation_type() self._segments: Dict[Segment, TierLabel] = dict() - self._timeline = Timeline() def __setitem__(self, segment: Segment, label: str): - if not self.allow_overlap: - for seg, _ in self._timeline.crop_iter(segment, mode="intersection"): - raise ValueError(f"Segment overlaps with {seg}") - - self._timeline.add(segment) + # TODO: check + self._segmentation.add(segment) self._segments[segment] = label def __getitem__(self, key: Union[Segment, int]) -> str: + # TODO: check if isinstance(key, int): - key = self._timeline.__getitem__(key) + key = self._segmentation.__getitem__(key) return self._segments[key] def __delitem__(self, key: Union[Segment, int]): + # TODO: check if isinstance(key, int): - key = self._timeline.__getitem__(key) + key = self._segmentation.__getitem__(key) del self._segments[key] - self._timeline.remove(key) + self._segmentation.remove(key) + + def __iter__(self) -> Iterable[TierValuePair]: + """Return segments with their annotation, in chronological order""" + for segment in self._segmentation.itersegments(): + yield segment, self._segments[segment] + + def __len__(self): + """Number of segments in the tier + + >>> len(tier) # tier contains three segments + 3 + """ + return len(self._segments) + + def empty(self) -> 'Tier': + """Return an empty copy + + Returns + ------- + empty : Tier + Empty timeline using the same 'uri' attribute. + + """ + return Tier(self.name, uri=self.uri) + +class Tier(GappedAnnotationMixin, BaseTier[Timeline]): + _segmentation_type = Timeline + """A set of chronologically-ordered, optionally non-overlapping + and annotated segments""" def __contains__(self, included: Union[Segment, Timeline]): # TODO @@ -187,7 +217,7 @@ def __contains__(self, included: Union[Segment, Timeline]): False otherwise """ - return included in self._timeline + return included in self._segmentation def get_timeline(self, copy: bool = False) -> Timeline: return self._timeline @@ -217,13 +247,7 @@ def update(self, tier: 'Tier') -> 'Tier': mode="intersection")): raise ValueError("Segments in a tier cannot overlap") - def __len__(self): - """Number of segments in the tier - >>> len(tier) # tier contains three segments - 3 - """ - return len(self._segments) def __nonzero__(self): return self.__bool__() @@ -236,19 +260,7 @@ def __bool__(self): ... else: ... # timeline is not empty """ - return len(self._segments) > 0 - - def __iter__(self) -> Iterable[Segment, str]: - """Iterate over segments (in chronological order) - - >>> for segment, annotation in tier: - ... # do something with the segment - - See also - -------- - :class:`pyannote.core.Segment` describes how segments are sorted. - """ - return iter(self._segments.items()) + return bool(self._segments) def __eq__(self, other: 'Tier'): """Equality @@ -475,55 +487,8 @@ def __repr__(self): return "" % (self.uri, list(self.segments_list_)) - def __contains__(self, included: Union[Segment, 'Timeline']): - """Inclusion - Check whether every segment of `included` does exist in timeline. - Parameters - ---------- - included : Segment or Timeline - Segment or timeline being checked for inclusion - - Returns - ------- - contains : bool - True if every segment in `included` exists in timeline, - False otherwise - - Examples - -------- - >>> timeline1 = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) - >>> timeline2 = Timeline(segments=[Segment(0, 10)]) - >>> timeline1 in timeline2 - False - >>> timeline2 in timeline1 - >>> Segment(1, 13.37) in timeline1 - True - - """ - - if isinstance(included, Segment): - return included in self.segments_set_ - - elif isinstance(included, Timeline): - return self.segments_set_.issuperset(included.segments_set_) - - else: - raise TypeError( - 'Checking for inclusion only supports Segment and ' - 'Timeline instances') - - def empty(self) -> 'Tier': - """Return an empty copy - - Returns - ------- - empty : Tier - Empty timeline using the same 'uri' attribute. - - """ - return Tier(self.name, uri=self.uri) def covers(self, other: Union[Timeline, 'Tier']) -> bool: """Check whether other timeline is fully covered by the timeline diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index 1a80dda..ec775a0 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -96,6 +96,7 @@ from typing_extensions import Self from . import PYANNOTE_URI, PYANNOTE_SEGMENT +from .base import GappedAnnotationMixin, BaseSegmentation from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT from .segment import Segment from .utils.types import Support, Label, CropMode @@ -113,7 +114,7 @@ # ===================================================================== -class Timeline: +class Timeline(GappedAnnotationMixin, BaseSegmentation): """ Ordered set of segments. @@ -143,6 +144,7 @@ def from_df(cls, df: 'pd.DataFrame', uri: Optional[str] = None) -> 'Timeline': def __init__(self, segments: Optional[Iterable[Segment]] = None, uri: str = None): + super().__init__(uri) if segments is None: segments = () @@ -159,8 +161,6 @@ def __init__(self, boundaries = (boundary for segment in segments_set for boundary in segment) self.segments_boundaries_ = SortedList(boundaries) - # path to (or any identifier of) segmented resource - self.uri: str = uri def __len__(self): """Number of segments diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py index 8507861..6e6b41b 100644 --- a/pyannote/core/utils/types.py +++ b/pyannote/core/utils/types.py @@ -3,7 +3,7 @@ from typing_extensions import Literal Label = Hashable -Support = Union['Segment', 'Timeline'] +Support = Union['Segment', 'SegmentSet'] LabelGeneratorMode = Literal['int', 'string'] LabelGenerator = Union[LabelGeneratorMode, Iterator[Label]] TrackName = Union[str, int] From 0cea04a9b7887db52b0a4066957ee643e51eb5b9 Mon Sep 17 00:00:00 2001 From: hadware Date: Tue, 14 Feb 2023 13:54:42 +0100 Subject: [PATCH 23/30] Starting implem of base functions and setting up classes --- pyannote/core/base.py | 40 ++++++++++++++++++++++++++++-------- pyannote/core/partition.py | 10 ++++----- pyannote/core/textgrid.py | 28 +++++-------------------- pyannote/core/utils/types.py | 1 + 4 files changed, 42 insertions(+), 37 deletions(-) diff --git a/pyannote/core/base.py b/pyannote/core/base.py index 4b24943..1654370 100644 --- a/pyannote/core/base.py +++ b/pyannote/core/base.py @@ -1,10 +1,11 @@ from abc import ABCMeta, abstractmethod from typing import Optional, Iterator, Tuple, Union, Dict, TYPE_CHECKING, Callable, List +from sortedcontainers import SortedList from typing_extensions import Self from pyannote.core import Segment -from pyannote.core.utils.types import Support, CropMode +from pyannote.core.utils.types import Support, CropMode, ContiguousSupport if TYPE_CHECKING: from .timeline import Timeline @@ -51,17 +52,27 @@ def __ne__(self, other: Self): def itersegments(self): pass - @abstractmethod def get_timeline(self) -> 'Timeline': - pass + from .timeline import Timeline + return Timeline(self.itersegments()) @abstractmethod def update(self, other: Self) -> Self: pass - @abstractmethod - def co_iter(self, other: 'BaseSegmentation') -> Iterator[Tuple[Segment, Segment]]: - pass + def co_iter(self, other: Union['BaseSegmentation', Segment]) -> Iterator[Tuple[Segment, Segment]]: + if isinstance(other, Segment): + other_segments = SortedList([other]) + else: + other_segments = SortedList(other.itersegments()) + + for segment in self.itersegments(): + + # iterate over segments that starts before 'segment' ends + temp = Segment(start=segment.end, end=segment.end) + for other_segment in other_segments.irange(maximum=temp): + if segment.intersects(other_segment): + yield segment, other_segment @abstractmethod def get_overlap(self) -> 'Timeline': @@ -125,8 +136,7 @@ def extrude(self, mode: CropMode = 'intersection') -> Self: pass - # TODO : maybe put inside the base seg class, add conditions for - # partition + @abstractmethod def crop(self, support: Support, @@ -140,7 +150,19 @@ def support(self, collar: float = 0.) -> Self: pass -class SegmentSet(metaclass=ABCMeta): +class ContiguousAnnotationMixin(metaclass=ABCMeta): + # TODO : figure out if the return mapping still makes sense + # (propably not) + @abstractmethod + def crop(self, + support: ContiguousSupport, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: + pass + + +class SegmentSetMixin(metaclass=ABCMeta): @abstractmethod def add(self, segment: Segment): diff --git a/pyannote/core/partition.py b/pyannote/core/partition.py index 18555cd..243011e 100755 --- a/pyannote/core/partition.py +++ b/pyannote/core/partition.py @@ -96,7 +96,7 @@ from sortedcontainers import SortedList from . import PYANNOTE_URI, PYANNOTE_SEGMENT, Timeline -from .base import BaseSegmentation, SegmentSet +from .base import BaseSegmentation, SegmentSetMixin from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT from .segment import Segment from .utils.types import Support, Label, CropMode @@ -114,7 +114,7 @@ # - partition empty if only one segment? # - truthiness? -class Partition(SegmentSet, BaseSegmentation): +class Partition(SegmentSetMixin, BaseSegmentation): """ Ordered set of segments that are all contiguous. @@ -165,10 +165,10 @@ def __len__(self) -> int: pass def __nonzero__(self): - pass + return True def __bool__(self): - pass + return True def __eq__(self, other: Self): pass @@ -216,7 +216,7 @@ def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) -> S pass def extent(self) -> Segment: - pass + return Segment(self.start, self.end) def support_iter(self, collar: float = 0.0) -> Iterator[Segment]: pass diff --git a/pyannote/core/textgrid.py b/pyannote/core/textgrid.py index a660e9a..4860515 100644 --- a/pyannote/core/textgrid.py +++ b/pyannote/core/textgrid.py @@ -144,7 +144,7 @@ class BaseTier(BaseSegmentation, Generic[T]): _segmentation_type: T - def __init__(self, name: str = None, uri: str = None): + def __init__(self, name: str = None, uri: str = None): super().__init__(uri) self.name = name @@ -194,6 +194,7 @@ def empty(self) -> 'Tier': """ return Tier(self.name, uri=self.uri) + class Tier(GappedAnnotationMixin, BaseTier[Timeline]): _segmentation_type = Timeline """A set of chronologically-ordered, optionally non-overlapping @@ -247,8 +248,6 @@ def update(self, tier: 'Tier') -> 'Tier': mode="intersection")): raise ValueError("Segments in a tier cannot overlap") - - def __nonzero__(self): return self.__bool__() @@ -487,9 +486,6 @@ def __repr__(self): return "" % (self.uri, list(self.segments_list_)) - - - def covers(self, other: Union[Timeline, 'Tier']) -> bool: """Check whether other timeline is fully covered by the timeline @@ -743,22 +739,7 @@ def to_annotation(self, modality: Optional[str] = None) -> 'Annotation': return annotation -class TieredAnnotation: - """Tiered Annotation. - - Parameters - ---------- - uri : string, optional - name of annotated resource (e.g. audio or video file) - modality : string, optional - name of annotated modality - - Returns - ------- - annotation : Annotation - New annotation - - """ +class TieredAnnotation(GappedAnnotationMixin, BaseSegmentation): def __init__(self, uri: Optional[str] = None): @@ -838,7 +819,8 @@ def itersegments(self): -------- :class:`pyannote.core.Segment` describes how segments are sorted. """ - return iter(self._timeline) + for tier in self.tiers: + yield from tier def __iter__(self) -> Iterable[Tuple[Segment, str]]: return iter(self._tiers.items()) diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py index 6e6b41b..7e07cff 100644 --- a/pyannote/core/utils/types.py +++ b/pyannote/core/utils/types.py @@ -4,6 +4,7 @@ Label = Hashable Support = Union['Segment', 'SegmentSet'] +ContiguousSupport = Union['Segment', 'ContiguousAnnotationMixin'] LabelGeneratorMode = Literal['int', 'string'] LabelGenerator = Union[LabelGeneratorMode, Iterator[Label]] TrackName = Union[str, int] From d6f8c7c32ef6b5798e65e6c39957e28ede3dc700 Mon Sep 17 00:00:00 2001 From: hadware Date: Wed, 22 Feb 2023 15:59:52 +0100 Subject: [PATCH 24/30] Some work on the implemention of methods for tiers. --- pyannote/core/base.py | 44 +- pyannote/core/partition.py | 123 ++--- pyannote/core/{textgrid.py => tiered.py} | 555 +++-------------------- pyannote/core/timeline.py | 22 +- 4 files changed, 165 insertions(+), 579 deletions(-) rename pyannote/core/{textgrid.py => tiered.py} (56%) diff --git a/pyannote/core/base.py b/pyannote/core/base.py index 1654370..1e69b5e 100644 --- a/pyannote/core/base.py +++ b/pyannote/core/base.py @@ -1,5 +1,5 @@ from abc import ABCMeta, abstractmethod -from typing import Optional, Iterator, Tuple, Union, Dict, TYPE_CHECKING, Callable, List +from typing import Optional, Iterator, Tuple, Union, Dict, TYPE_CHECKING, Callable, List, Set from sortedcontainers import SortedList from typing_extensions import Self @@ -30,9 +30,8 @@ def uri(self, uri: str): def __len__(self) -> int: pass - @abstractmethod def __nonzero__(self): - pass + return self.__bool__() @abstractmethod def __bool__(self): @@ -52,6 +51,10 @@ def __ne__(self, other: Self): def itersegments(self): pass + def segments_set(self) -> Set[Segment]: + # default implementation, may be overriden for better performance + return set(self.itersegments()) + def get_timeline(self) -> 'Timeline': from .timeline import Timeline return Timeline(self.itersegments()) @@ -74,10 +77,6 @@ def co_iter(self, other: Union['BaseSegmentation', Segment]) -> Iterator[Tuple[S if segment.intersects(other_segment): yield segment, other_segment - @abstractmethod - def get_overlap(self) -> 'Timeline': - pass - @abstractmethod def __str__(self): pass @@ -86,17 +85,22 @@ def __str__(self): def __repr__(self): pass - @abstractmethod - def __contains__(self, included: Union[Segment, 'Timeline']) -> bool: - pass + def __contains__(self, included: Union[Segment, 'BaseSegmentation']) -> bool: + # Base implementation, may be overloaded for better performance + seg_set = self.segments_set() + if isinstance(included, Segment): + return included in seg_set + elif isinstance(included, BaseSegmentation): + return seg_set.issuperset(included.segments_set()) + else: + raise ValueError("") @abstractmethod def empty(self) -> Self: pass @abstractmethod - def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \ - -> Self: + def copy(self) -> Self: pass @abstractmethod @@ -136,7 +140,6 @@ def extrude(self, mode: CropMode = 'intersection') -> Self: pass - @abstractmethod def crop(self, support: Support, @@ -149,6 +152,10 @@ def crop(self, def support(self, collar: float = 0.) -> Self: pass + @abstractmethod + def get_overlap(self) -> 'Timeline': + pass + class ContiguousAnnotationMixin(metaclass=ABCMeta): # TODO : figure out if the return mapping still makes sense @@ -161,8 +168,15 @@ def crop(self, -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: pass + @abstractmethod + def bisect(self, at: float): + pass + -class SegmentSetMixin(metaclass=ABCMeta): +class PureSegmentationMixin(metaclass=ABCMeta): + """A segmentation containing _only_ segments""" + + # TODO: add __and__ (defaults to crop intersection, not in place), that only takes objects of Self type? @abstractmethod def add(self, segment: Segment): @@ -178,4 +192,4 @@ def index(self, segment: Segment) -> int: @abstractmethod def overlapping(self, t: float) -> List[Segment]: - pass + pass \ No newline at end of file diff --git a/pyannote/core/partition.py b/pyannote/core/partition.py index 243011e..5e81ad4 100755 --- a/pyannote/core/partition.py +++ b/pyannote/core/partition.py @@ -88,21 +88,19 @@ See :class:`pyannote.core.Timeline` for the complete reference. """ -import warnings -from typing import (Optional, Iterable, List, Union, Callable, - TextIO, Tuple, TYPE_CHECKING, Iterator, Dict, Text) -from typing_extensions import Self +from typing import (Optional, Iterable, Union, Callable, + Tuple, TYPE_CHECKING, Iterator, Dict, List) -from sortedcontainers import SortedList +from sortedcontainers import SortedDict, SortedList, SortedSet +from typing_extensions import Self -from . import PYANNOTE_URI, PYANNOTE_SEGMENT, Timeline -from .base import BaseSegmentation, SegmentSetMixin -from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT +from . import Timeline +from .base import BaseSegmentation, PureSegmentationMixin from .segment import Segment -from .utils.types import Support, Label, CropMode +from .utils.types import Support, CropMode if TYPE_CHECKING: - from .annotation import Annotation + pass # ===================================================================== @@ -114,7 +112,13 @@ # - partition empty if only one segment? # - truthiness? -class Partition(SegmentSetMixin, BaseSegmentation): + +def pairwise(iterable): + "s -> (s0, s1), (s2, s3), (s4, s5), ..." + a = iter(iterable) + return zip(a, a) + +class Partition(PureSegmentationMixin, BaseSegmentation): """ Ordered set of segments that are all contiguous. @@ -138,31 +142,36 @@ class Partition(SegmentSetMixin, BaseSegmentation): New timeline """ + + def __init__(self, segments: Optional[Iterable[Segment]] = None, start: float = 0.0, end: float = None, uri: str = None): - segments = list(segments) + segments = list(segments) if segments else [] if segments is None and end is None: raise ValueError("Cannot initialize an empty timeline without and end boundary") elif end is None: end = max(seg.end for seg in segments) elif not segments: segments = Segment(start, end) - - self.start = start - self.end = end - self._boundaries = None # TODO: figure out if needed super().__init__(uri) - - # TODO: check "filling"? autofill if not valid? - self.update(self.gaps(support=self.extent())) - if self[0].start < self.start or self[-1].end > self.end: + self.boundaries = Segment(start, end) + timeline = Timeline(segments) + if timeline.extent() not in self.boundaries: raise ValueError(f"Segments have to be within ({start, end}) bounds") + # automatically filling in the gaps in the segments + # TODO: ask about behavior? + timeline.add(self.boundaries) + self._segments_bounds_set = SortedSet() + for (start, end) in timeline: + self._segments_bounds_set.update(start, end) + + def __len__(self) -> int: - pass + return len(self._segments_bounds_set) - 1 def __nonzero__(self): return True @@ -170,35 +179,43 @@ def __nonzero__(self): def __bool__(self): return True - def __eq__(self, other: Self): - pass + def __eq__(self, other: 'Partition'): + return isinstance(other, Partition) and self._segments_set == other._segments_set - def __ne__(self, other: Self): - pass + def __ne__(self, other: 'Partition'): + return not other == self + + def index(self, segment: Segment) -> int: + return self._segments_bounds_set.index(segment.start) def bisect(self, at: float): - pass + if not self.boundaries.overlaps(at): + raise ValueError("Cannot bisect outside of partition boundaries") + + self._segments_bounds_set.add(at) def add(self, segment: Segment): - pass + # TODO: ask about this behavior + if len(list(self.co_iter(segment))) > 1: + raise ValueError("Segment overlaps a boundary") + self.bisect(segment.start) + self.bisect(segment.end) def remove(self, segment: Segment): - pass + if not (set(segment) & self._segments_bounds_set): + raise KeyError(f"Segment {segment} not in partition") + self._segments_bounds_set.difference_update(segment) def itersegments(self): - pass + for (start, end) in pairwise(self._segments_bounds_set): + yield Segment(start, end) def get_timeline(self) -> 'Timeline': - pass - - def update(self, other: Self) -> Self: - pass - - def co_iter(self, other: 'BaseSegmentation') -> Iterator[Tuple[Segment, Segment]]: - pass + return Timeline(self.itersegments(), uri=self.uri) - def get_overlap(self) -> 'Timeline': - pass + def update(self, other: 'Partition') -> 'Partition': + assert other.boundaries in self.boundaries + self._segments_bounds_set |= other._segments_bounds_set def __str__(self): pass @@ -206,23 +223,27 @@ def __str__(self): def __repr__(self): pass - def __contains__(self, included: Union[Segment, 'Timeline']) -> bool: - pass - def empty(self) -> Self: - pass + return Partition(None, + start=self.boundaries.start, + end=self.boundaries.end, + uri=self.uri) - def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) -> Self: - pass + def copy(self) -> Self: + return Partition(self.itersegments(), + start=self.boundaries.start, + end=self.boundaries.end, + uri=self.uri) def extent(self) -> Segment: - return Segment(self.start, self.end) + return self.boundaries - def support_iter(self, collar: float = 0.0) -> Iterator[Segment]: - pass - - def support(self, collar: float = 0.) -> 'Timeline': - pass + def overlapping(self, t: float) -> List[Segment]: + assert self.boundaries.overlaps(t) + end = next(self._segments_bounds_set.irange(mininum=t)) + end_idx = self._segments_bounds_set.index(end) + start = self._segments_bounds_set[end_idx - 1] + return [Segment(start, end)] def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Iterator[ Union[Tuple[Segment, Segment], Segment]]: @@ -233,7 +254,7 @@ def crop(self, support: Support, mode: CropMode = 'intersection', returns_mappin pass def duration(self) -> float: - pass + return self.extent().duration def _repr_png_(self): pass diff --git a/pyannote/core/textgrid.py b/pyannote/core/tiered.py similarity index 56% rename from pyannote/core/textgrid.py rename to pyannote/core/tiered.py index 4860515..06d6388 100644 --- a/pyannote/core/textgrid.py +++ b/pyannote/core/tiered.py @@ -112,6 +112,7 @@ from pathlib import Path from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple, Iterator, Text, Callable, Type, Generic, \ TypeVar +from typing_extensions import Self import numpy as np from sortedcontainers import SortedDict @@ -119,7 +120,7 @@ from pyannote.core import Annotation from . import PYANNOTE_URI, PYANNOTE_MODALITY, \ PYANNOTE_SEGMENT, PYANNOTE_TRACK, PYANNOTE_LABEL -from .base import BaseSegmentation, GappedAnnotationMixin +from .base import BaseSegmentation, GappedAnnotationMixin, ContiguousAnnotationMixin from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT from .partition import Partition from .segment import Segment @@ -143,6 +144,11 @@ class BaseTier(BaseSegmentation, Generic[T]): _segmentation_type: T + # TODO: handle segment sets changes for + # - add (partition) + # - bisect (partition) + # - crop (partition/timeline) + # - for extrusion, should be based on cropping def __init__(self, name: str = None, uri: str = None): super().__init__(uri) @@ -183,85 +189,17 @@ def __len__(self): """ return len(self._segments) - def empty(self) -> 'Tier': - """Return an empty copy - - Returns - ------- - empty : Tier - Empty timeline using the same 'uri' attribute. - - """ - return Tier(self.name, uri=self.uri) - - -class Tier(GappedAnnotationMixin, BaseTier[Timeline]): - _segmentation_type = Timeline - """A set of chronologically-ordered, optionally non-overlapping - and annotated segments""" - - def __contains__(self, included: Union[Segment, Timeline]): - # TODO - """Inclusion - - Check whether every segment of `included` does exist in annotation. - - Parameters - ---------- - included : Segment or Timeline - Segment or timeline being checked for inclusion - - Returns - ------- - contains : bool - True if every segment in `included` exists in timeline, - False otherwise - - """ - return included in self._segmentation - - def get_timeline(self, copy: bool = False) -> Timeline: - return self._timeline - - def update(self, tier: 'Tier') -> 'Tier': - # TODO : Doc - """Add every segment of an existing tier (in place) - - Parameters - ---------- - tier : Tier - Tier whose segments and their annotations are being added - - Returns - ------- - self : Tier - Updated tier - - Note - ---- - Only segments that do not already exist will be added, as a timeline is - meant to be a **set** of segments (not a list). - - """ - if not self.allow_overlap and \ - any(True for _ in self._timeline.crop_iter(tier.get_timeline(), - mode="intersection")): - raise ValueError("Segments in a tier cannot overlap") - - def __nonzero__(self): - return self.__bool__() - def __bool__(self): """Emptiness >>> if tier: - ... # timeline is empty + ... # tier is empty ... else: - ... # timeline is not empty + ... # tier is not empty """ return bool(self._segments) - def __eq__(self, other: 'Tier'): + def __eq__(self, other: 'BaseTier'): """Equality Two PraatTiers are equal if and only if their segments and their annotations are equal. @@ -277,471 +215,102 @@ def __eq__(self, other: 'Tier'): """ return self._segments == other._segments - def __ne__(self, other: 'Tier'): + def __ne__(self, other: 'BaseTier'): """Inequality""" return self._segments != other._segments - def __or__(self, timeline: 'Timeline') -> 'Timeline': - return self.union(timeline) - - def co_iter(self, other: Union[Timeline, Segment]) -> Iterator[Tuple[Segment, Segment]]: - # TODO : Doc - """Iterate over pairs of intersecting segments - - >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) - >>> timeline2 = Timeline([Segment(1, 3), Segment(3, 5)]) - >>> for segment1, segment2 in timeline1.co_iter(timeline2): - ... print(segment1, segment2) - (, ) - (, ) - (, ) - - Parameters - ---------- - other : Timeline - Second timeline - - Returns - ------- - iterable : (Segment, Segment) iterable - Yields pairs of intersecting segments in chronological order. - """ - - yield from self._timeline.co_iter(other) - - def crop_iter(self, - support: Support, - mode: CropMode = 'intersection', - returns_mapping: bool = False) \ - -> Iterator[Union[Tuple[Segment, Segment], Segment]]: - """Like `crop` but returns a segment iterator instead - - See also - -------- - :func:`pyannote.core.Timeline.crop` - """ - - if mode not in {'loose', 'strict', 'intersection'}: - raise ValueError("Mode must be one of 'loose', 'strict', or " - "'intersection'.") - - if not isinstance(support, (Segment, Timeline)): - raise TypeError("Support must be a Segment or a Timeline.") - - if isinstance(support, Segment): - # corner case where "support" is empty - if support: - segments = [support] - else: - segments = [] - - support = Timeline(segments=segments, uri=self.uri) - for yielded in self.crop_iter(support, mode=mode, - returns_mapping=returns_mapping): - yield yielded - return - - # if 'support' is a `Timeline`, we use its support - support = support.support() - - # loose mode - if mode == 'loose': - for segment, _ in self.co_iter(support): - yield segment - return - - # strict mode - if mode == 'strict': - for segment, other_segment in self.co_iter(support): - if segment in other_segment: - yield segment - return - - # intersection mode - for segment, other_segment in self.co_iter(support): - mapped_to = segment & other_segment - if not mapped_to: - continue - if returns_mapping: - yield segment, mapped_to - else: - yield mapped_to - - def crop(self, - support: Support, - mode: CropMode = 'intersection', - returns_mapping: bool = False) \ - -> 'Tier': - """Crop timeline to new support - - Parameters - ---------- - support : Segment or Timeline - If `support` is a `Timeline`, its support is used. - mode : {'strict', 'loose', 'intersection'}, optional - Controls how segments that are not fully included in `support` are - handled. 'strict' mode only keeps fully included segments. 'loose' - mode keeps any intersecting segment. 'intersection' mode keeps any - intersecting segment but replace them by their actual intersection. - returns_mapping : bool, optional - In 'intersection' mode, return a dictionary whose keys are segments - of the cropped timeline, and values are list of the original - segments that were cropped. Defaults to False. - - Returns - ------- - cropped : Timeline - Cropped timeline - mapping : dict - When 'returns_mapping' is True, dictionary whose keys are segments - of 'cropped', and values are lists of corresponding original - segments. - - Examples - -------- - - >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) - >>> timeline.crop(Segment(1, 3)) - ])> - - >>> timeline.crop(Segment(1, 3), mode='loose') - , ])> - - >>> timeline.crop(Segment(1, 3), mode='strict') - ])> - - >>> cropped, mapping = timeline.crop(Segment(1, 3), returns_mapping=True) - >>> print(mapping) - {: [, ]} - - """ - - # TODO - - if mode == 'intersection' and returns_mapping: - segments, mapping = [], {} - for segment, mapped_to in self.crop_iter(support, - mode='intersection', - returns_mapping=True): - segments.append(mapped_to) - mapping[mapped_to] = mapping.get(mapped_to, list()) + [segment] - return Timeline(segments=segments, uri=self.uri), mapping - - return Timeline(segments=self.crop_iter(support, mode=mode), - uri=self.uri) - - def overlapping(self, t: float) -> List[Segment]: - """Get list of segments overlapping `t` - - Parameters - ---------- - t : float - Timestamp, in seconds. - - Returns - ------- - segments : list - List of all segments of timeline containing time t - """ - return self._timeline.overlapping(t) - - def overlapping_iter(self, t: float) -> Iterator[Segment]: - """Like `overlapping` but returns a segment iterator instead - - See also - -------- - :func:`pyannote.core.Timeline.overlapping` - """ - segment = Segment(start=t, end=t) - for segment in self.segments_list_.irange(maximum=segment): - if segment.overlaps(t): - yield segment - - def __str__(self): - """Human-readable representation - - >>> timeline = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) - >>> print(timeline) - [[ 00:00:00.000 --> 00:00:10.000] - [ 00:00:01.000 --> 00:00:13.370]] - - """ - - n = len(self.segments_list_) - string = "[" - for i, segment in enumerate(self.segments_list_): - string += str(segment) - string += "\n " if i + 1 < n else "" - string += "]" - return string - - def __repr__(self): - """Computer-readable representation - - >>> Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) - , ])> - - """ - - return "" % (self.uri, - list(self.segments_list_)) - - def covers(self, other: Union[Timeline, 'Tier']) -> bool: - """Check whether other timeline is fully covered by the timeline - - Parameter - --------- - other : Timeline - Second timeline - - Returns - ------- - covers : bool - True if timeline covers "other" timeline entirely. False if at least - one segment of "other" is not fully covered by timeline - """ - # TODO - - # compute gaps within "other" extent - # this is where we should look for possible faulty segments - gaps = self.gaps(support=other.extent()) - - # if at least one gap intersects with a segment from "other", - # "self" does not cover "other" entirely --> return False - for _ in gaps.co_iter(other): - return False - - # if no gap intersects with a segment from "other", - # "self" covers "other" entirely --> return True - return True - - def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \ - -> 'Timeline': - # TODO - """Get a copy of the timeline - - If `segment_func` is provided, it is applied to each segment first. + def itersegments(self): + return self._segmentation.itersegments() - Parameters - ---------- - segment_func : callable, optional - Callable that takes a segment as input, and returns a segment. - Defaults to identity function (segment_func(segment) = segment) + def empty(self) -> 'BaseTier': + """Return an empty copy Returns ------- - timeline : Timeline - Copy of the timeline + empty : Tier + Empty timeline using the same 'uri' attribute. """ + return self.__class__(self.name, uri=self.uri) - # if segment_func is not provided - # just add every segment - if segment_func is None: - return Timeline(segments=self.segments_list_, uri=self.uri) + def update(self, tier: 'BaseTier') -> 'BaseTier': + pass # TODO - # if is provided - # apply it to each segment before adding them - return Timeline(segments=[segment_func(s) for s in self.segments_list_], - uri=self.uri) + def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) -> Self: + pass # TODO def extent(self) -> Segment: - """Extent - - The extent of a timeline is the segment of minimum duration that - contains every segments of the timeline. It is unique, by definition. - The extent of an empty timeline is an empty segment. - - A picture is worth a thousand words:: - - timeline - |------| |------| |----| - |--| |-----| |----------| - - timeline.extent() - |--------------------------------| - - Returns - ------- - extent : Segment - Timeline extent - - Examples - -------- - >>> timeline = Timeline(segments=[Segment(0, 1), Segment(9, 10)]) - >>> timeline.extent() - - - """ - return self._timeline.extent() - - def support_iter(self, collar: float = 0.) -> Iterator[Segment]: - """Like `support` but returns a segment generator instead - - See also - -------- - :func:`pyannote.core.Timeline.support` - """ - - yield from self._timeline.support_iter(collar) - - def support(self, collar: float = 0.) -> 'Timeline': - # TODO: doc - """Timeline support - - The support of a timeline is the timeline with the minimum number of - segments with exactly the same time span as the original timeline. It - is (by definition) unique and does not contain any overlapping - segments. - - A picture is worth a thousand words:: - - collar - |---| - - timeline - |------| |------| |----| - |--| |-----| |----------| - - timeline.support() - |------| |--------| |----------| - - timeline.support(collar) - |------------------| |----------| - - Parameters - ---------- - collar : float, optional - Merge separated by less than `collar` seconds. This is why there - are only two segments in the final timeline in the above figure. - Defaults to 0. + return self._segmentation.extent() - Returns - ------- - support : Timeline - Timeline support - """ - return self._timeline.support(collar) + def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Iterator[ + Union[Tuple[Segment, Segment], Segment]]: + pass def duration(self) -> float: - """Timeline duration + return self._segmentation.duration() - The timeline duration is the sum of the durations of the segments - in the timeline support. + def _repr_png_(self): + pass - Returns - ------- - duration : float - Duration of timeline support, in seconds. - """ - # The timeline duration is the sum of the durations - # of the segments in the timeline support. - return self._timeline.duration() +class Tier(GappedAnnotationMixin, BaseTier[Timeline]): + _segmentation_type = Timeline + """A set of chronologically-ordered, optionally non-overlapping + and annotated segments""" def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: - """Like `gaps` but returns a segment generator instead - - See also - -------- - :func:`pyannote.core.Timeline.gaps` - - """ - - yield from self._timeline.gaps_iter(support) + return self._segmentation.gaps_iter(support) def gaps(self, support: Optional[Support] = None) -> 'Timeline': - """Gaps - - A picture is worth a thousand words:: - - tier - |------| |------| |----| + return self._segmentation.gaps(support) - timeline.gaps() - |--| |----| + def extrude(self, removed: Support, mode: CropMode = 'intersection') -> Self: + return self._segmentation.extrude(removed, mode) - Parameters - ---------- - support : None, Segment or Timeline - Support in which gaps are looked for. Defaults to timeline extent + def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Union[ + Self, Tuple[Self, Dict[Segment, Segment]]]: + return self._segmentation.crop(support, mode, returns_mapping) - Returns - ------- - gaps : Timeline - Timeline made of all gaps from original timeline, and delimited - by provided support + def support(self, collar: float = 0.) -> Timeline: + return self._segmentation.support(collar) - See also - -------- - :func:`pyannote.core.Timeline.extent` + def get_overlap(self) -> 'Timeline': + return self._segmentation.get_overlap() - """ - return Timeline(segments=self.gaps_iter(support=support), - uri=self.uri) + def co_iter(self, other: Union[Timeline, Segment]) -> Iterator[Tuple[Segment, Segment]]: + # TODO : Doc + """Iterate over pairs of intersecting segments - def argmax(self, support: Optional[Support] = None) -> Optional[Label]: - """Get label with longest duration + >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) + >>> timeline2 = Timeline([Segment(1, 3), Segment(3, 5)]) + >>> for segment1, segment2 in timeline1.co_iter(timeline2): + ... print(segment1, segment2) + (, ) + (, ) + (, ) Parameters ---------- - support : Segment or Timeline, optional - Find label with longest duration within provided support. - Defaults to whole extent. + other : Timeline + Second timeline Returns ------- - label : any existing label or None - Label with longest intersection - - Examples - -------- - >>> annotation = Annotation(modality='speaker') - >>> annotation[Segment(0, 10), 'speaker1'] = 'Alice' - >>> annotation[Segment(8, 20), 'speaker1'] = 'Bob' - >>> print "%s is such a talker!" % annotation.argmax() - Bob is such a talker! - >>> segment = Segment(22, 23) - >>> if not annotation.argmax(support): - ... print "No label intersecting %s" % segment - No label intersection [22 --> 23] - + iterable : (Segment, Segment) iterable + Yields pairs of intersecting segments in chronological order. """ - cropped = self - if support is not None: - cropped = cropped.crop(support, mode='intersection') - - if not cropped: - return None - - return max(((_, cropped.label_duration(_)) for _ in cropped.labels()), - key=lambda x: x[1])[0] - - def to_annotation(self, modality: Optional[str] = None) -> 'Annotation': - """Turn tier into an annotation - - Each segment is labeled by a unique label. - - Parameters - ---------- - modality : str, optional + yield from self._timeline.co_iter(other) - Returns - ------- - annotation : Annotation - Annotation - """ - from .annotation import Annotation - annotation = Annotation(uri=self.uri, modality=modality) - # TODO - return annotation +class PartitionTier(ContiguousAnnotationMixin, BaseTier[Partition]): + _segmentation_type = Partition class TieredAnnotation(GappedAnnotationMixin, BaseSegmentation): def __init__(self, uri: Optional[str] = None): + super().__init__(uri) self._uri: Optional[str] = uri diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index ec775a0..a9bdfce 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -157,10 +157,6 @@ def __init__(self, # sorted list of segments (used for sorted iteration) self.segments_list_ = SortedList(segments_set) - # sorted list of (possibly redundant) segment boundaries - boundaries = (boundary for segment in segments_set for boundary in segment) - self.segments_boundaries_ = SortedList(boundaries) - def __len__(self): """Number of segments @@ -270,11 +266,6 @@ def add(self, segment: Segment) -> 'Timeline': segments_set_.add(segment) self.segments_list_.add(segment) - - segments_boundaries_ = self.segments_boundaries_ - segments_boundaries_.add(segment.start) - segments_boundaries_.add(segment.end) - return self def remove(self, segment: Segment) -> 'Timeline': @@ -303,10 +294,6 @@ def remove(self, segment: Segment) -> 'Timeline': self.segments_list_.remove(segment) - segments_boundaries_ = self.segments_boundaries_ - segments_boundaries_.remove(segment.start) - segments_boundaries_.remove(segment.end) - return self def discard(self, segment: Segment) -> 'Timeline': @@ -348,10 +335,6 @@ def update(self, timeline: 'Timeline') -> 'Timeline': # sorted list of segments (used for sorted iteration) self.segments_list_ = SortedList(segments_set) - # sorted list of (possibly redundant) segment boundaries - boundaries = (boundary for segment in segments_set for boundary in segment) - self.segments_boundaries_ = SortedList(boundaries) - return self def __or__(self, timeline: Union['Timeline', Segment]) -> 'Timeline': @@ -798,9 +781,8 @@ def extent(self) -> Segment: """ if self.segments_set_: - segments_boundaries_ = self.segments_boundaries_ - start = segments_boundaries_[0] - end = segments_boundaries_[-1] + start = self.segments_list_[0].start + end = self.segments_list_[-1].end return Segment(start=start, end=end) return Segment(start=0.0, end=0.0) From 08a03e485a0e4a79d8bf3bf8a5853e771a73de21 Mon Sep 17 00:00:00 2001 From: hadware Date: Thu, 2 Mar 2023 18:31:38 +0100 Subject: [PATCH 25/30] Attempts at creating abstract classes for all annotations --- pyannote/core/tiered.py | 36 ++++++++++++++++++++---------------- pyannote/core/utils/types.py | 2 +- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/pyannote/core/tiered.py b/pyannote/core/tiered.py index 06d6388..6027216 100644 --- a/pyannote/core/tiered.py +++ b/pyannote/core/tiered.py @@ -107,26 +107,19 @@ See :class:`pyannote.core.Annotation` for the complete reference. """ import itertools -from collections import defaultdict -from numbers import Number from pathlib import Path -from typing import Optional, Dict, Union, Iterable, List, Set, TextIO, Tuple, Iterator, Text, Callable, Type, Generic, \ - TypeVar -from typing_extensions import Self +from typing import Optional, Dict, Union, Iterable, List, TextIO, Tuple, Iterator, Callable, Type, Generic -import numpy as np from sortedcontainers import SortedDict +from typing_extensions import Self from pyannote.core import Annotation -from . import PYANNOTE_URI, PYANNOTE_MODALITY, \ - PYANNOTE_SEGMENT, PYANNOTE_TRACK, PYANNOTE_LABEL from .base import BaseSegmentation, GappedAnnotationMixin, ContiguousAnnotationMixin -from .json import PYANNOTE_JSON, PYANNOTE_JSON_CONTENT from .partition import Partition from .segment import Segment from .timeline import Timeline -from .utils.generators import string_generator, int_generator -from .utils.types import Label, Key, Support, LabelGenerator, TierName, CropMode +from .utils.generators import string_generator +from .utils.types import Label, Key, Support, TierName, CropMode, ContiguousSupport, TierItemPair # TODO: add JSON dumping/loading # TODO: QUESTIONS: @@ -136,14 +129,12 @@ # TODO: IDEA: use a timeline in the Tier to do all the cropping/etc/ operations # and just make this class a thin wrapper for it -TierLabel = Union[Text, Number] -TierValuePair = Tuple[Segment, TierLabel] - T = Type[Union[Partition, Timeline]] class BaseTier(BaseSegmentation, Generic[T]): _segmentation_type: T + # TODO: handle segment sets changes for # - add (partition) # - bisect (partition) @@ -176,7 +167,7 @@ def __delitem__(self, key: Union[Segment, int]): del self._segments[key] self._segmentation.remove(key) - def __iter__(self) -> Iterable[TierValuePair]: + def __iter__(self) -> Iterable[TierItemPair]: """Return segments with their annotation, in chronological order""" for segment in self._segmentation.itersegments(): yield segment, self._segments[segment] @@ -269,6 +260,9 @@ def extrude(self, removed: Support, mode: CropMode = 'intersection') -> Self: def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Union[ Self, Tuple[Self, Dict[Segment, Segment]]]: + # TODO (for segments mapping): + # - if loose/strict, use segment_set of cropped segmentation to find deleted segments + # - if intersection, use return_mapping to replace sliced segments return self._segmentation.crop(support, mode, returns_mapping) def support(self, collar: float = 0.) -> Timeline: @@ -300,11 +294,21 @@ def co_iter(self, other: Union[Timeline, Segment]) -> Iterator[Tuple[Segment, Se Yields pairs of intersecting segments in chronological order. """ - yield from self._timeline.co_iter(other) + yield from self._segmentation.co_iter(other) class PartitionTier(ContiguousAnnotationMixin, BaseTier[Partition]): _segmentation_type = Partition + # TODO: + # - __iter__ should also yield empty segments, with a None annotation + + def crop(self, + support: ContiguousSupport, + mode: CropMode = 'intersection', + returns_mapping: bool = False) -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: + # TODO: + # - think about using crop_iter first + pass class TieredAnnotation(GappedAnnotationMixin, BaseSegmentation): diff --git a/pyannote/core/utils/types.py b/pyannote/core/utils/types.py index 7e07cff..e10f8f2 100644 --- a/pyannote/core/utils/types.py +++ b/pyannote/core/utils/types.py @@ -10,7 +10,7 @@ TrackName = Union[str, int] Key = Union['Segment', Tuple['Segment', TrackName]] TierName = str -TierKey = Tuple[TierName, 'Segment'] +TierItemPair = Tuple[TierName, 'Segment'] Resource = Union['Segment', 'Timeline', 'SlidingWindowFeature', 'Annotation'] CropMode = Literal['intersection', 'loose', 'strict'] From f94d65151d2ea7d465d8829c60e1d151f1b4e025 Mon Sep 17 00:00:00 2001 From: hadware Date: Sun, 26 Mar 2023 12:32:11 +0200 Subject: [PATCH 26/30] Post discussion commit --- pyannote/core/base.py | 6 ++++-- pyannote/core/segment.py | 4 +++- pyannote/core/tiered.py | 16 +++++++++++----- pyannote/core/timeline.py | 3 --- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/pyannote/core/base.py b/pyannote/core/base.py index 1e69b5e..a6a57e7 100644 --- a/pyannote/core/base.py +++ b/pyannote/core/base.py @@ -123,7 +123,7 @@ def duration(self) -> float: def _repr_png_(self): pass - +# TODO: rename to SegmentSet? class GappedAnnotationMixin(metaclass=ABCMeta): @abstractmethod @@ -166,6 +166,7 @@ def crop(self, mode: CropMode = 'intersection', returns_mapping: bool = False) \ -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: + # TODO: add errors messages explaining why the support isn't of the right type pass @abstractmethod @@ -178,6 +179,7 @@ class PureSegmentationMixin(metaclass=ABCMeta): # TODO: add __and__ (defaults to crop intersection, not in place), that only takes objects of Self type? + # TODO: can actually take any BaseSegmentation for add & remove @abstractmethod def add(self, segment: Segment): pass @@ -192,4 +194,4 @@ def index(self, segment: Segment) -> int: @abstractmethod def overlapping(self, t: float) -> List[Segment]: - pass \ No newline at end of file + pass diff --git a/pyannote/core/segment.py b/pyannote/core/segment.py index d659ebb..a71514b 100755 --- a/pyannote/core/segment.py +++ b/pyannote/core/segment.py @@ -170,6 +170,7 @@ def __bool__(self): def __post_init__(self): """Round start and end up to SEGMENT_PRECISION precision (when required)""" + # TODO: check for start < end ? if AUTO_ROUND_TIME: object.__setattr__(self, 'start', int(self.start / SEGMENT_PRECISION + 0.5) * SEGMENT_PRECISION) object.__setattr__(self, 'end', int(self.end / SEGMENT_PRECISION + 0.5) * SEGMENT_PRECISION) @@ -217,7 +218,7 @@ def __contains__(self, other: 'Segment'): """ return (self.start <= other.start) and (self.end >= other.end) - def __and__(self, other): + def __and__(self, other: 'Segment'): """Intersection >>> segment = Segment(0, 10) @@ -239,6 +240,7 @@ def __and__(self, other): end = min(self.end, other.end) return Segment(start=start, end=end) + def intersects(self, other: 'Segment') -> bool: """Check whether two segments intersect each other diff --git a/pyannote/core/tiered.py b/pyannote/core/tiered.py index 6027216..b6606d3 100644 --- a/pyannote/core/tiered.py +++ b/pyannote/core/tiered.py @@ -111,7 +111,7 @@ from typing import Optional, Dict, Union, Iterable, List, TextIO, Tuple, Iterator, Callable, Type, Generic from sortedcontainers import SortedDict -from typing_extensions import Self +from typing_extensions import Self, Any from pyannote.core import Annotation from .base import BaseSegmentation, GappedAnnotationMixin, ContiguousAnnotationMixin @@ -148,12 +148,12 @@ def __init__(self, name: str = None, uri: str = None): self._segmentation = self._segmentation_type() self._segments: Dict[Segment, TierLabel] = dict() - def __setitem__(self, segment: Segment, label: str): + def __setitem__(self, segment: Segment, label: Any): # TODO: check self._segmentation.add(segment) self._segments[segment] = label - def __getitem__(self, key: Union[Segment, int]) -> str: + def __getitem__(self, key: Union[Segment, int]) -> Any: # TODO: check if isinstance(key, int): key = self._segmentation.__getitem__(key) @@ -258,6 +258,10 @@ def gaps(self, support: Optional[Support] = None) -> 'Timeline': def extrude(self, removed: Support, mode: CropMode = 'intersection') -> Self: return self._segmentation.extrude(removed, mode) + def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Iterator[ + Union[Tuple[Segment, Segment], Segment]]: + pass + def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Union[ Self, Tuple[Self, Dict[Segment, Segment]]]: # TODO (for segments mapping): @@ -272,7 +276,6 @@ def get_overlap(self) -> 'Timeline': return self._segmentation.get_overlap() def co_iter(self, other: Union[Timeline, Segment]) -> Iterator[Tuple[Segment, Segment]]: - # TODO : Doc """Iterate over pairs of intersecting segments >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) @@ -310,6 +313,9 @@ def crop(self, # - think about using crop_iter first pass + # |------A-|-----C------|--B---| + # |-----C------| + class TieredAnnotation(GappedAnnotationMixin, BaseSegmentation): @@ -329,7 +335,7 @@ def __init__(self, uri: Optional[str] = None): @classmethod def from_textgrid(cls, textgrid: Union[str, Path, TextIO], textgrid_format: str = "full"): - try: + try:Pos from textgrid_parser import parse_textgrid except ImportError: raise ImportError("The dependencies used to parse TextGrid file cannot be found. " diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index a9bdfce..e4e8d75 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -364,9 +364,6 @@ def union(self, timeline: Union['Timeline', Segment]) -> 'Timeline': segments = self.segments_set_ | timeline.segments_set_ return Timeline(segments=segments, uri=self.uri) - def __and__(self, timeline: Union['Timeline', Segment]) -> 'Timeline': - return self.crop(timeline, mode="intersection") - def co_iter(self, other: Union['Timeline', Segment]) -> Iterator[Tuple[Segment, Segment]]: """Iterate over pairs of intersecting segments From ede6d840d9a4ba02dac194c85c107038103cd33f Mon Sep 17 00:00:00 2001 From: hadware Date: Sun, 26 Mar 2023 12:32:23 +0200 Subject: [PATCH 27/30] Woops --- pyannote/core/tiered.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyannote/core/tiered.py b/pyannote/core/tiered.py index b6606d3..16ae770 100644 --- a/pyannote/core/tiered.py +++ b/pyannote/core/tiered.py @@ -335,7 +335,7 @@ def __init__(self, uri: Optional[str] = None): @classmethod def from_textgrid(cls, textgrid: Union[str, Path, TextIO], textgrid_format: str = "full"): - try:Pos + try: from textgrid_parser import parse_textgrid except ImportError: raise ImportError("The dependencies used to parse TextGrid file cannot be found. " From 8978b110016494e18c3dde2d63ab0cdacf9c3225 Mon Sep 17 00:00:00 2001 From: hadware Date: Thu, 6 Apr 2023 17:18:20 +0200 Subject: [PATCH 28/30] Implemented methods for tiered objects. --- pyannote/core/base.py | 61 ++++++++++++++++++++++++++++++++--- pyannote/core/partition.py | 13 ++++---- pyannote/core/segment.py | 5 +++ pyannote/core/tiered.py | 65 +++++++++++++++++++------------------- 4 files changed, 101 insertions(+), 43 deletions(-) diff --git a/pyannote/core/base.py b/pyannote/core/base.py index a6a57e7..45bc203 100644 --- a/pyannote/core/base.py +++ b/pyannote/core/base.py @@ -1,11 +1,11 @@ from abc import ABCMeta, abstractmethod -from typing import Optional, Iterator, Tuple, Union, Dict, TYPE_CHECKING, Callable, List, Set +from typing import Optional, Iterator, Tuple, Union, Dict, TYPE_CHECKING, Callable, List, Set, Iterable from sortedcontainers import SortedList from typing_extensions import Self from pyannote.core import Segment -from pyannote.core.utils.types import Support, CropMode, ContiguousSupport +from pyannote.core.utils.types import Support, CropMode, ContiguousSupport, Label if TYPE_CHECKING: from .timeline import Timeline @@ -134,11 +134,53 @@ def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: def gaps(self, support: Optional[Support] = None) -> 'Timeline': pass - @abstractmethod def extrude(self, removed: Support, mode: CropMode = 'intersection') -> Self: - pass + """Remove segments that overlap `removed` support. + + Parameters + ---------- + removed : Segment or Timeline + If `support` is a `Timeline`, its support is used. + mode : {'strict', 'loose', 'intersection'}, optional + Controls how segments that are not fully included in `removed` are + handled. 'strict' mode only removes fully included segments. 'loose' + mode removes any intersecting segment. 'intersection' mode removes + the overlapping part of any intersecting segment. + + Returns + ------- + extruded : Timeline + Extruded timeline + + Examples + -------- + + >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 5)]) + >>> timeline.extrude(Segment(1, 2)) + , ])> + + >>> timeline.extrude(Segment(1, 3), mode='loose') + ])> + + >>> timeline.extrude(Segment(1, 3), mode='strict') + , ])> + + """ + if isinstance(removed, Segment): + removed = Timeline([removed]) + else: + removed = removed.get_timeline() + + extent_tl = Timeline([self.extent()], uri=self.uri) + truncating_support = removed.gaps(support=extent_tl) + # loose for truncate means strict for crop and vice-versa + if mode == "loose": + mode = "strict" + elif mode == "strict": + mode = "loose" + return self.crop(truncating_support, mode=mode) @abstractmethod def crop(self, @@ -195,3 +237,14 @@ def index(self, segment: Segment) -> int: @abstractmethod def overlapping(self, t: float) -> List[Segment]: pass + + @abstractmethod + def __iter__(self) -> Iterable[Segment]: + pass + + +class AnnotatedSegmentationMixin(metaclass=ABCMeta): + + @abstractmethod + def __iter__(self) -> Iterable[Tuple[Segment, Label]]: + pass \ No newline at end of file diff --git a/pyannote/core/partition.py b/pyannote/core/partition.py index 5e81ad4..8a77883 100755 --- a/pyannote/core/partition.py +++ b/pyannote/core/partition.py @@ -107,17 +107,13 @@ # Partition class # ===================================================================== -# TODO: Questions: -# - "autofill" the partition if the initialized segments aren't filling? -# - partition empty if only one segment? -# - truthiness? - def pairwise(iterable): "s -> (s0, s1), (s2, s3), (s4, s5), ..." a = iter(iterable) return zip(a, a) + class Partition(PureSegmentationMixin, BaseSegmentation): """ Ordered set of segments that are all contiguous. @@ -142,7 +138,8 @@ class Partition(PureSegmentationMixin, BaseSegmentation): New timeline """ - + # TODO: need to reimplement the co_iter function to make it much faster (the whole point of having partitions) + # -> if co_iter with another partition, even faster (need to distinguish the case) def __init__(self, segments: Optional[Iterable[Segment]] = None, @@ -169,7 +166,6 @@ def __init__(self, for (start, end) in timeline: self._segments_bounds_set.update(start, end) - def __len__(self) -> int: return len(self._segments_bounds_set) - 1 @@ -185,6 +181,9 @@ def __eq__(self, other: 'Partition'): def __ne__(self, other: 'Partition'): return not other == self + def __iter__(self) -> Iterable[Segment]: + return self.itersegments() + def index(self, segment: Segment) -> int: return self._segments_bounds_set.index(segment.start) diff --git a/pyannote/core/segment.py b/pyannote/core/segment.py index a71514b..72aad6f 100755 --- a/pyannote/core/segment.py +++ b/pyannote/core/segment.py @@ -276,6 +276,11 @@ def overlaps(self, t: float) -> bool: """ return self.start <= t and self.end >= t + def bisect(self, at: float) -> Tuple['Segment', 'Segment']: + if not self.overlaps(at): + raise RuntimeError("bisection time isn't contained in the segment") + return Segment(self.start, at), Segment(at, self.end) + def __or__(self, other: 'Segment') -> 'Segment': """Union diff --git a/pyannote/core/tiered.py b/pyannote/core/tiered.py index 16ae770..e2e5bb1 100644 --- a/pyannote/core/tiered.py +++ b/pyannote/core/tiered.py @@ -107,6 +107,7 @@ See :class:`pyannote.core.Annotation` for the complete reference. """ import itertools +from abc import abstractmethod from pathlib import Path from typing import Optional, Dict, Union, Iterable, List, TextIO, Tuple, Iterator, Callable, Type, Generic @@ -136,7 +137,6 @@ class BaseTier(BaseSegmentation, Generic[T]): _segmentation_type: T # TODO: handle segment sets changes for - # - add (partition) # - bisect (partition) # - crop (partition/timeline) # - for extrusion, should be based on cropping @@ -148,13 +148,11 @@ def __init__(self, name: str = None, uri: str = None): self._segmentation = self._segmentation_type() self._segments: Dict[Segment, TierLabel] = dict() + @abstractmethod def __setitem__(self, segment: Segment, label: Any): - # TODO: check - self._segmentation.add(segment) - self._segments[segment] = label + pass def __getitem__(self, key: Union[Segment, int]) -> Any: - # TODO: check if isinstance(key, int): key = self._segmentation.__getitem__(key) return self._segments[key] @@ -246,8 +244,13 @@ def _repr_png_(self): class Tier(GappedAnnotationMixin, BaseTier[Timeline]): _segmentation_type = Timeline - """A set of chronologically-ordered, optionally non-overlapping - and annotated segments""" + _segmentation: Timeline + """A set of chronologically-ordered and annotated segments""" + + def __setitem__(self, segment: Segment, label: Any): + # TODO: check + self._segmentation.add(segment) + self._segments[segment] = label def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: return self._segmentation.gaps_iter(support) @@ -256,7 +259,17 @@ def gaps(self, support: Optional[Support] = None) -> 'Timeline': return self._segmentation.gaps(support) def extrude(self, removed: Support, mode: CropMode = 'intersection') -> Self: - return self._segmentation.extrude(removed, mode) + if isinstance(removed, Segment): + removed = Timeline([removed]) + + extent_tl = Timeline([self.get_timeline().extent()], uri=self.uri) + truncating_support = removed.gaps(support=extent_tl) + # loose for truncate means strict for crop and vice-versa + if mode == "loose": + mode = "strict" + elif mode == "strict": + mode = "loose" + return self.crop(truncating_support, mode=mode) def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Iterator[ Union[Tuple[Segment, Segment], Segment]]: @@ -276,39 +289,27 @@ def get_overlap(self) -> 'Timeline': return self._segmentation.get_overlap() def co_iter(self, other: Union[Timeline, Segment]) -> Iterator[Tuple[Segment, Segment]]: - """Iterate over pairs of intersecting segments - - >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) - >>> timeline2 = Timeline([Segment(1, 3), Segment(3, 5)]) - >>> for segment1, segment2 in timeline1.co_iter(timeline2): - ... print(segment1, segment2) - (, ) - (, ) - (, ) - - Parameters - ---------- - other : Timeline - Second timeline - - Returns - ------- - iterable : (Segment, Segment) iterable - Yields pairs of intersecting segments in chronological order. - """ - yield from self._segmentation.co_iter(other) class PartitionTier(ContiguousAnnotationMixin, BaseTier[Partition]): + """A set of chronologically-ordered, contiguous and non-overlapping annotated segments""" _segmentation_type = Partition + _segmentation: Partition + # TODO: - # - __iter__ should also yield empty segments, with a None annotation + # - look into praat's way of dealing with segments insertions, and match its behavior + # - probably just allow bisect to create new segments, then use __setitem__ to set the segment's annotation + def bisect(self, at: float): + self._segmentation.bisect(at) + bisected_segment = self._segmentation.overlapping(at)[0] + annot = self._segments[bisected_segment] + del self._segments[bisected_segment] + self._segments.update({seg: annot for seg in bisected_segment.bisect(at)}) def crop(self, support: ContiguousSupport, - mode: CropMode = 'intersection', - returns_mapping: bool = False) -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: + mode: CropMode = 'intersection') -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: # TODO: # - think about using crop_iter first pass From e166f3f9673839b037037249f885df1b94200df3 Mon Sep 17 00:00:00 2001 From: hadware Date: Tue, 25 Apr 2023 02:31:53 +0200 Subject: [PATCH 29/30] Crop method for partitions. --- pyannote/core/annotation.py | 1 + pyannote/core/base.py | 59 +++++-- pyannote/core/partition.py | 107 ++++++++---- pyannote/core/tiered.py | 316 ++++++++++++++++++------------------ pyannote/core/timeline.py | 2 + 5 files changed, 285 insertions(+), 200 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index cdbce04..f729e45 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -560,6 +560,7 @@ def crop(self, support: Support, mode: CropMode = "intersection") -> "Annotation else: raise NotImplementedError("unsupported mode: '%s'" % mode) + # TODO: remove (already in base class) def extrude( self, removed: Support, mode: CropMode = "intersection" ) -> "Annotation": diff --git a/pyannote/core/base.py b/pyannote/core/base.py index 45bc203..9ba658b 100644 --- a/pyannote/core/base.py +++ b/pyannote/core/base.py @@ -47,6 +47,9 @@ def __eq__(self, other: Self): def __ne__(self, other: Self): pass + def __matmul__(self, other: Union['BaseSegmentation', Segment]): + return self.co_iter(other) + @abstractmethod def itersegments(self): pass @@ -69,6 +72,7 @@ def co_iter(self, other: Union['BaseSegmentation', Segment]) -> Iterator[Tuple[S else: other_segments = SortedList(other.itersegments()) + # TODO maybe wrap self.itersegs in a sortedlist as well? for segment in self.itersegments(): # iterate over segments that starts before 'segment' ends @@ -107,13 +111,7 @@ def copy(self) -> Self: def extent(self) -> Segment: pass - @abstractmethod - def crop_iter(self, - support: Support, - mode: CropMode = 'intersection', - returns_mapping: bool = False) \ - -> Iterator[Union[Tuple[Segment, Segment], Segment]]: - pass + @abstractmethod def duration(self) -> float: @@ -123,8 +121,9 @@ def duration(self) -> float: def _repr_png_(self): pass + # TODO: rename to SegmentSet? -class GappedAnnotationMixin(metaclass=ABCMeta): +class GappedAnnotationMixin(BaseSegmentation): @abstractmethod def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: @@ -199,9 +198,34 @@ def get_overlap(self) -> 'Timeline': pass -class ContiguousAnnotationMixin(metaclass=ABCMeta): +class ContiguousAnnotationMixin(BaseSegmentation): # TODO : figure out if the return mapping still makes sense # (propably not) + + + def co_iter(self, other: Union['BaseSegmentation', Segment]) -> Iterator[Tuple[Segment, Segment]]: + if not isinstance(other, (ContiguousAnnotationMixin, Segment)): + return super().co_iter(other) + + # we're dealing with another contiguous segmentation, things can be much quicker + if isinstance(other, Segment): + other_segments = SortedList([other]) + else: + other_segments = SortedList(other.itersegments()) + my_segments = SortedList(self.itersegments()) + try: + seg_a: Segment = my_segments.pop(0) + seg_b: Segment = other_segments.pop(0) + while True: + if seg_a.intersects(seg_b): + yield seg_a, seg_b + if seg_b.end < seg_a.end: + seg_b = other_segments.pop(0) + else: + seg_a = other_segments.pop(0) + except IndexError: # exhausting any of the stacks: yielding nothing and ending + yield from () + @abstractmethod def crop(self, support: ContiguousSupport, @@ -215,6 +239,10 @@ def crop(self, def bisect(self, at: float): pass + @abstractmethod + def fuse(self, at: float): + pass + class PureSegmentationMixin(metaclass=ABCMeta): """A segmentation containing _only_ segments""" @@ -222,6 +250,15 @@ class PureSegmentationMixin(metaclass=ABCMeta): # TODO: add __and__ (defaults to crop intersection, not in place), that only takes objects of Self type? # TODO: can actually take any BaseSegmentation for add & remove + + @abstractmethod + def crop_iter(self, + support: Support, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Iterator[Union[Tuple[Segment, Segment], Segment]]: + pass + @abstractmethod def add(self, segment: Segment): pass @@ -230,10 +267,12 @@ def add(self, segment: Segment): def remove(self, segment: Segment): pass + # TODO: maybe could be in BaseSegmentation @abstractmethod def index(self, segment: Segment) -> int: pass + # TODO: maybe could be in BaseSegmentation @abstractmethod def overlapping(self, t: float) -> List[Segment]: pass @@ -247,4 +286,4 @@ class AnnotatedSegmentationMixin(metaclass=ABCMeta): @abstractmethod def __iter__(self) -> Iterable[Tuple[Segment, Label]]: - pass \ No newline at end of file + pass diff --git a/pyannote/core/partition.py b/pyannote/core/partition.py index 8a77883..e8aa388 100755 --- a/pyannote/core/partition.py +++ b/pyannote/core/partition.py @@ -95,9 +95,9 @@ from typing_extensions import Self from . import Timeline -from .base import BaseSegmentation, PureSegmentationMixin +from .base import BaseSegmentation, PureSegmentationMixin, ContiguousAnnotationMixin from .segment import Segment -from .utils.types import Support, CropMode +from .utils.types import Support, CropMode, ContiguousSupport if TYPE_CHECKING: pass @@ -114,7 +114,7 @@ def pairwise(iterable): return zip(a, a) -class Partition(PureSegmentationMixin, BaseSegmentation): +class Partition(PureSegmentationMixin, ContiguousAnnotationMixin, BaseSegmentation): """ Ordered set of segments that are all contiguous. @@ -127,8 +127,7 @@ class Partition(PureSegmentationMixin, BaseSegmentation): ---------- segments : Segment iterator, optional initial set of (non-empty) segments - start: float TODO - end: float TODO + boundaries: Segment, optional uri : string, optional name of segmented resource @@ -143,24 +142,18 @@ class Partition(PureSegmentationMixin, BaseSegmentation): def __init__(self, segments: Optional[Iterable[Segment]] = None, - start: float = 0.0, - end: float = None, + boundaries: Optional[Segment] = None, uri: str = None): segments = list(segments) if segments else [] - if segments is None and end is None: - raise ValueError("Cannot initialize an empty timeline without and end boundary") - elif end is None: - end = max(seg.end for seg in segments) - elif not segments: - segments = Segment(start, end) + if not segments and boundaries is None: + raise ValueError("Cannot initialize an empty Partition without definin boundaries") super().__init__(uri) - self.boundaries = Segment(start, end) + self.boundaries = boundaries timeline = Timeline(segments) if timeline.extent() not in self.boundaries: - raise ValueError(f"Segments have to be within ({start, end}) bounds") + raise ValueError(f"Segments have to be within {boundaries}") # automatically filling in the gaps in the segments - # TODO: ask about behavior? timeline.add(self.boundaries) self._segments_bounds_set = SortedSet() for (start, end) in timeline: @@ -193,8 +186,14 @@ def bisect(self, at: float): self._segments_bounds_set.add(at) + def fuse(self, at: float): + try: + self._segments_bounds_set.remove(at) + except KeyError: + raise RuntimeError("Cannot fuse non-existing boundary") + def add(self, segment: Segment): - # TODO: ask about this behavior + # TODO: fix (check for segment inclusion) if len(list(self.co_iter(segment))) > 1: raise ValueError("Segment overlaps a boundary") self.bisect(segment.start) @@ -224,14 +223,12 @@ def __repr__(self): def empty(self) -> Self: return Partition(None, - start=self.boundaries.start, - end=self.boundaries.end, + boundaries=self.boundaries, uri=self.uri) def copy(self) -> Self: return Partition(self.itersegments(), - start=self.boundaries.start, - end=self.boundaries.end, + boundaries=self.boundaries, uri=self.uri) def extent(self) -> Segment: @@ -244,13 +241,67 @@ def overlapping(self, t: float) -> List[Segment]: start = self._segments_bounds_set[end_idx - 1] return [Segment(start, end)] - def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Iterator[ - Union[Tuple[Segment, Segment], Segment]]: - pass - - def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Union[ - Self, Tuple[Self, Dict[Segment, Segment]]]: - pass + def crop_iter(self, support: ContiguousSupport, mode: CropMode = 'intersection', returns_mapping: bool = False) \ + -> Iterator[Union[Tuple[Segment, Segment], Segment]]: + # TODO: check algo when boundaries match + if not isinstance(support, (Segment, ContiguousAnnotationMixin)): + raise ValueError(f"Only contiguous supports are allowed for cropping a {self.__class__.__name__}.") + + if not isinstance(support, Segment): + support = support.extent() + if self.extent() in support: + return self.itersegments() + + cropped_boundaries = SortedSet(self._segments_bounds_set.irange(minimum=support.start, + maximum=support.end, + inclusive=(False, False))) + + # first, yielding the first "cut" segment depending on mode + if support.start > self.extent().start: + idx_start = self._segments_bounds_set.index(cropped_boundaries[0]) + first_seg = Segment(start=self._segments_bounds_set[idx_start - 1], end=self._segments_bounds_set[idx_start]) + if mode == "intersection": + mapped_to = Segment(start=support.start, end=first_seg.end) + if returns_mapping: + yield first_seg, mapped_to + else: + yield mapped_to + elif mode == "loose": + yield first_seg + + # then, yielding "untouched" segments + for (start, end) in pairwise(cropped_boundaries): + seg = Segment(start, end) + if returns_mapping: + yield seg, seg + else: + yield seg + + # finally, yielding the last "cut" segment depending on mode + if support.end < self.extent().end: + idx_end = self._segments_bounds_set.index(cropped_boundaries[0]) + last_seg = Segment(start=self._segments_bounds_set[idx_end], end=self._segments_bounds_set[idx_end + 1]) + if mode == "intersection": + mapped_to = Segment(start=last_seg.start, end=support.end) + if returns_mapping: + yield last_seg, mapped_to + else: + yield last_seg + elif mode == "loose": + yield last_seg + + def crop(self, support: ContiguousSupport, mode: CropMode = 'intersection', returns_mapping: bool = False) \ + -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: + if mode == 'intersection' and returns_mapping: + segments, mapping = [], {} + for segment, mapped_to in self.crop_iter(support, + mode='intersection', + returns_mapping=True): + segments.append(segment) + mapping[mapped_to] = [segment] + return Partition(segments=segments, uri=self.uri), mapping + + return Partition(segments=self.crop_iter(support, mode=mode), uri=self.uri) def duration(self) -> float: return self.extent().duration diff --git a/pyannote/core/tiered.py b/pyannote/core/tiered.py index e2e5bb1..bfb3373 100644 --- a/pyannote/core/tiered.py +++ b/pyannote/core/tiered.py @@ -106,7 +106,6 @@ See :class:`pyannote.core.Annotation` for the complete reference. """ -import itertools from abc import abstractmethod from pathlib import Path from typing import Optional, Dict, Union, Iterable, List, TextIO, Tuple, Iterator, Callable, Type, Generic @@ -115,14 +114,13 @@ from typing_extensions import Self, Any from pyannote.core import Annotation -from .base import BaseSegmentation, GappedAnnotationMixin, ContiguousAnnotationMixin +from .base import BaseSegmentation, GappedAnnotationMixin, ContiguousAnnotationMixin, AnnotatedSegmentationMixin from .partition import Partition from .segment import Segment from .timeline import Timeline from .utils.generators import string_generator from .utils.types import Label, Key, Support, TierName, CropMode, ContiguousSupport, TierItemPair -# TODO: add JSON dumping/loading # TODO: QUESTIONS: # - iterator for the TieredAnnotation @@ -133,7 +131,7 @@ T = Type[Union[Partition, Timeline]] -class BaseTier(BaseSegmentation, Generic[T]): +class BaseTier(BaseSegmentation, AnnotatedSegmentationMixin, Generic[T]): _segmentation_type: T # TODO: handle segment sets changes for @@ -168,7 +166,7 @@ def __delitem__(self, key: Union[Segment, int]): def __iter__(self) -> Iterable[TierItemPair]: """Return segments with their annotation, in chronological order""" for segment in self._segmentation.itersegments(): - yield segment, self._segments[segment] + yield segment, self._segments.get(segment, None) def __len__(self): """Number of segments in the tier @@ -191,7 +189,7 @@ def __bool__(self): def __eq__(self, other: 'BaseTier'): """Equality - Two PraatTiers are equal if and only if their segments and their annotations are equal. + Two Tiers are equal if and only if their segments and their annotations are equal. # TODO : doc >>> timeline1 = Timeline([Segment(0, 1), Segment(2, 3)]) @@ -222,18 +220,15 @@ def empty(self) -> 'BaseTier': """ return self.__class__(self.name, uri=self.uri) - def update(self, tier: 'BaseTier') -> 'BaseTier': - pass # TODO - def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) -> Self: - pass # TODO + copy = self.__class__(name=self.name, uri=self.uri) + copy._segmentation = self._segmentation.copy() + copy._segments = self._segments.copy() + return copy def extent(self) -> Segment: return self._segmentation.extent() - def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Iterator[ - Union[Tuple[Segment, Segment], Segment]]: - pass def duration(self) -> float: return self._segmentation.duration() @@ -258,23 +253,6 @@ def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: def gaps(self, support: Optional[Support] = None) -> 'Timeline': return self._segmentation.gaps(support) - def extrude(self, removed: Support, mode: CropMode = 'intersection') -> Self: - if isinstance(removed, Segment): - removed = Timeline([removed]) - - extent_tl = Timeline([self.get_timeline().extent()], uri=self.uri) - truncating_support = removed.gaps(support=extent_tl) - # loose for truncate means strict for crop and vice-versa - if mode == "loose": - mode = "strict" - elif mode == "strict": - mode = "loose" - return self.crop(truncating_support, mode=mode) - - def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Iterator[ - Union[Tuple[Segment, Segment], Segment]]: - pass - def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Union[ Self, Tuple[Self, Dict[Segment, Segment]]]: # TODO (for segments mapping): @@ -291,15 +269,27 @@ def get_overlap(self) -> 'Timeline': def co_iter(self, other: Union[Timeline, Segment]) -> Iterator[Tuple[Segment, Segment]]: yield from self._segmentation.co_iter(other) + def update(self, tier: 'Tier') -> 'Tier': + self._segmentation.update(tier._segmentation) + self._segments.update(tier._segments) + class PartitionTier(ContiguousAnnotationMixin, BaseTier[Partition]): """A set of chronologically-ordered, contiguous and non-overlapping annotated segments""" _segmentation_type = Partition _segmentation: Partition - # TODO: - # - look into praat's way of dealing with segments insertions, and match its behavior - # - probably just allow bisect to create new segments, then use __setitem__ to set the segment's annotation + def __setitem__(self, segment: Segment, label: Any): + if not segment in self._segmentation: + raise RuntimeError(f"Segment {segment} not contained in the tier's partition") + self._segments[segment] = label + + def __str__(self): + pass + + def __repr__(self): + pass + def bisect(self, at: float): self._segmentation.bisect(at) bisected_segment = self._segmentation.overlapping(at)[0] @@ -307,15 +297,35 @@ def bisect(self, at: float): del self._segments[bisected_segment] self._segments.update({seg: annot for seg in bisected_segment.bisect(at)}) + def fuse(self, at: float): + # To know if segments can be fused, check segment before fuse and after fuse + # if they have matching annotations, allow fuse + pass + def crop(self, support: ContiguousSupport, mode: CropMode = 'intersection') -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: + seg_set = self.segments_set().copy() + if mode in {"loose", "strict"}: + cropped_seg = self._segmentation.crop(support, mode=mode) + annotated_segments = {seg: self._segments[seg] for seg in cropped_seg} + else: # it's "intersection" + cropped_seg, mapping = self._segmentation.crop(support, mode="intersection", returns_mapping=True) + annotated_segments = {} + # TODO: for tiers based on timelines, figure out what to do when cropped segment maps to several + # annotations. Use (segment, annot) pairs maybe? Raise an error? + for seg, mapped_to in mapping.items(): + annotated_segments.update({ + seg: self._segments[mapped_seg] for mapped_seg in mapped_to + }) + # TODO: - # - think about using crop_iter first + # - if "intersection", use the return mapping to remove segments + # - if loose or strict, find missing segments and remove them pass - # |------A-|-----C------|--B---| - # |-----C------| + def update(self, tier: 'BaseTier') -> 'BaseTier': + raise RuntimeError(f"A {self.__class__.__name__} cannot be updated.") class TieredAnnotation(GappedAnnotationMixin, BaseSegmentation): @@ -389,53 +399,12 @@ def __bool__(self): """ return len(self) > 0 - def itersegments(self): - """Iterate over segments (in chronological order) - >>> for segment in annotation.itersegments(): - ... # do something with the segment - - See also - -------- - :class:`pyannote.core.Segment` describes how segments are sorted. - """ - for tier in self.tiers: - yield from tier def __iter__(self) -> Iterable[Tuple[Segment, str]]: - return iter(self._tiers.items()) - - def _update_timeline(self): - segments = list(itertools.chain.from_iterable(self._tiers.keys())) - self._timeline = Timeline(segments=segments, uri=self.uri) - self._timelineNeedsUpdate = False - - def get_timeline(self, copy: bool = True) -> Timeline: - """Get timeline made of all annotated segments - - Parameters - ---------- - copy : bool, optional - Defaults (True) to returning a copy of the internal timeline. - Set to False to return the actual internal timeline (faster). - - Returns - ------- - timeline : Timeline - Timeline made of all annotated segments. - - Note - ---- - In case copy is set to False, be careful **not** to modify the returned - timeline, as it may lead to weird subsequent behavior of the annotation - instance. + # TODO + pass - """ - if self._timelineNeedsUpdate: - self._update_timeline() - if copy: - return self._timeline.copy() - return self._timeline def __eq__(self, other: 'TieredAnnotation'): """Equality @@ -446,19 +415,12 @@ def __eq__(self, other: 'TieredAnnotation'): labels are equal. """ # TODO - pairOfTracks = itertools.zip_longest( - self.itertracks(yield_label=True), - other.itertracks(yield_label=True)) - return all(t1 == t2 for t1, t2 in pairOfTracks) + pass def __ne__(self, other: 'TieredAnnotation'): """Inequality""" # TODO - pairOfTracks = itertools.zip_longest( - self.itertracks(yield_label=True), - other.itertracks(yield_label=True)) - - return any(t1 != t2 for t1, t2 in pairOfTracks) + pass def __contains__(self, included: Union[Segment, Timeline]): """Inclusion @@ -479,9 +441,64 @@ def __contains__(self, included: Union[Segment, Timeline]): """ return included in self.get_timeline(copy=False) + + def __delitem__(self, key: TierName): + """Delete a tier + # TODO : doc + """ + del self._tiers[key] + + def __getitem__(self, key: TierName) -> Tier: + """Get a tier + + >>> praat_tier = annotation[tiername] + + """ + + return self._tiers[key] + + + def __setitem__(self, key: Key, label: Label): + pass # TODO : set a tier + + def __repr__(self): + pass # TODO + + def __str__(self): + """Human-friendly representation""" + # TODO: use pandas.DataFrame + return "\n".join(["%s %s %s" % (s, t, l) + for s, t, l in self.itertracks(yield_label=True)]) + + def empty(self) -> 'Annotation': + """Return an empty copy + + Returns + ------- + empty : Annotation + Empty annotation using the same 'uri' and 'modality' attributes. + + """ + return self.__class__(uri=self.uri, modality=self.modality) + + def itersegments(self): + """Iterate over segments (in chronological order) + + >>> for segment in annotation.itersegments(): + ... # do something with the segment + + See also + -------- + :class:`pyannote.core.Segment` describes how segments are sorted. + """ + for tier in self.tiers: + yield from tier + def to_textgrid(self, file: Union[str, Path, TextIO]): pass + + def to_annotation(self, modality: Optional[str] = None) -> Annotation: """Convert to an annotation object. The new annotation's labels are the tier names of each segments. In short, the segment's @@ -508,6 +525,33 @@ def to_annotation(self, modality: Optional[str] = None) -> Annotation: annotation[segment] = tier_name return annotation + def get_timeline(self, copy: bool = True) -> Timeline: + """Get timeline made of all annotated segments + + Parameters + ---------- + copy : bool, optional + Defaults (True) to returning a copy of the internal timeline. + Set to False to return the actual internal timeline (faster). + + Returns + ------- + timeline : Timeline + Timeline made of all annotated segments. + + Note + ---- + In case copy is set to False, be careful **not** to modify the returned + timeline, as it may lead to weird subsequent behavior of the annotation + instance. + + """ + if self._timelineNeedsUpdate: + self._update_timeline() + if copy: + return self._timeline.copy() + return self._timeline + def crop(self, support: Support, mode: CropMode = 'intersection') \ -> 'TieredAnnotation': """Crop textgrid to new support @@ -545,80 +589,6 @@ def copy(self) -> 'TieredAnnotation': # TODO pass - def __str__(self): - """Human-friendly representation""" - # TODO: use pandas.DataFrame - return "\n".join(["%s %s %s" % (s, t, l) - for s, t, l in self.itertracks(yield_label=True)]) - - def __delitem__(self, key: TierName): - """Delete a tier - # TODO : doc - """ - del self._tiers[key] - - def __getitem__(self, key: TierName) -> Tier: - """Get a tier - - >>> praat_tier = annotation[tiername] - - """ - - return self._tiers[key] - - def __setitem__(self, key: Key, label: Label): - """Add new or update existing track - - >>> annotation[segment, track] = label - - If (segment, track) does not exist, it is added. - If (segment, track) already exists, it is updated. - - Note - ---- - ``annotation[segment] = label`` is equivalent to ``annotation[segment, '_'] = label`` - - Note - ---- - If `segment` is empty, it does nothing. - """ - - if isinstance(key, Segment): - key = (key, '_') - - segment, track = key - - # do not add empty track - if not segment: - return - - # in case we create a new segment - # mark timeline as modified - if segment not in self._tiers: - self._tiers[segment] = {} - self._timelineNeedsUpdate = True - - # in case we modify an existing track - # mark old label as modified - if track in self._tiers[segment]: - old_label = self._tiers[segment][track] - self._labelNeedsUpdate[old_label] = True - - # mark new label as modified - self._tiers[segment][track] = label - self._labelNeedsUpdate[label] = True - - def empty(self) -> 'Annotation': - """Return an empty copy - - Returns - ------- - empty : Annotation - Empty annotation using the same 'uri' and 'modality' attributes. - - """ - return self.__class__(uri=self.uri, modality=self.modality) - def update(self, textgrid: 'TieredAnnotation', copy: bool = False) \ -> 'TieredAnnotation': """Add every track of an existing annotation (in place) @@ -703,6 +673,28 @@ def support(self, collar: float = 0.) -> 'TieredAnnotation': return support + def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: + pass + + def gaps(self, support: Optional[Support] = None) -> 'Timeline': + pass + + def get_overlap(self) -> 'Timeline': + pass + + def extent(self) -> Segment: + pass + + def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Iterator[ + Union[Tuple[Segment, Segment], Segment]]: + pass + + def duration(self) -> float: + pass + + def _repr_png_(self): + pass + def _repr_png(self): """IPython notebook support diff --git a/pyannote/core/timeline.py b/pyannote/core/timeline.py index e4e8d75..8aeb093 100755 --- a/pyannote/core/timeline.py +++ b/pyannote/core/timeline.py @@ -412,6 +412,7 @@ def crop_iter(self, raise ValueError("Mode must be one of 'loose', 'strict', or " "'intersection'.") + # TODO: make more generic using SegmentSet if not isinstance(support, (Segment, Timeline)): raise TypeError("Support must be a Segment or a Timeline.") @@ -567,6 +568,7 @@ def get_overlap(self) -> 'Timeline': overlaps_tl.add(s1 & s2) return overlaps_tl.support() + # TODO: remove (already in base class) def extrude(self, removed: Support, mode: CropMode = 'intersection') -> 'Timeline': From fc3baf3cd498d20e666f017a12215ff20d230612 Mon Sep 17 00:00:00 2001 From: hadware Date: Thu, 27 Apr 2023 02:17:55 +0200 Subject: [PATCH 30/30] Implemented crop for all tier types. Restricted regular tiers to non-overlapping to prevent errors while cropping. --- pyannote/core/base.py | 7 --- pyannote/core/partition.py | 5 +- pyannote/core/tiered.py | 104 +++++++++++++++---------------------- 3 files changed, 44 insertions(+), 72 deletions(-) diff --git a/pyannote/core/base.py b/pyannote/core/base.py index 9ba658b..230235d 100644 --- a/pyannote/core/base.py +++ b/pyannote/core/base.py @@ -111,8 +111,6 @@ def copy(self) -> Self: def extent(self) -> Segment: pass - - @abstractmethod def duration(self) -> float: pass @@ -202,7 +200,6 @@ class ContiguousAnnotationMixin(BaseSegmentation): # TODO : figure out if the return mapping still makes sense # (propably not) - def co_iter(self, other: Union['BaseSegmentation', Segment]) -> Iterator[Tuple[Segment, Segment]]: if not isinstance(other, (ContiguousAnnotationMixin, Segment)): return super().co_iter(other) @@ -239,10 +236,6 @@ def crop(self, def bisect(self, at: float): pass - @abstractmethod - def fuse(self, at: float): - pass - class PureSegmentationMixin(metaclass=ABCMeta): """A segmentation containing _only_ segments""" diff --git a/pyannote/core/partition.py b/pyannote/core/partition.py index e8aa388..37d0a49 100755 --- a/pyannote/core/partition.py +++ b/pyannote/core/partition.py @@ -187,15 +187,14 @@ def bisect(self, at: float): self._segments_bounds_set.add(at) def fuse(self, at: float): + if at in set(self.boundaries): + raise RuntimeError("Cannot fuse start or end boundary of the partition") try: self._segments_bounds_set.remove(at) except KeyError: raise RuntimeError("Cannot fuse non-existing boundary") def add(self, segment: Segment): - # TODO: fix (check for segment inclusion) - if len(list(self.co_iter(segment))) > 1: - raise ValueError("Segment overlaps a boundary") self.bisect(segment.start) self.bisect(segment.end) diff --git a/pyannote/core/tiered.py b/pyannote/core/tiered.py index bfb3373..de6b19e 100644 --- a/pyannote/core/tiered.py +++ b/pyannote/core/tiered.py @@ -124,26 +124,15 @@ # TODO: QUESTIONS: # - iterator for the TieredAnnotation -# TODO: add segmentation abstract class - -# TODO: IDEA: use a timeline in the Tier to do all the cropping/etc/ operations -# and just make this class a thin wrapper for it T = Type[Union[Partition, Timeline]] class BaseTier(BaseSegmentation, AnnotatedSegmentationMixin, Generic[T]): - _segmentation_type: T - - # TODO: handle segment sets changes for - # - bisect (partition) - # - crop (partition/timeline) - # - for extrusion, should be based on cropping + _segmentation: Union[Partition, Timeline] def __init__(self, name: str = None, uri: str = None): super().__init__(uri) self.name = name - - self._segmentation = self._segmentation_type() self._segments: Dict[Segment, TierLabel] = dict() @abstractmethod @@ -151,12 +140,13 @@ def __setitem__(self, segment: Segment, label: Any): pass def __getitem__(self, key: Union[Segment, int]) -> Any: + # TODO: check int key if isinstance(key, int): key = self._segmentation.__getitem__(key) return self._segments[key] def __delitem__(self, key: Union[Segment, int]): - # TODO: check + # TODO: check int key if isinstance(key, int): key = self._segmentation.__getitem__(key) @@ -229,21 +219,49 @@ def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) -> S def extent(self) -> Segment: return self._segmentation.extent() - def duration(self) -> float: return self._segmentation.duration() + def crop(self, + support: ContiguousSupport, + mode: CropMode = 'intersection', + returns_mapping: bool = False) \ + -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: + if mode in {"loose", "strict"}: + cropped_seg = self._segmentation.crop(support, mode=mode) + annotated_segments = {seg: self._segments[seg] for seg in cropped_seg} + else: # it's "intersection" + cropped_seg, mapping = self._segmentation.crop(support, mode="intersection", returns_mapping=True) + annotated_segments = {} + for seg, mapped_to in mapping.items(): + annotated_segments.update({ + seg: self._segments[mapped_seg] for mapped_seg in mapped_to + }) + + new_tier = self.__class__(self.name, uri=self.uri) + new_tier._segmentation = cropped_seg + new_tier._segments = annotated_segments + if returns_mapping and mode == "intersection": + return new_tier, mapping # noqa + else: + return new_tier + def _repr_png_(self): pass class Tier(GappedAnnotationMixin, BaseTier[Timeline]): - _segmentation_type = Timeline - _segmentation: Timeline - """A set of chronologically-ordered and annotated segments""" + """A set of chronologically-ordered and non-overlapping annotated segments""" + + # TODO: crop-safe or non-overlapping? + + def __init__(self, name: str = None, uri: str = None): + super().__init__(name, uri) + self._segmentation = Timeline() def __setitem__(self, segment: Segment, label: Any): - # TODO: check + if list(self._segmentation.co_iter(segment)): + raise RuntimeError("Cannot add a segment that overlaps a pre-existing segment") self._segmentation.add(segment) self._segments[segment] = label @@ -253,13 +271,6 @@ def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: def gaps(self, support: Optional[Support] = None) -> 'Timeline': return self._segmentation.gaps(support) - def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) -> Union[ - Self, Tuple[Self, Dict[Segment, Segment]]]: - # TODO (for segments mapping): - # - if loose/strict, use segment_set of cropped segmentation to find deleted segments - # - if intersection, use return_mapping to replace sliced segments - return self._segmentation.crop(support, mode, returns_mapping) - def support(self, collar: float = 0.) -> Timeline: return self._segmentation.support(collar) @@ -276,10 +287,13 @@ def update(self, tier: 'Tier') -> 'Tier': class PartitionTier(ContiguousAnnotationMixin, BaseTier[Partition]): """A set of chronologically-ordered, contiguous and non-overlapping annotated segments""" - _segmentation_type = Partition - _segmentation: Partition + + def __init__(self, boundaries: Segment, name: str = None, uri: str = None): + super().__init__(name, uri) + self._segmentation = Partition(boundaries=boundaries) def __setitem__(self, segment: Segment, label: Any): + # TODO: maybe allow segment setting for segments that are not yet annotated? if not segment in self._segmentation: raise RuntimeError(f"Segment {segment} not contained in the tier's partition") self._segments[segment] = label @@ -297,33 +311,6 @@ def bisect(self, at: float): del self._segments[bisected_segment] self._segments.update({seg: annot for seg in bisected_segment.bisect(at)}) - def fuse(self, at: float): - # To know if segments can be fused, check segment before fuse and after fuse - # if they have matching annotations, allow fuse - pass - - def crop(self, - support: ContiguousSupport, - mode: CropMode = 'intersection') -> Union[Self, Tuple[Self, Dict[Segment, Segment]]]: - seg_set = self.segments_set().copy() - if mode in {"loose", "strict"}: - cropped_seg = self._segmentation.crop(support, mode=mode) - annotated_segments = {seg: self._segments[seg] for seg in cropped_seg} - else: # it's "intersection" - cropped_seg, mapping = self._segmentation.crop(support, mode="intersection", returns_mapping=True) - annotated_segments = {} - # TODO: for tiers based on timelines, figure out what to do when cropped segment maps to several - # annotations. Use (segment, annot) pairs maybe? Raise an error? - for seg, mapped_to in mapping.items(): - annotated_segments.update({ - seg: self._segments[mapped_seg] for mapped_seg in mapped_to - }) - - # TODO: - # - if "intersection", use the return mapping to remove segments - # - if loose or strict, find missing segments and remove them - pass - def update(self, tier: 'BaseTier') -> 'BaseTier': raise RuntimeError(f"A {self.__class__.__name__} cannot be updated.") @@ -399,13 +386,10 @@ def __bool__(self): """ return len(self) > 0 - - def __iter__(self) -> Iterable[Tuple[Segment, str]]: # TODO pass - def __eq__(self, other: 'TieredAnnotation'): """Equality @@ -441,7 +425,6 @@ def __contains__(self, included: Union[Segment, Timeline]): """ return included in self.get_timeline(copy=False) - def __delitem__(self, key: TierName): """Delete a tier # TODO : doc @@ -457,7 +440,6 @@ def __getitem__(self, key: TierName) -> Tier: return self._tiers[key] - def __setitem__(self, key: Key, label: Label): pass # TODO : set a tier @@ -479,7 +461,7 @@ def empty(self) -> 'Annotation': Empty annotation using the same 'uri' and 'modality' attributes. """ - return self.__class__(uri=self.uri, modality=self.modality) + return self.__class__(uri=self.uri) def itersegments(self): """Iterate over segments (in chronological order) @@ -497,8 +479,6 @@ def itersegments(self): def to_textgrid(self, file: Union[str, Path, TextIO]): pass - - def to_annotation(self, modality: Optional[str] = None) -> Annotation: """Convert to an annotation object. The new annotation's labels are the tier names of each segments. In short, the segment's