diff --git a/build.py b/build.py index a0162fcd..6c9550db 100644 --- a/build.py +++ b/build.py @@ -341,11 +341,15 @@ def _add_definition(self) -> None: def get_identity_row(identity_url): return TABLE_ROW([tag('td', text='Also known as'), tag('td', dtrs=[HREF(identity_url, identity_url)])]) if self.clams_type['version'] == 'v1': - patches = [0, 1] - if self.clams_type['name'] != 'Annotation': - patches.append(2) - for patch in patches: - children.append(get_identity_row(f'https://mmif.clams.ai/0.4.{patch}/vocabulary/{self.clams_type["name"]}/')) + # old lapps vocabs + if self.clams_type['name'] in 'Token Sentence Paragraph Markable NamedEntity NounChunk VerbChunk'.split(): + children.append(get_identity_row(f'http://vocab.lappsgrid.org/{self.clams_type["name"]}')) + else: + patches = [0, 1] + if self.clams_type['name'] != 'Annotation': + patches.append(2) + for patch in patches: + children.append(get_identity_row(f'https://mmif.clams.ai/0.4.{patch}/vocabulary/{self.clams_type["name"]}/')) elif self.clams_type['version'] == 'v2' and self.clams_type['name'] == 'Annotation': children.append( get_identity_row(f'https://mmif.clams.ai/0.4.2/vocabulary/{self.clams_type["name"]}/')) @@ -359,23 +363,25 @@ def _add_metadata(self) -> None: self.main_content.append(H1('Metadata')) if self.metadata: self._add_properties_aux(self.metadata) - self._add_properties_from_chain('metadata') + print('found meta:', self.metadata) + self._add_properties_from_chain('metadata', shadowed_names=self.metadata.keys() if self.metadata else {}) def _add_properties(self) -> None: self.main_content.append(H1('Properties')) if self.properties: self._add_properties_aux(self.properties) - self._add_properties_from_chain('properties') + print('found prop:', self.properties) + self._add_properties_from_chain('properties', shadowed_names=self.properties.keys() if self.properties else {}) - def _add_properties_from_chain(self, proptype) -> None: + def _add_properties_from_chain(self, proptype, shadowed_names={}) -> None: for n in self._chain_to_top(): properties = n.get(proptype, None) if properties is not None: h2 = H2("%s from %s" % (proptype.capitalize(), n['name'])) self.main_content.append(h2) - self._add_properties_aux(properties) + self._add_properties_aux(properties, shadowed_names) - def _add_properties_aux(self, properties) -> None: + def _add_properties_aux(self, properties, shadowed_names={}) -> None: if properties: th1 = tag('th', {'class': 'fixed'}, text='Property') th2 = tag('th', {'class': 'fixed'}, text='Type') @@ -391,7 +397,7 @@ def _add_properties_aux(self, properties) -> None: if prop_required: req = tag('span', {'class': 'required'}, text='[Required]') description_cell.append(req) - row = TABLE_ROW([tag('td', {}, prop), + row = TABLE_ROW([tag('td', {}, prop if prop not in shadowed_names else f'{prop} (shadowed)'), tag('td', {}, prop_type), description_cell]) table.append(row) @@ -548,7 +554,13 @@ def how_different(type1, type2): updated = collections.defaultdict(lambda: False) def propagate_version_changes(node, parent_changed=False): - if parent_changed: + if node['name'] not in old_types: + # a newly added type, don't propagate to its children + updated[node['name']] = False + for child in node['childNodes']: + propagate_version_changes(child, False) + + elif parent_changed: updated[node['name']] = True for child in node['childNodes']: propagate_version_changes(child, True) diff --git a/vocabulary/clams.vocabulary.yaml b/vocabulary/clams.vocabulary.yaml index 2074b2b2..bb3524f9 100644 --- a/vocabulary/clams.vocabulary.yaml +++ b/vocabulary/clams.vocabulary.yaml @@ -72,11 +72,25 @@ properties: label: type: String description: >- - A label given to this object by a classifier. The value must be a simple - string value of the label and must be one of the values defined in the - labelset or labelsetUri annotation metadata. + A label given to this object by classification. The value must be a + simple string value of the label and must be one of the values + defined in the labelset or labelsetUri + annotation metadata.

- [Note] Annotations from a classifier app must have this property. + For example, for the Sentence subtype, this could + be used to indicate the type of sentence, such as "declarative", + "interrogative", "exclamatory", etc. For NamedEntity + subtype, this could be used to indicate the type of named entity, + such as "PER", "ORG", "LOC", "MISC" (following the CoNLL-2003 labels). +

+ For non-linguistic annotations, for example for TimeFrame, + this could be used to indicate the type of the time frame, such as + "speech", "music", "noise", "bars-and-tones", etc, for acoustic + classification. Or "slate", "lower-third", "credits" for visual + classification of video frames. +

+ [Note] Annotations from a type of classifier model must have this + property. classifications: type: Map from String to Number description: >- @@ -174,6 +188,115 @@ description: >- pointing directly into primary data (by using start and end offsets) or by linking to one or more other Annotations with the targets property. +properties: + text: + type: String + description: The surface string in the primary data covered by this span. +--- + +name: Token +parent: Span + +similarTo: + - http://vocab.lappsgrid.org/Token + +description: >- + A string of one or more characters that serves as an indivisible unit + for the purposes of morpho-syntactic labeling (part of speech tagging). + +properties: + pos: + type: String or URI + description: Part-of-speech tag associated with the token. + lemma: + type: String or URI + description: >- + The root (base) form associated with the token. URI may point to a + lexicon entry. + orth: + type: String or URI + description: >- + Orthographic properties of the token such as LowerCase, UpperCase, + UpperInitial, etc. Ideally a URI referencing a pre-defined descriptor. + +--- + +name: Sentence +parent: Span + +similarTo: + - http://vocab.lappsgrid.org/Sentence + +description: > + A sequence of words capable of standing alone to make an assertion, ask a + question, or give a command, usually consisting of a subject and a predicate + containing a finite verb. + +--- + +name: Paragraph +parent: Span + +similarTo: + - http://vocab.lappsgrid.org/Paragraph + +description: >- + A division of a piece of writing, usually dealing with a single theme and + indicated by a new line, indentation, and/or numbering. + +--- + +name: NamedEntity +parent: Span + +similarTo: + - http://vocab.lappsgrid.org/NamedEntity + +description: >- + A phrase that clearly identifies an individual from others that have similar + attributes, such as the name of a person, organization, location, artifact, + etc. as well as temporal expressions. + +--- + +name: NounChunk +parent: Span + +similarTo: + - http://vocab.lappsgrid.org/NounChunk + +description: >- + The initial portion of a non-recursive noun phrase up to the head, including + determiners but not including postmodifying prepositional phrases or clauses. + +--- + +name: VerbChunk +parent: Span + +similarTo: + - http://vocab.lappsgrid.org/VerbChunk + +description: >- + Non-recursive verb groups, which include modals, auxiliary verbs, and medial + adverbs, and end at the head verb or predicate adjective. + +properties: + tense: + type: String or URI + description: >- + Provides tense information for the verb. Example values include BeVBG, + BeVBN, FutCon, HaveVBN, Pas, PasCon, PasPer, PasPerCon, Per, Pre, PreCon, + PrePer, PrePerCon, SimFut, SimPas, SimPre, none + voice: + type: String or URI + description: >- + Indicates if the verb group is active or passive. Possible values include + ACTIVE, PASSIVE, or NONE + neg: + type: String or URI + description: Indicates whether or not the verb is negated. Values include YES, NO. + --- name: TimeFrame @@ -195,7 +318,6 @@ properties: No longer encouraged to use, instead label property should replace this property. - --- name: Chapter