diff --git a/build.py b/build.py
index a0162fcd..6c9550db 100644
--- a/build.py
+++ b/build.py
@@ -341,11 +341,15 @@ def _add_definition(self) -> None:
def get_identity_row(identity_url):
return TABLE_ROW([tag('td', text='Also known as'), tag('td', dtrs=[HREF(identity_url, identity_url)])])
if self.clams_type['version'] == 'v1':
- patches = [0, 1]
- if self.clams_type['name'] != 'Annotation':
- patches.append(2)
- for patch in patches:
- children.append(get_identity_row(f'https://mmif.clams.ai/0.4.{patch}/vocabulary/{self.clams_type["name"]}/'))
+ # old lapps vocabs
+ if self.clams_type['name'] in 'Token Sentence Paragraph Markable NamedEntity NounChunk VerbChunk'.split():
+ children.append(get_identity_row(f'http://vocab.lappsgrid.org/{self.clams_type["name"]}'))
+ else:
+ patches = [0, 1]
+ if self.clams_type['name'] != 'Annotation':
+ patches.append(2)
+ for patch in patches:
+ children.append(get_identity_row(f'https://mmif.clams.ai/0.4.{patch}/vocabulary/{self.clams_type["name"]}/'))
elif self.clams_type['version'] == 'v2' and self.clams_type['name'] == 'Annotation':
children.append(
get_identity_row(f'https://mmif.clams.ai/0.4.2/vocabulary/{self.clams_type["name"]}/'))
@@ -359,23 +363,25 @@ def _add_metadata(self) -> None:
self.main_content.append(H1('Metadata'))
if self.metadata:
self._add_properties_aux(self.metadata)
- self._add_properties_from_chain('metadata')
+ print('found meta:', self.metadata)
+ self._add_properties_from_chain('metadata', shadowed_names=self.metadata.keys() if self.metadata else {})
def _add_properties(self) -> None:
self.main_content.append(H1('Properties'))
if self.properties:
self._add_properties_aux(self.properties)
- self._add_properties_from_chain('properties')
+ print('found prop:', self.properties)
+ self._add_properties_from_chain('properties', shadowed_names=self.properties.keys() if self.properties else {})
- def _add_properties_from_chain(self, proptype) -> None:
+ def _add_properties_from_chain(self, proptype, shadowed_names={}) -> None:
for n in self._chain_to_top():
properties = n.get(proptype, None)
if properties is not None:
h2 = H2("%s from %s" % (proptype.capitalize(), n['name']))
self.main_content.append(h2)
- self._add_properties_aux(properties)
+ self._add_properties_aux(properties, shadowed_names)
- def _add_properties_aux(self, properties) -> None:
+ def _add_properties_aux(self, properties, shadowed_names={}) -> None:
if properties:
th1 = tag('th', {'class': 'fixed'}, text='Property')
th2 = tag('th', {'class': 'fixed'}, text='Type')
@@ -391,7 +397,7 @@ def _add_properties_aux(self, properties) -> None:
if prop_required:
req = tag('span', {'class': 'required'}, text='[Required]')
description_cell.append(req)
- row = TABLE_ROW([tag('td', {}, prop),
+ row = TABLE_ROW([tag('td', {}, prop if prop not in shadowed_names else f'{prop} (shadowed)'),
tag('td', {}, prop_type),
description_cell])
table.append(row)
@@ -548,7 +554,13 @@ def how_different(type1, type2):
updated = collections.defaultdict(lambda: False)
def propagate_version_changes(node, parent_changed=False):
- if parent_changed:
+ if node['name'] not in old_types:
+ # a newly added type, don't propagate to its children
+ updated[node['name']] = False
+ for child in node['childNodes']:
+ propagate_version_changes(child, False)
+
+ elif parent_changed:
updated[node['name']] = True
for child in node['childNodes']:
propagate_version_changes(child, True)
diff --git a/vocabulary/clams.vocabulary.yaml b/vocabulary/clams.vocabulary.yaml
index 2074b2b2..bb3524f9 100644
--- a/vocabulary/clams.vocabulary.yaml
+++ b/vocabulary/clams.vocabulary.yaml
@@ -72,11 +72,25 @@ properties:
label:
type: String
description: >-
- A label given to this object by a classifier. The value must be a simple
- string value of the label and must be one of the values defined in the
- labelset or labelsetUri annotation metadata.
+ A label given to this object by classification. The value must be a
+ simple string value of the label and must be one of the values
+ defined in the labelset or labelsetUri
+ annotation metadata.
- [Note] Annotations from a classifier app must have this property.
+ For example, for the Sentence subtype, this could
+ be used to indicate the type of sentence, such as "declarative",
+ "interrogative", "exclamatory", etc. For NamedEntity
+ subtype, this could be used to indicate the type of named entity,
+ such as "PER", "ORG", "LOC", "MISC" (following the CoNLL-2003 labels).
+
+ For non-linguistic annotations, for example for TimeFrame,
+ this could be used to indicate the type of the time frame, such as
+ "speech", "music", "noise", "bars-and-tones", etc, for acoustic
+ classification. Or "slate", "lower-third", "credits" for visual
+ classification of video frames.
+
+ [Note] Annotations from a type of classifier model must have this
+ property.
classifications:
type: Map from String to Number
description: >-
@@ -174,6 +188,115 @@ description: >-
pointing directly into primary data (by using start and end offsets) or
by linking to one or more other Annotations with the targets property.
+properties:
+ text:
+ type: String
+ description: The surface string in the primary data covered by this span.
+---
+
+name: Token
+parent: Span
+
+similarTo:
+ - http://vocab.lappsgrid.org/Token
+
+description: >-
+ A string of one or more characters that serves as an indivisible unit
+ for the purposes of morpho-syntactic labeling (part of speech tagging).
+
+properties:
+ pos:
+ type: String or URI
+ description: Part-of-speech tag associated with the token.
+ lemma:
+ type: String or URI
+ description: >-
+ The root (base) form associated with the token. URI may point to a
+ lexicon entry.
+ orth:
+ type: String or URI
+ description: >-
+ Orthographic properties of the token such as LowerCase, UpperCase,
+ UpperInitial, etc. Ideally a URI referencing a pre-defined descriptor.
+
+---
+
+name: Sentence
+parent: Span
+
+similarTo:
+ - http://vocab.lappsgrid.org/Sentence
+
+description: >
+ A sequence of words capable of standing alone to make an assertion, ask a
+ question, or give a command, usually consisting of a subject and a predicate
+ containing a finite verb.
+
+---
+
+name: Paragraph
+parent: Span
+
+similarTo:
+ - http://vocab.lappsgrid.org/Paragraph
+
+description: >-
+ A division of a piece of writing, usually dealing with a single theme and
+ indicated by a new line, indentation, and/or numbering.
+
+---
+
+name: NamedEntity
+parent: Span
+
+similarTo:
+ - http://vocab.lappsgrid.org/NamedEntity
+
+description: >-
+ A phrase that clearly identifies an individual from others that have similar
+ attributes, such as the name of a person, organization, location, artifact,
+ etc. as well as temporal expressions.
+
+---
+
+name: NounChunk
+parent: Span
+
+similarTo:
+ - http://vocab.lappsgrid.org/NounChunk
+
+description: >-
+ The initial portion of a non-recursive noun phrase up to the head, including
+ determiners but not including postmodifying prepositional phrases or clauses.
+
+---
+
+name: VerbChunk
+parent: Span
+
+similarTo:
+ - http://vocab.lappsgrid.org/VerbChunk
+
+description: >-
+ Non-recursive verb groups, which include modals, auxiliary verbs, and medial
+ adverbs, and end at the head verb or predicate adjective.
+
+properties:
+ tense:
+ type: String or URI
+ description: >-
+ Provides tense information for the verb. Example values include BeVBG,
+ BeVBN, FutCon, HaveVBN, Pas, PasCon, PasPer, PasPerCon, Per, Pre, PreCon,
+ PrePer, PrePerCon, SimFut, SimPas, SimPre, none
+ voice:
+ type: String or URI
+ description: >-
+ Indicates if the verb group is active or passive. Possible values include
+ ACTIVE, PASSIVE, or NONE
+ neg:
+ type: String or URI
+ description: Indicates whether or not the verb is negated. Values include YES, NO.
+
---
name: TimeFrame
@@ -195,7 +318,6 @@ properties:
No longer encouraged to use, instead label property
should replace this property.
-
---
name: Chapter