Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,11 +341,15 @@ def _add_definition(self) -> None:
def get_identity_row(identity_url):
return TABLE_ROW([tag('td', text='Also known as'), tag('td', dtrs=[HREF(identity_url, identity_url)])])
if self.clams_type['version'] == 'v1':
patches = [0, 1]
if self.clams_type['name'] != 'Annotation':
patches.append(2)
for patch in patches:
children.append(get_identity_row(f'https://mmif.clams.ai/0.4.{patch}/vocabulary/{self.clams_type["name"]}/'))
# old lapps vocabs
if self.clams_type['name'] in 'Token Sentence Paragraph Markable NamedEntity NounChunk VerbChunk'.split():
children.append(get_identity_row(f'http://vocab.lappsgrid.org/{self.clams_type["name"]}'))
else:
patches = [0, 1]
if self.clams_type['name'] != 'Annotation':
patches.append(2)
for patch in patches:
children.append(get_identity_row(f'https://mmif.clams.ai/0.4.{patch}/vocabulary/{self.clams_type["name"]}/'))
elif self.clams_type['version'] == 'v2' and self.clams_type['name'] == 'Annotation':
children.append(
get_identity_row(f'https://mmif.clams.ai/0.4.2/vocabulary/{self.clams_type["name"]}/'))
Expand All @@ -359,23 +363,25 @@ def _add_metadata(self) -> None:
self.main_content.append(H1('Metadata'))
if self.metadata:
self._add_properties_aux(self.metadata)
self._add_properties_from_chain('metadata')
print('found meta:', self.metadata)
self._add_properties_from_chain('metadata', shadowed_names=self.metadata.keys() if self.metadata else {})

def _add_properties(self) -> None:
self.main_content.append(H1('Properties'))
if self.properties:
self._add_properties_aux(self.properties)
self._add_properties_from_chain('properties')
print('found prop:', self.properties)
self._add_properties_from_chain('properties', shadowed_names=self.properties.keys() if self.properties else {})

def _add_properties_from_chain(self, proptype) -> None:
def _add_properties_from_chain(self, proptype, shadowed_names={}) -> None:
for n in self._chain_to_top():
properties = n.get(proptype, None)
if properties is not None:
h2 = H2("%s from %s" % (proptype.capitalize(), n['name']))
self.main_content.append(h2)
self._add_properties_aux(properties)
self._add_properties_aux(properties, shadowed_names)

def _add_properties_aux(self, properties) -> None:
def _add_properties_aux(self, properties, shadowed_names={}) -> None:
if properties:
th1 = tag('th', {'class': 'fixed'}, text='Property')
th2 = tag('th', {'class': 'fixed'}, text='Type')
Expand All @@ -391,7 +397,7 @@ def _add_properties_aux(self, properties) -> None:
if prop_required:
req = tag('span', {'class': 'required'}, text='[Required]')
description_cell.append(req)
row = TABLE_ROW([tag('td', {}, prop),
row = TABLE_ROW([tag('td', {}, prop if prop not in shadowed_names else f'<s>{prop}</s> (shadowed)'),
tag('td', {}, prop_type),
description_cell])
table.append(row)
Expand Down Expand Up @@ -548,7 +554,13 @@ def how_different(type1, type2):
updated = collections.defaultdict(lambda: False)

def propagate_version_changes(node, parent_changed=False):
if parent_changed:
if node['name'] not in old_types:
# a newly added type, don't propagate to its children
updated[node['name']] = False
for child in node['childNodes']:
propagate_version_changes(child, False)

elif parent_changed:
updated[node['name']] = True
for child in node['childNodes']:
propagate_version_changes(child, True)
Expand Down
132 changes: 127 additions & 5 deletions vocabulary/clams.vocabulary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,25 @@ properties:
label:
type: String
description: >-
A label given to this object by a classifier. The value must be a simple
string value of the label and must be one of the values defined in the
<code>labelset</code> or <code>labelsetUri</code> annotation metadata.
A label given to this object by classification. The value must be a
simple string value of the label and must be one of the values
defined in the <code>labelset</code> or <code>labelsetUri</code>
annotation metadata.
<br><br>
[Note] Annotations from a classifier app must have this property.
For example, for the <code>Sentence</code> subtype, this could
be used to indicate the type of sentence, such as "declarative",
"interrogative", "exclamatory", etc. For <code>NamedEntity</code>
subtype, this could be used to indicate the type of named entity,
such as "PER", "ORG", "LOC", "MISC" (following the CoNLL-2003 labels).
<br><br>
For non-linguistic annotations, for example for <code>TimeFrame<code>,
this could be used to indicate the type of the time frame, such as
"speech", "music", "noise", "bars-and-tones", etc, for acoustic
classification. Or "slate", "lower-third", "credits" for visual
classification of video frames.
<br><br>
[Note] Annotations from a type of classifier model must have this
property.
classifications:
type: Map from String to Number
description: >-
Expand Down Expand Up @@ -174,6 +188,115 @@ description: >-
pointing directly into primary data (by using start and end offsets) or
by linking to one or more other Annotations with the targets property.

properties:
text:
type: String
description: The surface string in the primary data covered by this span.
---

name: Token
parent: Span

similarTo:
- http://vocab.lappsgrid.org/Token

description: >-
A string of one or more characters that serves as an indivisible unit
for the purposes of morpho-syntactic labeling (part of speech tagging).

properties:
pos:
type: String or URI
description: Part-of-speech tag associated with the token.
lemma:
type: String or URI
description: >-
The root (base) form associated with the token. URI may point to a
lexicon entry.
orth:
type: String or URI
description: >-
Orthographic properties of the token such as LowerCase, UpperCase,
UpperInitial, etc. Ideally a URI referencing a pre-defined descriptor.

---

name: Sentence
parent: Span

similarTo:
- http://vocab.lappsgrid.org/Sentence

description: >
A sequence of words capable of standing alone to make an assertion, ask a
question, or give a command, usually consisting of a subject and a predicate
containing a finite verb.

---

name: Paragraph
parent: Span

similarTo:
- http://vocab.lappsgrid.org/Paragraph

description: >-
A division of a piece of writing, usually dealing with a single theme and
indicated by a new line, indentation, and/or numbering.

---

name: NamedEntity
parent: Span

similarTo:
- http://vocab.lappsgrid.org/NamedEntity

description: >-
A phrase that clearly identifies an individual from others that have similar
attributes, such as the name of a person, organization, location, artifact,
etc. as well as temporal expressions.

---

name: NounChunk
parent: Span

similarTo:
- http://vocab.lappsgrid.org/NounChunk

description: >-
The initial portion of a non-recursive noun phrase up to the head, including
determiners but not including postmodifying prepositional phrases or clauses.

---

name: VerbChunk
parent: Span

similarTo:
- http://vocab.lappsgrid.org/VerbChunk

description: >-
Non-recursive verb groups, which include modals, auxiliary verbs, and medial
adverbs, and end at the head verb or predicate adjective.

properties:
tense:
type: String or URI
description: >-
Provides tense information for the verb. Example values include BeVBG,
BeVBN, FutCon, HaveVBN, Pas, PasCon, PasPer, PasPerCon, Per, Pre, PreCon,
PrePer, PrePerCon, SimFut, SimPas, SimPre, none
voice:
type: String or URI
description: >-
Indicates if the verb group is active or passive. Possible values include
ACTIVE, PASSIVE, or NONE
neg:
type: String or URI
description: Indicates whether or not the verb is negated. Values include YES, NO.

---

name: TimeFrame
Expand All @@ -195,7 +318,6 @@ properties:
No longer encouraged to use, instead <code>label</code> property
should replace this property.


---

name: Chapter
Expand Down
Loading