diff --git a/.travis.yml b/.travis.yml index 23c7489..83541dc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: python python: -- '2.7' +- '3.6' before_script: - yes | python setup.py install - git clone https://github.com/uc-cdis/dictionaryutils; cd dictionaryutils diff --git a/gdcdictionary/schemas/_terms.yaml b/gdcdictionary/schemas/_terms.yaml index 0e6c976..1cd7c1a 100644 --- a/gdcdictionary/schemas/_terms.yaml +++ b/gdcdictionary/schemas/_terms.yaml @@ -46,7 +46,7 @@ adverse_event: age_at_event_days: description: > - Age at the time of diagnosis expressed. + Participant’s age in days of the Outcome event age_at_diagnosis_days: description: > diff --git a/gdcdictionary/schemas/demographic.yaml b/gdcdictionary/schemas/demographic.yaml deleted file mode 100644 index 761845f..0000000 --- a/gdcdictionary/schemas/demographic.yaml +++ /dev/null @@ -1,114 +0,0 @@ -$schema: "http://json-schema.org/draft-04/schema#" - -id: "demographic" -title: Demographic -type: object -namespace: https://dcf-interop.kidsfirstdrc.org/ -category: clinical -program: '*' -project: '*' -description: > - Data for the characterization of the patient by means of segementing the population (e.g., - characterization by age, sex, or race). -additionalProperties: false -submittable: true -validators: null - -systemProperties: - - id - - project_id - - state - - created_datetime - - updated_datetime - -links: - - name: participants - backref: demographics - label: describes - target_type: participant - multiplicity: one_to_one - required: true - -required: - - type - - submitter_id - - participants - - race - - ethnicity - - gender - -preferred: - - year_of_death - -uniqueKeys: - #unclear if want submitter ID for clinical - - [id] - - [project_id, submitter_id] - -properties: - $ref_ubiq: "_definitions.yaml#/ubiquitous_properties" - - cause_of_death: - term: - $ref: "_terms.yaml#/cause_of_death" - enum: - - Disease Related - - Not Disease Related - - Not Reported - - Unknown - - age_at_last_follow_up_days: - term: - $ref: "_terms.yaml#/age_at_last_follow_up_days" - type: - - integer - - "null" - maximum: 32872 - minimum: 0 - - gender: - term: - $ref: "_terms.yaml#/gender" - enum: - - female - - male - - unknown - - other - - unspecified - - not reported - - race: - term: - $ref: "_terms.yaml#/race" - enum: - - White - - American Indian or Alaska Native - - Black or African American - - Asian - - Native Hawaiian or Other Pacific Islander - - Other - - Unknown - - Not Reported - - Not allowed to collect - - ethnicity: - term: - $ref: "_terms.yaml#/ethnicity" - enum: - - hispanic or latino - - not hispanic or latino - - unknown - - not reported - - not allowed to collect - - vital_status: - term: - $ref: "_terms.yaml#/vital_status" - enum: - - Alive - - Dead - - Unknown - - Not Reported - - participants: - $ref: "_definitions.yaml#/to_one" diff --git a/gdcdictionary/schemas/diagnosis.yaml b/gdcdictionary/schemas/diagnosis.yaml index ab61b26..ceafb6d 100644 --- a/gdcdictionary/schemas/diagnosis.yaml +++ b/gdcdictionary/schemas/diagnosis.yaml @@ -43,69 +43,66 @@ uniqueKeys: properties: $ref_ubiq: "_definitions.yaml#/ubiquitous_properties" - age_at_event_days: - term: - $ref: "_terms.yaml#/age_at_event_days" - type: - - integer - - "null" - maximum: 32872 - minimum: 0 + external_id: + description: "external id used by contributor" + type: string + diagnosis: + description: | + A calculated rollup of a participant’s diagnoses. If the participant has only Cancer diagnoses, the participant’s value is Cancer. If the participant has only Structural Birth Defect diagnoses, the value is Structural Birth Defects. + type: string + + diagnosis_source_text: + description: | + Analysis, and recognition of the presence and nature of disease, condition, or injury from expressed signs and symptoms; also, the scientific determination of any kind; the concise results of such a study/investigation. + type: string + diagnosis_category: - description: "High level diagnosis categorization." + description: | + An overarching classification based on the Diagnosis Source Text to aide in quick searching over Cancer and Structural Birth Defects. enum: - Structural Birth Defect - Cancer - - Other - - tumor_location: - description: "Location of the tumor." - type: string + - Not Reported - primary_diagnosis: - term: - $ref: "_terms.yaml#/primary_diagnosis" + icd_id_diagnosis: + description: | + ICD10 code for the diagnosis. type: string - external_id: - description: "external id used by contributor" + mondo_id_diagnosis: + description: | + The Mondo ID associated with the Diagnosis (Source Text) value. Derived by matching the Diagnosis Source Text value with Mondo ID lookups. type: string - - source_text_diagnosis: - description: "Pathological diagnosis" + + ncit_id_diagnosis: + description: | + The National Cancer Institute Thesaurus (NCIt) ID associated with the Diagnosis (Source Text) value. Derived by matching the Diagnosis Source Text value with NCIt lookups. type: string - source_text_tumor_location: - description: "Location of the tumor" + spatial_descriptor: + description: | + Term to indicate precise, relative anatomical position from where the biospecimen was obtained. type: string - mondo_id_diagnosis: + tumor_location_source_text: description: | - The ID of the term from the Monary Disease Ontology - which represents a harmonized diagnosis + Text term from the investigator that describes the anatomic site of the tumor. type: string - icd_id_diagnosis: + source_text_tumor_location: description: | - The ID of the term from the International Classification of Diseases - which represents harmonized diagnosis + TBD type: string uberon_id_tumor_location: - term: - $ref: "_terms.yaml#/age_at_event_days" - type: string - - ncit_id_diagnosis: description: | - The ID term from the National Cancer Institute Thesaurus which represents a - harmonized diagnosis + TBD type: string spatial_descriptor: description: | - Ontology term that harmonizes the spatial concepts from Biological Spatial Ontology + TBD type: string participants: diff --git a/gdcdictionary/schemas/family.yaml b/gdcdictionary/schemas/family.yaml index 63bf2f5..df928ba 100644 --- a/gdcdictionary/schemas/family.yaml +++ b/gdcdictionary/schemas/family.yaml @@ -45,5 +45,14 @@ properties: description: "Name given to the family by contributor" type: string + family_composition: + description: "A calculated value based on the family members present with genomic data within a participant’s pedigree." + enum: + - "proband_only" + - "duo" + - "duo_plus" + - "trio" + - "trio_plus" + projects: $ref: "_definitions.yaml#/to_one_project" diff --git a/gdcdictionary/schemas/family_relationship.yaml b/gdcdictionary/schemas/family_relationship.yaml index a1fd345..7f5d33e 100644 --- a/gdcdictionary/schemas/family_relationship.yaml +++ b/gdcdictionary/schemas/family_relationship.yaml @@ -43,10 +43,108 @@ properties: relative_to_participant_relation: description: "Text describing the nature of the relationship (i.e. father, mother, sister, brother)" - type: string + enum: + - Aunt + - Brother + - Brother-in-law + - Brother-Monozygotic Twin + - Child + - Cousin + - Daughter + - Father + - First cousin once removed + - Grandfather + - Grandmother + - Married in aunt + - Married in Husband + - Married in-Spouse + - Maternal aunt + - Maternal cousin + - Maternal grandfather + - Maternal grandmother + - Maternal Great Grandmother + - Maternal Great Uncle + - Maternal half-sister + - Maternal uncle + - Mother + - Nephew + - Other + - Paternal aunt + - Paternal cousin + - Paternal grandfather + - Paternal grandmother + - Paternal uncle + - Sibling + - Sister + - Son + - Spouse + - Twin Brother + - Twin Sister + - Uncle + - Uncle-married in + - Wife + - Proband + - Maternal Granddaughter participant_to_relative_relation: description: "Text describing the nature of the relationship (i.e. father, mother, sister, brother)" + enum: + - Aunt + - Brother + - Brother-in-law + - Brother-Monozygotic Twin + - Child + - Cousin + - Daughter + - Father + - First cousin once removed + - Grandfather + - Grandmother + - Married in aunt + - Married in Husband + - Married in-Spouse + - Maternal aunt + - Maternal cousin + - Maternal grandfather + - Maternal grandmother + - Maternal Great Grandmother + - Maternal Great Uncle + - Maternal half-sister + - Maternal uncle + - Mother + - Nephew + - Other + - Paternal aunt + - Paternal cousin + - Paternal grandfather + - Paternal grandmother + - Paternal uncle + - Sibling + - Sister + - Son + - Spouse + - Twin Brother + - Twin Sister + - Uncle + - Uncle-married in + - Wife + - Proband + - Maternal Granddaughter + + participant1_id: + description: "Participant1 ID" + type: string + + participant2_id: + description: "Participant2 ID" + type: string + + participant1_to_participant2_relation: + description: "Participant1 to Participant2 Relation" + type: string + + participant2_to_participant1_relation: + description: "Participant2 to Participant1 Relation" type: string participants: diff --git a/gdcdictionary/schemas/outcome.yaml b/gdcdictionary/schemas/outcome.yaml index f7cefcb..9acab98 100644 --- a/gdcdictionary/schemas/outcome.yaml +++ b/gdcdictionary/schemas/outcome.yaml @@ -52,22 +52,22 @@ properties: disease_related: description: | - Whether Dead and cause of death was disease related (Yes) - or Dead and cause of death was not disease related (No) - or Not Reported + Text value describing whether or not the participant’s outcome is related to their disease. For example, whether their deceased status was due to their disease. enum: - "Yes" - "No" - - Not Reported - - Reported Unknown - - Not Applicable + - "Not Reported" + - "Reported Unknown" + - "Not Applicable" vital_status: term: $ref: "_terms.yaml#/age_at_event_days" enum: - - Dead - - Alive + - "Deceased" + - "Alive" + - "Reported Unknown" + - "Not Reported" participants: $ref: "_definitions.yaml#/to_one" diff --git a/gdcdictionary/schemas/participant.yaml b/gdcdictionary/schemas/participant.yaml index aba72b2..aa573a9 100644 --- a/gdcdictionary/schemas/participant.yaml +++ b/gdcdictionary/schemas/participant.yaml @@ -47,44 +47,60 @@ uniqueKeys: # Case properties properties: $ref_ubiq: "_definitions.yaml#/ubiquitous_properties" - days_to_lost_to_followup: - term: - $ref: "_terms.yaml#/days_to_lost_to_followup" - type: integer - disease_type: - term: - $ref: "_terms.yaml#/disease_type" + + family_id: + description: "Id for the participants grouped by family" type: string - index_date: - term: - $ref: "_terms.yaml#/index_date" + + alias_group: + description: "For potential future use, currently not populated with data" + type: string + + available_data_types: + description: "The File data types available for the participants." + type: string + + ethnicity: + description: "An individual’s self-described social and cultural grouping, specifically whether an individual describes themselves as Hispanic or Latino." enum: - - Diagnosis - - First Patient Visit - - Study Enrollment - lost_to_followup: - term: - $ref: "_terms.yaml#/lost_to_followup" + - "Not Hispanic or Latino" + - "Hispanic or Latino" + - "Not Reported" + - "Reported Unknown" + + gender: + description: "Text designations that identify gender. Gender is described as the assemblage of properties that distinguish people on the basis of their societal roles. This value is self-reported and may come from a form, questionnaire, interview, etc." enum: - - "Yes" - - "No" - primary_site: - term: - $ref: "_terms.yaml#/primary_site" - type: string + - "Female" + - "Male" + - "Reported Unknown" + - "Not Reported" + is_proband: - description: "Denotes whether participant is proband of study" + description: "Denotes whether participant is proband of study. The participant serving as the starting point for enrollment into study, often the first family member seeking medical attention." type: boolean - external_id: - term: - $ref: "_terms.yaml#/external_id" - type: string - family_id: - description: "Id for the participants grouped by family" - type: string - consent_type: - description: "Type of the consent participant belongs to" - type: string + + race: + description: "An arbitrary classification of a taxonomic group that is a division of a species. It is characterized by shared hereditary, physical attributes and behavior, and in the case of humans, by common history, nationality, or geographic distribution." + enum: + - "White" + - "American Indian or Alaska Native" + - "Black or African American" + - "Asian" + - "Native Hawaiian or Other Pacific Islander" + - "Other" + - "More Than One Race" + - "Reported Unknown" + - "Not Reported" + + species: + description: "species" + type: string + + relationship_to_proband: + description: "Relation to proband" + type: string + projects: $ref: "_definitions.yaml#/to_one_project" families: diff --git a/gdcdictionary/schemas/phenotype.yaml b/gdcdictionary/schemas/phenotype.yaml index 835bfbc..b74d20a 100644 --- a/gdcdictionary/schemas/phenotype.yaml +++ b/gdcdictionary/schemas/phenotype.yaml @@ -31,7 +31,7 @@ links: required: - type - participants - - phenotype + - source_text_phenotype - observed - submitter_id @@ -43,20 +43,14 @@ uniqueKeys: properties: $ref_ubiq: "_definitions.yaml#/ubiquitous_properties" - hpo_id: - description: | - The ID of the term from Human Phenotype Ontology - which represents a harmonized phenotype - type: string - snomed_id_phenotype: description: | The ID of the term from Systematized Nomenclature of Medicine - Clinical Terms which encodes clinical terminology type: string - phenotype: - description: "Name given to Phenotype by contributor" + source_text_phenotype: + description: "The observable characteristics in a participant resulting from the expression of genes, environment factors, and their interactions. Name given to Phenotype by contributor" enum: - "2 vessel cord" - "2 vessel umbilical cord" @@ -2539,7 +2533,7 @@ properties: version_date: observed: - description: "Whether phenotype is negative or positive" + description: "Files for which the HPO ID was positively observed. Whether phenotype is negative or positive" enum: - Positive - Negative @@ -2556,5 +2550,45 @@ properties: maximum: 32872 minimum: 0 + ancestral_hpo_id: + description: | + The Human Phenotype Ontology value associated with the Participant Phenotype (Source Text) value. Derived by matching the Phenotype Source Text value with HPO lookups. + type: string + + external_id: + description: | + External ID provided by the investigator of the original study for the Phenotype observation. + type: string + + hpo_id_phenotype: + description: | + TBD + type: string + + snomed_id_phenotype: + description: | + TBD + type: string + + participant_phenotype_source_text: + description: | + The observable characteristics in a participant resulting from the expression of genes, environment factors, and their interactions. + type: string + + hpo_phenotype_not_observed: + description: | + Files for which the HPO ID was negatively observed. + type: string + + snomed_phenotype_not_observed: + description: | + Files for which the Snomed value associated with the Participant Phenotype (Source Text) was negatively observed. Derived by matching the Phenotype Source Text value with Snomed lookups. + type: string + + snomed_phenotype_observed: + description: | + Files for which the Snomed value associated with the Participant Phenotype (Source Text) was positively observed. Derived by matching the Phenotype Source Text value with Snomed lookups. + type: string + participants: $ref: "_definitions.yaml#/to_one" diff --git a/gdcdictionary/schemas/project.yaml b/gdcdictionary/schemas/project.yaml index 36b4e41..e679224 100644 --- a/gdcdictionary/schemas/project.yaml +++ b/gdcdictionary/schemas/project.yaml @@ -134,7 +134,7 @@ properties: type: boolean release_status: - description: "Release status of the study." + description: "The status of the study within its Data Access Authority." enum: - Pending - Waiting diff --git a/gdcdictionary/schemas/study_file.yaml b/gdcdictionary/schemas/study_file.yaml index 7c895bb..dcd00d5 100644 --- a/gdcdictionary/schemas/study_file.yaml +++ b/gdcdictionary/schemas/study_file.yaml @@ -43,6 +43,11 @@ required: - type - submitter_id - file_name + - file_size + - md5sum + - data_type + - data_format + - data_category uniqueKeys: - [ id ]