From b95da0b39a93c6e98fe37f1c9f3351a41c19225f Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Thu, 12 Feb 2026 13:19:06 -0500 Subject: [PATCH 01/11] =?UTF-8?q?=F0=9F=90=9B=20Fix=20int=20out=20of=20ran?= =?UTF-8?q?ge=20error?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dbt_project/seeds/_seeds.yml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/dbt_project/seeds/_seeds.yml b/dbt_project/seeds/_seeds.yml index d98ab0f..a7883b3 100644 --- a/dbt_project/seeds/_seeds.yml +++ b/dbt_project/seeds/_seeds.yml @@ -15,6 +15,7 @@ seeds: config: column_types: Size: bigint + LastModified: varchar - name: sample config: @@ -26,9 +27,13 @@ seeds: pf_hq_aligned_q20_bases: bigint genome_territory: bigint library-1_estimated_library_size: bigint - # pf_reads: bigint - # pf_reads_aligned: bigint - # reads_aligned_in_pairs: bigint - # pf_hq_aligned_reads: bigint - # library-1_read_pairs: bigint - # pf_reads_improper_pairs: bigint \ No newline at end of file + pf_reads: bigint + pf_reads_aligned: bigint + reads_aligned_in_pairs: bigint + pf_hq_aligned_reads: bigint + library-1_read_pairs: bigint + pf_reads_improper_pairs: bigint + - name: ds_ses + config: + column_types: + total_reads: bigint \ No newline at end of file From 12106e5efb778e6c8cafa1e1814953d73167e811 Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Fri, 13 Feb 2026 14:02:25 -0500 Subject: [PATCH 02/11] =?UTF-8?q?=E2=9C=A8=20Add=20src=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dataservice_studies/src/kf_ds_src_biospecimen.sql | 6 ++++++ .../kids_first/dataservice_studies/src/kf_ds_src_bsgf.sql | 6 ++++++ .../dataservice_studies/src/kf_ds_src_diagnosis.sql | 6 ++++++ .../kids_first/dataservice_studies/src/kf_ds_src_family.sql | 6 ++++++ .../src/kf_ds_src_family_relationship.sql | 6 ++++++ .../dataservice_studies/src/kf_ds_src_genomic_file.sql | 6 ++++++ .../dataservice_studies/src/kf_ds_src_investigator.sql | 6 ++++++ .../dataservice_studies/src/kf_ds_src_outcome.sql | 6 ++++++ .../dataservice_studies/src/kf_ds_src_participant.sql | 6 ++++++ .../dataservice_studies/src/kf_ds_src_phenotype.sql | 6 ++++++ .../kids_first/dataservice_studies/src/kf_ds_src_sample.sql | 6 ++++++ .../kids_first/dataservice_studies/src/kf_ds_src_segf.sql | 6 ++++++ .../dataservice_studies/src/kf_ds_src_sequencing_center.sql | 6 ++++++ .../src/kf_ds_src_sequencing_experiment.sql | 6 ++++++ .../kids_first/dataservice_studies/src/kf_ds_src_study.sql | 6 ++++++ 15 files changed, 90 insertions(+) create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_biospecimen.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_bsgf.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_diagnosis.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family_relationship.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_genomic_file.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_investigator.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_outcome.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_participant.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_phenotype.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sample.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_segf.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_center.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_experiment.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_study.sql diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_biospecimen.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_biospecimen.sql new file mode 100644 index 0000000..f52c2b2 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_biospecimen.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from {{ ref('ds_specimens') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_bsgf.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_bsgf.sql new file mode 100644 index 0000000..47af19c --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_bsgf.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from {{ ref('ds_bsgfs')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_diagnosis.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_diagnosis.sql new file mode 100644 index 0000000..2b6f8dd --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_diagnosis.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ ref('ds_diagnoses')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family.sql new file mode 100644 index 0000000..8af9054 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_families')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family_relationship.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family_relationship.sql new file mode 100644 index 0000000..a69754b --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family_relationship.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_family_relationships')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_genomic_file.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_genomic_file.sql new file mode 100644 index 0000000..4e25cf8 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_genomic_file.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_genomic_files')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_investigator.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_investigator.sql new file mode 100644 index 0000000..cc5fef2 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_investigator.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_investigators')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_outcome.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_outcome.sql new file mode 100644 index 0000000..e361023 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_outcome.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_outcomes')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_participant.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_participant.sql new file mode 100644 index 0000000..f32b383 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_participant.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_participants')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_phenotype.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_phenotype.sql new file mode 100644 index 0000000..d6f51de --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_phenotype.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_phenotypes')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sample.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sample.sql new file mode 100644 index 0000000..337c270 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sample.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_samples')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_segf.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_segf.sql new file mode 100644 index 0000000..924356a --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_segf.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_segfs')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_center.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_center.sql new file mode 100644 index 0000000..30d74f0 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_center.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_sequencing_centers')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_experiment.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_experiment.sql new file mode 100644 index 0000000..0cb6986 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_experiment.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_ses')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_study.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_study.sql new file mode 100644 index 0000000..241d216 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_study.sql @@ -0,0 +1,6 @@ +{{ config( + schema='src' +) }} + +select * +from{{ref('ds_studies')}} From 059f0efd070c89c9028ed7b1c56275b6450953a8 Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Fri, 13 Feb 2026 14:10:05 -0500 Subject: [PATCH 03/11] =?UTF-8?q?=E2=9C=A8=20Add=20int=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../int/kf_ds_int_biospecimen.sql | 46 +++++++++++++++ .../int/kf_ds_int_bsgf.sql | 19 +++++++ .../int/kf_ds_int_diagnosis.sql | 22 +++++++ .../int/kf_ds_int_family.sql | 16 ++++++ .../int/kf_ds_int_family_relationship.sql | 19 +++++++ .../int/kf_ds_int_genomic_file.sql | 34 +++++++++++ .../int/kf_ds_int_investigator.sql | 17 ++++++ .../int/kf_ds_int_outcome.sql | 18 ++++++ .../int/kf_ds_int_participant.sql | 25 ++++++++ .../int/kf_ds_int_phenotype.sql | 16 ++++++ .../int/kf_ds_int_sample.sql | 27 +++++++++ .../int/kf_ds_int_segf.sql | 16 ++++++ .../int/kf_ds_int_sequencing_center.sql | 17 ++++++ .../int/kf_ds_int_sequencing_experiment.sql | 57 +++++++++++++++++++ .../int/kf_ds_int_study.sql | 34 +++++++++++ 15 files changed, 383 insertions(+) create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_biospecimen.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_bsgf.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_diagnosis.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family_relationship.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_genomic_file.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_investigator.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_outcome.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_participant.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_phenotype.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sample.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_segf.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_center.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_experiment.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_study.sql diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_biospecimen.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_biospecimen.sql new file mode 100644 index 0000000..061bba3 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_biospecimen.sql @@ -0,0 +1,46 @@ +{{ config( + schema='int' +) }} + +-- need to review all fields in ds and which ones are neeeded +-- for now, extracting the main ones we use +select distinct + uuid, -- we can leave this out; it's not used, + created_at, + modified_at, + kf_id as specimen_id, + lower(replace(kf_id, '_', '-')) as dewrangle_specimen_id, + external_sample_id, + external_aliquot_id, + source_text_tissue_type, -- can rename without source text prefix? + composition, + source_text_anatomical_site, -- can rename without source text prefix? + age_at_event_days, + source_text_tumor_descriptor, -- can rename without source text prefix? + analyte_type, + participant_id, + sequencing_center_id, + dbgap_consent_code, -- maybe this should be on the pt level? + consent_type, -- maybe this should be on the pt level? + method_of_sample_procurement, + sample_id, -- is this needed? haven't exported 'sample' table for kf + specimen_status, -- should this be set as a constant for all kf studies ? + has_matched_normal_sample, + visible, + visibility_reason, + visibility_comment, + + -- these are fields we should discuss if needed + shipment_origin, + concentration_mg_per_ml, + volume_ul, + shipment_date, + uberon_id_anatomical_site, + ncit_id_tissue_type, + ncit_id_anatomical_site, + spatial_descriptor, + preservation_method, + amount, + amount_units, + cell_entity +from {{ref('kf_ds_src_biospecimen')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_bsgf.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_bsgf.sql new file mode 100644 index 0000000..17c7b13 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_bsgf.sql @@ -0,0 +1,19 @@ +{{ config( + schema='int' +) }} + +-- we currently use this table in DS to link specimens/gfs +-- not sure if we would need it here, since we can easily add specimen ids to a files model + +select distinct + uuid, -- we can leave this out, it's not used + created_at, + modified_at, + genomic_file_id, + biospecimen_id, + kf_id as bsgf_id, + visible, + external_id, -- i think we can leave this out - it's rarely populated/used, + visibility_reason, + visibility_comment +from {{ref('kf_ds_src_bsgf')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_diagnosis.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_diagnosis.sql new file mode 100644 index 0000000..b28c694 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_diagnosis.sql @@ -0,0 +1,22 @@ +{{ config( + schema='int' +) }} + +select distinct + uuid, -- we can leave this out; it's not used, + kf_id as diagnosis_id, + lower(replace(kf_id, 'DG_', 'cn-')) as dewrangle_diagnosis_id, + participant_id, + source_text_diagnosis, + age_at_event_days, + mondo_id_diagnosis, + icd_id_diagnosis, + 'Positive' as observed, -- implied by presence in diagnosis table + + -- unsure if necessary to include tehse fields + diagnosis_category, + external_id, + source_text_tumor_location, + uberon_id_tumor_location, + spatial_descriptor +from {{ref('kf_ds_src_diagnosis')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family.sql new file mode 100644 index 0000000..6a32e56 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family.sql @@ -0,0 +1,16 @@ +{{ config( + schema='int' +) }} + +select distinct + uuid, -- we can leave this out; isn't used, + created_at, + modified_at, + kf_id as family_id, + lower(replace(kf_id, '_', '-')) as dewrangle_family_id, + external_id, + family_type, -- not historically populated but it should be; can use logic rules to calculate + visible, + visibility_reason, + visibility_comment +from {{ref('kf_ds_src_family')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family_relationship.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family_relationship.sql new file mode 100644 index 0000000..53996ff --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family_relationship.sql @@ -0,0 +1,19 @@ +{{ config( + schema='int' +) }} + +select distinct + uuid, -- we can leave this out, it's not used + created_at, + modified_at, + external_id, -- i think we can leave this out - it's rarely populated/used, + participant1_id, + participant2_id, + participant1_to_participant2_relation, + participant2_to_participant1_relation, + kf_id as relationship_id, + visible, + visibility_reason, + visibility_comment, + source_text_notes -- don't think we need this? we never populate it +from {{ref('kf_ds_src_family_relationship')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_genomic_file.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_genomic_file.sql new file mode 100644 index 0000000..86378e7 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_genomic_file.sql @@ -0,0 +1,34 @@ +{{ config( + schema='int' +) }} + +select distinct + uuid, -- we can leave this out; it's not used, + kf_id as genomic_file_id, + lower(replace(kf_id, '_', 'dr-')) as dewrangle_genomic_file_id, + created_at, -- think these could be useful when answering questions about changes over time + modified_at, -- ^^^ + external_id, + is_harmonized, + reference_genome, + controlled_access, -- would be nice to incorporate mappung logic for this field based on file type, location and harmonization + availability, + paired_end, + visible, + visibility_reason, + visibility_comment, + + -- should we discuss with bix about standardizing these values? I know ingest mapping logic has changed over time + data_type, + file_format, + data_category, + workflow_tool, + workflow_type, + workflow_version, + workflow_endpoint, + file_version_descriptor, -- should discuss with bix about reliability of these values in dataservice currently + + -- could be useful after delivery but would be null during source load + cavatica_file_id, + cavatica_volume +from {{ref('kf_ds_src_genomic_file')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_investigator.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_investigator.sql new file mode 100644 index 0000000..02bc1ad --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_investigator.sql @@ -0,0 +1,17 @@ +{{ config( + schema='int' +) }} + +select distinct + uuid, -- we can leave this out; isn't used, + created_at, + modified_at, + kf_id as investigator_id, + lower(replace(kf_id, '_', '-')) as dewrangle_investigator_id, + external_id, + name, + institution, + visible, + visibility_reason, + visibility_comment +from {{ref('kf_ds_src_investigator')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_outcome.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_outcome.sql new file mode 100644 index 0000000..89f5b1c --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_outcome.sql @@ -0,0 +1,18 @@ +{{ config( + schema='int' +) }} + +select distinct + uuid, -- we can leave this out, it's not used + created_at, + modified_at, + external_id, -- i think we can leave this out - it's rarely populated/used, + vital_status, + disease_related, + age_at_event_days, + participant_id, + kf_id, + visible, + visibility_reason, + visibility_comment +from {{ref('kf_ds_src_outcome')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_participant.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_participant.sql new file mode 100644 index 0000000..1ca1bcd --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_participant.sql @@ -0,0 +1,25 @@ +{{ config( + schema='int' +) }} + +select distinct + uuid, -- we can leave this out ; it's not used, + created_at, + modified_at, + alias_group_id, -- we can leave this out; it's not used + study_id, + lower(replace(study_id, '_', '-')) as dewrangle_study_id, + kf_id as participant_id, + lower(replace(kf_id, '_', '-')) as dewrangle_participant_id, + external_id, + family_id, + is_proband, + race, + ethnicity, + gender, + affected_status, + species, + visible, + visibility_reason, -- can we standardize this a bit more? maybe release status instead of reason? and try to standardize more? + visibility_comment +from {{ref('kf_ds_src_participant')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_phenotype.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_phenotype.sql new file mode 100644 index 0000000..cc968f1 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_phenotype.sql @@ -0,0 +1,16 @@ + + +select distinct + uuid, -- we can leave this out; it's not used, + kf_id as phenotype_id, + lower(replace(kf_id, '_', 'cn-')) as dewrangle_phenotype_id, + participant_id, + source_text_phenotype, + age_at_event_days, + hpo_id_phenotype, + observed, + + -- additional fields that may be included + snomed_id_phenotype, + external_id +from {{ref('kf_ds_src_phenotype')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sample.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sample.sql new file mode 100644 index 0000000..6a6be80 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sample.sql @@ -0,0 +1,27 @@ +{{ config( + schema='int' +) }} + +select distinct + uuid, -- we can leave this out, it's not used + created_at, + modified_at, + external_id, + kf_id, + age_at_event_days, + sample_event_key, + tissue_type, + sample_type, + anatomical_location, + volume_ul, + method_of_sample_procurement, + preservation_method, + participant_id, + external_collection_id, + has_matched_normal_sample, + amount, + amount_units, + visible, + visibility_reason, + visibility_comment +from {{ref('kf_ds_src_sample')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_segf.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_segf.sql new file mode 100644 index 0000000..39b2820 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_segf.sql @@ -0,0 +1,16 @@ +{{ config( + schema='int' +) }} + +select distinct + created_at, + modified_at, + uuid, -- we can leave this out, it's not used + visible, + sequencing_experiment_id, + genomic_file_id, + external_id, -- this is rarely populated, it could be left out + kf_id as segf_id, + visibility_reason, + visibility_comment +from {{ref('kf_ds_src_segf')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_center.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_center.sql new file mode 100644 index 0000000..a3872b6 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_center.sql @@ -0,0 +1,17 @@ +{{ config( + schema='int' +) }} + +-- this entity as a whole may not be needed - a simple "seq center name" may be all that's needed in the seq exp table + +select distinct + uuid, -- we can leave this out, it's not used + created_at, + modified_at, + external_id, + kf_id, + name as sequencing_center_name, + visible, + visibility_reason, + visibility_comment +from {{ref('kf_ds_src_sequencing_center')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_experiment.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_experiment.sql new file mode 100644 index 0000000..588e823 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_experiment.sql @@ -0,0 +1,57 @@ +{{ config( + schema='int' +) }} + +select distinct + uuid, -- we can leave this out; it's not used, + kf_id as sequencing_experiment_id, + lower(replace(kf_id, '_', '-')) as dewrangle_sequencing_experiment_id, + sequencing_center_id, + external_id, + experiment_date, + experiment_strategy, + is_paired_end, + platform, + instrument_model, + visible, + visibility_reason, + visibility_comment, + + -- I think these should be included? talk with BIX + library_name, + library_strand, + library_prep, + library_selection, + max_insert_size, + mean_insert_size, + mean_depth, + total_reads, + mean_read_length, + adapter_sequencing, + is_adapter_trimmed, + read_pair_number, + target_capture_kit, + acquisition_type, + cdna_read, + cdna_read_offset, + cell_barcode_offset, + cell_barcode_read, + cell_barcode_size, + chromatography_approach, + end_bias, + enrichment_approach, + fraction_number, + fractionation_approach, + ion_fragmentation, + library_construction, + mass_spec_rawfile_conversion, + proteomics_experiment, + quantification_label_id, + quantification_labeling_method, + quantification_technique, + sequencing_mode, + target_cell_number, + umi_barcode_offset, + umi_barcode_read, + umi_barcode_size +from {{ref('kf_ds_src_sequencing_experiment')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_study.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_study.sql new file mode 100644 index 0000000..f5a3075 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_study.sql @@ -0,0 +1,34 @@ +{{ config( + schema='int' +) }} + +select distinct + created_at, + modified_at, + uuid, -- we can leave this out, it's not used + data_access_authority, -- usually value is dbgap here + external_id,-- usually we make this the phs number - would be worth renaming to reflect that + version, -- seems useful, but is rarely ever used. + name, -- this is the full name of the study; might be worth renaming + short_name, -- do we need name and short name? + attribution, -- not exactly sure what would go here, we rarely populate it + release_status, -- like the idea of this, but it's not consistently updated + investigator_id, -- like the idea of this, but again not consistently populated, should it be? + kf_id as study_id, + lower(replace(kf_id, '_', '-')) as dewrangle_study_id, + visible, + short_code, + domain, -- this is research domain e.g., CANCER vs SBD + program, + visibility_reason, + visibility_comment, + parent_study_id, + biobank_email, -- has been NA for kids first, only used for CBTN, should we keep? + biobank_name, -- has been NA for kids first, only used for CBTN, should we keep? + biobank_request_instructions, -- has been NA for kids first, only used for CBTN, should we keep? + biobank_request_link -- has been NA for kids first, only used for CBTN, should we keep? +from {{ref('kf_ds_src_study')}} +where lower(program) in ( + 'kids first', + 'kf/include' +) \ No newline at end of file From 3eb7696abba0fa0aaf79be7131b8b979dd6adfab Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Fri, 13 Feb 2026 14:10:45 -0500 Subject: [PATCH 04/11] =?UTF-8?q?=E2=9C=A8=20Add=20stable=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../stable/kf_ds_stable_biospecimen.sql | 46 +++++++++++++++ .../stable/kf_ds_stable_bsgf.sql | 19 +++++++ .../stable/kf_ds_stable_diagnosis.sql | 22 +++++++ .../stable/kf_ds_stable_family.sql | 16 ++++++ .../kf_ds_stable_family_relationship.sql | 19 +++++++ .../stable/kf_ds_stable_genomic_file.sql | 34 +++++++++++ .../stable/kf_ds_stable_investigator.sql | 17 ++++++ .../stable/kf_ds_stable_outcome.sql | 18 ++++++ .../stable/kf_ds_stable_participant.sql | 23 ++++++++ .../stable/kf_ds_stable_phenotype.sql | 16 ++++++ .../stable/kf_ds_stable_sample.sql | 27 +++++++++ .../stable/kf_ds_stable_segf.sql | 16 ++++++ .../stable/kf_ds_stable_sequencing_center.sql | 16 ++++++ .../kf_ds_stable_sequencing_experiment.sql | 57 +++++++++++++++++++ .../stable/kf_ds_stable_study.sql | 30 ++++++++++ 15 files changed, 376 insertions(+) create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_biospecimen.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_bsgf.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_diagnosis.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family_relationship.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_genomic_file.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_investigator.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_outcome.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_participant.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_phenotype.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sample.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_segf.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_center.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_experiment.sql create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_study.sql diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_biospecimen.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_biospecimen.sql new file mode 100644 index 0000000..3dece5f --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_biospecimen.sql @@ -0,0 +1,46 @@ +{{ config( + schema='stable' +) }} + +-- need to review all fields in ds and which ones are neeeded +-- for now, extracting the main ones we use +select distinct + uuid, -- we can leave this out; it's not used, + created_at, + modified_at, + specimen_id, + dewrangle_specimen_id, + external_sample_id, + external_aliquot_id, + source_text_tissue_type, -- can rename without source text prefix? + composition, + source_text_anatomical_site, -- can rename without source text prefix? + age_at_event_days, + source_text_tumor_descriptor, -- can rename without source text prefix? + analyte_type, + participant_id, + sequencing_center_id, + dbgap_consent_code, -- maybe this should be on the pt level? + consent_type, -- maybe this should be on the pt level? + method_of_sample_procurement, + sample_id, -- is this needed? haven't exported 'sample' table for kf + specimen_status, -- should this be set as a constant for all kf studies ? + has_matched_normal_sample, + visible, + visibility_reason, + visibility_comment, + + -- these are fields we should discuss if needed + shipment_origin, + concentration_mg_per_ml, + volume_ul, + shipment_date, + uberon_id_anatomical_site, + ncit_id_tissue_type, + ncit_id_anatomical_site, + spatial_descriptor, + preservation_method, + amount, + amount_units, + cell_entity +from {{ref('kf_ds_int_biospecimen')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_bsgf.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_bsgf.sql new file mode 100644 index 0000000..23e3ff4 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_bsgf.sql @@ -0,0 +1,19 @@ +{{ config( + schema='stable' +) }} + +-- we currently use this table in DS to link specimens/gfs +-- not sure if we would need it here, since we can easily add specimen ids to a files model + +select distinct + uuid, -- we can leave this out, it's not used + created_at, + modified_at, + genomic_file_id, + biospecimen_id, + bsgf_id, + visible, + external_id, -- i think we can leave this out - it's rarely populated/used, + visibility_reason, + visibility_comment +from {{ref('kf_ds_int_bsgf')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_diagnosis.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_diagnosis.sql new file mode 100644 index 0000000..ada485b --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_diagnosis.sql @@ -0,0 +1,22 @@ +{{ config( + schema='stable' +) }} + +select distinct + uuid, -- we can leave this out; it's not used, + diagnosis_id, + dewrangle_diagnosis_id, + participant_id, + source_text_diagnosis, + age_at_event_days, + mondo_id_diagnosis, + icd_id_diagnosis, + observed, -- implied by presence in diagnosis table + + -- unsure if necessary to include tehse fields + diagnosis_category, + external_id, + source_text_tumor_location, + uberon_id_tumor_location, + spatial_descriptor +from {{ref('kf_ds_int_diagnosis')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family.sql new file mode 100644 index 0000000..f42143a --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family.sql @@ -0,0 +1,16 @@ +{{ config( + schema='stable' +) }} + +select distinct + uuid, -- we can leave this out; isn't used, + created_at, + modified_at, + family_id, + dewrangle_family_id, + external_id, + family_type, -- not historically populated but it should be; can use logic rules to calculate + visible, + visibility_reason, + visibility_comment +from {{ref('kf_ds_int_family')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family_relationship.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family_relationship.sql new file mode 100644 index 0000000..b4df682 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family_relationship.sql @@ -0,0 +1,19 @@ +{{ config( + schema='stable' +) }} + +select distinct + uuid, -- we can leave this out, it's not used + created_at, + modified_at, + external_id, -- i think we can leave this out - it's rarely populated/used, + participant1_id, + participant2_id, + participant1_to_participant2_relation, + participant2_to_participant1_relation, + relationship_id, + visible, + visibility_reason, + visibility_comment, + source_text_notes -- don't think we need this? we never populate it +from {{ref('kf_ds_int_family_relationship')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_genomic_file.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_genomic_file.sql new file mode 100644 index 0000000..e24bebf --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_genomic_file.sql @@ -0,0 +1,34 @@ +{{ config( + schema='stable' +) }} + +select distinct + uuid, -- we can leave this out; it's not used, + genomic_file_id, + dewrangle_genomic_file_id, + created_at, -- think these could be useful when answering questions about changes over time + modified_at, -- ^^^ + external_id, + is_harmonized, + reference_genome, + controlled_access, -- would be nice to incorporate mappung logic for this field based on file type, location and harmonization + availability, + paired_end, + visible, + visibility_reason, + visibility_comment, + + -- should we discuss with bix about standardizing these values? I know ingest mapping logic has changed over time + data_type, + file_format, + data_category, + workflow_tool, + workflow_type, + workflow_version, + workflow_endpoint, + file_version_descriptor, -- should discuss with bix about reliability of these values in dataservice currently + + -- could be useful after delivery but would be null during source load + cavatica_file_id, + cavatica_volume +from {{ref('kf_ds_int_genomic_file')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_investigator.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_investigator.sql new file mode 100644 index 0000000..94d6ebc --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_investigator.sql @@ -0,0 +1,17 @@ +{{ config( + schema='stable' +) }} + +select distinct + uuid, -- we can leave this out; isn't used, + created_at, + modified_at, + investigator_id, + dewrangle_investigator_id, + external_id, + name, + institution, + visible, + visibility_reason, + visibility_comment +from {{ref('kf_ds_int_investigator')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_outcome.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_outcome.sql new file mode 100644 index 0000000..de2751d --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_outcome.sql @@ -0,0 +1,18 @@ +{{ config( + schema='stable' +) }} + +select distinct + uuid, -- we can leave this out, it's not used + created_at, + modified_at, + external_id, -- i think we can leave this out - it's rarely populated/used, + vital_status, + disease_related, + age_at_event_days, + participant_id, + kf_id, + visible, + visibility_reason, + visibility_comment +from {{ref('kf_ds_int_outcome')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_participant.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_participant.sql new file mode 100644 index 0000000..ac1d31a --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_participant.sql @@ -0,0 +1,23 @@ + + +select distinct + uuid, -- we can leave this out ; it's not used, + created_at, + modified_at, + alias_group_id, -- we can leave this out; it's not used + study_id, + dewrangle_study_id, + participant_id, + dewrangle_participant_id, + external_id, + family_id, + is_proband, + race, + ethnicity, + gender, + affected_status, + species, + visible, + visibility_reason, -- can we standardize this a bit more? maybe release status instead of reason? and try to standardize more? + visibility_comment +from {{ref('kf_ds_int_participant')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_phenotype.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_phenotype.sql new file mode 100644 index 0000000..26bbbde --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_phenotype.sql @@ -0,0 +1,16 @@ + + +select distinct + uuid, -- we can leave this out; it's not used, + phenotype_id, + dewrangle_phenotype_id, + participant_id, + source_text_phenotype, + age_at_event_days, + hpo_id_phenotype, + observed, + + -- additional fields that may be included + snomed_id_phenotype, + external_id +from {{ref('kf_ds_int_phenotype')}} diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sample.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sample.sql new file mode 100644 index 0000000..39a24d5 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sample.sql @@ -0,0 +1,27 @@ +{{ config( + schema='stable' +) }} + +select distinct + uuid, -- we can leave this out, it's not used + created_at, + modified_at, + external_id, + kf_id, + age_at_event_days, + sample_event_key, + tissue_type, + sample_type, + anatomical_location, + volume_ul, + method_of_sample_procurement, + preservation_method, + participant_id, + external_collection_id, + has_matched_normal_sample, + amount, + amount_units, + visible, + visibility_reason, + visibility_comment +from {{ref('kf_ds_int_sample')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_segf.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_segf.sql new file mode 100644 index 0000000..aa9fa1f --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_segf.sql @@ -0,0 +1,16 @@ +{{ config( + schema='stable' +) }} + +select distinct + created_at, + modified_at, + uuid, -- we can leave this out, it's not used + visible, + sequencing_experiment_id, + genomic_file_id, + external_id, -- this is rarely populated, it could be left out + segf_id, + visibility_reason, + visibility_comment +from {{ref('kf_ds_int_segf')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_center.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_center.sql new file mode 100644 index 0000000..91410ed --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_center.sql @@ -0,0 +1,16 @@ +{{ config( + schema='stable' +) }} + + +select distinct + uuid, -- we can leave this out, it's not used + created_at, + modified_at, + external_id, + kf_id, + sequencing_center_name, + visible, + visibility_reason, + visibility_comment +from {{ref('kf_ds_int_sequencing_center')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_experiment.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_experiment.sql new file mode 100644 index 0000000..79e937b --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_experiment.sql @@ -0,0 +1,57 @@ +{{ config( + schema='stable' +) }} + +select distinct + uuid, -- we can leave this out; it's not used, + sequencing_experiment_id, + dewrangle_sequencing_experiment_id, + sequencing_center_id, + external_id, + experiment_date, + experiment_strategy, + is_paired_end, + platform, + instrument_model, + visible, + visibility_reason, + visibility_comment, + + -- I think these should be included? talk with BIX + library_name, + library_strand, + library_prep, + library_selection, + max_insert_size, + mean_insert_size, + mean_depth, + total_reads, + mean_read_length, + adapter_sequencing, + is_adapter_trimmed, + read_pair_number, + target_capture_kit, + acquisition_type, + cdna_read, + cdna_read_offset, + cell_barcode_offset, + cell_barcode_read, + cell_barcode_size, + chromatography_approach, + end_bias, + enrichment_approach, + fraction_number, + fractionation_approach, + ion_fragmentation, + library_construction, + mass_spec_rawfile_conversion, + proteomics_experiment, + quantification_label_id, + quantification_labeling_method, + quantification_technique, + sequencing_mode, + target_cell_number, + umi_barcode_offset, + umi_barcode_read, + umi_barcode_size +from {{ref('kf_ds_int_sequencing_experiment')}} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_study.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_study.sql new file mode 100644 index 0000000..0bee6e7 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_study.sql @@ -0,0 +1,30 @@ +{{ config( + schema='stable' +) }} + +select distinct + created_at, + modified_at, + uuid, -- we can leave this out, it's not used + data_access_authority, -- usually value is dbgap here + external_id,-- usually we make this the phs number - would be worth renaming to reflect that + version, -- seems useful, but is rarely ever used. + name, -- this is the full name of the study; might be worth renaming + short_name, -- do we need name and short name? + attribution, -- not exactly sure what would go here, we rarely populate it + release_status, -- like the idea of this, but it's not consistently updated + investigator_id, -- like the idea of this, but again not consistently populated, should it be? + study_id, + dewrangle_study_id, + visible, + short_code, + domain, -- this is research domain e.g., CANCER vs SBD + program, + visibility_reason, + visibility_comment, + parent_study_id, + biobank_email, -- has been NA for kids first, only used for CBTN, should we keep? + biobank_name, -- has been NA for kids first, only used for CBTN, should we keep? + biobank_request_instructions, -- has been NA for kids first, only used for CBTN, should we keep? + biobank_request_link -- has been NA for kids first, only used for CBTN, should we keep? +from {{ref('kf_ds_int_study')}} \ No newline at end of file From e8c6cf389f0f5929b24e67c85b2a3bfdd67ec7ee Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Fri, 13 Feb 2026 15:07:08 -0500 Subject: [PATCH 05/11] =?UTF-8?q?=E2=9C=A8=20Src=20yaml?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dataservice_studies/src/kf_ds_src.yaml | 655 ++++++++++++++++++ 1 file changed, 655 insertions(+) create mode 100644 dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src.yaml diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src.yaml b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src.yaml new file mode 100644 index 0000000..425f33a --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src.yaml @@ -0,0 +1,655 @@ +version: 2 + +models: + +- name: kf_ds_src_bsgf + description: '{{ doc("kf_ds_src_bsgf") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: genomic_file_id + description: '{{ doc("genomic_file_id") }}' + - name: biospecimen_id + description: '{{ doc("specimen_id") }}' + +- name: kf_ds_src_diagnosis + description: '{{ doc("kf_ds_src_diagnosis") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: source_text_diagnosis + description: '{{ doc("source_text_diagnosis") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: mondo_id_diagnosis + description: '{{ doc("mondo_id_diagnosis") }}' + - name: icd_id_diagnosis + description: '{{ doc("icd_id_diagnosis") }}' + - name: diagnosis_category + description: '{{ doc("diagnosis_category") }}' + - name: source_text_tumor_location + description: '{{ doc("source_text_tumor_location") }}' + - name: uberon_id_tumor_location + description: '{{ doc("uberon_id_tumor_location") }}' + - name: spatial_descriptor + description: '{{ doc("spatial_descriptor") }}' + +- name: kf_ds_src_family_relationship + description: '{{ doc("kf_ds_src_family_relationship") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: participant1_id + description: '{{ doc("participant1_id") }}' + - name: participant2_id + description: '{{ doc("participant2_id") }}' + - name: participant1_to_participant2_relation + description: '{{ doc("participant1_to_participant2_relation") }}' + - name: participant2_to_participant1_relation + description: '{{ doc("participant2_to_participant1_relation") }}' + - name: source_text_notes + description: '{{ doc("source_text_notes") }}' + +- name: kf_ds_src_family + description: '{{ doc("kf_ds_src_family") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: family_type + description: '{{ doc("family_type") }}' + +- name: kf_ds_src_genomic_file + description: '{{ doc("kf_ds_src_genomic_file") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: is_harmonized + description: '{{ doc("is_harmonized") }}' + - name: reference_genome + description: '{{ doc("reference_genome") }}' + - name: controlled_access + description: '{{ doc("controlled_access") }}' + - name: availability + description: '{{ doc("availability") }}' + - name: paired_end + description: '{{ doc("paired_end") }}' + - name: data_type + description: '{{ doc("data_type") }}' + - name: file_format + description: '{{ doc("file_format") }}' + - name: data_category + description: '{{ doc("data_category") }}' + - name: workflow_tool + description: '{{ doc("workflow_tool") }}' + - name: workflow_type + description: '{{ doc("workflow_type") }}' + - name: workflow_version + description: '{{ doc("workflow_version") }}' + - name: workflow_endpoint + description: '{{ doc("workflow_endpoint") }}' + - name: file_version_descriptor + description: '{{ doc("file_version_descriptor") }}' + - name: cavatica_file_id + description: '{{ doc("cavatica_file_id") }}' + - name: cavatica_volume + description: '{{ doc("cavatica_volume") }}' + +- name: kf_ds_src_investigator + description: '{{ doc("kf_ds_src_investigator") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: name + description: '{{ doc("name") }}' + - name: institution + description: '{{ doc("institution") }}' + +- name: kf_ds_src_outcome + description: '{{ doc("kf_ds_src_outcome") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: vital_status + description: '{{ doc("vital_status") }}' + - name: disease_related + description: '{{ doc("disease_related") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + +- name: kf_ds_src_participant + description: '{{ doc("kf_ds_src_participant") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: alias_group_id + description: '{{ doc("alias_group_id") }}' + - name: study_id + description: '{{ doc("study_id") }}' + - name: family_id + description: '{{ doc("family_id") }}' + - name: is_proband + description: '{{ doc("is_proband") }}' + - name: race + description: '{{ doc("race") }}' + - name: ethnicity + description: '{{ doc("ethnicity") }}' + - name: gender + description: '{{ doc("gender") }}' + - name: affected_status + description: '{{ doc("affected_status") }}' + - name: species + description: '{{ doc("species") }}' + +- name: kf_ds_src_phenotype + description: '{{ doc("kf_ds_src_phenotype") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: source_text_phenotype + description: '{{ doc("source_text_phenotype") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: hpo_id_phenotype + description: '{{ doc("hpo_id_phenotype") }}' + - name: observed + description: '{{ doc("observed") }}' + - name: snomed_id_phenotype + description: '{{ doc("snomed_id_phenotype") }}' + +- name: kf_ds_src_sample + description: '{{ doc("kf_ds_src_sample") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: external_sample_id + description: '{{ doc("sample_event_key") }}' + - name: tissue_type + description: '{{ doc("tissue_type") }}' + - name: composition + description: '{{ doc("sample_type") }}' + - name: anatomical_location + description: '{{ doc("anatomical_location") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: method_of_sample_procurement + description: '{{ doc("method_of_sample_procurement") }}' + - name: has_matched_normal_sample + description: '{{ doc("has_matched_normal_sample") }}' + - name: external_collection_id + description: '{{ doc("external_collection_id") }}' + - name: volume_ul + description: '{{ doc("volume_ul") }}' + - name: preservation_method + description: '{{ doc("preservation_method") }}' + - name: amount + description: '{{ doc("amount") }}' + - name: amount_units + description: '{{ doc("amount_units") }}' + + +- name: kf_ds_src_segf + description: '{{ doc("kf_ds_src_segf") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: sequencing_experiment_id + description: '{{ doc("sequencing_experiment_id") }}' + - name: genomic_file_id + description: '{{ doc("genomic_file_id") }}' + +- name: kf_ds_src_sequencing_center + description: '{{ doc("kf_ds_src_sequencing_center") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: name + description: '{{ doc("name") }}' + +- name: kf_ds_src_sequencing_experiment + description: '{{ doc("kf_ds_src_sequencing_experiment") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: sequencing_center_id + description: '{{ doc("sequencing_center_id") }}' + - name: experiment_date + description: '{{ doc("experiment_date") }}' + - name: experiment_strategy + description: '{{ doc("experiment_strategy") }}' + - name: is_paired_end + description: '{{ doc("is_paired_end") }}' + - name: platform + description: '{{ doc("platform") }}' + - name: instrument_model + description: '{{ doc("instrument_model") }}' + - name: library_name + description: '{{ doc("library_name") }}' + - name: library_strand + description: '{{ doc("library_strand") }}' + - name: library_prep + description: '{{ doc("library_prep") }}' + - name: library_selection + description: '{{ doc("library_selection") }}' + - name: max_insert_size + description: '{{ doc("max_insert_size") }}' + - name: mean_insert_size + description: '{{ doc("mean_insert_size") }}' + - name: mean_depth + description: '{{ doc("mean_depth") }}' + - name: total_reads + description: '{{ doc("total_reads") }}' + - name: mean_read_length + description: '{{ doc("mean_read_length") }}' + - name: adapter_sequencing + description: '{{ doc("adapter_sequencing") }}' + - name: is_adapter_trimmed + description: '{{ doc("is_adapter_trimmed") }}' + - name: read_pair_number + description: '{{ doc("read_pair_number") }}' + - name: target_capture_kit + description: '{{ doc("target_capture_kit") }}' + - name: acquisition_type + description: '{{ doc("acquisition_type") }}' + - name: cdna_read + description: '{{ doc("cdna_read") }}' + - name: cdna_read_offset + description: '{{ doc("cdna_read_offset") }}' + - name: cell_barcode_offset + description: '{{ doc("cell_barcode_offset") }}' + - name: cell_barcode_read + description: '{{ doc("cell_barcode_read") }}' + - name: cell_barcode_size + description: '{{ doc("cell_barcode_size") }}' + - name: chromatography_approach + description: '{{ doc("chromatography_approach") }}' + - name: end_bias + description: '{{ doc("end_bias") }}' + - name: enrichment_approach + description: '{{ doc("enrichment_approach") }}' + - name: fraction_number + description: '{{ doc("fraction_number") }}' + - name: fractionation_approach + description: '{{ doc("fractionation_approach") }}' + - name: ion_fragmentation + description: '{{ doc("ion_fragmentation") }}' + - name: library_construction + description: '{{ doc("library_construction") }}' + - name: mass_spec_rawfile_conversion + description: '{{ doc("mass_spec_rawfile_conversion") }}' + - name: proteomics_experiment + description: '{{ doc("proteomics_experiment") }}' + - name: quantification_label_id + description: '{{ doc("quantification_label_id") }}' + - name: quantification_labeling_method + description: '{{ doc("quantification_labeling_method") }}' + - name: quantification_technique + description: '{{ doc("quantification_technique") }}' + - name: sequencing_mode + description: '{{ doc("sequencing_mode") }}' + - name: target_cell_number + description: '{{ doc("target_cell_number") }}' + - name: umi_barcode_offset + description: '{{ doc("umi_barcode_offset") }}' + - name: umi_barcode_read + description: '{{ doc("umi_barcode_read") }}' + - name: umi_barcode_size + description: '{{ doc("umi_barcode_size") }}' + +- name: kf_ds_src_biospecimen + description: '{{ doc("kf_ds_src_biospecimen") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: external_sample_id + description: '{{ doc("external_sample_id") }}' + - name: external_aliquot_id + description: '{{ doc("external_aliquot_id") }}' + - name: source_text_tissue_type + description: '{{ doc("source_text_tissue_type") }}' + - name: composition + description: '{{ doc("composition") }}' + - name: source_text_anatomical_site + description: '{{ doc("source_text_anatomical_site") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: source_text_tumor_descriptor + description: '{{ doc("source_text_tumor_descriptor") }}' + - name: analyte_type + description: '{{ doc("analyte_type") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: sequencing_center_id + description: '{{ doc("sequencing_center_id") }}' + - name: dbgap_consent_code + description: '{{ doc("dbgap_consent_code") }}' + - name: consent_type + description: '{{ doc("consent_type") }}' + - name: method_of_sample_procurement + description: '{{ doc("method_of_sample_procurement") }}' + - name: sample_id + description: '{{ doc("sample_id") }}' + - name: specimen_status + description: '{{ doc("specimen_status") }}' + - name: has_matched_normal_sample + description: '{{ doc("has_matched_normal_sample") }}' + - name: shipment_origin + description: '{{ doc("shipment_origin") }}' + - name: concentration_mg_per_ml + description: '{{ doc("concentration_mg_per_ml") }}' + - name: volume_ul + description: '{{ doc("volume_ul") }}' + - name: shipment_date + description: '{{ doc("shipment_date") }}' + - name: uberon_id_anatomical_site + description: '{{ doc("uberon_id_anatomical_site") }}' + - name: ncit_id_tissue_type + description: '{{ doc("ncit_id_tissue_type") }}' + - name: ncit_id_anatomical_site + description: '{{ doc("ncit_id_anatomical_site") }}' + - name: spatial_descriptor + description: '{{ doc("spatial_descriptor") }}' + - name: preservation_method + description: '{{ doc("preservation_method") }}' + - name: amount + description: '{{ doc("amount") }}' + - name: amount_units + description: '{{ doc("amount_units") }}' + - name: cell_entity + description: '{{ doc("cell_entity") }}' + +- name: kf_ds_src_study + description: '{{ doc("kf_ds_src_study") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: data_access_authority + description: '{{ doc("data_access_authority") }}' + - name: version + description: '{{ doc("version") }}' + - name: name + description: '{{ doc("name") }}' + - name: short_name + description: '{{ doc("short_name") }}' + - name: attribution + description: '{{ doc("attribution") }}' + - name: release_status + description: '{{ doc("release_status") }}' + - name: investigator_id + description: '{{ doc("investigator_id") }}' + - name: short_code + description: '{{ doc("short_code") }}' + - name: domain + description: '{{ doc("domain") }}' + - name: program + description: '{{ doc("program") }}' + - name: parent_study_id + description: '{{ doc("parent_study_id") }}' + - name: biobank_email + description: '{{ doc("biobank_email") }}' + - name: biobank_name + description: '{{ doc("biobank_name") }}' + - name: biobank_request_instructions + description: '{{ doc("biobank_request_instructions") }}' + - name: biobank_request_link + description: '{{ doc("biobank_request_link") }}' \ No newline at end of file From cd938c76c9ee120cfe10444a0f133fcf0c69d6fb Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Fri, 13 Feb 2026 15:07:32 -0500 Subject: [PATCH 06/11] =?UTF-8?q?=E2=9C=A8=20Int=20yaml?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dataservice_studies/int/kf_ds_int.yaml | 673 ++++++++++++++++++ 1 file changed, 673 insertions(+) create mode 100644 dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int.yaml diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int.yaml b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int.yaml new file mode 100644 index 0000000..0feb58c --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int.yaml @@ -0,0 +1,673 @@ +version: 2 + +models: + +- name: kf_ds_int_bsgf + description: '{{ doc("kf_ds_int_bsgf") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: bsgf_id + description: '{{ doc("bsgf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: genomic_file_id + description: '{{ doc("genomic_file_id") }}' + - name: biospecimen_id + description: '{{ doc("specimen_id") }}' + + +- name: kf_ds_int_diagnosis + description: '{{ doc("kf_ds_int_diagnosis") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: diagnosis_id + description: '{{ doc("diagnosis_id") }}' + - name: dewrangle_diagnosis_id + description: '{{ doc("dewrangle_diagnosis_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: source_text_diagnosis + description: '{{ doc("source_text_diagnosis") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: mondo_id_diagnosis + description: '{{ doc("mondo_id_diagnosis") }}' + - name: icd_id_diagnosis + description: '{{ doc("icd_id_diagnosis") }}' + - name: diagnosis_category + description: '{{ doc("diagnosis_category") }}' + - name: source_text_tumor_location + description: '{{ doc("source_text_tumor_location") }}' + - name: uberon_id_tumor_location + description: '{{ doc("uberon_id_tumor_location") }}' + - name: spatial_descriptor + description: '{{ doc("spatial_descriptor") }}' + - name: observed + description: '{{ doc("observed") }}' + +- name: kf_ds_int_family_relationship + description: '{{ doc("kf_ds_int_family_relationship") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: relationship_id + description: '{{ doc("relationship_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: participant1_id + description: '{{ doc("participant1_id") }}' + - name: participant2_id + description: '{{ doc("participant2_id") }}' + - name: participant1_to_participant2_relation + description: '{{ doc("participant1_to_participant2_relation") }}' + - name: participant2_to_participant1_relation + description: '{{ doc("participant2_to_participant1_relation") }}' + - name: source_text_notes + description: '{{ doc("source_text_notes") }}' + +- name: kf_ds_int_family + description: '{{ doc("kf_ds_int_family") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: family_id + description: '{{ doc("family_id") }}' + - name: dewrangle_family_id + description: '{{ doc("dewrangle_family_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: family_type + description: '{{ doc("family_type") }}' + +- name: kf_ds_int_genomic_file + description: '{{ doc("kf_ds_int_genomic_file") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: genomic_file_id + description: '{{ doc("genomic_file_id") }}' + - name: dewrangle_genomic_file_id + description: '{{ doc("dewrangle_genomic_file_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: is_harmonized + description: '{{ doc("is_harmonized") }}' + - name: reference_genome + description: '{{ doc("reference_genome") }}' + - name: controlled_access + description: '{{ doc("controlled_access") }}' + - name: availability + description: '{{ doc("availability") }}' + - name: paired_end + description: '{{ doc("paired_end") }}' + - name: data_type + description: '{{ doc("data_type") }}' + - name: file_format + description: '{{ doc("file_format") }}' + - name: data_category + description: '{{ doc("data_category") }}' + - name: workflow_tool + description: '{{ doc("workflow_tool") }}' + - name: workflow_type + description: '{{ doc("workflow_type") }}' + - name: workflow_version + description: '{{ doc("workflow_version") }}' + - name: workflow_endpoint + description: '{{ doc("workflow_endpoint") }}' + - name: file_version_descriptor + description: '{{ doc("file_version_descriptor") }}' + - name: cavatica_file_id + description: '{{ doc("cavatica_file_id") }}' + - name: cavatica_volume + description: '{{ doc("cavatica_volume") }}' +- name: kf_ds_int_investigator + description: '{{ doc("kf_ds_int_investigator") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: investigator_id + description: '{{ doc("investigator_id") }}' + - name: dewrangle_investigator_id + description: '{{ doc("dewrangle_investigator_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: name + description: '{{ doc("name") }}' + - name: institution + description: '{{ doc("institution") }}' + +- name: kf_ds_int_outcome + description: '{{ doc("kf_ds_int_outcome") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: vital_status + description: '{{ doc("vital_status") }}' + - name: disease_related + description: '{{ doc("disease_related") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + +- name: kf_ds_int_participant + description: '{{ doc("kf_ds_int_participant") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: dewrangle_participant_id + description: '{{ doc("dewrangle_participant_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: alias_group_id + description: '{{ doc("alias_group_id") }}' + - name: study_id + description: '{{ doc("study_id") }}' + - name: dewrangle_study_id + description: '{{ doc("dewrangle_study_id") }}' + - name: family_id + description: '{{ doc("family_id") }}' + - name: is_proband + description: '{{ doc("is_proband") }}' + - name: race + description: '{{ doc("race") }}' + - name: ethnicity + description: '{{ doc("ethnicity") }}' + - name: gender + description: '{{ doc("gender") }}' + - name: affected_status + description: '{{ doc("affected_status") }}' + - name: species + description: '{{ doc("species") }}' +- name: kf_ds_int_phenotype + description: '{{ doc("kf_ds_int_phenotype") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: phenotype_id + description: '{{ doc("phenotype_id") }}' + - name: dewrangle_phenotype_id + description: '{{ doc("dewrangle_phenotype_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: source_text_phenotype + description: '{{ doc("source_text_phenotype") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: hpo_id_phenotype + description: '{{ doc("hpo_id_phenotype") }}' + - name: observed + description: '{{ doc("observed") }}' + - name: snomed_id_phenotype + description: '{{ doc("snomed_id_phenotype") }}' + +- name: kf_ds_int_sample + description: '{{ doc("kf_ds_src_sample") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: external_sample_id + description: '{{ doc("sample_event_key") }}' + - name: tissue_type + description: '{{ doc("tissue_type") }}' + - name: composition + description: '{{ doc("sample_type") }}' + - name: anatomical_location + description: '{{ doc("anatomical_location") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: method_of_sample_procurement + description: '{{ doc("method_of_sample_procurement") }}' + - name: has_matched_normal_sample + description: '{{ doc("has_matched_normal_sample") }}' + - name: external_collection_id + description: '{{ doc("external_collection_id") }}' + - name: volume_ul + description: '{{ doc("volume_ul") }}' + - name: preservation_method + description: '{{ doc("preservation_method") }}' + - name: amount + description: '{{ doc("amount") }}' + - name: amount_units + description: '{{ doc("amount_units") }}' + +- name: kf_ds_int_sequencing_center + description: '{{ doc("kf_ds_int_sequencing_center") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: sequencing_center_name + description: '{{ doc("sequencing_center_name") }}' + +- name: kf_ds_int_segf + description: '{{ doc("kf_ds_int_segf") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: segf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: sequencing_experiment_id + description: '{{ doc("sequencing_experiment_id") }}' + - name: genomic_file_id + description: '{{ doc("genomic_file_id") }}' +- name: kf_ds_int_sequencing_experiment + description: '{{ doc("kf_ds_int_sequencing_experiment") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: sequencing_experiment_id + description: '{{ doc("sequencing_experiment_id") }}' + - name: dewrangle_sequencing_experiment_id + description: '{{ doc("dewrangle_sequencing_experiment_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: sequencing_center_id + description: '{{ doc("sequencing_center_id") }}' + - name: experiment_date + description: '{{ doc("experiment_date") }}' + - name: experiment_strategy + description: '{{ doc("experiment_strategy") }}' + - name: is_paired_end + description: '{{ doc("is_paired_end") }}' + - name: platform + description: '{{ doc("platform") }}' + - name: instrument_model + description: '{{ doc("instrument_model") }}' + - name: library_name + description: '{{ doc("library_name") }}' + - name: library_strand + description: '{{ doc("library_strand") }}' + - name: library_prep + description: '{{ doc("library_prep") }}' + - name: library_selection + description: '{{ doc("library_selection") }}' + - name: max_insert_size + description: '{{ doc("max_insert_size") }}' + - name: mean_insert_size + description: '{{ doc("mean_insert_size") }}' + - name: mean_depth + description: '{{ doc("mean_depth") }}' + - name: total_reads + description: '{{ doc("total_reads") }}' + - name: mean_read_length + description: '{{ doc("mean_read_length") }}' + - name: adapter_sequencing + description: '{{ doc("adapter_sequencing") }}' + - name: is_adapter_trimmed + description: '{{ doc("is_adapter_trimmed") }}' + - name: read_pair_number + description: '{{ doc("read_pair_number") }}' + - name: target_capture_kit + description: '{{ doc("target_capture_kit") }}' + - name: acquisition_type + description: '{{ doc("acquisition_type") }}' + - name: cdna_read + description: '{{ doc("cdna_read") }}' + - name: cdna_read_offset + description: '{{ doc("cdna_read_offset") }}' + - name: cell_barcode_offset + description: '{{ doc("cell_barcode_offset") }}' + - name: cell_barcode_read + description: '{{ doc("cell_barcode_read") }}' + - name: cell_barcode_size + description: '{{ doc("cell_barcode_size") }}' + - name: chromatography_approach + description: '{{ doc("chromatography_approach") }}' + - name: end_bias + description: '{{ doc("end_bias") }}' + - name: enrichment_approach + description: '{{ doc("enrichment_approach") }}' + - name: fraction_number + description: '{{ doc("fraction_number") }}' + - name: fractionation_approach + description: '{{ doc("fractionation_approach") }}' + - name: ion_fragmentation + description: '{{ doc("ion_fragmentation") }}' + - name: library_construction + description: '{{ doc("library_construction") }}' + - name: mass_spec_rawfile_conversion + description: '{{ doc("mass_spec_rawfile_conversion") }}' + - name: proteomics_experiment + description: '{{ doc("proteomics_experiment") }}' + - name: quantification_label_id + description: '{{ doc("quantification_label_id") }}' + - name: quantification_labeling_method + description: '{{ doc("quantification_labeling_method") }}' + - name: quantification_technique + description: '{{ doc("quantification_technique") }}' + - name: sequencing_mode + description: '{{ doc("sequencing_mode") }}' + - name: target_cell_number + description: '{{ doc("target_cell_number") }}' + - name: umi_barcode_offset + description: '{{ doc("umi_barcode_offset") }}' + - name: umi_barcode_read + description: '{{ doc("umi_barcode_read") }}' + - name: umi_barcode_size + description: '{{ doc("umi_barcode_size") }}' + +- name: kf_ds_int_biospecimen + description: '{{ doc("kf_ds_int_biospecimen") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: specimen_id + description: '{{ doc("specimen_id") }}' + - name: dewrangle_specimen_id + description: '{{ doc("dewrangle_specimen_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: external_sample_id + description: '{{ doc("external_sample_id") }}' + - name: external_aliquot_id + description: '{{ doc("external_aliquot_id") }}' + - name: source_text_tissue_type + description: '{{ doc("source_text_tissue_type") }}' + - name: composition + description: '{{ doc("composition") }}' + - name: source_text_anatomical_site + description: '{{ doc("source_text_anatomical_site") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: source_text_tumor_descriptor + description: '{{ doc("source_text_tumor_descriptor") }}' + - name: analyte_type + description: '{{ doc("analyte_type") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: sequencing_center_id + description: '{{ doc("sequencing_center_id") }}' + - name: dbgap_consent_code + description: '{{ doc("dbgap_consent_code") }}' + - name: consent_type + description: '{{ doc("consent_type") }}' + - name: method_of_sample_procurement + description: '{{ doc("method_of_sample_procurement") }}' + - name: sample_id + description: '{{ doc("sample_id") }}' + - name: specimen_status + description: '{{ doc("specimen_status") }}' + - name: has_matched_normal_sample + description: '{{ doc("has_matched_normal_sample") }}' + - name: shipment_origin + description: '{{ doc("shipment_origin") }}' + - name: concentration_mg_per_ml + description: '{{ doc("concentration_mg_per_ml") }}' + - name: volume_ul + description: '{{ doc("volume_ul") }}' + - name: shipment_date + description: '{{ doc("shipment_date") }}' + - name: uberon_id_anatomical_site + description: '{{ doc("uberon_id_anatomical_site") }}' + - name: ncit_id_tissue_type + description: '{{ doc("ncit_id_tissue_type") }}' + - name: ncit_id_anatomical_site + description: '{{ doc("ncit_id_anatomical_site") }}' + - name: spatial_descriptor + description: '{{ doc("spatial_descriptor") }}' + - name: preservation_method + description: '{{ doc("preservation_method") }}' + - name: amount + description: '{{ doc("amount") }}' + - name: amount_units + description: '{{ doc("amount_units") }}' + - name: cell_entity + description: '{{ doc("cell_entity") }}' +- name: kf_ds_int_study + description: '{{ doc("kf_ds_int_study") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: study_id + description: '{{ doc("study_id") }}' + - name: dewrangle_study_id + description: '{{ doc("dewrangle_study_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: data_access_authority + description: '{{ doc("data_access_authority") }}' + - name: version + description: '{{ doc("version") }}' + - name: name + description: '{{ doc("name") }}' + - name: short_name + description: '{{ doc("short_name") }}' + - name: attribution + description: '{{ doc("attribution") }}' + - name: release_status + description: '{{ doc("release_status") }}' + - name: investigator_id + description: '{{ doc("investigator_id") }}' + - name: short_code + description: '{{ doc("short_code") }}' + - name: domain + description: '{{ doc("domain") }}' + - name: program + description: '{{ doc("program") }}' + - name: parent_study_id + description: '{{ doc("parent_study_id") }}' + - name: biobank_email + description: '{{ doc("biobank_email") }}' + - name: biobank_name + description: '{{ doc("biobank_name") }}' + - name: biobank_request_instructions + description: '{{ doc("biobank_request_instructions") }}' + - name: biobank_request_link + description: '{{ doc("biobank_request_link") }}' \ No newline at end of file From 0499a1ddec86a81918885e8ba99813334b7a9211 Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Fri, 13 Feb 2026 15:07:56 -0500 Subject: [PATCH 07/11] =?UTF-8?q?=F0=9F=9A=A8=20Sqlfluff?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dataservice_studies/int/kf_ds_int_biospecimen.sql | 2 +- .../kids_first/dataservice_studies/int/kf_ds_int_bsgf.sql | 2 +- .../dataservice_studies/int/kf_ds_int_diagnosis.sql | 2 +- .../kids_first/dataservice_studies/int/kf_ds_int_family.sql | 2 +- .../dataservice_studies/int/kf_ds_int_family_relationship.sql | 2 +- .../dataservice_studies/int/kf_ds_int_genomic_file.sql | 2 +- .../dataservice_studies/int/kf_ds_int_investigator.sql | 2 +- .../kids_first/dataservice_studies/int/kf_ds_int_outcome.sql | 2 +- .../dataservice_studies/int/kf_ds_int_participant.sql | 2 +- .../dataservice_studies/int/kf_ds_int_phenotype.sql | 4 +--- .../kids_first/dataservice_studies/int/kf_ds_int_sample.sql | 2 +- .../kids_first/dataservice_studies/int/kf_ds_int_segf.sql | 2 +- .../dataservice_studies/int/kf_ds_int_sequencing_center.sql | 2 +- .../int/kf_ds_int_sequencing_experiment.sql | 2 +- .../kids_first/dataservice_studies/int/kf_ds_int_study.sql | 2 +- .../kids_first/dataservice_studies/src/kf_ds_src_bsgf.sql | 2 +- .../dataservice_studies/src/kf_ds_src_diagnosis.sql | 2 +- .../kids_first/dataservice_studies/src/kf_ds_src_family.sql | 2 +- .../dataservice_studies/src/kf_ds_src_family_relationship.sql | 2 +- .../dataservice_studies/src/kf_ds_src_genomic_file.sql | 2 +- .../dataservice_studies/src/kf_ds_src_investigator.sql | 2 +- .../kids_first/dataservice_studies/src/kf_ds_src_outcome.sql | 2 +- .../dataservice_studies/src/kf_ds_src_participant.sql | 2 +- .../dataservice_studies/src/kf_ds_src_phenotype.sql | 2 +- .../kids_first/dataservice_studies/src/kf_ds_src_sample.sql | 2 +- .../kids_first/dataservice_studies/src/kf_ds_src_segf.sql | 2 +- .../dataservice_studies/src/kf_ds_src_sequencing_center.sql | 2 +- .../src/kf_ds_src_sequencing_experiment.sql | 2 +- .../kids_first/dataservice_studies/src/kf_ds_src_study.sql | 2 +- .../dataservice_studies/stable/kf_ds_stable_biospecimen.sql | 2 +- .../dataservice_studies/stable/kf_ds_stable_bsgf.sql | 2 +- .../dataservice_studies/stable/kf_ds_stable_diagnosis.sql | 2 +- .../dataservice_studies/stable/kf_ds_stable_family.sql | 2 +- .../stable/kf_ds_stable_family_relationship.sql | 2 +- .../dataservice_studies/stable/kf_ds_stable_genomic_file.sql | 2 +- .../dataservice_studies/stable/kf_ds_stable_investigator.sql | 2 +- .../dataservice_studies/stable/kf_ds_stable_outcome.sql | 2 +- .../dataservice_studies/stable/kf_ds_stable_participant.sql | 4 +--- .../dataservice_studies/stable/kf_ds_stable_phenotype.sql | 4 +--- .../dataservice_studies/stable/kf_ds_stable_sample.sql | 2 +- .../dataservice_studies/stable/kf_ds_stable_segf.sql | 2 +- .../stable/kf_ds_stable_sequencing_center.sql | 2 +- .../stable/kf_ds_stable_sequencing_experiment.sql | 2 +- .../dataservice_studies/stable/kf_ds_stable_study.sql | 2 +- 44 files changed, 44 insertions(+), 50 deletions(-) diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_biospecimen.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_biospecimen.sql index 061bba3..781ea0b 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_biospecimen.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_biospecimen.sql @@ -43,4 +43,4 @@ select distinct amount, amount_units, cell_entity -from {{ref('kf_ds_src_biospecimen')}} +from {{ ref('kf_ds_src_biospecimen') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_bsgf.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_bsgf.sql index 17c7b13..d8274a1 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_bsgf.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_bsgf.sql @@ -16,4 +16,4 @@ select distinct external_id, -- i think we can leave this out - it's rarely populated/used, visibility_reason, visibility_comment -from {{ref('kf_ds_src_bsgf')}} +from {{ ref('kf_ds_src_bsgf') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_diagnosis.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_diagnosis.sql index b28c694..f38ab70 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_diagnosis.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_diagnosis.sql @@ -19,4 +19,4 @@ select distinct source_text_tumor_location, uberon_id_tumor_location, spatial_descriptor -from {{ref('kf_ds_src_diagnosis')}} +from {{ ref('kf_ds_src_diagnosis') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family.sql index 6a32e56..315773b 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family.sql @@ -13,4 +13,4 @@ select distinct visible, visibility_reason, visibility_comment -from {{ref('kf_ds_src_family')}} +from {{ ref('kf_ds_src_family') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family_relationship.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family_relationship.sql index 53996ff..7457ce7 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family_relationship.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_family_relationship.sql @@ -16,4 +16,4 @@ select distinct visibility_reason, visibility_comment, source_text_notes -- don't think we need this? we never populate it -from {{ref('kf_ds_src_family_relationship')}} +from {{ ref('kf_ds_src_family_relationship') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_genomic_file.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_genomic_file.sql index 86378e7..fa751ba 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_genomic_file.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_genomic_file.sql @@ -31,4 +31,4 @@ select distinct -- could be useful after delivery but would be null during source load cavatica_file_id, cavatica_volume -from {{ref('kf_ds_src_genomic_file')}} +from {{ ref('kf_ds_src_genomic_file') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_investigator.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_investigator.sql index 02bc1ad..3f496b7 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_investigator.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_investigator.sql @@ -14,4 +14,4 @@ select distinct visible, visibility_reason, visibility_comment -from {{ref('kf_ds_src_investigator')}} +from {{ ref('kf_ds_src_investigator') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_outcome.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_outcome.sql index 89f5b1c..d7085c8 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_outcome.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_outcome.sql @@ -15,4 +15,4 @@ select distinct visible, visibility_reason, visibility_comment -from {{ref('kf_ds_src_outcome')}} +from {{ ref('kf_ds_src_outcome') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_participant.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_participant.sql index 1ca1bcd..8902c9e 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_participant.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_participant.sql @@ -22,4 +22,4 @@ select distinct visible, visibility_reason, -- can we standardize this a bit more? maybe release status instead of reason? and try to standardize more? visibility_comment -from {{ref('kf_ds_src_participant')}} +from {{ ref('kf_ds_src_participant') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_phenotype.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_phenotype.sql index cc968f1..b44a446 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_phenotype.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_phenotype.sql @@ -1,5 +1,3 @@ - - select distinct uuid, -- we can leave this out; it's not used, kf_id as phenotype_id, @@ -13,4 +11,4 @@ select distinct -- additional fields that may be included snomed_id_phenotype, external_id -from {{ref('kf_ds_src_phenotype')}} +from {{ ref('kf_ds_src_phenotype') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sample.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sample.sql index 6a6be80..4f58c84 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sample.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sample.sql @@ -24,4 +24,4 @@ select distinct visible, visibility_reason, visibility_comment -from {{ref('kf_ds_src_sample')}} +from {{ ref('kf_ds_src_sample') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_segf.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_segf.sql index 39b2820..9b8fc4c 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_segf.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_segf.sql @@ -13,4 +13,4 @@ select distinct kf_id as segf_id, visibility_reason, visibility_comment -from {{ref('kf_ds_src_segf')}} +from {{ ref('kf_ds_src_segf') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_center.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_center.sql index a3872b6..5558404 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_center.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_center.sql @@ -14,4 +14,4 @@ select distinct visible, visibility_reason, visibility_comment -from {{ref('kf_ds_src_sequencing_center')}} +from {{ ref('kf_ds_src_sequencing_center') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_experiment.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_experiment.sql index 588e823..7b82514 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_experiment.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_sequencing_experiment.sql @@ -54,4 +54,4 @@ select distinct umi_barcode_offset, umi_barcode_read, umi_barcode_size -from {{ref('kf_ds_src_sequencing_experiment')}} +from {{ ref('kf_ds_src_sequencing_experiment') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_study.sql b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_study.sql index f5a3075..2835ce5 100644 --- a/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_study.sql +++ b/dbt_project/models/kids_first/dataservice_studies/int/kf_ds_int_study.sql @@ -27,7 +27,7 @@ select distinct biobank_name, -- has been NA for kids first, only used for CBTN, should we keep? biobank_request_instructions, -- has been NA for kids first, only used for CBTN, should we keep? biobank_request_link -- has been NA for kids first, only used for CBTN, should we keep? -from {{ref('kf_ds_src_study')}} +from {{ ref('kf_ds_src_study') }} where lower(program) in ( 'kids first', 'kf/include' diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_bsgf.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_bsgf.sql index 47af19c..089ca60 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_bsgf.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_bsgf.sql @@ -3,4 +3,4 @@ ) }} select * -from {{ ref('ds_bsgfs')}} +from {{ ref('ds_bsgfs') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_diagnosis.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_diagnosis.sql index 2b6f8dd..e7824f2 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_diagnosis.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_diagnosis.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ ref('ds_diagnoses')}} +from{{ ref('ds_diagnoses') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family.sql index 8af9054..052d446 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_families')}} +from{{ ref('ds_families') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family_relationship.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family_relationship.sql index a69754b..ec75898 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family_relationship.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_family_relationship.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_family_relationships')}} +from{{ ref('ds_family_relationships') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_genomic_file.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_genomic_file.sql index 4e25cf8..855f6b8 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_genomic_file.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_genomic_file.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_genomic_files')}} +from{{ ref('ds_genomic_files') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_investigator.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_investigator.sql index cc5fef2..2024eeb 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_investigator.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_investigator.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_investigators')}} +from{{ ref('ds_investigators') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_outcome.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_outcome.sql index e361023..f3c3c72 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_outcome.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_outcome.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_outcomes')}} +from{{ ref('ds_outcomes') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_participant.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_participant.sql index f32b383..d188eb5 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_participant.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_participant.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_participants')}} +from{{ ref('ds_participants') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_phenotype.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_phenotype.sql index d6f51de..89baf40 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_phenotype.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_phenotype.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_phenotypes')}} +from{{ ref('ds_phenotypes') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sample.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sample.sql index 337c270..1d2e8dd 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sample.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sample.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_samples')}} +from{{ ref('ds_samples') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_segf.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_segf.sql index 924356a..507e45a 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_segf.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_segf.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_segfs')}} +from{{ ref('ds_segfs') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_center.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_center.sql index 30d74f0..07baf06 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_center.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_center.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_sequencing_centers')}} +from{{ ref('ds_sequencing_centers') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_experiment.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_experiment.sql index 0cb6986..0a44ca7 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_experiment.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_sequencing_experiment.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_ses')}} +from{{ ref('ds_ses') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_study.sql b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_study.sql index 241d216..35a5c46 100644 --- a/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_study.sql +++ b/dbt_project/models/kids_first/dataservice_studies/src/kf_ds_src_study.sql @@ -3,4 +3,4 @@ ) }} select * -from{{ref('ds_studies')}} +from{{ ref('ds_studies') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_biospecimen.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_biospecimen.sql index 3dece5f..5bee206 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_biospecimen.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_biospecimen.sql @@ -43,4 +43,4 @@ select distinct amount, amount_units, cell_entity -from {{ref('kf_ds_int_biospecimen')}} \ No newline at end of file +from {{ ref('kf_ds_int_biospecimen') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_bsgf.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_bsgf.sql index 23e3ff4..0f42bc8 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_bsgf.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_bsgf.sql @@ -16,4 +16,4 @@ select distinct external_id, -- i think we can leave this out - it's rarely populated/used, visibility_reason, visibility_comment -from {{ref('kf_ds_int_bsgf')}} \ No newline at end of file +from {{ ref('kf_ds_int_bsgf') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_diagnosis.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_diagnosis.sql index ada485b..dc16b27 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_diagnosis.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_diagnosis.sql @@ -19,4 +19,4 @@ select distinct source_text_tumor_location, uberon_id_tumor_location, spatial_descriptor -from {{ref('kf_ds_int_diagnosis')}} \ No newline at end of file +from {{ ref('kf_ds_int_diagnosis') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family.sql index f42143a..15ce859 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family.sql @@ -13,4 +13,4 @@ select distinct visible, visibility_reason, visibility_comment -from {{ref('kf_ds_int_family')}} \ No newline at end of file +from {{ ref('kf_ds_int_family') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family_relationship.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family_relationship.sql index b4df682..381ae64 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family_relationship.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_family_relationship.sql @@ -16,4 +16,4 @@ select distinct visibility_reason, visibility_comment, source_text_notes -- don't think we need this? we never populate it -from {{ref('kf_ds_int_family_relationship')}} \ No newline at end of file +from {{ ref('kf_ds_int_family_relationship') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_genomic_file.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_genomic_file.sql index e24bebf..7683746 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_genomic_file.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_genomic_file.sql @@ -31,4 +31,4 @@ select distinct -- could be useful after delivery but would be null during source load cavatica_file_id, cavatica_volume -from {{ref('kf_ds_int_genomic_file')}} \ No newline at end of file +from {{ ref('kf_ds_int_genomic_file') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_investigator.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_investigator.sql index 94d6ebc..9f7d5f1 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_investigator.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_investigator.sql @@ -14,4 +14,4 @@ select distinct visible, visibility_reason, visibility_comment -from {{ref('kf_ds_int_investigator')}} \ No newline at end of file +from {{ ref('kf_ds_int_investigator') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_outcome.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_outcome.sql index de2751d..81c0572 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_outcome.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_outcome.sql @@ -15,4 +15,4 @@ select distinct visible, visibility_reason, visibility_comment -from {{ref('kf_ds_int_outcome')}} \ No newline at end of file +from {{ ref('kf_ds_int_outcome') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_participant.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_participant.sql index ac1d31a..ce54158 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_participant.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_participant.sql @@ -1,5 +1,3 @@ - - select distinct uuid, -- we can leave this out ; it's not used, created_at, @@ -20,4 +18,4 @@ select distinct visible, visibility_reason, -- can we standardize this a bit more? maybe release status instead of reason? and try to standardize more? visibility_comment -from {{ref('kf_ds_int_participant')}} \ No newline at end of file +from {{ ref('kf_ds_int_participant') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_phenotype.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_phenotype.sql index 26bbbde..8d19f98 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_phenotype.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_phenotype.sql @@ -1,5 +1,3 @@ - - select distinct uuid, -- we can leave this out; it's not used, phenotype_id, @@ -13,4 +11,4 @@ select distinct -- additional fields that may be included snomed_id_phenotype, external_id -from {{ref('kf_ds_int_phenotype')}} +from {{ ref('kf_ds_int_phenotype') }} diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sample.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sample.sql index 39a24d5..2d62d94 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sample.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sample.sql @@ -24,4 +24,4 @@ select distinct visible, visibility_reason, visibility_comment -from {{ref('kf_ds_int_sample')}} \ No newline at end of file +from {{ ref('kf_ds_int_sample') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_segf.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_segf.sql index aa9fa1f..0ff00cd 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_segf.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_segf.sql @@ -13,4 +13,4 @@ select distinct segf_id, visibility_reason, visibility_comment -from {{ref('kf_ds_int_segf')}} \ No newline at end of file +from {{ ref('kf_ds_int_segf') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_center.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_center.sql index 91410ed..166d3d2 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_center.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_center.sql @@ -13,4 +13,4 @@ select distinct visible, visibility_reason, visibility_comment -from {{ref('kf_ds_int_sequencing_center')}} \ No newline at end of file +from {{ ref('kf_ds_int_sequencing_center') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_experiment.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_experiment.sql index 79e937b..f80b2e4 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_experiment.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_sequencing_experiment.sql @@ -54,4 +54,4 @@ select distinct umi_barcode_offset, umi_barcode_read, umi_barcode_size -from {{ref('kf_ds_int_sequencing_experiment')}} \ No newline at end of file +from {{ ref('kf_ds_int_sequencing_experiment') }} \ No newline at end of file diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_study.sql b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_study.sql index 0bee6e7..7095b0b 100644 --- a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_study.sql +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable_study.sql @@ -27,4 +27,4 @@ select distinct biobank_name, -- has been NA for kids first, only used for CBTN, should we keep? biobank_request_instructions, -- has been NA for kids first, only used for CBTN, should we keep? biobank_request_link -- has been NA for kids first, only used for CBTN, should we keep? -from {{ref('kf_ds_int_study')}} \ No newline at end of file +from {{ ref('kf_ds_int_study') }} \ No newline at end of file From 13654a717853bccf8ac8c5e1388bf0ebe5c78b51 Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Fri, 13 Feb 2026 15:08:20 -0500 Subject: [PATCH 08/11] =?UTF-8?q?=E2=9C=A8=20Add=20ds=20tables?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../docs_tables.md | 164 +++++++++++------- 1 file changed, 106 insertions(+), 58 deletions(-) diff --git a/dbt_project/models/_metadata_description_files/docs_tables.md b/dbt_project/models/_metadata_description_files/docs_tables.md index d254b48..ff03c6f 100644 --- a/dbt_project/models/_metadata_description_files/docs_tables.md +++ b/dbt_project/models/_metadata_description_files/docs_tables.md @@ -2,140 +2,188 @@ ## Kids First Dataservice Tables - Source Stage -{% docs src_bsgf %} +{% docs kf_ds_src_bsgf %} Kids First Dataservivce source table for linking specimens to genomic files. One file may be linked to many specimens. {% enddocs %} -{% docs src_diagnosis %} +{% docs kf_ds_src_diagnosis %} Kids First Dataservice source table for harmonized conditions curated to MONDO codes at the patient level. All conditions in this table are implied to be observed in patients. Each row represents one condition per patient. {% enddocs %} -{% docs src_family %} -Kids First Dataservice source table that holds family ids for each participant. This table can be joined to src_participants to obtain participant to family id mappings. +{% docs kf_ds_src_family %} +Kids First Dataservice source table that holds family ids for each participant. This table can be joined to kf_ds_src_participants to obtain participant to family id mappings. {% enddocs %} -{% docs src_genomic_files %} -Kids First Dataservice source table that holds raw and harmonized genomic file outputs. This table provides file and bioinformatic workflow metadata for each file. Must be joined to src_bsgf to obtain specimen to file mappings. +{% docs kf_ds_src_family_relationship %} +Kids First Dataservice source table that holds family relationships for each participant. Usually only reports relationships for duos, trios, or trios+. {% enddocs %} -{% docs src_investigator %} +{% docs kf_ds_src_genomic_file %} +Kids First Dataservice source table that holds raw and harmonized genomic file outputs. This table provides file and bioinformatic workflow metadata for each file. Must be joined to kf_ds_src_bsgf to obtain specimen to file mappings. +{% enddocs %} + +{% docs kf_ds_src_investigator %} Kids First Dataservice source table for investigator information. Only contains minimal contact information for the Principle Investigator of a study. One investigator may be associated to multiple study ids. {% enddocs %} -{% docs src_participant %} -Kids First Dataservice source table for participant demographic information. Also contains information regarding a participant's affected status. Links each participant to an assigned family id from src_family and an assigned study id from src_study. +{% docs kf_ds_src_outcome %} +Kids First Dataservice source table for outcome information. Reports the vital status of patients and whether or not death was disease related. +{% enddocs %} + +{% docs kf_ds_src_participant %} +Kids First Dataservice source table for participant demographic information. Also contains information regarding a participant's affected status. Links each participant to an assigned family id from kf_ds_src_family and an assigned study id from kf_ds_src_study. {% enddocs %} -{% docs src_phenotype %} +{% docs kf_ds_src_phenotype %} Kids First Dataservice source table for harmonized conditions curated to HPO codes at the patient level. Conditions can be observed or not observed in a patient. Each row represents one condition and observation status per patient. {% enddocs %} -{% docs src_segf %} +{% docs kf_ds_src_sample %} +Kids First Dataservice source table for samples. +{% enddocs %} + +{% docs kf_ds_src_segf %} Kids First Dataservice source table for linking sequencing experiments to genomic files. Multiple files can be linked to one sequencing experiment. {% enddocs %} -{% docs src_sequencing_experiments %} +{% docs kf_ds_src_sequencing_center %} +Kids First Dataserivce source table for sequencing center information. +{% enddocs %} + +{% docs kf_ds_src_sequencing_experiment %} Kids First Dataservice source table for sequencing experiments that holds sequencing metadata. {% enddocs %} -{% docs src_specimens %} +{% docs kf_ds_src_biospecimen %} Kids First Dataservice source table for biospecimen information. Contains specimen collection information and specimen material information, as well as VBR specific entities to support CBTN VBR fields. Each row represents one aliquot per participant. {% enddocs %} -{% docs src_study %} +{% docs kf_ds_src_study %} Kids First Dataservice source table for study metadata. Contains full and short study names, study codes, study program, and dbgap phs numbers. {% enddocs %} ## Kids First Dataservice Tables - Int Stage -{% docs int_bsgf %} -Intermediate table for src_bsgf. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_bsgf %} +Intermediate table for kf_ds_src_bsgf. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% enddocs %} + +{% docs kf_ds_int_diagnosis %} +Intermediate table for kf_ds_src_diagnosis. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% enddocs %} + +{% docs kf_ds_int_family %} +Intermediate table for kf_ds_src_family. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} -{% docs int_diagnosis %} -Intermediate table for src_diagnosis. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_family_relationship %} +Intermediate table for kf_ds_src_family_relationship. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} -{% docs int_family %} -Intermediate table for src_family. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_genomic_file %} +Intermediate table for kf_ds_src_genomic_files. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} -{% docs int_genomic_files %} -Intermediate table for src_genomic_files. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_investigator %} +Intermediate table for kf_ds_src_investigator. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} -{% docs int_investigator %} -Intermediate table for src_investigator. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_outcome %} +Intermediate table for kf_ds_src_outcome. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} -{% docs int_participant %} -Intermediate table for src_participant. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_participant %} +Intermediate table for kf_ds_src_participant. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} -{% docs int_phenotype %} -Intermediate table for src_phenotype. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_phenotype %} +Intermediate table for kf_ds_src_phenotype. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} -{% docs int_segf %} -Intermediate table for src_segf. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_sample %} +Intermediate table for kf_ds_src_sample. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} -{% docs int_sequencing_experiment %} -Intermediate table for src_sequencing_experiments. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_sequencing_center %} +Intermediate table for kf_ds_src_sequencing_center. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} -{% docs int_specimens %} -Intermediate table for src_specimens. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_segf %} +Intermediate table for kf_ds_src_segf. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} -{% docs int_study %} -Intermediate table for src_study. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% docs kf_ds_int_sequencing_experiment %} +Intermediate table for kf_ds_src_sequencing_experiments. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% enddocs %} + +{% docs kf_ds_int_biospecimen %} +Intermediate table for kf_ds_src_biospecimen. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. +{% enddocs %} + +{% docs kf_ds_int_study %} +Intermediate table for kf_ds_src_study. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. {% enddocs %} ## Kids First Dataservice Tables - Stable Stage -{% docs stable_bsgf %} -Stable table for int_bsgf. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_bsgf %} +Stable table for kf_ds_int_bsgf. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% enddocs %} + +{% docs kf_ds_stable_diagnosis %} +Stable table for kf_ds_int_diagnosis. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% enddocs %} + +{% docs kf_ds_stable_family %} +Stable table for kf_ds_int_family. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% enddocs %} + +{% docs kf_ds_stable_family_relationship %} +Stable table for kf_ds_src_family_relationship. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% enddocs %} + +{% docs kf_ds_stable_genomic_file %} +Stable table for kf_ds_int_families. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} -{% docs stable_diagnosis %} -Stable table for int_diagnosis. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_investigator %} +Stable table for kf_ds_int_investigator. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} -{% docs stable_family %} -Stable table for int_family. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_outcome %} +Stable table for kf_ds_int_outcome. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} -{% docs stable_genomic_file %} -Stable table for int_families. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_participant %} +Stable table for kf_ds_int_participant. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} -{% docs stable_investigator %} -Stable table for int_investigator. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_phenotype %} +Stable table for kf_ds_int_phenotype. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} -{% docs stable_participant %} -Stable table for int_participant. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_sample %} +Stable table for kf_ds_int_sample. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} -{% docs stable_phenotype %} -Stable table for int_phenotype. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_sequencing_center %} +Stable table for kf_ds_int_sequencing_center. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} -{% docs stable_segf %} -Stable table for int_segf. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_segf %} +Stable table for kf_ds_int_segf. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} -{% docs stable_sequencing_experiment %} -Stable table for int_sequencing_experiment. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_sequencing_experiment %} +Stable table for kf_ds_int_sequencing_experiment. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} -{% docs stable_specimens %} -Stable table for int_specimens. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_biospecimen %} +Stable table for kf_ds_int_biospecimen. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} -{% docs stable_study %} -Stable table for int_study. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. +{% docs kf_ds_stable_study %} +Stable table for kf_ds_int_study. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer. {% enddocs %} From f88e8b7acc4bb9ebd6cf054c590ff73b2e1234ed Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Fri, 13 Feb 2026 15:08:34 -0500 Subject: [PATCH 09/11] =?UTF-8?q?=E2=9C=A8=20Add=20fields?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../docs_fields.md | 68 +++++++++++++++++-- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/dbt_project/models/_metadata_description_files/docs_fields.md b/dbt_project/models/_metadata_description_files/docs_fields.md index a5cd9aa..aee4563 100644 --- a/dbt_project/models/_metadata_description_files/docs_fields.md +++ b/dbt_project/models/_metadata_description_files/docs_fields.md @@ -128,6 +128,32 @@ The dewrangle generated id for a family. This id is a lower-cased version of the Denotes type of family using a set of enums, such as proband only or trio. Not currently populated in Kids First dataservie, but is calculcated by the portal etl and displayed on the Kids First portal. {% enddocs %} +### family relationship fields + +{% docs participant1_id %} +The kf id of one person in the family relationship. +{% enddocs %} + +{% docs participant2_id %} +The kf id of the second person in the family relationship. +{% enddocs %} + +{% docs participant1_to_participant2_relation %} +A descriptor that indicates person 1's genetic relationship to person 2. Is typically mother, father, child, or sibling. +{% enddocs %} + +{% docs participant2_to_participant1_relation %} +A descriptor that indicates person 2's genetic relationship to person 1. Is typically null, mother, father, son/daughter, brother/sister. +{% enddocs %} + +{% docs relationship_id %} +The Kids First assigned kf id that represents a genetic relationship between two participants. In the format, "FR_XXXXXXXX" +{% enddocs %} + +{% docs source_text_notes %} +Additional text notes from source describing the relationship. Not typically populated. +{% enddocs %} + ### genomic_file fields {% docs dewrangle_genomic_file_id %} @@ -204,6 +230,16 @@ The dewrangle generated id for an investigator. This id is a lower-cased version The name of the investigator's institution. {% enddocs %} +### outcome fields + +{% docs vital_status %} +The patient's reported state of being alive or deceased. +{% enddocs %} + +{% docs disease_related %} +A yes or no field indicating whether a patient's deceased vital status is a result of the disease. +{% enddocs %} + ### participant fields {% docs alias_group_id %} @@ -260,6 +296,34 @@ Denotes whether a phenotype is negative or positive The ID of the term from Systematized Nomenclature of Medicine --Clinical Terms which encodes clinical terminology. Not actively populated. {% enddocs %} +### sample fields + +{% docs sample_event_key %} +Identifier for event when sample was first drawn +{% enddocs %} + +{% docs tissue_type %} +Description of the kind of tissue collected if its a tissue type sample. +{% enddocs %} + +{% docs sample_type %} +The kind of material of the sample. +{% enddocs %} + +{% docs anatomical_location %} +The anatomical location of collection. +{% enddocs %} + +{% docs external_collection_id %} +Identifier for the collection event +{% enddocs %} + +### sequencing center fields + +{% docs sequencing_center_name %} +The official name of the sequencing center used to generate source genomic file outputs. +{% enddocs %} + ### sequencing experiment fields {% docs dewrangle_sequencing_experiment_id %} @@ -634,10 +698,6 @@ Sex of pariticipant Age of participant when phenotype was asserted {% enddocs %} -{% docs vital_status %} -Vital status of participant -{% enddocs %} - ### Broad Manifest From 77192d5739a7d65d6f737905f8f58c75dd01084c Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Fri, 13 Feb 2026 15:10:13 -0500 Subject: [PATCH 10/11] =?UTF-8?q?=E2=9C=A8=20Add=20stable=20yml?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../stable/kf_ds_stable.yaml | 677 ++++++++++++++++++ 1 file changed, 677 insertions(+) create mode 100644 dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable.yaml diff --git a/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable.yaml b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable.yaml new file mode 100644 index 0000000..f68eac0 --- /dev/null +++ b/dbt_project/models/kids_first/dataservice_studies/stable/kf_ds_stable.yaml @@ -0,0 +1,677 @@ +version: 2 + +models: + +- name: kf_ds_stable_bsgf + description: '{{ doc("kf_ds_stable_bsgf") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: bsgf_id + description: '{{ doc("bsgf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: genomic_file_id + description: '{{ doc("genomic_file_id") }}' + - name: biospecimen_id + description: '{{ doc("specimen_id") }}' + +- name: kf_ds_stable_diagnosis + description: '{{ doc("kf_ds_stable_diagnosis") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: diagnosis_id + description: '{{ doc("diagnosis_id") }}' + - name: dewrangle_diagnosis_id + description: '{{ doc("dewrangle_diagnosis_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: source_text_diagnosis + description: '{{ doc("source_text_diagnosis") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: mondo_id_diagnosis + description: '{{ doc("mondo_id_diagnosis") }}' + - name: icd_id_diagnosis + description: '{{ doc("icd_id_diagnosis") }}' + - name: diagnosis_category + description: '{{ doc("diagnosis_category") }}' + - name: source_text_tumor_location + description: '{{ doc("source_text_tumor_location") }}' + - name: uberon_id_tumor_location + description: '{{ doc("uberon_id_tumor_location") }}' + - name: spatial_descriptor + description: '{{ doc("spatial_descriptor") }}' + - name: observed + description: '{{ doc("observed") }}' + +- name: kf_ds_stable_family_relationship + description: '{{ doc("kf_ds_stable_family_relationship") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: relationship_id + description: '{{ doc("relationship_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: participant1_id + description: '{{ doc("participant1_id") }}' + - name: participant2_id + description: '{{ doc("participant2_id") }}' + - name: participant1_to_participant2_relation + description: '{{ doc("participant1_to_participant2_relation") }}' + - name: participant2_to_participant1_relation + description: '{{ doc("participant2_to_participant1_relation") }}' + - name: source_text_notes + description: '{{ doc("source_text_notes") }}' + + +- name: kf_ds_stable_family + description: '{{ doc("kf_ds_stable_family") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: family_id + description: '{{ doc("family_id") }}' + - name: dewrangle_family_id + description: '{{ doc("dewrangle_family_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: family_type + description: '{{ doc("family_type") }}' + +- name: kf_ds_stable_genomic_file + description: '{{ doc("kf_ds_stable_genomic_file") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: genomic_file_id + description: '{{ doc("genomic_file_id") }}' + - name: dewrangle_genomic_file_id + description: '{{ doc("dewrangle_genomic_file_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: is_harmonized + description: '{{ doc("is_harmonized") }}' + - name: reference_genome + description: '{{ doc("reference_genome") }}' + - name: controlled_access + description: '{{ doc("controlled_access") }}' + - name: availability + description: '{{ doc("availability") }}' + - name: paired_end + description: '{{ doc("paired_end") }}' + - name: data_type + description: '{{ doc("data_type") }}' + - name: file_format + description: '{{ doc("file_format") }}' + - name: data_category + description: '{{ doc("data_category") }}' + - name: workflow_tool + description: '{{ doc("workflow_tool") }}' + - name: workflow_type + description: '{{ doc("workflow_type") }}' + - name: workflow_version + description: '{{ doc("workflow_version") }}' + - name: workflow_endpoint + description: '{{ doc("workflow_endpoint") }}' + - name: file_version_descriptor + description: '{{ doc("file_version_descriptor") }}' + - name: cavatica_file_id + description: '{{ doc("cavatica_file_id") }}' + - name: cavatica_volume + description: '{{ doc("cavatica_volume") }}' + +- name: kf_ds_stable_investigator + description: '{{ doc("kf_ds_stable_investigator") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: investigator_id + description: '{{ doc("investigator_id") }}' + - name: dewrangle_investigator_id + description: '{{ doc("dewrangle_investigator_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: name + description: '{{ doc("name") }}' + - name: institution + description: '{{ doc("institution") }}' + +- name: kf_ds_stable_outcome + description: '{{ doc("kf_ds_stable_outcome") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: vital_status + description: '{{ doc("vital_status") }}' + - name: disease_related + description: '{{ doc("disease_related") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + +- name: kf_ds_stable_participant + description: '{{ doc("kf_ds_stable_participant") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: dewrangle_participant_id + description: '{{ doc("dewrangle_participant_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: alias_group_id + description: '{{ doc("alias_group_id") }}' + - name: study_id + description: '{{ doc("study_id") }}' + - name: dewrangle_study_id + description: '{{ doc("dewrangle_study_id") }}' + - name: family_id + description: '{{ doc("family_id") }}' + - name: is_proband + description: '{{ doc("is_proband") }}' + - name: race + description: '{{ doc("race") }}' + - name: ethnicity + description: '{{ doc("ethnicity") }}' + - name: gender + description: '{{ doc("gender") }}' + - name: affected_status + description: '{{ doc("affected_status") }}' + - name: species + description: '{{ doc("species") }}' + +- name: kf_ds_stable_phenotype + description: '{{ doc("kf_ds_stable_phenotype") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: phenotype_id + description: '{{ doc("phenotype_id") }}' + - name: dewrangle_phenotype_id + description: '{{ doc("dewrangle_phenotype_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: source_text_phenotype + description: '{{ doc("source_text_phenotype") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: hpo_id_phenotype + description: '{{ doc("hpo_id_phenotype") }}' + - name: observed + description: '{{ doc("observed") }}' + - name: snomed_id_phenotype + description: '{{ doc("snomed_id_phenotype") }}' + +- name: kf_ds_stable_sample + description: '{{ doc("kf_ds_src_sample") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: external_sample_id + description: '{{ doc("sample_event_key") }}' + - name: tissue_type + description: '{{ doc("tissue_type") }}' + - name: composition + description: '{{ doc("sample_type") }}' + - name: anatomical_location + description: '{{ doc("anatomical_location") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: method_of_sample_procurement + description: '{{ doc("method_of_sample_procurement") }}' + - name: has_matched_normal_sample + description: '{{ doc("has_matched_normal_sample") }}' + - name: external_collection_id + description: '{{ doc("external_collection_id") }}' + - name: volume_ul + description: '{{ doc("volume_ul") }}' + - name: preservation_method + description: '{{ doc("preservation_method") }}' + - name: amount + description: '{{ doc("amount") }}' + - name: amount_units + description: '{{ doc("amount_units") }}' + +- name: kf_ds_stable_sequencing_center + description: '{{ doc("kf_ds_stable_sequencing_center") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: kf_id + description: '{{ doc("kf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: sequencing_center_name + description: '{{ doc("sequencing_center_name") }}' + +- name: kf_ds_stable_segf + description: '{{ doc("kf_ds_stable_segf") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: segf_id + description: '{{ doc("segf_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: sequencing_experiment_id + description: '{{ doc("sequencing_experiment_id") }}' + - name: genomic_file_id + description: '{{ doc("genomic_file_id") }}' + +- name: kf_ds_stable_sequencing_experiment + description: '{{ doc("kf_ds_stable_sequencing_experiment") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: sequencing_experiment_id + description: '{{ doc("sequencing_experiment_id") }}' + - name: dewrangle_sequencing_experiment_id + description: '{{ doc("dewrangle_sequencing_experiment_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: sequencing_center_id + description: '{{ doc("sequencing_center_id") }}' + - name: experiment_date + description: '{{ doc("experiment_date") }}' + - name: experiment_strategy + description: '{{ doc("experiment_strategy") }}' + - name: is_paired_end + description: '{{ doc("is_paired_end") }}' + - name: platform + description: '{{ doc("platform") }}' + - name: instrument_model + description: '{{ doc("instrument_model") }}' + - name: library_name + description: '{{ doc("library_name") }}' + - name: library_strand + description: '{{ doc("library_strand") }}' + - name: library_prep + description: '{{ doc("library_prep") }}' + - name: library_selection + description: '{{ doc("library_selection") }}' + - name: max_insert_size + description: '{{ doc("max_insert_size") }}' + - name: mean_insert_size + description: '{{ doc("mean_insert_size") }}' + - name: mean_depth + description: '{{ doc("mean_depth") }}' + - name: total_reads + description: '{{ doc("total_reads") }}' + - name: mean_read_length + description: '{{ doc("mean_read_length") }}' + - name: adapter_sequencing + description: '{{ doc("adapter_sequencing") }}' + - name: is_adapter_trimmed + description: '{{ doc("is_adapter_trimmed") }}' + - name: read_pair_number + description: '{{ doc("read_pair_number") }}' + - name: target_capture_kit + description: '{{ doc("target_capture_kit") }}' + - name: acquisition_type + description: '{{ doc("acquisition_type") }}' + - name: cdna_read + description: '{{ doc("cdna_read") }}' + - name: cdna_read_offset + description: '{{ doc("cdna_read_offset") }}' + - name: cell_barcode_offset + description: '{{ doc("cell_barcode_offset") }}' + - name: cell_barcode_read + description: '{{ doc("cell_barcode_read") }}' + - name: cell_barcode_size + description: '{{ doc("cell_barcode_size") }}' + - name: chromatography_approach + description: '{{ doc("chromatography_approach") }}' + - name: end_bias + description: '{{ doc("end_bias") }}' + - name: enrichment_approach + description: '{{ doc("enrichment_approach") }}' + - name: fraction_number + description: '{{ doc("fraction_number") }}' + - name: fractionation_approach + description: '{{ doc("fractionation_approach") }}' + - name: ion_fragmentation + description: '{{ doc("ion_fragmentation") }}' + - name: library_construction + description: '{{ doc("library_construction") }}' + - name: mass_spec_rawfile_conversion + description: '{{ doc("mass_spec_rawfile_conversion") }}' + - name: proteomics_experiment + description: '{{ doc("proteomics_experiment") }}' + - name: quantification_label_id + description: '{{ doc("quantification_label_id") }}' + - name: quantification_labeling_method + description: '{{ doc("quantification_labeling_method") }}' + - name: quantification_technique + description: '{{ doc("quantification_technique") }}' + - name: sequencing_mode + description: '{{ doc("sequencing_mode") }}' + - name: target_cell_number + description: '{{ doc("target_cell_number") }}' + - name: umi_barcode_offset + description: '{{ doc("umi_barcode_offset") }}' + - name: umi_barcode_read + description: '{{ doc("umi_barcode_read") }}' + - name: umi_barcode_size + description: '{{ doc("umi_barcode_size") }}' + +- name: kf_ds_stable_biospecimen + description: '{{ doc("kf_ds_stable_biospecimen") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: specimen_id + description: '{{ doc("specimen_id") }}' + - name: dewrangle_specimen_id + description: '{{ doc("dewrangle_specimen_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: external_sample_id + description: '{{ doc("external_sample_id") }}' + - name: external_aliquot_id + description: '{{ doc("external_aliquot_id") }}' + - name: source_text_tissue_type + description: '{{ doc("source_text_tissue_type") }}' + - name: composition + description: '{{ doc("composition") }}' + - name: source_text_anatomical_site + description: '{{ doc("source_text_anatomical_site") }}' + - name: age_at_event_days + description: '{{ doc("age_at_event_days") }}' + - name: source_text_tumor_descriptor + description: '{{ doc("source_text_tumor_descriptor") }}' + - name: analyte_type + description: '{{ doc("analyte_type") }}' + - name: participant_id + description: '{{ doc("participant_id") }}' + - name: sequencing_center_id + description: '{{ doc("sequencing_center_id") }}' + - name: dbgap_consent_code + description: '{{ doc("dbgap_consent_code") }}' + - name: consent_type + description: '{{ doc("consent_type") }}' + - name: method_of_sample_procurement + description: '{{ doc("method_of_sample_procurement") }}' + - name: sample_id + description: '{{ doc("sample_id") }}' + - name: specimen_status + description: '{{ doc("specimen_status") }}' + - name: has_matched_normal_sample + description: '{{ doc("has_matched_normal_sample") }}' + - name: shipment_origin + description: '{{ doc("shipment_origin") }}' + - name: concentration_mg_per_ml + description: '{{ doc("concentration_mg_per_ml") }}' + - name: volume_ul + description: '{{ doc("volume_ul") }}' + - name: shipment_date + description: '{{ doc("shipment_date") }}' + - name: uberon_id_anatomical_site + description: '{{ doc("uberon_id_anatomical_site") }}' + - name: ncit_id_tissue_type + description: '{{ doc("ncit_id_tissue_type") }}' + - name: ncit_id_anatomical_site + description: '{{ doc("ncit_id_anatomical_site") }}' + - name: spatial_descriptor + description: '{{ doc("spatial_descriptor") }}' + - name: preservation_method + description: '{{ doc("preservation_method") }}' + - name: amount + description: '{{ doc("amount") }}' + - name: amount_units + description: '{{ doc("amount_units") }}' + - name: cell_entity + description: '{{ doc("cell_entity") }}' + +- name: kf_ds_stable_study + description: '{{ doc("kf_ds_stable_study") }}' + config: + meta: + study: kf_dataservice_study + columns: + - name: uuid + description: '{{ doc("uuid") }}' + - name: study_id + description: '{{ doc("study_id") }}' + - name: dewrangle_study_id + description: '{{ doc("dewrangle_study_id") }}' + - name: created_at + description: '{{ doc("created_at") }}' + - name: modified_at + description: '{{ doc("modified_at") }}' + - name: external_id + description: '{{ doc("external_id") }}' + - name: visible + description: '{{ doc("visible") }}' + - name: visibility_reason + description: '{{ doc("visibility_reason") }}' + - name: visibility_comment + description: '{{ doc("visibility_comment") }}' + - name: data_access_authority + description: '{{ doc("data_access_authority") }}' + - name: version + description: '{{ doc("version") }}' + - name: name + description: '{{ doc("name") }}' + - name: short_name + description: '{{ doc("short_name") }}' + - name: attribution + description: '{{ doc("attribution") }}' + - name: release_status + description: '{{ doc("release_status") }}' + - name: investigator_id + description: '{{ doc("investigator_id") }}' + - name: short_code + description: '{{ doc("short_code") }}' + - name: domain + description: '{{ doc("domain") }}' + - name: program + description: '{{ doc("program") }}' + - name: parent_study_id + description: '{{ doc("parent_study_id") }}' + - name: biobank_email + description: '{{ doc("biobank_email") }}' + - name: biobank_name + description: '{{ doc("biobank_name") }}' + - name: biobank_request_instructions + description: '{{ doc("biobank_request_instructions") }}' + - name: biobank_request_link + description: '{{ doc("biobank_request_link") }}' \ No newline at end of file From 368dcd4649a1dcb9eb2cfe45375ea7d0a81fa8b3 Mon Sep 17 00:00:00 2001 From: Christina Diaz Date: Fri, 13 Feb 2026 15:12:28 -0500 Subject: [PATCH 11/11] =?UTF-8?q?=E2=9C=A8=20Add=20dags?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dags/kids_first/dataservice_studies.py | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 dags/kids_first/dataservice_studies.py diff --git a/dags/kids_first/dataservice_studies.py b/dags/kids_first/dataservice_studies.py new file mode 100644 index 0000000..3c3a1d5 --- /dev/null +++ b/dags/kids_first/dataservice_studies.py @@ -0,0 +1,36 @@ +from airflow.sdk import Variable + +from cosmos import ( + DbtDag, + ProjectConfig, + ProfileConfig, + ExecutionConfig, + RenderConfig, +) +from cosmos.profiles import PostgresUserPasswordProfileMapping + +profile_config = ProfileConfig( + # make sure target_name and profile_mapping align + profile_name=Variable.get("DBT_PROFILE_NAME"), + target_name="prd", + profile_mapping=PostgresUserPasswordProfileMapping( + conn_id="postgres_prd_svc", + profile_args={"schema": "prd"}, + ), +) + +example_study_dag = DbtDag( + project_config=ProjectConfig( + Variable.get("DBT_PROJECT_DIR"), + install_dbt_deps=True, + ), + profile_config=profile_config, + execution_config=ExecutionConfig( + dbt_executable_path=Variable.get("DBT_EXECUTABLE_PATH"), + ), + render_config=RenderConfig(select=["config.meta.study:kf_dataservice_study"]), + # normal dag parameters + schedule="@daily", + dag_id="kf_dataservice_studies", + tags=["POC", "Kids First"], +) \ No newline at end of file