From 78729f600e2928ebf92cebebbdf9e0cb06bc142c Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Thu, 22 Jan 2026 19:12:10 -0500
Subject: [PATCH 01/12] =?UTF-8?q?=F0=9F=8E=89=20add=20source=20models?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dbt_project/models/access/demographics.sql    | 15 -------
 dbt_project/models/access/investigator.sql    | 11 -----
 dbt_project/models/access/study.sql           | 41 -------------------
 dbt_project/models/access/subject.sql         | 10 -----
 .../models/access/subject_assertation.sql     | 40 ------------------
 .../src/kf_sd_1nns3k8v_src_clinical.sql       |  6 +++
 .../sd_1nns3k8v/src/kf_sd_1nns3k8v_src_s3.sql |  6 +++
 .../src/kf_sd_1nns3k8v_src_sequencing.sql     |  6 +++
 8 files changed, 18 insertions(+), 117 deletions(-)
 delete mode 100644 dbt_project/models/access/demographics.sql
 delete mode 100644 dbt_project/models/access/investigator.sql
 delete mode 100644 dbt_project/models/access/study.sql
 delete mode 100644 dbt_project/models/access/subject.sql
 delete mode 100644 dbt_project/models/access/subject_assertation.sql
 create mode 100644 dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_clinical.sql
 create mode 100644 dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_s3.sql
 create mode 100644 dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_sequencing.sql

diff --git a/dbt_project/models/access/demographics.sql b/dbt_project/models/access/demographics.sql
deleted file mode 100644
index 07c2711..0000000
--- a/dbt_project/models/access/demographics.sql
+++ /dev/null
@@ -1,15 +0,0 @@
-{{ config(
-    schema='access'
-) }}
-
-select distinct 
-  participant_id as subject_id,
-  gender as sex,
-  race,
-  ethnicity,
-  'Not Applicable' as down_syndrome_status,
-  'Not Reported' as age_at_last_vital_status, -- ideally needs to be pulled from outcome data
-  'Not Reported' as vital_status,
-  'Not Reported' as age_at_first_engagement,
-  external_id
-from {{ ref('stable_participant') }}
\ No newline at end of file
diff --git a/dbt_project/models/access/investigator.sql b/dbt_project/models/access/investigator.sql
deleted file mode 100644
index 2e76303..0000000
--- a/dbt_project/models/access/investigator.sql
+++ /dev/null
@@ -1,11 +0,0 @@
-{{ config(
-    schema='access'
-) }}
-
-select distinct 
-  name,
-  institution, 
-  'Principle Investigator' as investigator_title,
-  'Not Reported' as email,
-  external_id
-from {{ ref('stable_investigator') }}
\ No newline at end of file
diff --git a/dbt_project/models/access/study.sql b/dbt_project/models/access/study.sql
deleted file mode 100644
index 8cf8518..0000000
--- a/dbt_project/models/access/study.sql
+++ /dev/null
@@ -1,41 +0,0 @@
-{{ config(
-    schema='access'
-) }}
-
--- WIP
--- should some of the constants be assigned upstream in the stable tables? 
--- it would make it easier in the access model when bringing in every study
--- these fields are not explictly provided in kf 
-
-select distinct 
-  s.study_id, -- do we want to use the KF study id or the dewrangle one? 
-  'X01' as funding_source, -- is this the funding source for every kf study? 
-  s.name as study_title, -- is study title the full study name? 
-  s.short_code as study_code,
-  s.short_name as study_short_name,
-  'KF' as program, -- see enum list; should this always be a constant? or should we bother mapping what we have in study.program? 
-  'Not Reported' as study_description, -- we have this just not stored in any db
-  s.domain as research_domain, -- current linkml enums don't quite match
-  'Pediatric' as participant_lifespan_age, -- all kf studies should be set to pediatric
-  'Not Reported' as selection_criteria, -- we may have this info in intake sheets, but not stored in any db for kf
-  'Longitudinal' as study_design, -- not stored in a db, but might be in intake sheets ; otherwise not explicitly provided for kf
-  'Unknown' as clinical_data_source_type,
-  'Genomics' as data_category, -- this should be derived from available seq types in DS; need to review best apporach
-  'Not Reported' as website, 
-  count(distinct p.participant_id) as expected_number_of_participants, -- should this be a count of every pt in ds? 
-  count(case when p.visible='true' then participant_id end) as actual_number_of_participants, -- should this be a count of only vis pt in ds? 
-  'Not Reported' as acknowledgements,
-  'Not Reported' as citation_statement, 
-  'Not Reported' as doi,
-  i.name as contact, 
-  s.parent_study_id as parent_study,
-  i.name as principle_investigator,
-  s.external_id -- using phs number for now
-from {{ ref('stable_investigator') }} as i 
-inner join {{ ref('stable_study') }} as s on i.investigator_id=s.investigator_id-- think about how this would be adjusted to account for all stable_study tables in KF
-inner join {{ ref('stable_participant') }} as p on s.study_id=p.study_id 
-group by
-  s.study_id, funding_source, study_title, study_code, study_short_name, 
-  program, study_description, research_domain, participant_lifespan_age, selection_criteria,
-  study_design, clinical_data_source_type, data_category, website, acknowledgements, citation_statement, doi, s.external_id,
-  contact, parent_study, principle_investigator
\ No newline at end of file
diff --git a/dbt_project/models/access/subject.sql b/dbt_project/models/access/subject.sql
deleted file mode 100644
index 1d1c71d..0000000
--- a/dbt_project/models/access/subject.sql
+++ /dev/null
@@ -1,10 +0,0 @@
-{{ config(
-    schema='access'
-) }}
-
-select distinct
-  participant_id as subject_id,
-  'KF participant' as subject_type, -- adding in KF as a source identifier
-  external_id
--- organism_type -- not sure if this is needed. could be set to 'human' if needed
-from {{ ref('stable_participant') }}
\ No newline at end of file
diff --git a/dbt_project/models/access/subject_assertation.sql b/dbt_project/models/access/subject_assertation.sql
deleted file mode 100644
index cd98781..0000000
--- a/dbt_project/models/access/subject_assertation.sql
+++ /dev/null
@@ -1,40 +0,0 @@
-{{ config(
-    schema='access'
-) }}
-
-(
-  select distinct 
-    diagnosis_id as assertion_id, 
-    participant_id as subject_id, 
-    -- assertion_provenance, -- don't think we have this available in KF
-    age_at_event_days as age_at_assertion, -- could also use age_at_event
-    -- age_at_event,
-    -- age_at_resolution,
-    mondo_id_diagnosis as concept_source, -- using mondo as the primary ontology for diagnoses
-    -- value_number,
-    source_text_diagnosis as value_source -- not sure this is right place
-  -- value_units,
-  -- value_units_source
-  from {{ ref('stable_diagnosis') }}
-
-)
-
-union all
-
-(
-  select distinct 
-    phenotype_id as assertion_id, 
-    participant_id as subject_id, 
-    -- assertion_provenance, -- don't think we have this available in KF
-    age_at_event_days as age_at_assertion, -- could also use age_at_event
-    -- age_at_event,
-    -- age_at_resolution,
-    hpo_id_phenotype as concept_source, -- using hpo as the primary ontology for phenotypes
-    -- value_number,
-    source_text_phenotype as value_source -- not sure this is right place
-  -- value_units,
-  -- value_units_source
-  from {{ ref('stable_phenotype') }}
-)
-
-
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_clinical.sql b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_clinical.sql
new file mode 100644
index 0000000..0efe7e1
--- /dev/null
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_clinical.sql
@@ -0,0 +1,6 @@
+{{ config(
+    schema='src'
+) }}
+
+select * 
+from {{ ref('kids_first_update') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_s3.sql b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_s3.sql
new file mode 100644
index 0000000..e06c9a4
--- /dev/null
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_s3.sql
@@ -0,0 +1,6 @@
+{{ config(
+    schema='src'
+) }}
+
+select * 
+from {{ ref('s3_scrape_cody') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_sequencing.sql b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_sequencing.sql
new file mode 100644
index 0000000..a0ddfdd
--- /dev/null
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_sequencing.sql
@@ -0,0 +1,6 @@
+{{ config(
+    schema='src'
+) }}
+
+select * 
+from {{ ref('sample') }}
\ No newline at end of file

From 4e2b0fd2dd9b9663cdef7c37672016fed574e226 Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Thu, 22 Jan 2026 19:13:08 -0500
Subject: [PATCH 02/12] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20fix=20mismatched=20f?=
 =?UTF-8?q?ilenames?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql         | 6 ++++++
 .../kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql    | 6 ++++++
 .../sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql       | 6 ++++++
 .../sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_bsgf.sql        | 2 +-
 .../sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_diagnosis.sql   | 2 +-
 .../sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_family.sql      | 2 +-
 .../int/kf_sd_1nns3k8v_int_genomic_files.sql                | 2 +-
 .../int/kf_sd_1nns3k8v_int_investigator.sql                 | 2 +-
 .../sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_participant.sql | 2 +-
 .../sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_phenotype.sql   | 2 +-
 .../sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_segf.sql        | 2 +-
 .../int/kf_sd_1nns3k8v_int_sequencing_experiment.sql        | 2 +-
 .../sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_specimens.sql   | 2 +-
 .../sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_study.sql       | 2 +-
 .../sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_bsgf.sql  | 2 +-
 .../stable/kf_sd_1nns3k8v_stable_diagnosis.sql              | 2 +-
 .../stable/kf_sd_1nns3k8v_stable_family.sql                 | 2 +-
 .../stable/kf_sd_1nns3k8v_stable_genomic_file.sql           | 2 +-
 .../stable/kf_sd_1nns3k8v_stable_investigator.sql           | 2 +-
 .../stable/kf_sd_1nns3k8v_stable_participant.sql            | 2 +-
 .../stable/kf_sd_1nns3k8v_stable_phenotype.sql              | 2 +-
 .../sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_segf.sql  | 2 +-
 .../stable/kf_sd_1nns3k8v_stable_sequencing_experiment.sql  | 2 +-
 .../stable/kf_sd_1nns3k8v_stable_specimens.sql              | 2 +-
 .../sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_study.sql | 2 +-
 25 files changed, 40 insertions(+), 22 deletions(-)
 create mode 100644 dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql
 create mode 100644 dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql
 create mode 100644 dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql

diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql
new file mode 100644
index 0000000..0efe7e1
--- /dev/null
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql
@@ -0,0 +1,6 @@
+{{ config(
+    schema='src'
+) }}
+
+select * 
+from {{ ref('kids_first_update') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql
new file mode 100644
index 0000000..e06c9a4
--- /dev/null
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql
@@ -0,0 +1,6 @@
+{{ config(
+    schema='src'
+) }}
+
+select * 
+from {{ ref('s3_scrape_cody') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
new file mode 100644
index 0000000..a0ddfdd
--- /dev/null
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
@@ -0,0 +1,6 @@
+{{ config(
+    schema='src'
+) }}
+
+select * 
+from {{ ref('sample') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_bsgf.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_bsgf.sql
index 1880852..903b922 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_bsgf.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_bsgf.sql
@@ -16,5 +16,5 @@ select distinct
   external_id, -- i think we can leave this out - it's rarely populated/used, 
   visibility_reason,
   visibility_comment
-from {{ ref('src_bsgf') }}
+from {{ ref('kf_sd_1nns3k8v_src_bsgf') }}
 
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_diagnosis.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_diagnosis.sql
index c1f0f61..493eae3 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_diagnosis.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_diagnosis.sql
@@ -20,4 +20,4 @@ select distinct
   uberon_id_tumor_location,
   spatial_descriptor
    
-from {{ ref('src_diagnosis') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_src_diagnosis') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_family.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_family.sql
index cbf2906..42c91ee 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_family.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_family.sql
@@ -13,4 +13,4 @@ select distinct
   visible,
   visibility_reason,
   visibility_comment
-from {{ ref('src_family') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_src_family') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_genomic_files.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_genomic_files.sql
index f34cf82..e223ec9 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_genomic_files.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_genomic_files.sql
@@ -32,4 +32,4 @@ select distinct
   cavatica_file_id, 
   cavatica_volume
 
-from {{ ref('src_genomic_files') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_src_genomic_files') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_investigator.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_investigator.sql
index f0719b9..60fc967 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_investigator.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_investigator.sql
@@ -14,4 +14,4 @@ select distinct
   visible,
   visibility_reason,
   visibility_comment
-from {{ ref('src_investigator') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_src_investigator') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_participant.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_participant.sql
index 7327230..51c2866 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_participant.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_participant.sql
@@ -22,4 +22,4 @@ select distinct
   visible,
   visibility_reason, -- can we standardize this a bit more? maybe release status instead of reason? and try to standardize more? 
   visibility_comment
-from {{ ref('src_participant') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_src_participant') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_phenotype.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_phenotype.sql
index b755230..f7613d4 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_phenotype.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_phenotype.sql
@@ -16,4 +16,4 @@ select distinct
   snomed_id_phenotype,
   external_id
     
-from {{ ref('src_phenotype') }}
+from {{ ref('kf_sd_1nns3k8v_src_phenotype') }}
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_segf.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_segf.sql
index 05f30ff..446d4f5 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_segf.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_segf.sql
@@ -13,4 +13,4 @@ select distinct
   kf_id as segf_id,
   visibility_reason,
   visibility_comment 
-from {{ ref ('src_segf') }}
\ No newline at end of file
+from {{ ref ('kf_sd_1nns3k8v_src_segf') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_sequencing_experiment.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_sequencing_experiment.sql
index 749e045..42e4060 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_sequencing_experiment.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_sequencing_experiment.sql
@@ -55,4 +55,4 @@ select distinct
   umi_barcode_read,
   umi_barcode_size
 
-from {{ ref('src_sequencing_experiments') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_src_sequencing_experiments') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_specimens.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_specimens.sql
index 7ec9c69..f7dc671 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_specimens.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_specimens.sql
@@ -43,4 +43,4 @@ select distinct
   amount,
   amount_units,
   cell_entity 
-from {{ ref('src_specimens') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_src_specimens') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_study.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_study.sql
index 57f527f..7645f82 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_study.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/int/kf_sd_1nns3k8v_int_study.sql
@@ -27,4 +27,4 @@ select distinct
   biobank_name, -- has been NA for kids first, only used for CBTN, should we keep? 
   biobank_request_instructions, -- has been NA for kids first, only used for CBTN, should we keep? 
   biobank_request_link -- has been NA for kids first, only used for CBTN, should we keep? 
-from {{ ref('src_study') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_src_study') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_bsgf.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_bsgf.sql
index 77691a7..5efcb54 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_bsgf.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_bsgf.sql
@@ -16,4 +16,4 @@ select distinct
   external_id, -- i think we can leave this out - it's rarely populated/used, 
   visibility_reason,
   visibility_comment
-from {{ ref('int_bsgf') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_int_bsgf') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_diagnosis.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_diagnosis.sql
index 95a8076..cf672d0 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_diagnosis.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_diagnosis.sql
@@ -19,4 +19,4 @@ select distinct
   uberon_id_tumor_location,
   spatial_descriptor
    
-from {{ ref('int_diagnosis') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_int_diagnosis') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_family.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_family.sql
index 9c68b90..f64c0ea 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_family.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_family.sql
@@ -10,6 +10,6 @@ select distinct
   visible,
   visibility_reason,
   visibility_comment
-from {{ ref('int_family') }}
+from {{ ref('kf_sd_1nns3k8v_int_family') }}
 
 
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_genomic_file.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_genomic_file.sql
index 48ce216..e40e1b1 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_genomic_file.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_genomic_file.sql
@@ -27,4 +27,4 @@ select distinct
   file_version_descriptor,
   cavatica_volume
 
-from {{ ref('int_genomic_files') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_int_genomic_files') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_investigator.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_investigator.sql
index 0a3151a..0a8a032 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_investigator.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_investigator.sql
@@ -14,4 +14,4 @@ select distinct
   visible,
   visibility_reason,
   visibility_comment
-from {{ ref('int_investigator') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_int_investigator') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_participant.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_participant.sql
index a10b326..d283148 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_participant.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_participant.sql
@@ -21,4 +21,4 @@ select distinct
   visible,
   visibility_reason, 
   visibility_comment
-from {{ ref('int_participant') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_int_participant') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_phenotype.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_phenotype.sql
index 057de7b..b38262b 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_phenotype.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_phenotype.sql
@@ -13,4 +13,4 @@ select distinct
   observed,
   snomed_id_phenotype,
   external_id
-from {{ ref('int_phenotype') }}
+from {{ ref('kf_sd_1nns3k8v_int_phenotype') }}
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_segf.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_segf.sql
index 7bf0a44..2677a68 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_segf.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_segf.sql
@@ -13,4 +13,4 @@ select distinct
   segf_id,
   visibility_reason,
   visibility_comment 
-from {{ ref ('int_segf') }}
\ No newline at end of file
+from {{ ref ('kf_sd_1nns3k8v_int_segf') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_sequencing_experiment.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_sequencing_experiment.sql
index ba14384..74fdaab 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_sequencing_experiment.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_sequencing_experiment.sql
@@ -53,4 +53,4 @@ select distinct
   umi_barcode_offset,
   umi_barcode_read,
   umi_barcode_size
-from {{ ref('int_sequencing_experiment') }}
+from {{ ref('kf_sd_1nns3k8v_int_sequencing_experiment') }}
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_specimens.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_specimens.sql
index 9a28948..d651227 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_specimens.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_specimens.sql
@@ -40,4 +40,4 @@ select distinct
   amount,
   amount_units,
   cell_entity 
-from {{ ref('int_specimens') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_int_specimens') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_study.sql b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_study.sql
index 615fbc1..77d94a9 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_study.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v_test/stable/kf_sd_1nns3k8v_stable_study.sql
@@ -27,4 +27,4 @@ select distinct
   biobank_name, -- has been NA for kids first, only used for CBTN, should we keep? 
   biobank_request_instructions, -- has been NA for kids first, only used for CBTN, should we keep? 
   biobank_request_link -- has been NA for kids first, only used for CBTN, should we keep? 
-from {{ ref('int_study') }}
\ No newline at end of file
+from {{ ref('kf_sd_1nns3k8v_int_study') }}
\ No newline at end of file

From 22e1de3ebd3030dcce973eabd553e6c0312df828 Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Fri, 23 Jan 2026 12:57:36 -0500
Subject: [PATCH 03/12] =?UTF-8?q?=E2=9C=A8=20add=20int=20models?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../int/kf_sd_1nns3k8v_int_clinical.sql       | 21 ++++++++++--
 .../sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql | 10 ++++--
 .../int/kf_sd_1nns3k8v_int_sequencing.sql     | 32 +++++++++++++++++--
 3 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql
index 0efe7e1..1cacf62 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql
@@ -1,6 +1,21 @@
 {{ config(
-    schema='src'
+    schema='int'
 ) }}
 
-select * 
-from {{ ref('kids_first_update') }}
\ No newline at end of file
+select distinct
+    "Family ID" as family_id,
+    "Participant ID" as participant_id,
+    "Family Relationship Target Participant ID" as family_relationship_target_participant_id,
+    "alliquot_id" as aliquot_id,
+    "Consent Group" as consent_group,
+    "Age at Sample" as age_at_sample,
+    "Sample Composition" as sample_composition,
+    "Sample Tissue Type" as sample_tissue_type,
+    "Race" as race,
+    "Sex" as sex,
+    "Ethnicity" as ethnicity,
+    "Analyte Type" as analyte_type,
+    "Age at Phenotype" as age_at_phenotype,
+    "Affected Status" as affected_status, 
+    "Vital Status" as vital_status
+from {{ ref('kf_sd_1nns3k8v_src_clinical') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql
index e06c9a4..4873529 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql
@@ -1,6 +1,12 @@
 {{ config(
-    schema='src'
+    schema='int'
 ) }}
 
-select * 
+select distinct
+    "Bucket" as bucket,
+    "Key" as key,
+    "LastModified" as last_modified,
+    "ETag" as etag,
+    "Size" as size,
+    "StorageClass" as storage_class
 from {{ ref('s3_scrape_cody') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
index a0ddfdd..ecccf19 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
@@ -1,6 +1,32 @@
 {{ config(
-    schema='src'
+    schema='int'
 ) }}
 
-select * 
-from {{ ref('sample') }}
\ No newline at end of file
+select distinct
+    "entity:sample_id" as sample_id,
+    "project" as project,
+    "collaborator_sample_id" as collaborator_sample_id,
+    -- "version" as version,
+    "data_type" as data_type,
+    -- "pdo" as pdo,
+    -- "release_date" as release_date,
+    -- "reference_sequence_name" as reference_sequence_name,
+    "collaborator_participant_id" as collaborator_participant_id,
+    "cram_path" as cram_path,
+    "crai_path" as crai_path,
+    "md5_path" as md5_path,
+    "root_sample_id" as root_sample_id,
+    "mean_coverage" as mean_coverage,
+    "adapter_rate" as adapter_rate,
+    -- "pf_hq_aligned_q20_bases" as pf_hq_aligned_q20_bases,
+    -- "pf_hq_aligned_reads" as pf_hq_aligned_reads,
+    -- "pf_mismatch_rate" as pf_mismatch_rate,
+    -- "pf_noise_reads" as pf_noise_reads,
+    -- "pf_reads" as pf_reads,
+    -- "pf_reads_aligned" as pf_reads_aligned,
+    "total_reads" as total_reads,
+    "mean_read_length" as mean_read_length,
+    -- "pf_reads_rate" as pf_reads_rate,
+    -- "pf_reads_aligned_rate" as pf_reads_aligned_rate,
+    -- "pf_aligned_bases" as pf_aligned_bases,
+    -- "reads_aligned_in_pairs" as reads_aligned_in_pairs,
\ No newline at end of file

From e53c69a920e2024147e3772a67c879169382ffc9 Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Fri, 23 Jan 2026 13:33:33 -0500
Subject: [PATCH 04/12] =?UTF-8?q?=F0=9F=93=9D=20add=20docs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../sd_1nns3k8v/int/kf_sd_1nns3k8v_int.yml    | 91 +++++++++++++++++++
 .../int/kf_sd_1nns3k8v_int_sequencing.sql     |  2 +-
 2 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int.yml

diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int.yml b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int.yml
new file mode 100644
index 0000000..ab4bd47
--- /dev/null
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int.yml
@@ -0,0 +1,91 @@
+version: 2
+
+models: 
+
+- name: kf_sd_1nns3k8v_int_clinical
+  description: '{{ doc("int_clinical") }}'
+  config:
+    meta:
+      study: kf_sd_1nns3k8v
+  columns: 
+    - name: family_id
+      description: '{{ doc("family_id") }}'
+    - name: participant_id
+      description: '{{ doc("participant_id") }}'
+    - name: family_relationship_target_participant_id
+      description: '{{ doc("family_relationship_target_participant_id") }}'
+    - name: aliquot_id
+      description: '{{ doc("aliquot_id") }}'
+    - name: consent_group
+      description: '{{ doc("consent_group") }}'
+    - name: age_at_sample
+      description: '{{ doc("age_at_sample") }}'
+    - name: sample_composition
+      description: '{{ doc("sample_composition") }}'
+    - name: sample_tissue_type
+      description: '{{ doc("sample_tissue_type") }}'
+    - name: race
+      description: '{{ doc("race") }}'
+    - name: sex
+      description: '{{ doc("sex") }}'
+    - name: ethnicity
+      description: '{{ doc("ethnicity") }}'
+    - name: analyte_type
+      description: '{{ doc("analyte_type") }}'
+    - name: age_at_phenotype
+      description: '{{ doc("age_at_phenotype") }}'
+    - name: affected_status
+      description: '{{ doc("affected_status") }}'
+    - name: vital_status
+      description: '{{ doc("vital_status") }}'
+
+- name: kf_sd_1nns3k8v_int_sequencing
+  description: '{{ doc("int_sequencing") }}'
+  config:
+    meta:
+      study: kf_sd_1nns3k8v
+  columns: 
+    - name: sample_id
+      description: '{{ doc("sample_id") }}'
+    - name: project
+      description: '{{ doc("project") }}'
+    - name: collaborator_sample_id
+      description: '{{ doc("collaborator_sample_id") }}'
+    - name: data_type
+      description: '{{ doc("data_type") }}'
+    - name: collaborator_participant_id
+      description: '{{ doc("collaborator_participant_id") }}'
+    - name: cram_path
+      description: '{{ doc("cram_path") }}'
+    - name: crai_path
+      description: '{{ doc("crai_path") }}'
+    - name: md5_path
+      description: '{{ doc("md5_path") }}'
+    - name: root_sample_id
+      description: '{{ doc("root_sample_id") }}'
+    - name: mean_coverage
+      description: '{{ doc("mean_coverage") }}'
+    - name: total_reads
+      description: '{{ doc("total_reads") }}'
+    - name: mean_read_length
+      description: '{{ doc("mean_read_length") }}'
+    
+
+- name: kf_sd_1nns3k8v_int_s3
+  description: '{{ doc("int_s3") }}'
+  config:
+    meta:
+      study: kf_sd_1nns3k8v
+  columns: 
+    - name: bucket
+      description: '{{ doc("bucket") }}'
+    - name: key
+      description: '{{ doc("key") }}'
+    - name: last_modified
+      description: '{{ doc("last_modified") }}'
+    - name: etag
+      description: '{{ doc("etag") }}'
+    - name: size
+      description: '{{ doc("size") }}'
+    - name: storage_class
+      description: '{{ doc("storage_class") }}'
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
index ecccf19..33a4b26 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
@@ -17,7 +17,7 @@ select distinct
     "md5_path" as md5_path,
     "root_sample_id" as root_sample_id,
     "mean_coverage" as mean_coverage,
-    "adapter_rate" as adapter_rate,
+    -- "adapter_rate" as adapter_rate,
     -- "pf_hq_aligned_q20_bases" as pf_hq_aligned_q20_bases,
     -- "pf_hq_aligned_reads" as pf_hq_aligned_reads,
     -- "pf_mismatch_rate" as pf_mismatch_rate,

From a3098653a3aff2b58a45300e7765487fc344c038 Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Fri, 23 Jan 2026 13:33:52 -0500
Subject: [PATCH 05/12] =?UTF-8?q?=E2=9C=A8=20add=20DAG?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dags/kids_first/kf_sd_1nns3k8v.py | 36 +++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 dags/kids_first/kf_sd_1nns3k8v.py

diff --git a/dags/kids_first/kf_sd_1nns3k8v.py b/dags/kids_first/kf_sd_1nns3k8v.py
new file mode 100644
index 0000000..4d86a13
--- /dev/null
+++ b/dags/kids_first/kf_sd_1nns3k8v.py
@@ -0,0 +1,36 @@
+from airflow.sdk import Variable
+
+from cosmos import (
+    DbtDag,
+    ProjectConfig,
+    ProfileConfig,
+    ExecutionConfig,
+    RenderConfig,
+)
+from cosmos.profiles import PostgresUserPasswordProfileMapping
+
+profile_config = ProfileConfig(
+    # make sure target_name and profile_mapping align
+    profile_name=Variable.get("DBT_PROFILE_NAME"),
+    target_name="prd",
+    profile_mapping=PostgresUserPasswordProfileMapping(
+        conn_id="postgres_prd_svc",
+        profile_args={"schema": "prd"},
+    ),
+)
+
+example_study_dag = DbtDag(
+    project_config=ProjectConfig(
+        Variable.get("DBT_PROJECT_DIR"),
+        install_dbt_deps=True,
+    ),
+    profile_config=profile_config,
+    execution_config=ExecutionConfig(
+        dbt_executable_path=Variable.get("DBT_EXECUTABLE_PATH"),
+    ),
+    render_config=RenderConfig(select=["config.meta.study:kf_sd_1nns3k8v"]),
+    # normal dag parameters
+    schedule="@daily",
+    dag_id="kf_sd_1nns3k8v_dbt_dag",
+    tags=["POC", "Kids First"],
+)
\ No newline at end of file

From 4f70d82b1bfcc75614469facabb56b336ffce780 Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Fri, 23 Jan 2026 13:43:26 -0500
Subject: [PATCH 06/12] =?UTF-8?q?=F0=9F=93=9D=20add=20missing=20doc?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml    | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml

diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml
new file mode 100644
index 0000000..7cb7c73
--- /dev/null
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml
@@ -0,0 +1,91 @@
+version: 2
+
+models: 
+
+- name: kf_sd_1nns3k8v_src_clinical
+  description: '{{ doc("src_clinical") }}'
+  config:
+    meta:
+      study: kf_sd_1nns3k8v
+  columns: 
+    - name: family_id
+      description: '{{ doc("family_id") }}'
+    - name: participant_id
+      description: '{{ doc("participant_id") }}'
+    - name: family_relationship_target_participant_id
+      description: '{{ doc("family_relationship_target_participant_id") }}'
+    - name: aliquot_id
+      description: '{{ doc("aliquot_id") }}'
+    - name: consent_group
+      description: '{{ doc("consent_group") }}'
+    - name: age_at_sample
+      description: '{{ doc("age_at_sample") }}'
+    - name: sample_composition
+      description: '{{ doc("sample_composition") }}'
+    - name: sample_tissue_type
+      description: '{{ doc("sample_tissue_type") }}'
+    - name: race
+      description: '{{ doc("race") }}'
+    - name: sex
+      description: '{{ doc("sex") }}'
+    - name: ethnicity
+      description: '{{ doc("ethnicity") }}'
+    - name: analyte_type
+      description: '{{ doc("analyte_type") }}'
+    - name: age_at_phenotype
+      description: '{{ doc("age_at_phenotype") }}'
+    - name: affected_status
+      description: '{{ doc("affected_status") }}'
+    - name: vital_status
+      description: '{{ doc("vital_status") }}'
+
+- name: kf_sd_1nns3k8v_src_sequencing
+  description: '{{ doc("src_sequencing") }}'
+  config:
+    meta:
+      study: kf_sd_1nns3k8v
+  columns: 
+    - name: sample_id
+      description: '{{ doc("sample_id") }}'
+    - name: project
+      description: '{{ doc("project") }}'
+    - name: collaborator_sample_id
+      description: '{{ doc("collaborator_sample_id") }}'
+    - name: data_type
+      description: '{{ doc("data_type") }}'
+    - name: collaborator_participant_id
+      description: '{{ doc("collaborator_participant_id") }}'
+    - name: cram_path
+      description: '{{ doc("cram_path") }}'
+    - name: crai_path
+      description: '{{ doc("crai_path") }}'
+    - name: md5_path
+      description: '{{ doc("md5_path") }}'
+    - name: root_sample_id
+      description: '{{ doc("root_sample_id") }}'
+    - name: mean_coverage
+      description: '{{ doc("mean_coverage") }}'
+    - name: total_reads
+      description: '{{ doc("total_reads") }}'
+    - name: mean_read_length
+      description: '{{ doc("mean_read_length") }}'
+    
+
+- name: kf_sd_1nns3k8v_src_s3
+  description: '{{ doc("src_s3") }}'
+  config:
+    meta:
+      study: kf_sd_1nns3k8v
+  columns: 
+    - name: bucket
+      description: '{{ doc("bucket") }}'
+    - name: key
+      description: '{{ doc("key") }}'
+    - name: last_modified
+      description: '{{ doc("last_modified") }}'
+    - name: etag
+      description: '{{ doc("etag") }}'
+    - name: size
+      description: '{{ doc("size") }}'
+    - name: storage_class
+      description: '{{ doc("storage_class") }}'
\ No newline at end of file

From e7be05e802e07579feeb2aba6eb8b6c4d29ca95d Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Fri, 23 Jan 2026 13:46:09 -0500
Subject: [PATCH 07/12] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20add=20back=20mistake?=
 =?UTF-8?q?nly=20removed=20models?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dbt_project/models/access/demographics.sql    | 15 +++++++
 dbt_project/models/access/investigator.sql    | 11 +++++
 dbt_project/models/access/study.sql           | 41 +++++++++++++++++++
 dbt_project/models/access/subject.sql         | 10 +++++
 .../models/access/subject_assertation.sql     | 40 ++++++++++++++++++
 5 files changed, 117 insertions(+)
 create mode 100644 dbt_project/models/access/demographics.sql
 create mode 100644 dbt_project/models/access/investigator.sql
 create mode 100644 dbt_project/models/access/study.sql
 create mode 100644 dbt_project/models/access/subject.sql
 create mode 100644 dbt_project/models/access/subject_assertation.sql

diff --git a/dbt_project/models/access/demographics.sql b/dbt_project/models/access/demographics.sql
new file mode 100644
index 0000000..07c2711
--- /dev/null
+++ b/dbt_project/models/access/demographics.sql
@@ -0,0 +1,15 @@
+{{ config(
+    schema='access'
+) }}
+
+select distinct 
+  participant_id as subject_id,
+  gender as sex,
+  race,
+  ethnicity,
+  'Not Applicable' as down_syndrome_status,
+  'Not Reported' as age_at_last_vital_status, -- ideally needs to be pulled from outcome data
+  'Not Reported' as vital_status,
+  'Not Reported' as age_at_first_engagement,
+  external_id
+from {{ ref('stable_participant') }}
\ No newline at end of file
diff --git a/dbt_project/models/access/investigator.sql b/dbt_project/models/access/investigator.sql
new file mode 100644
index 0000000..2e76303
--- /dev/null
+++ b/dbt_project/models/access/investigator.sql
@@ -0,0 +1,11 @@
+{{ config(
+    schema='access'
+) }}
+
+select distinct 
+  name,
+  institution, 
+  'Principle Investigator' as investigator_title,
+  'Not Reported' as email,
+  external_id
+from {{ ref('stable_investigator') }}
\ No newline at end of file
diff --git a/dbt_project/models/access/study.sql b/dbt_project/models/access/study.sql
new file mode 100644
index 0000000..8cf8518
--- /dev/null
+++ b/dbt_project/models/access/study.sql
@@ -0,0 +1,41 @@
+{{ config(
+    schema='access'
+) }}
+
+-- WIP
+-- should some of the constants be assigned upstream in the stable tables? 
+-- it would make it easier in the access model when bringing in every study
+-- these fields are not explictly provided in kf 
+
+select distinct 
+  s.study_id, -- do we want to use the KF study id or the dewrangle one? 
+  'X01' as funding_source, -- is this the funding source for every kf study? 
+  s.name as study_title, -- is study title the full study name? 
+  s.short_code as study_code,
+  s.short_name as study_short_name,
+  'KF' as program, -- see enum list; should this always be a constant? or should we bother mapping what we have in study.program? 
+  'Not Reported' as study_description, -- we have this just not stored in any db
+  s.domain as research_domain, -- current linkml enums don't quite match
+  'Pediatric' as participant_lifespan_age, -- all kf studies should be set to pediatric
+  'Not Reported' as selection_criteria, -- we may have this info in intake sheets, but not stored in any db for kf
+  'Longitudinal' as study_design, -- not stored in a db, but might be in intake sheets ; otherwise not explicitly provided for kf
+  'Unknown' as clinical_data_source_type,
+  'Genomics' as data_category, -- this should be derived from available seq types in DS; need to review best apporach
+  'Not Reported' as website, 
+  count(distinct p.participant_id) as expected_number_of_participants, -- should this be a count of every pt in ds? 
+  count(case when p.visible='true' then participant_id end) as actual_number_of_participants, -- should this be a count of only vis pt in ds? 
+  'Not Reported' as acknowledgements,
+  'Not Reported' as citation_statement, 
+  'Not Reported' as doi,
+  i.name as contact, 
+  s.parent_study_id as parent_study,
+  i.name as principle_investigator,
+  s.external_id -- using phs number for now
+from {{ ref('stable_investigator') }} as i 
+inner join {{ ref('stable_study') }} as s on i.investigator_id=s.investigator_id-- think about how this would be adjusted to account for all stable_study tables in KF
+inner join {{ ref('stable_participant') }} as p on s.study_id=p.study_id 
+group by
+  s.study_id, funding_source, study_title, study_code, study_short_name, 
+  program, study_description, research_domain, participant_lifespan_age, selection_criteria,
+  study_design, clinical_data_source_type, data_category, website, acknowledgements, citation_statement, doi, s.external_id,
+  contact, parent_study, principle_investigator
\ No newline at end of file
diff --git a/dbt_project/models/access/subject.sql b/dbt_project/models/access/subject.sql
new file mode 100644
index 0000000..1d1c71d
--- /dev/null
+++ b/dbt_project/models/access/subject.sql
@@ -0,0 +1,10 @@
+{{ config(
+    schema='access'
+) }}
+
+select distinct
+  participant_id as subject_id,
+  'KF participant' as subject_type, -- adding in KF as a source identifier
+  external_id
+-- organism_type -- not sure if this is needed. could be set to 'human' if needed
+from {{ ref('stable_participant') }}
\ No newline at end of file
diff --git a/dbt_project/models/access/subject_assertation.sql b/dbt_project/models/access/subject_assertation.sql
new file mode 100644
index 0000000..cd98781
--- /dev/null
+++ b/dbt_project/models/access/subject_assertation.sql
@@ -0,0 +1,40 @@
+{{ config(
+    schema='access'
+) }}
+
+(
+  select distinct 
+    diagnosis_id as assertion_id, 
+    participant_id as subject_id, 
+    -- assertion_provenance, -- don't think we have this available in KF
+    age_at_event_days as age_at_assertion, -- could also use age_at_event
+    -- age_at_event,
+    -- age_at_resolution,
+    mondo_id_diagnosis as concept_source, -- using mondo as the primary ontology for diagnoses
+    -- value_number,
+    source_text_diagnosis as value_source -- not sure this is right place
+  -- value_units,
+  -- value_units_source
+  from {{ ref('stable_diagnosis') }}
+
+)
+
+union all
+
+(
+  select distinct 
+    phenotype_id as assertion_id, 
+    participant_id as subject_id, 
+    -- assertion_provenance, -- don't think we have this available in KF
+    age_at_event_days as age_at_assertion, -- could also use age_at_event
+    -- age_at_event,
+    -- age_at_resolution,
+    hpo_id_phenotype as concept_source, -- using hpo as the primary ontology for phenotypes
+    -- value_number,
+    source_text_phenotype as value_source -- not sure this is right place
+  -- value_units,
+  -- value_units_source
+  from {{ ref('stable_phenotype') }}
+)
+
+

From 4b19372958994138da2b47454bff85398859a158 Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Fri, 23 Jan 2026 14:31:50 -0500
Subject: [PATCH 08/12] =?UTF-8?q?=F0=9F=93=9D=20fix=20doc=20errors?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../docs_fields.md                            | 88 +++++++++++++++++++
 .../docs_tables.md                            | 26 ++++++
 .../models/access/subject_assertation.sql     | 40 ---------
 .../sd_1nns3k8v/int/kf_sd_1nns3k8v_int.yml    | 10 +--
 .../int/kf_sd_1nns3k8v_int_sequencing.sql     |  4 +-
 .../sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml    | 12 +--
 6 files changed, 123 insertions(+), 57 deletions(-)
 delete mode 100644 dbt_project/models/access/subject_assertation.sql

diff --git a/dbt_project/models/_metadata_description_files/docs_fields.md b/dbt_project/models/_metadata_description_files/docs_fields.md
index 06b7cba..4c2ccf5 100644
--- a/dbt_project/models/_metadata_description_files/docs_fields.md
+++ b/dbt_project/models/_metadata_description_files/docs_fields.md
@@ -600,3 +600,91 @@ Link to request specimens from study biobank.
 
 
 
+## Cody Fields
+{% docs family_relationship_target_participant_id %}
+Participant ID of Proband of Family
+{% enddocs %}
+
+{% docs aliquot_id %}
+Aliquot ID from sequencing manifest
+{% enddocs %}
+
+{% docs consent_group %}
+Consent group of study
+{% enddocs %}
+
+{% docs age_at_sample %}
+Age sample was collected from sequencing manifest
+{% enddocs %}
+
+{% docs sample_composition %}
+Composition of specimen
+{% enddocs %}
+
+{% docs sample_tissue_type %}
+Tissue type of collected specimen
+{% enddocs %}
+
+{% docs sex %}
+Sex of pariticipant
+{% enddocs %}
+
+{% docs age_at_phenotype %}
+Age of participant when phenotype was asserted
+{% enddocs %}
+
+{% docs vital_status %}
+Vital status of participant
+{% enddocs %}
+
+{% docs collaborator_sample_id %}
+Sample ID submitted by PI to sequencing center 
+{% enddocs %}
+
+{% docs collaborator_participant_id %}
+Participant ID submitted by PI to sequencing center 
+{% enddocs %}
+
+{% docs cram_path %}
+path of cram file from sequencing manifest
+{% enddocs %}
+
+{% docs crai_path %}
+path of crai file from sequencing manifest
+{% enddocs %}
+
+{% docs md5_path %}
+path of md5 file from sequencing manifest
+{% enddocs %}
+
+{% docs root_sample_id %}
+Specimen sample ID from sequencing manifest
+{% enddocs %}
+
+{% docs mean_coverage %}
+mean coverage value from sequencing manifest
+{% enddocs %}
+
+{% docs bucket %}
+s3 bucket for file
+{% enddocs %}
+
+{% docs key %}
+s3 key for file
+{% enddocs %}
+
+{% docs last_modified %}
+last time s3 file was modified
+{% enddocs %}
+
+{% docs etag %}
+etag hash from s3 manifest
+{% enddocs %}
+
+{% docs size %}
+size of file from s3 manifest
+{% enddocs %}
+
+{% docs storage_class %}
+s3 storage class for file
+{% enddocs %}
\ No newline at end of file
diff --git a/dbt_project/models/_metadata_description_files/docs_tables.md b/dbt_project/models/_metadata_description_files/docs_tables.md
index 5d03390..d254b48 100644
--- a/dbt_project/models/_metadata_description_files/docs_tables.md
+++ b/dbt_project/models/_metadata_description_files/docs_tables.md
@@ -136,4 +136,30 @@ Stable table for int_specimens. Finalized mapping of transformed dataservice ent
 
 {% docs stable_study %}
 Stable table for int_study. Finalized mapping of transformed dataservice entities that are ready to be brought into the access layer.
+{% enddocs %}
+
+
+## Kids First Study SD_1NNS3K8V 
+{% docs kf_sd_1nns3k8v_src_clinical %}
+Source table for Cody study source clinical data 
+{% enddocs %}
+
+{% docs kf_sd_1nns3k8v_src_sequencing %}
+Source table for Cody study source sequencing data 
+{% enddocs %}
+
+{% docs kf_sd_1nns3k8v_src_s3 %}
+Source table for Cody study s3 file manifest
+{% enddocs %}
+
+{% docs kf_sd_1nns3k8v_int_clinical %}
+Intermediate table for src_clinical. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. 
+{% enddocs %}
+
+{% docs kf_sd_1nns3k8v_int_sequencing %}
+Intermediate table for src_sequencing. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. 
+{% enddocs %}
+
+{% docs kf_sd_1nns3k8v_int_s3 %}
+Intermediate table for src_s3. Transforms dataservice entities for better usability and clarity. Excludes certain entites that are not needed. 
 {% enddocs %}
\ No newline at end of file
diff --git a/dbt_project/models/access/subject_assertation.sql b/dbt_project/models/access/subject_assertation.sql
deleted file mode 100644
index cd98781..0000000
--- a/dbt_project/models/access/subject_assertation.sql
+++ /dev/null
@@ -1,40 +0,0 @@
-{{ config(
-    schema='access'
-) }}
-
-(
-  select distinct 
-    diagnosis_id as assertion_id, 
-    participant_id as subject_id, 
-    -- assertion_provenance, -- don't think we have this available in KF
-    age_at_event_days as age_at_assertion, -- could also use age_at_event
-    -- age_at_event,
-    -- age_at_resolution,
-    mondo_id_diagnosis as concept_source, -- using mondo as the primary ontology for diagnoses
-    -- value_number,
-    source_text_diagnosis as value_source -- not sure this is right place
-  -- value_units,
-  -- value_units_source
-  from {{ ref('stable_diagnosis') }}
-
-)
-
-union all
-
-(
-  select distinct 
-    phenotype_id as assertion_id, 
-    participant_id as subject_id, 
-    -- assertion_provenance, -- don't think we have this available in KF
-    age_at_event_days as age_at_assertion, -- could also use age_at_event
-    -- age_at_event,
-    -- age_at_resolution,
-    hpo_id_phenotype as concept_source, -- using hpo as the primary ontology for phenotypes
-    -- value_number,
-    source_text_phenotype as value_source -- not sure this is right place
-  -- value_units,
-  -- value_units_source
-  from {{ ref('stable_phenotype') }}
-)
-
-
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int.yml b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int.yml
index ab4bd47..ec4f3ea 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int.yml
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int.yml
@@ -3,7 +3,7 @@ version: 2
 models: 
 
 - name: kf_sd_1nns3k8v_int_clinical
-  description: '{{ doc("int_clinical") }}'
+  description: '{{ doc("kf_sd_1nns3k8v_int_clinical") }}'
   config:
     meta:
       study: kf_sd_1nns3k8v
@@ -40,19 +40,15 @@ models:
       description: '{{ doc("vital_status") }}'
 
 - name: kf_sd_1nns3k8v_int_sequencing
-  description: '{{ doc("int_sequencing") }}'
+  description: '{{ doc("kf_sd_1nns3k8v_int_sequencing") }}'
   config:
     meta:
       study: kf_sd_1nns3k8v
   columns: 
     - name: sample_id
       description: '{{ doc("sample_id") }}'
-    - name: project
-      description: '{{ doc("project") }}'
     - name: collaborator_sample_id
       description: '{{ doc("collaborator_sample_id") }}'
-    - name: data_type
-      description: '{{ doc("data_type") }}'
     - name: collaborator_participant_id
       description: '{{ doc("collaborator_participant_id") }}'
     - name: cram_path
@@ -72,7 +68,7 @@ models:
     
 
 - name: kf_sd_1nns3k8v_int_s3
-  description: '{{ doc("int_s3") }}'
+  description: '{{ doc("kf_sd_1nns3k8v_int_s3") }}'
   config:
     meta:
       study: kf_sd_1nns3k8v
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
index 33a4b26..69d3794 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
@@ -4,10 +4,10 @@
 
 select distinct
     "entity:sample_id" as sample_id,
-    "project" as project,
+    -- "project" as project,
     "collaborator_sample_id" as collaborator_sample_id,
     -- "version" as version,
-    "data_type" as data_type,
+    -- "data_type" as data_type,
     -- "pdo" as pdo,
     -- "release_date" as release_date,
     -- "reference_sequence_name" as reference_sequence_name,
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml
index 7cb7c73..d4ae549 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml
@@ -3,12 +3,12 @@ version: 2
 models: 
 
 - name: kf_sd_1nns3k8v_src_clinical
-  description: '{{ doc("src_clinical") }}'
+  description: '{{ doc("kf_sd_1nns3k8v_src_clinical") }}'
   config:
     meta:
       study: kf_sd_1nns3k8v
   columns: 
-    - name: family_id
+    - name: "family_id"
       description: '{{ doc("family_id") }}'
     - name: participant_id
       description: '{{ doc("participant_id") }}'
@@ -40,19 +40,15 @@ models:
       description: '{{ doc("vital_status") }}'
 
 - name: kf_sd_1nns3k8v_src_sequencing
-  description: '{{ doc("src_sequencing") }}'
+  description: '{{ doc("kf_sd_1nns3k8v_src_sequencing") }}'
   config:
     meta:
       study: kf_sd_1nns3k8v
   columns: 
     - name: sample_id
       description: '{{ doc("sample_id") }}'
-    - name: project
-      description: '{{ doc("project") }}'
     - name: collaborator_sample_id
       description: '{{ doc("collaborator_sample_id") }}'
-    - name: data_type
-      description: '{{ doc("data_type") }}'
     - name: collaborator_participant_id
       description: '{{ doc("collaborator_participant_id") }}'
     - name: cram_path
@@ -72,7 +68,7 @@ models:
     
 
 - name: kf_sd_1nns3k8v_src_s3
-  description: '{{ doc("src_s3") }}'
+  description: '{{ doc("kf_sd_1nns3k8v_src_s3") }}'
   config:
     meta:
       study: kf_sd_1nns3k8v

From 5028c13b027c700adad6ea850f8467ae4d0ad110 Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Tue, 27 Jan 2026 15:05:48 -0500
Subject: [PATCH 09/12] =?UTF-8?q?=E2=9C=85=20resolve=20seed=20errors?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../int/kf_sd_1nns3k8v_int_sequencing.sql     |  6 +++--
 dbt_project/seeds/_seeds.yml                  | 23 ++++++++++++++++++-
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
index 69d3794..33b1689 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
@@ -25,8 +25,10 @@ select distinct
     -- "pf_reads" as pf_reads,
     -- "pf_reads_aligned" as pf_reads_aligned,
     "total_reads" as total_reads,
-    "mean_read_length" as mean_read_length,
+    "mean_read_length" as mean_read_length
     -- "pf_reads_rate" as pf_reads_rate,
     -- "pf_reads_aligned_rate" as pf_reads_aligned_rate,
     -- "pf_aligned_bases" as pf_aligned_bases,
-    -- "reads_aligned_in_pairs" as reads_aligned_in_pairs,
\ No newline at end of file
+    -- "reads_aligned_in_pairs" as reads_aligned_in_pairs,
+
+from {{ ref('kf_sd_1nns3k8v_src_sequencing') }}
\ No newline at end of file
diff --git a/dbt_project/seeds/_seeds.yml b/dbt_project/seeds/_seeds.yml
index 3ea41e7..09a2d9f 100644
--- a/dbt_project/seeds/_seeds.yml
+++ b/dbt_project/seeds/_seeds.yml
@@ -9,4 +9,25 @@ seeds:
         - name: letter
           description: A letter of the alphabet
         - name: letter_grouping
-          description: The grouping that the letter belongs to
\ No newline at end of file
+          description: The grouping that the letter belongs to
+
+    - name: s3_scrape_cody
+      config:
+        column_types:
+          Size: bigint
+
+    - name: sample
+      config:
+        column_types:
+          total_reads: bigint
+          pf_aligned_bases: bigint
+          pf_hq_aligned_bases: bigint
+          pf_hq_aligned_q20_bases: bigint
+          genome_territory: bigint
+          library-1_estimated_library_size: bigint
+          # pf_reads: bigint
+          # pf_reads_aligned: bigint
+          # reads_aligned_in_pairs: bigint
+          # pf_hq_aligned_reads: bigint
+          # library-1_read_pairs: bigint
+          # pf_reads_improper_pairs: bigint
\ No newline at end of file

From 3d1eaf1d7235deae88cf086edacb0485b7994c67 Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Tue, 27 Jan 2026 15:18:02 -0500
Subject: [PATCH 10/12] =?UTF-8?q?=E2=9C=8F=EF=B8=8F=20lint=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../example_study/my_first_dbt_model.sql      |  7 ++-
 .../int/kf_sd_1nns3k8v_int_clinical.sql       | 30 +++++------
 .../sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql | 12 ++---
 .../int/kf_sd_1nns3k8v_int_sequencing.sql     | 54 +++++++++----------
 4 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/dbt_project/models/kids_first/example_study/my_first_dbt_model.sql b/dbt_project/models/kids_first/example_study/my_first_dbt_model.sql
index 5b51873..279006b 100644
--- a/dbt_project/models/kids_first/example_study/my_first_dbt_model.sql
+++ b/dbt_project/models/kids_first/example_study/my_first_dbt_model.sql
@@ -1,4 +1,3 @@
-
 /*
     Welcome to your first dbt model!
     Did you know that you can also configure models directly within SQL files?
@@ -11,9 +10,9 @@
 
 with source_data as (
 
-    select 1 as id
-    union all
-    select null as id
+  select 1 as id
+  union all
+  select null as id
 
 )
 
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql
index 1cacf62..404613a 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_clinical.sql
@@ -3,19 +3,19 @@
 ) }}
 
 select distinct
-    "Family ID" as family_id,
-    "Participant ID" as participant_id,
-    "Family Relationship Target Participant ID" as family_relationship_target_participant_id,
-    "alliquot_id" as aliquot_id,
-    "Consent Group" as consent_group,
-    "Age at Sample" as age_at_sample,
-    "Sample Composition" as sample_composition,
-    "Sample Tissue Type" as sample_tissue_type,
-    "Race" as race,
-    "Sex" as sex,
-    "Ethnicity" as ethnicity,
-    "Analyte Type" as analyte_type,
-    "Age at Phenotype" as age_at_phenotype,
-    "Affected Status" as affected_status, 
-    "Vital Status" as vital_status
+  "Family ID" as family_id,
+  "Participant ID" as participant_id,
+  "Family Relationship Target Participant ID" as family_relationship_target_participant_id,
+  alliquot_id as aliquot_id,
+  "Consent Group" as consent_group,
+  "Age at Sample" as age_at_sample,
+  "Sample Composition" as sample_composition,
+  "Sample Tissue Type" as sample_tissue_type,
+  "Race" as race,
+  "Sex" as sex,
+  "Ethnicity" as ethnicity,
+  "Analyte Type" as analyte_type,
+  "Age at Phenotype" as age_at_phenotype,
+  "Affected Status" as affected_status, 
+  "Vital Status" as vital_status
 from {{ ref('kf_sd_1nns3k8v_src_clinical') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql
index 4873529..bb15b5e 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_s3.sql
@@ -3,10 +3,10 @@
 ) }}
 
 select distinct
-    "Bucket" as bucket,
-    "Key" as key,
-    "LastModified" as last_modified,
-    "ETag" as etag,
-    "Size" as size,
-    "StorageClass" as storage_class
+  "Bucket" as bucket,
+  "Key" as key,
+  "LastModified" as last_modified,
+  "ETag" as etag,
+  "Size" as size,
+  "StorageClass" as storage_class
 from {{ ref('s3_scrape_cody') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
index 33b1689..8ae9f7a 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/int/kf_sd_1nns3k8v_int_sequencing.sql
@@ -3,32 +3,32 @@
 ) }}
 
 select distinct
-    "entity:sample_id" as sample_id,
-    -- "project" as project,
-    "collaborator_sample_id" as collaborator_sample_id,
-    -- "version" as version,
-    -- "data_type" as data_type,
-    -- "pdo" as pdo,
-    -- "release_date" as release_date,
-    -- "reference_sequence_name" as reference_sequence_name,
-    "collaborator_participant_id" as collaborator_participant_id,
-    "cram_path" as cram_path,
-    "crai_path" as crai_path,
-    "md5_path" as md5_path,
-    "root_sample_id" as root_sample_id,
-    "mean_coverage" as mean_coverage,
-    -- "adapter_rate" as adapter_rate,
-    -- "pf_hq_aligned_q20_bases" as pf_hq_aligned_q20_bases,
-    -- "pf_hq_aligned_reads" as pf_hq_aligned_reads,
-    -- "pf_mismatch_rate" as pf_mismatch_rate,
-    -- "pf_noise_reads" as pf_noise_reads,
-    -- "pf_reads" as pf_reads,
-    -- "pf_reads_aligned" as pf_reads_aligned,
-    "total_reads" as total_reads,
-    "mean_read_length" as mean_read_length
-    -- "pf_reads_rate" as pf_reads_rate,
-    -- "pf_reads_aligned_rate" as pf_reads_aligned_rate,
-    -- "pf_aligned_bases" as pf_aligned_bases,
-    -- "reads_aligned_in_pairs" as reads_aligned_in_pairs,
+  "entity:sample_id" as sample_id,
+  -- "project" as project,
+  collaborator_sample_id,
+  -- "version" as version,
+  -- "data_type" as data_type,
+  -- "pdo" as pdo,
+  -- "release_date" as release_date,
+  -- "reference_sequence_name" as reference_sequence_name,
+  collaborator_participant_id,
+  cram_path,
+  crai_path,
+  md5_path,
+  root_sample_id,
+  mean_coverage,
+  -- "adapter_rate" as adapter_rate,
+  -- "pf_hq_aligned_q20_bases" as pf_hq_aligned_q20_bases,
+  -- "pf_hq_aligned_reads" as pf_hq_aligned_reads,
+  -- "pf_mismatch_rate" as pf_mismatch_rate,
+  -- "pf_noise_reads" as pf_noise_reads,
+  -- "pf_reads" as pf_reads,
+  -- "pf_reads_aligned" as pf_reads_aligned,
+  total_reads,
+  mean_read_length
+-- "pf_reads_rate" as pf_reads_rate,
+-- "pf_reads_aligned_rate" as pf_reads_aligned_rate,
+-- "pf_aligned_bases" as pf_aligned_bases,
+-- "reads_aligned_in_pairs" as reads_aligned_in_pairs,
 
 from {{ ref('kf_sd_1nns3k8v_src_sequencing') }}
\ No newline at end of file

From 5271e2d29cfacfd8c2fc3f46e9bc7211e4ae8b21 Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Tue, 27 Jan 2026 19:01:57 -0500
Subject: [PATCH 11/12] =?UTF-8?q?=F0=9F=93=9D=20fix=20documentation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../docs_fields.md                            | 39 ++++++++++++++++++-
 dbt_project/seeds/_seeds.yml                  |  1 +
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/dbt_project/models/_metadata_description_files/docs_fields.md b/dbt_project/models/_metadata_description_files/docs_fields.md
index 4c2ccf5..e027cc2 100644
--- a/dbt_project/models/_metadata_description_files/docs_fields.md
+++ b/dbt_project/models/_metadata_description_files/docs_fields.md
@@ -598,9 +598,10 @@ Link to request specimens from study biobank.
 {% enddocs %}
 
 
+## Kids First Source Data Fields
 
+### Clinical Data
 
-## Cody Fields
 {% docs family_relationship_target_participant_id %}
 Participant ID of Proband of Family
 {% enddocs %}
@@ -637,6 +638,35 @@ Age of participant when phenotype was asserted
 Vital status of participant
 {% enddocs %}
 
+
+### Broad Manifest 
+
+{% docs collaborator_sample_id %}
+Sample ID submitted by PI to sequencing center 
+{% enddocs %}
+
+{% docs collaborator_participant_id %}
+Participant ID submitted by PI to sequencing center 
+{% enddocs %}
+
+{% docs cram_path %}
+path of cram file from sequencing manifest
+{% enddocs %}
+
+{% docs crai_path %}
+path of crai file from sequencing manifest
+{% enddocs %}
+
+{% docs md5_path %}
+path of md5 file from sequencing manifest
+{% enddocs %}
+
+{% docs root_sample_id %}
+Specimen sample ID from sequencing manifest
+{% enddocs %}
+
+### S3 Scrape
+
 {% docs collaborator_sample_id %}
 Sample ID submitted by PI to sequencing center 
 {% enddocs %}
@@ -687,4 +717,9 @@ size of file from s3 manifest
 
 {% docs storage_class %}
 s3 storage class for file
-{% enddocs %}
\ No newline at end of file
+{% enddocs %}
+
+
+## Study Specific Fields
+
+# SD_1NNS3K8V Fields
\ No newline at end of file
diff --git a/dbt_project/seeds/_seeds.yml b/dbt_project/seeds/_seeds.yml
index 09a2d9f..d98ab0f 100644
--- a/dbt_project/seeds/_seeds.yml
+++ b/dbt_project/seeds/_seeds.yml
@@ -19,6 +19,7 @@ seeds:
     - name: sample
       config:
         column_types:
+        # these were all the columns that had the highest integers and I narrowed it down to what would pass
           total_reads: bigint
           pf_aligned_bases: bigint
           pf_hq_aligned_bases: bigint

From 25e1bea62a6ddee55d180e9697d98f2f5ad632e7 Mon Sep 17 00:00:00 2001
From: Amanda Warkow <warkowa@chop.edu>
Date: Wed, 4 Feb 2026 13:36:10 -0500
Subject: [PATCH 12/12] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20make=20seed=20files?=
 =?UTF-8?q?=20prd=20sources?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .sqlfluff                                     |  2 +-
 .../docs_fields.md                            | 24 -------------------
 .../sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml    | 10 ++++++++
 .../src/kf_sd_1nns3k8v_src_clinical.sql       |  2 +-
 .../sd_1nns3k8v/src/kf_sd_1nns3k8v_src_s3.sql |  2 +-
 .../src/kf_sd_1nns3k8v_src_sequencing.sql     |  2 +-
 dbt_project/profiles.yml                      |  2 +-
 7 files changed, 15 insertions(+), 29 deletions(-)

diff --git a/.sqlfluff b/.sqlfluff
index 995d6dc..7b88f5a 100644
--- a/.sqlfluff
+++ b/.sqlfluff
@@ -10,7 +10,7 @@ project_dir = dbt_project
 target = dev
 # If needed, uncomment the line below to specify the directory where your
 # profiles.yml file is located
-profiles_dir = ~/.dbt/include
+profiles_dir = ~/.dbt/
 
 [sqlfluff:indentation]
 tab_space_size = 2
diff --git a/dbt_project/models/_metadata_description_files/docs_fields.md b/dbt_project/models/_metadata_description_files/docs_fields.md
index e027cc2..a5cd9aa 100644
--- a/dbt_project/models/_metadata_description_files/docs_fields.md
+++ b/dbt_project/models/_metadata_description_files/docs_fields.md
@@ -667,30 +667,6 @@ Specimen sample ID from sequencing manifest
 
 ### S3 Scrape
 
-{% docs collaborator_sample_id %}
-Sample ID submitted by PI to sequencing center 
-{% enddocs %}
-
-{% docs collaborator_participant_id %}
-Participant ID submitted by PI to sequencing center 
-{% enddocs %}
-
-{% docs cram_path %}
-path of cram file from sequencing manifest
-{% enddocs %}
-
-{% docs crai_path %}
-path of crai file from sequencing manifest
-{% enddocs %}
-
-{% docs md5_path %}
-path of md5 file from sequencing manifest
-{% enddocs %}
-
-{% docs root_sample_id %}
-Specimen sample ID from sequencing manifest
-{% enddocs %}
-
 {% docs mean_coverage %}
 mean coverage value from sequencing manifest
 {% enddocs %}
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml
index d4ae549..3daf953 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src.yml
@@ -1,5 +1,15 @@
 version: 2
 
+sources:
+  - name: kf_sd_1nns3k8v_src
+    database: includewarehouse
+    schema: prd_import
+    tables:
+      - name: kids_first_update
+      - name: sample
+      - name: s3_scrape_cody
+
+
 models: 
 
 - name: kf_sd_1nns3k8v_src_clinical
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_clinical.sql b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_clinical.sql
index 0efe7e1..83f5178 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_clinical.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_clinical.sql
@@ -3,4 +3,4 @@
 ) }}
 
 select * 
-from {{ ref('kids_first_update') }}
\ No newline at end of file
+from {{ source('kf_sd_1nns3k8v_src', 'kids_first_update') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_s3.sql b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_s3.sql
index e06c9a4..d05fff0 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_s3.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_s3.sql
@@ -3,4 +3,4 @@
 ) }}
 
 select * 
-from {{ ref('s3_scrape_cody') }}
\ No newline at end of file
+from {{ source('kf_sd_1nns3k8v_src', 's3_scrape_cody') }}
\ No newline at end of file
diff --git a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_sequencing.sql b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_sequencing.sql
index a0ddfdd..12ecfce 100644
--- a/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_sequencing.sql
+++ b/dbt_project/models/kids_first/sd_1nns3k8v/src/kf_sd_1nns3k8v_src_sequencing.sql
@@ -3,4 +3,4 @@
 ) }}
 
 select * 
-from {{ ref('sample') }}
\ No newline at end of file
+from {{ source('kf_sd_1nns3k8v_src', 'sample') }}
\ No newline at end of file
diff --git a/dbt_project/profiles.yml b/dbt_project/profiles.yml
index 6269949..5388117 100644
--- a/dbt_project/profiles.yml
+++ b/dbt_project/profiles.yml
@@ -26,7 +26,7 @@ include_dbt_sandbox:
       user: "{{ env_var('INCLUDEWAREHOUSE_SCV_USERNAME') }}"
       password: "{{ env_var('INCLUDEWAREHOUSE_SCV_PASSWORD') }}"
       port: 5432
-      dbname: postgres
+      dbname: includewarehouse
       schema: prd
       threads: 4