diff --git a/.Rbuildignore b/.Rbuildignore
index b08190c..475c93d 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -3,3 +3,4 @@
 ^README\.Rmd$
 ^LICENSE\.md$
 ^\.github$
+^data-raw$
diff --git a/DESCRIPTION b/DESCRIPTION
index 1dfc1a5..10510ef 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -46,3 +46,6 @@ Suggests:
     tidyr,
     tidyselect
 Roxygen: list(markdown = TRUE)
+Depends: 
+    R (>= 3.5)
+LazyData: true
diff --git a/R/raw_counts.R b/R/raw_counts.R
new file mode 100644
index 0000000..205423e
--- /dev/null
+++ b/R/raw_counts.R
@@ -0,0 +1,16 @@
+#' GDC TCGA Lung Adenocarcinoma (LUAD) - Raw STAR counts
+#'
+#' A subset of TCGA-LUAD RNA-seq gene-level counts generated by STAR and distributed via UCSC Xena.
+#' Data originate from GDC, only selected samples are included.
+#' Gene identifiers are Ensembl IDs with the version suffix removed (e.g., "ENSG00000141510.15" → "ENSG00000141510").
+#' Sample barcodes were compacted and written with dots instead of dashes (e.g., "TCGA-38-4627-11A" → "TCGA.38.4627.11A").
+#'
+#' @format ## `raw_counts`
+#' A data frame with 60,660 rows and 32 columns:
+#' \describe{
+#'   \item{rownames}{Ensembl gene IDs (GRCh38) with version suffix stripped (no ".##").}
+#'   \item{columns}{TCGA sample IDs written with dots instead of dashes, e.g., `TCGA.38.4627.11A`.}
+#'   \item{values}{Integer raw counts from STAR gene quantification (untransformed).}
+#' }
+#' @source <https://gdc-hub.s3.us-east-1.amazonaws.com/download/TCGA-LUAD.star_counts.tsv.gz>
+"raw_counts"
diff --git a/R/sampledata.R b/R/sampledata.R
new file mode 100644
index 0000000..0ed9f7e
--- /dev/null
+++ b/R/sampledata.R
@@ -0,0 +1,24 @@
+#' GDC TCGA Lung Adenocarcinoma (LUAD) - Metadata
+#'
+#' Samples information of the subset of TCGA-LUAD RNA-seq gene-level counts generated by STAR and distributed via UCSC Xena.
+#' Data originate from GDC, only selected samples are included.
+#' Sample barcodes were compacted and written with dots instead of dashes (e.g., "TCGA-38-4627-11A" → "TCGA.38.4627.11A").
+#'
+#' @format ## `sampledata`
+#' A data frame with 32 rows and 12 columns:
+#' \describe{
+#' \item{patient_id}{TCGA sample ID written with dots instead of dashes, e.g., \code{TCGA.38.4627.11A}.}
+#' \item{sample_type}{Sample category, e.g., \code{"tumor"} or \code{"normal"}.}
+#' \item{age}{Age (years) at diagnosis/collection.}
+#' \item{race_demographic}{Self-reported race/ethnicity (e.g., \code{"white"}).}
+#' \item{sex}{Biological sex (\code{"male"} / \code{"female"}).}
+#' \item{status}{Vital status at last follow-up (\code{"Alive"} / \code{"Dead"}).}
+#' \item{pathologic_stage}{Overall AJCC pathologic stage, e.g., \code{"Stage IA"}, \code{"Stage IIIA"}.}
+#' \item{pathologic_t}{Primary tumor (T) category, e.g., \code{"T1b"}, \code{"T2"}.}
+#' \item{smoking_status}{Smoking history from TCGA clinical (free text), e.g., \code{"Lifelong Non-smoker"}.}
+#' \item{agents}{Therapeutic agents administered (free text; may be empty).}
+#' \item{treatment_response}{Clinical response to therapy (free text; may be empty).}
+#' \item{treatment_type}{Type of therapy (free text; may be empty).}
+#' }
+#' @source <https://gdc-hub.s3.us-east-1.amazonaws.com/download/TCGA-LUAD.star_counts.tsv.gz>
+"sampledata"
diff --git a/data-raw/raw_counts.R b/data-raw/raw_counts.R
new file mode 100644
index 0000000..4c6ab2d
--- /dev/null
+++ b/data-raw/raw_counts.R
@@ -0,0 +1,20 @@
+# STAR Counts - TCGA LUAD -------------------------
+
+# August, 2025
+# Source: https://gdc-hub.s3.us-east-1.amazonaws.com/download/TCGA-LUAD.star_counts.tsv.gz
+
+raw_counts <- read.delim("../TCGA-LUAD.star_counts.tsv.gz", sep = "\t", header = TRUE)
+sampledata <- read.csv("../TCGA-LUAD.samples.reduced.tsv", sep = "\t", header = TRUE)
+
+raw_counts[, -1] <- as.matrix(raw_counts[, -1])
+
+gids <- sub("\\.\\d+$", "", raw_counts$Ensembl_ID)
+raw_counts <- raw_counts[,-1]
+rownames(raw_counts) <- gids
+
+raw_counts <- 2^raw_counts - 1
+
+sampledata$patient_id <- gsub("-", ".", sampledata$patient_id)
+raw_counts <- raw_counts[, sampledata$patient_id]
+
+usethis::use_data(raw_counts, compress = "xz", overwrite = TRUE)
diff --git a/data-raw/sampledata.R b/data-raw/sampledata.R
new file mode 100644
index 0000000..4c7a941
--- /dev/null
+++ b/data-raw/sampledata.R
@@ -0,0 +1,9 @@
+# Metadata - TCGA LUAD -------------------------
+
+# August, 2025
+# Source: https://gdc-hub.s3.us-east-1.amazonaws.com/download/TCGA-LUAD.star_counts.tsv.gz
+
+sampledata <- read.csv("../TCGA-LUAD.samples.reduced.tsv", sep = "\t", header = TRUE)
+sampledata$patient_id <- gsub("-", ".", sampledata$patient_id)
+
+usethis::use_data(sampledata, compress = "xz", overwrite = TRUE)
diff --git a/data/raw_counts.rda b/data/raw_counts.rda
new file mode 100644
index 0000000..ea791d6
Binary files /dev/null and b/data/raw_counts.rda differ
diff --git a/data/sampledata.rda b/data/sampledata.rda
new file mode 100644
index 0000000..6e956c6
Binary files /dev/null and b/data/sampledata.rda differ
diff --git a/man/raw_counts.Rd b/man/raw_counts.Rd
new file mode 100644
index 0000000..2880349
--- /dev/null
+++ b/man/raw_counts.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/raw_counts.R
+\docType{data}
+\name{raw_counts}
+\alias{raw_counts}
+\title{GDC TCGA Lung Adenocarcinoma (LUAD) - Raw STAR counts}
+\format{
+\subsection{\code{raw_counts}}{
+
+A data frame with 60,660 rows and 32 columns:
+\describe{
+\item{rownames}{Ensembl gene IDs (GRCh38) with version suffix stripped (no ".##").}
+\item{columns}{TCGA sample IDs written with dots instead of dashes, e.g., \code{TCGA.38.4627.11A}.}
+\item{values}{Integer raw counts from STAR gene quantification (untransformed).}
+}
+}
+}
+\source{
+\url{https://gdc-hub.s3.us-east-1.amazonaws.com/download/TCGA-LUAD.star_counts.tsv.gz}
+}
+\usage{
+raw_counts
+}
+\description{
+A subset of TCGA-LUAD RNA-seq gene-level counts generated by STAR and distributed via UCSC Xena.
+Data originate from GDC, only selected samples are included.
+Gene identifiers are Ensembl IDs with the version suffix removed (e.g., "ENSG00000141510.15" → "ENSG00000141510").
+Sample barcodes were compacted and written with dots instead of dashes (e.g., "TCGA-38-4627-11A" → "TCGA.38.4627.11A").
+}
+\keyword{datasets}
diff --git a/man/sampledata.Rd b/man/sampledata.Rd
new file mode 100644
index 0000000..fd22613
--- /dev/null
+++ b/man/sampledata.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sampledata.R
+\docType{data}
+\name{sampledata}
+\alias{sampledata}
+\title{GDC TCGA Lung Adenocarcinoma (LUAD) - Metadata}
+\format{
+\subsection{\code{sampledata}}{
+
+A data frame with 32 rows and 12 columns:
+\describe{
+\item{patient_id}{TCGA sample ID written with dots instead of dashes, e.g., \code{TCGA.38.4627.11A}.}
+\item{sample_type}{Sample category, e.g., \code{"tumor"} or \code{"normal"}.}
+\item{age}{Age (years) at diagnosis/collection.}
+\item{race_demographic}{Self-reported race/ethnicity (e.g., \code{"white"}).}
+\item{sex}{Biological sex (\code{"male"} / \code{"female"}).}
+\item{status}{Vital status at last follow-up (\code{"Alive"} / \code{"Dead"}).}
+\item{pathologic_stage}{Overall AJCC pathologic stage, e.g., \code{"Stage IA"}, \code{"Stage IIIA"}.}
+\item{pathologic_t}{Primary tumor (T) category, e.g., \code{"T1b"}, \code{"T2"}.}
+\item{smoking_status}{Smoking history from TCGA clinical (free text), e.g., \code{"Lifelong Non-smoker"}.}
+\item{agents}{Therapeutic agents administered (free text; may be empty).}
+\item{treatment_response}{Clinical response to therapy (free text; may be empty).}
+\item{treatment_type}{Type of therapy (free text; may be empty).}
+}
+}
+}
+\source{
+\url{https://gdc-hub.s3.us-east-1.amazonaws.com/download/TCGA-LUAD.star_counts.tsv.gz}
+}
+\usage{
+sampledata
+}
+\description{
+Samples information of the subset of TCGA-LUAD RNA-seq gene-level counts generated by STAR and distributed via UCSC Xena.
+Data originate from GDC, only selected samples are included.
+Sample barcodes were compacted and written with dots instead of dashes (e.g., "TCGA-38-4627-11A" → "TCGA.38.4627.11A").
+}
+\keyword{datasets}