Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions src/modules/Neighborhood.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import shiny #Only needed if you are planning to use it later
import scanpy as sc
import umap

adata = sc.read("C:/Users/pc/.ipython/HW3/Hw3covid_Data_AllCells.h5ad")

#computing
def compute(adata, n_neighbors=10, n_pcs=40):
"""_summary_

Args:
Hw3covid_Data_AllCells (_type_): _description_
n_neighbors (int, optional): _description_. Defaults to 10.
n_pcs (int, optional): _description_. Defaults to 40.
"""
try:
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=n_pcs)
print("Neighbors computed successfully.")
except Exception as e:
print(f"Error in computing neighbors: {e}")

#embedding
def embed(adata, color=['CST3', 'NKG7', 'PPBP']):
"""_summary_

Args:
adata (_type_): _description_
color (list, optional): _description_. Defaults to ['CST3', 'NKG7', 'PPBP'].
"""
try:
sc.pl.umap(adata, color=color)
sc.pl.umap(adata, color=color) # Note: This plots twice; check if you really want this
print("Embedding and plotting successful.")
except Exception as e:
print(f"Error in embedding: {e}")

#clustering
def cluster(adata, color=['leiden', 'CST3', 'NKG7']):
"""_summary_

Args:
adata (_type_): _description_
color (list, optional): _description_. Defaults to ['leiden', 'CST3', 'NKG7'].
"""
try:
sc.tl.leiden(adata)
sc.pl.umap(adata, color=color)
print("Clustering and plotting successful.")
except Exception as e:
print(f"Error in clustering: {e}")

#saving
def save(results_file, adata):
"""_summary_

Args:
results_file (_type_): _description_
adata (_type_): _description_
"""
try:
adata.write(results_file)
print(f"Data saved successfully to {results_file}.")
except Exception as e:
print(f"Error in saving data: {e}")
30 changes: 30 additions & 0 deletions src/modules/Neighborhood_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import scanpy as sc
import Neighborhood as nb # Your functions are here!
import umap

#Load the dataset
print("Loading dataset...")
adata = sc.datasets.pbmc3k()
print(f"Dataset loaded! Number of cells: {adata.n_obs}, Number of genes: {adata.n_vars}\n")

#Compute the neighbor graph
print("Computing the neighborhood graph...")
nb.compute(adata, n_neighbors=10, n_pcs=40)

#Compute UMAP
print("Computing UMAP embedding...")
sc.tl.umap(adata)

#Visualize the embedding
print("Plotting UMAP...")
genes_of_interest = ['CD3D', 'MS4A1', 'GNLY']
nb.embed(adata, color=genes_of_interest)

#Perform clustering
print("Performing Leiden clustering...")
nb.cluster(adata, color=['leiden'] + genes_of_interest)

#Save the results
results_file = "pbmc3k_final_results.h5ad"
print(f"Saving results to {results_file}...")
nb.save(results_file, adata)
91 changes: 91 additions & 0 deletions src/modules/visualization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Import libraries
import scanpy as sc
import matplotlib.pyplot as plt
import tempfile

# PCA plot function
def pca_plot(adata, log=True, color='CST3'):
"""_summary_

Args:
adata (_type_): _description_æ
log (bool, optional): _description_. Defaults to True.
color (str, optional): _description_. Defaults to 'CST3'.

Raises:
ValueError: _description_
"""
if adata is None:
raise ValueError("Input AnnData object (adata) is None.")
sc.pl.pca_variance_ratio(adata, log=log)
sc.pl.pca(adata, color=color)

# Highly variable genes plot
def highvarGen_pl(adata, min_mean=0.0125, max_mean=3, min_disp=0.5):
"""_summary_

Args:
adata (_type_): _description_
min_mean (float, optional): _description_. Defaults to 0.0125.
max_mean (int, optional): _description_. Defaults to 3.
min_disp (float, optional): _description_. Defaults to 0.5.

Raises:
ValueError: _description_
ValueError: _description_
"""
if adata is None:
raise ValueError("Input AnnData object (adata) is None.")
if "Cell type" not in adata.obs:
raise ValueError("'Cell type' not found in adata.obs. Please check your annotations.")

sc.pl.umap(adata, color="Cell type", show=False)
sc.pl.highly_variable_genes(adata)

# Violin plot function
def violin_pl(adata):
"""_summary_

Args:
adata (_type_): _description_

Raises:
ValueError: _description_
ValueError: _description_
"""
if adata is None:
raise ValueError("Input AnnData object (adata) is None.")
if "Cell type" not in adata.obs:
raise ValueError("'Cell type' not found in adata.obs. Please check your annotations.")

sc.pl.umap(adata, color="Cell type", show=False)
sc.pl.violin(
adata,
['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
jitter=0.4,
multi_panel=True
)

# Scatter plot function
def scatter_plotting(adata, x='total_counts', y='pct_counts_mt'):
"""_summary_

Args:
adata (_type_): _description_
x (str, optional): _description_. Defaults to 'total_counts'.
y (str, optional): _description_. Defaults to 'pct_counts_mt'.

Raises:
ValueError: _description_
ValueError: _description_
"""
if adata is None:
raise ValueError("Input AnnData object (adata) is None.")
if "Cell type" not in adata.obs:
raise ValueError("'Cell type' not found in adata.obs. Please check your annotations.")

sc.pl.umap(adata, color="Cell type", show=False)
sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')


132 changes: 132 additions & 0 deletions src/modules/visualization_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import scanpy as sc
import matplotlib.pyplot as plt
import matplotlib
import os

# === Plot helper ===
def save_and_show(filename, folder='plots') -> None:
"""
Saves the current matplotlib plot to a file and displays it.

Parameters:
filename (str): Name of the output image file
folder (str): Folder where the image will be saved (default is "plots").
"""
if not os.path.exists(folder):
os.makedirs(folder)
plt.savefig(f"{folder}/{filename}")
plt.show()

def load_file_demo(file_path: str) -> sc.AnnData:
"""
Loads a .h5ad file, applies basic quality control preprocessing,
and plots the top expressed genes.

Steps:
- Reads file and saves a copy
- Plots highest expressed genes
- Filters cells and genes
- Annotates mitochondrial genes
- Calculates QC metrics
- Normalizes and log-transforms data
- Identifies highly variable genes

Parameters:
file_path (str): Path to the .h5ad file

Returns:
AnnData: Preprocessed AnnData object
"""
adata = sc.read_h5ad(file_path)
adata.write("adata_demo.h5ad")
print("Data saved as adata_demo.h5ad")

print("Plotting highest expressed genes...")
sc.pl.highest_expr_genes(adata, n_top=20, show=False)
save_and_show("highest_expr_genes.png")

print("Filtering cells and genes...")
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

print("Annotating mitochondrial genes...")
adata.var['mt'] = adata.var_names.str.startswith('MT-')

print("Calculating QC metrics...")
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

print("Normalizing and log-transforming...")
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

print("Finding highly variable genes...")
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)

print("Preprocessing done.")
return adata

def pca_plot(adata: sc.AnnData, log: bool = True, color: str = 'CST3') -> None:
"""
Performs PCA on the dataset and visualizes results.

Parameters:
adata (AnnData): Preprocessed AnnData object
log (bool): Whether to log scale the variance ratio plot
color (str): Gene or metadata field to color PCA by
"""
sc.tl.pca(adata, svd_solver='arpack')

sc.pl.pca_variance_ratio(adata, log=log, show=False)
save_and_show("pca_variance_ratio.png")

sc.pl.pca(adata, color=color, show=False)
save_and_show("pca_plot.png")

def highvarGen_pl(adata: sc.AnnData) -> None:
"""
Plots various visualizations related to highly variable genes and quality metrics.

- UMAP by cell type (if available)
- Highly variable genes plot
- Violin plots for quality control stats
"""
if "Cell type" in adata.obs:
sc.pl.umap(adata, color="Cell type", show=False)
save_and_show("umap_cell_type.png")
else:
print("Error: 'Cell type' variable couldn't find in adata.obs.")

sc.pl.highly_variable_genes(adata, show=False)
save_and_show("highly_variable_genes.png")

sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts'], jitter=0.4, multi_panel=True, show=False)
save_and_show("violin_qc.png")

def scatter_plotting(adata: sc.AnnData) -> None:
"""
Generates scatter plots for common QC visualizations:

- UMAP by cell type (if available)
- Total counts vs. mitochondrial percentage
- Total counts vs. number of genes
"""
if "Cell type" in adata.obs:
sc.pl.umap(adata, color="Cell type", show=False)
save_and_show("umap_cell_type_scatter.png")
else:
print("Error: 'Cell type' variable couldn't find in adata.obs.")

sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt', show=False)
save_and_show("scatter_total_vs_pct_mt.png")

sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts', show=False)
save_and_show("scatter_total_vs_n_genes.png")

# === MAIN RUN ===
file_path = r"C:\Users\gediz\Downloads\Hw3covid_Data_AllCells.h5ad"

adata = load_file_demo(file_path)

pca_plot(adata)
highvarGen_pl(adata)
scatter_plotting(adata)