IEEE-Ege · serayyucetin · Apr 4, 2025 · Apr 27, 2025 · Apr 27, 2025 · Apr 27, 2025
diff --git a/src/modules/Neighborhood.py b/src/modules/Neighborhood.py
@@ -0,0 +1,64 @@
+import shiny  #Only needed if you are planning to use it later
+import scanpy as sc
+import umap
+
+adata = sc.read("C:/Users/pc/.ipython/HW3/Hw3covid_Data_AllCells.h5ad")
+
+#computing
+def compute(adata, n_neighbors=10, n_pcs=40):
+    """_summary_
+
+    Args:
+        Hw3covid_Data_AllCells (_type_): _description_
+        n_neighbors (int, optional): _description_. Defaults to 10.
+        n_pcs (int, optional): _description_. Defaults to 40.
+    """
+    try:
+        sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=n_pcs)
+        print("Neighbors computed successfully.")
+    except Exception as e:
+        print(f"Error in computing neighbors: {e}")
+
+#embedding
+def embed(adata, color=['CST3', 'NKG7', 'PPBP']):
+    """_summary_
+
+    Args:
+        adata (_type_): _description_
+        color (list, optional): _description_. Defaults to ['CST3', 'NKG7', 'PPBP'].
+    """
+    try:
+        sc.pl.umap(adata, color=color)
+        sc.pl.umap(adata, color=color)  # Note: This plots twice; check if you really want this
+        print("Embedding and plotting successful.")
+    except Exception as e:
+        print(f"Error in embedding: {e}")
+
+#clustering
+def cluster(adata, color=['leiden', 'CST3', 'NKG7']):
+    """_summary_
+
+    Args:
+        adata (_type_): _description_
+        color (list, optional): _description_. Defaults to ['leiden', 'CST3', 'NKG7'].
+    """
+    try:
+        sc.tl.leiden(adata)
+        sc.pl.umap(adata, color=color)
+        print("Clustering and plotting successful.")
+    except Exception as e:
+        print(f"Error in clustering: {e}")
+
+#saving
+def save(results_file, adata):
+    """_summary_
+
+    Args:
+        results_file (_type_): _description_
+        adata (_type_): _description_
+    """
+    try:
+        adata.write(results_file)
+        print(f"Data saved successfully to {results_file}.")
+    except Exception as e:
+        print(f"Error in saving data: {e}")
diff --git a/src/modules/Neighborhood_demo.py b/src/modules/Neighborhood_demo.py
@@ -0,0 +1,30 @@
+import scanpy as sc
+import Neighborhood as nb  # Your functions are here!
+import umap
+
+#Load the dataset
+print("Loading dataset...")
+adata = sc.datasets.pbmc3k()
+print(f"Dataset loaded! Number of cells: {adata.n_obs}, Number of genes: {adata.n_vars}\n")
+
+#Compute the neighbor graph
+print("Computing the neighborhood graph...")
+nb.compute(adata, n_neighbors=10, n_pcs=40)
+
+#Compute UMAP
+print("Computing UMAP embedding...")
+sc.tl.umap(adata)
+
+#Visualize the embedding
+print("Plotting UMAP...")
+genes_of_interest = ['CD3D', 'MS4A1', 'GNLY']
+nb.embed(adata, color=genes_of_interest)
+
+#Perform clustering
+print("Performing Leiden clustering...")
+nb.cluster(adata, color=['leiden'] + genes_of_interest)
+
+#Save the results
+results_file = "pbmc3k_final_results.h5ad"
+print(f"Saving results to {results_file}...")
+nb.save(results_file, adata)
diff --git a/src/modules/visualization.py b/src/modules/visualization.py
@@ -0,0 +1,91 @@
+# Import libraries
+import scanpy as sc
+import matplotlib.pyplot as plt
+import tempfile
+
+# PCA plot function
+def pca_plot(adata, log=True, color='CST3'):
+    """_summary_
+
+    Args:
+        adata (_type_): _description_æ
+        log (bool, optional): _description_. Defaults to True.
+        color (str, optional): _description_. Defaults to 'CST3'.
+
+    Raises:
+        ValueError: _description_
+    """
+    if adata is None:
+        raise ValueError("Input AnnData object (adata) is None.")
+    sc.pl.pca_variance_ratio(adata, log=log)
+    sc.pl.pca(adata, color=color)
+
+# Highly variable genes plot
+def highvarGen_pl(adata, min_mean=0.0125, max_mean=3, min_disp=0.5):
+    """_summary_
+
+    Args:
+        adata (_type_): _description_
+        min_mean (float, optional): _description_. Defaults to 0.0125.
+        max_mean (int, optional): _description_. Defaults to 3.
+        min_disp (float, optional): _description_. Defaults to 0.5.
+
+    Raises:
+        ValueError: _description_
+        ValueError: _description_
+    """
+    if adata is None:
+        raise ValueError("Input AnnData object (adata) is None.")
+    if "Cell type" not in adata.obs:
+        raise ValueError("'Cell type' not found in adata.obs. Please check your annotations.")
+
+    sc.pl.umap(adata, color="Cell type", show=False)
+    sc.pl.highly_variable_genes(adata)
+
+# Violin plot function
+def violin_pl(adata):
+    """_summary_
+
+    Args:
+        adata (_type_): _description_
+
+    Raises:
+        ValueError: _description_
+        ValueError: _description_
+    """
+    if adata is None:
+        raise ValueError("Input AnnData object (adata) is None.")
+    if "Cell type" not in adata.obs:
+        raise ValueError("'Cell type' not found in adata.obs. Please check your annotations.")
+
+    sc.pl.umap(adata, color="Cell type", show=False)
+    sc.pl.violin(
+        adata,
+        ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
+        jitter=0.4,
+        multi_panel=True
+    )
+
+# Scatter plot function
+def scatter_plotting(adata, x='total_counts', y='pct_counts_mt'):
+    """_summary_
+
+    Args:
+        adata (_type_): _description_
+        x (str, optional): _description_. Defaults to 'total_counts'.
+        y (str, optional): _description_. Defaults to 'pct_counts_mt'.
+
+    Raises:
+        ValueError: _description_
+        ValueError: _description_
+    """
+    if adata is None:
+        raise ValueError("Input AnnData object (adata) is None.")
+    if "Cell type" not in adata.obs:
+        raise ValueError("'Cell type' not found in adata.obs. Please check your annotations.")
+
+    sc.pl.umap(adata, color="Cell type", show=False)
+    sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')
+    sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')
+
+
diff --git a/src/modules/visualization_demo.py b/src/modules/visualization_demo.py
@@ -0,0 +1,132 @@
+import scanpy as sc
+import matplotlib.pyplot as plt
+import matplotlib
+import os
+
+# === Plot helper ===
+def save_and_show(filename, folder='plots') -> None:
+    """
+    Saves the current matplotlib plot to a file and displays it.
+
+    Parameters:
+    filename (str): Name of the output image file 
+    folder (str): Folder where the image will be saved (default is "plots").
+    """
+    if not os.path.exists(folder):
+        os.makedirs(folder)
+    plt.savefig(f"{folder}/{filename}")
+    plt.show()
+
+def load_file_demo(file_path: str) -> sc.AnnData:
+    """
+    Loads a .h5ad file, applies basic quality control preprocessing,
+    and plots the top expressed genes.
+
+    Steps:
+    - Reads file and saves a copy
+    - Plots highest expressed genes
+    - Filters cells and genes
+    - Annotates mitochondrial genes
+    - Calculates QC metrics
+    - Normalizes and log-transforms data
+    - Identifies highly variable genes
+
+    Parameters:
+    file_path (str): Path to the .h5ad file
+
+    Returns:
+    AnnData: Preprocessed AnnData object
+    """
+    adata = sc.read_h5ad(file_path)
+    adata.write("adata_demo.h5ad")
+    print("Data saved as adata_demo.h5ad")
+
+    print("Plotting highest expressed genes...")
+    sc.pl.highest_expr_genes(adata, n_top=20, show=False)
+    save_and_show("highest_expr_genes.png")
+
+    print("Filtering cells and genes...")
+    sc.pp.filter_cells(adata, min_genes=200)
+    sc.pp.filter_genes(adata, min_cells=3)
+
+    print("Annotating mitochondrial genes...")
+    adata.var['mt'] = adata.var_names.str.startswith('MT-')
+
+    print("Calculating QC metrics...")
+    sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
+
+    print("Normalizing and log-transforming...")
+    sc.pp.normalize_total(adata, target_sum=1e4)
+    sc.pp.log1p(adata)
+
+    print("Finding highly variable genes...")
+    sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
+
+    print("Preprocessing done.")
+    return adata
+
+def pca_plot(adata: sc.AnnData, log: bool = True, color: str = 'CST3') -> None:
+    """
+    Performs PCA on the dataset and visualizes results.
+
+    Parameters:
+    adata (AnnData): Preprocessed AnnData object
+    log (bool): Whether to log scale the variance ratio plot
+    color (str): Gene or metadata field to color PCA by
+    """
+    sc.tl.pca(adata, svd_solver='arpack')
+
+    sc.pl.pca_variance_ratio(adata, log=log, show=False)
+    save_and_show("pca_variance_ratio.png")
+
+    sc.pl.pca(adata, color=color, show=False)
+    save_and_show("pca_plot.png")
+
+def highvarGen_pl(adata: sc.AnnData) -> None:
+    """
+    Plots various visualizations related to highly variable genes and quality metrics.
+
+    - UMAP by cell type (if available)
+    - Highly variable genes plot
+    - Violin plots for quality control stats
+    """
+    if "Cell type" in adata.obs:
+        sc.pl.umap(adata, color="Cell type", show=False)
+        save_and_show("umap_cell_type.png")
+    else:
+        print("Error: 'Cell type' variable couldn't find in adata.obs.")
+
+    sc.pl.highly_variable_genes(adata, show=False)
+    save_and_show("highly_variable_genes.png")
+
+    sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts'], jitter=0.4, multi_panel=True, show=False)
+    save_and_show("violin_qc.png")
+
+def scatter_plotting(adata: sc.AnnData) -> None:
+    """
+    Generates scatter plots for common QC visualizations:
+
+    - UMAP by cell type (if available)
+    - Total counts vs. mitochondrial percentage
+    - Total counts vs. number of genes
+    """
+    if "Cell type" in adata.obs:
+        sc.pl.umap(adata, color="Cell type", show=False)
+        save_and_show("umap_cell_type_scatter.png")
+    else:
+        print("Error: 'Cell type' variable couldn't find in adata.obs.")
+
+    sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt', show=False)
+    save_and_show("scatter_total_vs_pct_mt.png")
+
+    sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts', show=False)
+    save_and_show("scatter_total_vs_n_genes.png")
+
+# === MAIN RUN ===
+file_path = r"C:\Users\gediz\Downloads\Hw3covid_Data_AllCells.h5ad"
+
+adata = load_file_demo(file_path)
+
+pca_plot(adata)
+highvarGen_pl(adata)
+scatter_plotting(adata)