IEEE-Ege · unsalangulce · Apr 2, 2025 · Apr 4, 2025 · Apr 4, 2025 · Apr 4, 2025
diff --git a/.gitignore b/.gitignore
@@ -172,3 +172,4 @@ cython_debug/
 
 # PyPI configuration file
 .pypirc
+oturum.txt
diff --git a/README.md b/README.md
@@ -1,2 +1,15 @@
 # SingleCellWebApp
 Bioinformatic technique team project  
+Here are the outputs we got from running our codes. 
+
+![PHOTO-2025-05-23-22-18-02](https://github.com/user-attachments/assets/052b7fae-295a-41b5-8f4c-9efb99e6365b)
+Above is the output from rank genes function
+
+![PHOTO-2025-05-23-22-32-16](https://github.com/user-attachments/assets/f7fe0897-e9b4-4412-be4a-c63479aa43b9)
+This one is the violin plot output.
+
+![PHOTO-2025-05-23-22-52-52](https://github.com/user-attachments/assets/62fcd331-dd72-4e4a-930a-e8698a6f8d01)
+This one is from marker genes code.
+
+![PHOTO-2025-05-23-21-32-06](https://github.com/user-attachments/assets/30361fa4-fdc4-43cf-83a1-15c57454cbc1)
+Lastly, this is the output we got from the get scores p-values function.
diff --git a/SingleCellWebApp b/SingleCellWebApp
diff --git a/app_final_demo.py b/app_final_demo.py
diff --git a/data/pbmc3k_raw.h5ad b/data/pbmc3k_raw.h5ad
diff --git a/pcaprocess/demo_pcanlysis.py b/pcaprocess/demo_pcanlysis.py
@@ -0,0 +1,93 @@
+import scanpy as sc
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Define the PCA functions in the same file or import them correctly
+# from pca_functions import run_pca, plot_pca, plot_variance, save_results, get_adata
+
+# Step 1: Create random data
+n_cells, n_genes = 100, 2000
+np.random.seed(42)
+
+# Generate random gene expression data
+data = np.random.rand(n_cells, n_genes)
+
+# Create an AnnData object
+adata = sc.AnnData(X=data)
+
+# Adding cell types (obs)
+adata.obs['cell_type'] = ['type1' if i < 50 else 'type2' for i in range(n_cells)]
+adata.var['gene_id'] = [f"gene{i}" for i in range(n_genes)]
+
+# Step 2: Apply PCA
+def run_pca(adata, n_comps=50, svd_solver='arpack'):
+    """Runs PCA and stores the computed components."""
+    try:
+        if 'X_pca' in adata.obsm:
+            print("PCA already computed. Overwriting previous results...")
+
+        print(f"Running PCA with {n_comps} components using {svd_solver} solver...")
+        sc.pp.normalize_total(adata, target_sum=1e4)  # Normalization
+        sc.pp.log1p(adata)  # Log transformation
+        sc.pp.scale(adata)  # Scaling
+        sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver)
+
+        print("PCA completed.")
+    except Exception as e:
+        print(f"Error during PCA: {e}")
+        raise  
+
+run_pca(adata, n_comps=10, svd_solver='arpack')
+
+# Step 3: Plot PCA graph
+def plot_pca(adata, color=None):
+    """Plots PCA results, colored by a specified attribute (if provided)."""
+    try:
+        print(f"Plotting PCA, color by: {color or 'default'}")
+        sc.pl.pca(adata, color=color, show=False)
+        plt.title(f"PCA - Colored by {color if color else 'default'}")
+        plt.show()
+    except KeyError:
+        print(f"Warning: '{color}' not found. Using default coloring.")
+        sc.pl.pca(adata, show=False)
+        plt.title("PCA - Default Coloring")
+        plt.show()
+    except Exception as e:
+        print(f"Error in PCA plot: {e}")
+        raise  
+
+plot_pca(adata, color='cell_type')
+
+# Step 4: Plot explained variance
+def plot_variance(adata, log=True):
+    """Plots the variance explained by PCA components."""
+    try:
+        print("Plotting explained variance...")
+        sc.pl.pca_variance_ratio(adata, log=log, show=False)
+        plt.title("PCA: Explained Variance")
+        plt.show()
+    except Exception as e:
+        print(f"Error in variance plot: {e}")
+        raise  
+
+plot_variance(adata)
+
+# Step 5: Save PCA results
+def save_results(adata, results_file="pca_results.h5ad"):
+    """Saves the PCA results to an H5AD file."""
+    try:
+        print(f"Saving results to {results_file}...")
+        adata.write(results_file)
+        print("Save successful.")
+    except Exception as e:
+        print(f"Error saving results: {e}")
+        raise  
+
+save_results(adata, "pca_results.h5ad")
+
+# Step 6: Retrieve processed AnnData object
+def get_adata(adata):
+    """Returns the processed AnnData object."""
+    return adata
+
+processed_adata = get_adata(adata)
diff --git a/rank_genes_violin_demo.py b/rank_genes_violin_demo.py
@@ -0,0 +1,17 @@
+from rank_genes_violin import get_rank_genes_groups_violin
+import scanpy as sc
+
+#sample dataset
+adata = sc.datasets.pbmc3k()
+
+# calculating neighborhoods
+sc.pp.neighbors(adata)
+
+sc.tl.leiden(adata, resolution=1.0)
+
+# Differential expression analysis (with t-test)
+sc.tl.rank_genes_groups(adata, groupby='leiden', method='t-test')
+
+
+# calling the function and visualizing
+get_rank_genes_groups_violin(adata, groups='0', n_genes=8)
diff --git a/src/demo_pcanlysis.py b/src/demo_pcanlysis.py
@@ -0,0 +1,93 @@
+import scanpy as sc
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Define the PCA functions in the same file or import them correctly
+# from pca_functions import run_pca, plot_pca, plot_variance, save_results, get_adata
+
+# Step 1: Create random data
+n_cells, n_genes = 100, 2000
+np.random.seed(42)
+
+# Generate random gene expression data
+data = np.random.rand(n_cells, n_genes)
+
+# Create an AnnData object
+adata = sc.AnnData(X=data)
+
+# Adding cell types (obs)
+adata.obs['cell_type'] = ['type1' if i < 50 else 'type2' for i in range(n_cells)]
+adata.var['gene_id'] = [f"gene{i}" for i in range(n_genes)]
+
+# Step 2: Apply PCA
+def run_pca(adata, n_comps=50, svd_solver='arpack'):
+    """Runs PCA and stores the computed components."""
+    try:
+        if 'X_pca' in adata.obsm:
+            print("PCA already computed. Overwriting previous results...")
+
+        print(f"Running PCA with {n_comps} components using {svd_solver} solver...")
+        sc.pp.normalize_total(adata, target_sum=1e4)  # Normalization
+        sc.pp.log1p(adata)  # Log transformation
+        sc.pp.scale(adata)  # Scaling
+        sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver)
+
+        print("PCA completed.")
+    except Exception as e:
+        print(f"Error during PCA: {e}")
+        raise  
+
+run_pca(adata, n_comps=10, svd_solver='arpack')
+
+# Step 3: Plot PCA graph
+def plot_pca(adata, color=None):
+    """Plots PCA results, colored by a specified attribute (if provided)."""
+    try:
+        print(f"Plotting PCA, color by: {color or 'default'}")
+        sc.pl.pca(adata, color=color, show=False)
+        plt.title(f"PCA - Colored by {color if color else 'default'}")
+        plt.show()
+    except KeyError:
+        print(f"Warning: '{color}' not found. Using default coloring.")
+        sc.pl.pca(adata, show=False)
+        plt.title("PCA - Default Coloring")
+        plt.show()
+    except Exception as e:
+        print(f"Error in PCA plot: {e}")
+        raise  
+
+plot_pca(adata, color='cell_type')
+
+# Step 4: Plot explained variance
+def plot_variance(adata, log=True):
+    """Plots the variance explained by PCA components."""
+    try:
+        print("Plotting explained variance...")
+        sc.pl.pca_variance_ratio(adata, log=log, show=False)
+        plt.title("PCA: Explained Variance")
+        plt.show()
+    except Exception as e:
+        print(f"Error in variance plot: {e}")
+        raise  
+
+plot_variance(adata)
+
+# Step 5: Save PCA results
+def save_results(adata, results_file="pca_results.h5ad"):
+    """Saves the PCA results to an H5AD file."""
+    try:
+        print(f"Saving results to {results_file}...")
+        adata.write(results_file)
+        print("Save successful.")
+    except Exception as e:
+        print(f"Error saving results: {e}")
+        raise  
+
+save_results(adata, "pca_results.h5ad")
+
+# Step 6: Retrieve processed AnnData object
+def get_adata(adata):
+    """Returns the processed AnnData object."""
+    return adata
+
+processed_adata = get_adata(adata)
diff --git a/src/modules/Neighborhood.py b/src/modules/Neighborhood.py
@@ -0,0 +1,64 @@
+import shiny  #Only needed if you are planning to use it later
+import scanpy as sc
+import umap
+
+adata = sc.read("C:/Users/pc/.ipython/HW3/Hw3covid_Data_AllCells.h5ad")
+
+#computing
+def compute(adata, n_neighbors=10, n_pcs=40):
+    """_summary_
+
+    Args:
+        Hw3covid_Data_AllCells (_type_): _description_
+        n_neighbors (int, optional): _description_. Defaults to 10.
+        n_pcs (int, optional): _description_. Defaults to 40.
+    """
+    try:
+        sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=n_pcs)
+        print("Neighbors computed successfully.")
+    except Exception as e:
+        print(f"Error in computing neighbors: {e}")
+
+#embedding
+def embed(adata, color=['CST3', 'NKG7', 'PPBP']):
+    """_summary_
+
+    Args:
+        adata (_type_): _description_
+        color (list, optional): _description_. Defaults to ['CST3', 'NKG7', 'PPBP'].
+    """
+    try:
+        sc.pl.umap(adata, color=color)
+        sc.pl.umap(adata, color=color)  # Note: This plots twice; check if you really want this
+        print("Embedding and plotting successful.")
+    except Exception as e:
+        print(f"Error in embedding: {e}")
+
+#clustering
+def cluster(adata, color=['leiden', 'CST3', 'NKG7']):
+    """_summary_
+
+    Args:
+        adata (_type_): _description_
+        color (list, optional): _description_. Defaults to ['leiden', 'CST3', 'NKG7'].
+    """
+    try:
+        sc.tl.leiden(adata)
+        sc.pl.umap(adata, color=color)
+        print("Clustering and plotting successful.")
+    except Exception as e:
+        print(f"Error in clustering: {e}")
+
+#saving
+def save(results_file, adata):
+    """_summary_
+
+    Args:
+        results_file (_type_): _description_
+        adata (_type_): _description_
+    """
+    try:
+        adata.write(results_file)
+        print(f"Data saved successfully to {results_file}.")
+    except Exception as e:
+        print(f"Error in saving data: {e}")
diff --git a/src/modules/Neighborhood_demo.py b/src/modules/Neighborhood_demo.py
@@ -0,0 +1,30 @@
+import scanpy as sc
+import Neighborhood as nb  # Your functions are here!
+import umap
+
+#Load the dataset
+print("Loading dataset...")
+adata = sc.datasets.pbmc3k()
+print(f"Dataset loaded! Number of cells: {adata.n_obs}, Number of genes: {adata.n_vars}\n")
+
+#Compute the neighbor graph
+print("Computing the neighborhood graph...")
+nb.compute(adata, n_neighbors=10, n_pcs=40)
+
+#Compute UMAP
+print("Computing UMAP embedding...")
+sc.tl.umap(adata)
+
+#Visualize the embedding
+print("Plotting UMAP...")
+genes_of_interest = ['CD3D', 'MS4A1', 'GNLY']
+nb.embed(adata, color=genes_of_interest)
+
+#Perform clustering
+print("Performing Leiden clustering...")
+nb.cluster(adata, color=['leiden'] + genes_of_interest)
+
+#Save the results
+results_file = "pbmc3k_final_results.h5ad"
+print(f"Saving results to {results_file}...")
+nb.save(results_file, adata)
diff --git a/src/modules/SingleCellWebApp b/src/modules/SingleCellWebApp
diff --git a/src/modules/data/pbmc3k_raw.h5ad b/src/modules/data/pbmc3k_raw.h5ad
Original file line number	Diff line number	Diff line change
Expand Up		@@ -172,3 +172,4 @@ cython_debug/

		# PyPI configuration file
		.pypirc
		oturum.txt