diff --git a/.gitignore b/.gitignore index 0a19790..ff67cbc 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,4 @@ cython_debug/ # PyPI configuration file .pypirc +oturum.txt diff --git a/README.md b/README.md index b393bbe..6bb402c 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,15 @@ # SingleCellWebApp Bioinformatic technique team project +Here are the outputs we got from running our codes. + +![PHOTO-2025-05-23-22-18-02](https://github.com/user-attachments/assets/052b7fae-295a-41b5-8f4c-9efb99e6365b) +Above is the output from rank genes function + +![PHOTO-2025-05-23-22-32-16](https://github.com/user-attachments/assets/f7fe0897-e9b4-4412-be4a-c63479aa43b9) +This one is the violin plot output. + +![PHOTO-2025-05-23-22-52-52](https://github.com/user-attachments/assets/62fcd331-dd72-4e4a-930a-e8698a6f8d01) +This one is from marker genes code. + +![PHOTO-2025-05-23-21-32-06](https://github.com/user-attachments/assets/30361fa4-fdc4-43cf-83a1-15c57454cbc1) +Lastly, this is the output we got from the get scores p-values function. diff --git a/SingleCellWebApp b/SingleCellWebApp new file mode 160000 index 0000000..a9ae786 --- /dev/null +++ b/SingleCellWebApp @@ -0,0 +1 @@ +Subproject commit a9ae7869927e2230f9e97f2990892d72ad2bb246 diff --git a/app_final_demo.py b/app_final_demo.py new file mode 100644 index 0000000..8b2a762 --- /dev/null +++ b/app_final_demo.py @@ -0,0 +1,539 @@ +from shiny import App, ui, reactive, render +from sqlalchemy import create_engine, Column, Integer, String +from sqlalchemy.orm import sessionmaker, declarative_base +import bcrypt +import jwt +import datetime + +# Database Configuration +# IMPORTANT: Ensure your PostgreSQL server is running and the 'postgres' user +# with password '1234' has access to the 'AZRA_SCA_DEMO' database. +DATABASE_URL = "postgresql://postgres:1234@localhost:5432/AZRA_SCA_DEMO" +engine = create_engine(DATABASE_URL) +SessionLocal = sessionmaker(bind=engine) +Base = declarative_base() + +# Define the User model for SQLAlchemy +class User(Base): + __tablename__ = "users" + id = Column(Integer, primary_key=True, index=True) + username = Column(String, unique=True, index=True, nullable=False) + email = Column(String, unique=True, nullable=False) + password_hash = Column(String, nullable=False) + +# Create database tables if they don't exist +Base.metadata.create_all(bind=engine) + +# JWT Secret Key +# IMPORTANT: In a real application, use a strong, randomly generated +# key stored securely (e.g., in environment variables), not hardcoded. +SECRET_KEY = "supersecretkey123" + +# Function to create a JWT token +def create_jwt_token(username): + # Payload for the JWT token + payload = { + "user": username, + "exp": datetime.datetime.utcnow() + datetime.timedelta(hours=1), # Token expires in 1 hour + } + # Encode the payload with the secret key and HS256 algorithm + return jwt.encode(payload, SECRET_KEY, algorithm="HS256") + +# Define the User Interface (UI) for the Shiny app +app_ui = ui.page_fluid( + # Custom CSS styling for the app's appearance + ui.tags.style(""" + /* General body styling for a softer background and font */ + body { + background-color: #eef2f7; /* Light blue-grey background */ + font-family: 'Inter', 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; + color: #333; + line-height: 1.6; + margin: 0; + padding: 0; + display: flex; + justify-content: center; + align-items: center; + min-height: 100vh; /* Full viewport height */ + } + /* Container styling for a modern card-like look */ + .container { + max-width: 450px; /* Slightly wider for better spacing */ + width: 90%; /* Responsive width */ + padding: 30px 35px; + border-radius: 12px; /* More rounded corners */ + background-color: #ffffff; /* Pure white background */ + box-shadow: 0 8px 25px rgba(0,0,0,0.12); /* Stronger, softer shadow */ + box-sizing: border-box; /* Include padding in element's total width and height */ + } + /* Title styling */ + h2 { + text-align: center; + color: #2c3e50; /* Darker title color */ + margin-bottom: 30px; + font-size: 2.2rem; /* Larger title */ + font-weight: 700; /* Bolder font */ + font-family: 'Lucida Console', 'Consolas', monospace; /* Lucida Console font for bold text */ + } + /* Form headings styling */ + h3 { + color: #34495e; + margin-bottom: 25px; /* More space below heading */ + text-align: center; + font-size: 1.6rem; /* Larger form headings */ + font-weight: 600; + font-family: 'Lucida Console', 'Consolas', monospace; /* Lucida Console font for bold text */ + } + /* Input field styling */ + input[type="text"], input[type="password"], input[type="email"] { + width: 100%; + padding: 12px 15px; /* More padding */ + margin-bottom: 20px; /* More space between inputs */ + border: 1px solid #dcdcdc; /* Lighter border */ + border-radius: 8px; /* More rounded input fields */ + font-size: 1.05rem; /* Slightly larger text in inputs */ + box-sizing: border-box; + transition: border-color 0.3s ease, box-shadow 0.3s ease; + } + /* Input focus effect */ + input[type="text"]:focus, input[type="password"]:focus, input[type="email"]:focus { + border-color: #f7a6b5; /* Brighter pastel pink on focus */ + outline: none; + box-shadow: 0 0 0 3px rgba(247, 166, 181, 0.5); /* Glow effect with pastel pink */ + } + /* Button styling */ + button, .btn { + background-color: #f7baba; /* Pastel pink color */ + color: white; + padding: 12px 0; /* More vertical padding */ + width: 100%; + font-size: 1.1rem; /* Larger font for buttons */ + font-weight: 600; /* Bolder button text */ + border: none; + border-radius: 8px; /* More rounded buttons */ + cursor: pointer; + transition: background-color 0.3s ease, transform 0.2s ease; + letter-spacing: 0.5px; /* Slightly spaced letters */ + } + /* Button hover effect */ + button:hover, .btn:hover { + background-color: #f7a6b5; /* Darker pastel pink on hover */ + transform: translateY(-2px); /* Slight lift effect */ + } + button:active, .btn:active { + transform: translateY(0); /* Press down effect */ + } + /* Link styling for switch between forms */ + a.form-link { + display: block; + text-align: center; + margin-top: 20px; + color: #f7baba; /* Pastel pink for links */ + cursor: pointer; + text-decoration: none; + font-size: 0.95rem; + transition: color 0.3s ease; + } + /* Link hover effect */ + a.form-link:hover { + text-decoration: underline; + color: #f7a6b5; + } + /* Message area styling (for error/success messages) */ + #message_text { + text-align: center; + font-weight: 600; + margin-top: 15px; /* Adjusted margin */ + margin-bottom: 25px; /* Adjusted margin */ + color: #dc3545; /* Red for error messages */ + font-size: 0.95rem; + } + /* Styling for success messages */ + .message-success { + color: #28a745 !important; /* Green for success */ + } + /* Horizontal rule styling */ + hr { + border: 0; + height: 1px; + background: #eee; /* Lighter divider */ + margin: 30px 0; + } + /* Protected content styling */ + #protected_content { + background-color: #f8f9fa; + border: 1px solid #e9ecef; + border-radius: 8px; + padding: 20px; + margin-top: 25px; + text-align: center; + color: #495057; + } + #protected_content h4 { + color: #f7baba; /* Pastel pink for protected content heading */ + margin-bottom: 15px; + font-size: 1.3rem; + font-family: 'Lucida Console', 'Consolas', monospace; /* Lucida Console font for bold text */ + } + /* JWT Token display styling */ + .token-display { + white-space: pre-wrap; + word-wrap: break-word; + max-height: 150px; + overflow-y: auto; + background-color: #e9ecef; /* Slightly darker background for token */ + padding: 15px; + border: 1px solid #ced4da; + border-radius: 8px; + font-family: 'Fira Code', 'Cascadia Code', monospace; /* Modern monospace font */ + font-size: 0.85rem; + margin-top: 15px; + color: #495057; + } + """), + # Main application div container + ui.div( + ui.h2("SCA-Web"), # Application title + ui.output_ui("message_text"), # Displays messages to the user + ui.output_ui("main_ui"), # UI for login/register/welcome screen + ui.hr(), # Horizontal rule + ui.output_ui("protected_content"), # Content visible only when logged in + class_="container" # Apply the CSS container class + ), + # Add JavaScript for "Enter to continue" functionality on form submissions + ui.tags.script(""" + $(document).on('keypress', function(e) { + if(e.which == 13) { // Enter key + if ($('#main_ui').find('#btn_login').is(':visible')) { + $('#btn_login').click(); + } else if ($('#main_ui').find('#btn_register').is(':visible')) { + $('#btn_register').click(); + } else if ($('#main_ui').find('#btn_reset_password_initiate').is(':visible')) { + $('#btn_reset_password_initiate').click(); + } else if ($('#main_ui').find('#btn_reset_password_final').is(':visible')) { + $('#btn_reset_password_final').click(); + } + } + }); + """) +) + +# Define the Server logic for the Shiny app +def server(input, output, session): + # Reactive values to manage application state + logged_in = reactive.Value(False) + current_user = reactive.Value(None) + jwt_token = reactive.Value(None) + message = reactive.Value("") + message_type = reactive.Value("error") + # Default page state is "register" + page_state = reactive.Value("register") # Can be "register", "login", "forgot_password_initiate", "forgot_password_reset" + show_token = reactive.Value(False) + + # Store temporary user info for password reset flow + reset_username = reactive.Value(None) + reset_email = reactive.Value(None) + + # Render reactive message text to the UI with dynamic styling + @output + @render.ui + def message_text(): + if message_type() == "success": + return ui.tags.div(message(), class_="message-success") + return ui.tags.div(message()) + + # Dynamically render the main UI based on login state and page state + @output + @render.ui + def main_ui(): + if logged_in(): + return ui.div( + ui.h3(f"Welcome, {current_user()}!"), + ui.p("You are successfully logged in and can now access protected content."), + ui.input_action_button("btn_toggle_token", "Toggle JWT Token Display", class_="btn"), + ui.output_ui("token_text"), + ui.input_action_button("btn_logout", "Log out", class_="btn"), + ) + elif page_state() == "register": + return ui.div( + ui.h3("Register New Account"), + ui.input_text("reg_username", "Username", placeholder="Enter your desired username"), + ui.input_text("reg_email", "Email", placeholder="Enter your email address"), + ui.input_password("reg_password", "Password", placeholder="Create a strong password"), + ui.input_action_button("btn_register", "Register", class_="btn"), + ui.a( + "Already have an account? Log in here.", + href="#", + onclick="Shiny.setInputValue('go_to_login', Math.random())", + class_="form-link" + ), + ) + elif page_state() == "login": + return ui.div( + ui.h3("Log In to Your Account"), + ui.input_text("login_username", "Username", placeholder="Enter your username"), + ui.input_password("login_password", "Password", placeholder="Enter your password"), + ui.input_action_button("btn_login", "Log In", class_="btn"), + ui.a( + "Don't have an account? Register here.", + href="#", + onclick="Shiny.setInputValue('go_to_register', Math.random())", + class_="form-link" + ), + ui.a( + "Forgot Password?", + href="#", + onclick="Shiny.setInputValue('go_to_forgot_password_initiate', Math.random())", + class_="form-link" + ) + ) + elif page_state() == "forgot_password_initiate": + return ui.div( + ui.h3("Reset Your Password"), + ui.p("Please enter your username and email to proceed."), + ui.input_text("reset_username_input", "Username", placeholder="Enter your username"), + ui.input_text("reset_email_input", "Email", placeholder="Enter your email address"), + ui.input_action_button("btn_reset_password_initiate", "Continue", class_="btn"), + ui.a( + "Back to Login", + href="#", + onclick="Shiny.setInputValue('go_to_login', Math.random())", + class_="form-link" + ) + ) + elif page_state() == "forgot_password_reset": + return ui.div( + ui.h3("Set New Password"), + ui.p(f"Setting new password for {reset_username()}."), + ui.input_password("new_password", "New Password", placeholder="Enter your new strong password"), + ui.input_password("confirm_new_password", "Confirm New Password", placeholder="Confirm your new password"), + ui.input_action_button("btn_reset_password_final", "Reset Password", class_="btn"), + ui.a( + "Back to Login", + href="#", + onclick="Shiny.setInputValue('go_to_login', Math.random())", + class_="form-link" + ) + ) + + # Render protected content based on JWT token presence + @output + @render.ui + def protected_content(): + if jwt_token(): + return ui.div( + ui.h4("🔒 Protected Application Dashboard"), + ui.p("Welcome to your secure dashboard! This area is only accessible after successful authentication."), + ui.p("You can integrate your secure analytics, personalized reports, or confidential data visualizations here."), + ui.tags.ul( + ui.tags.li("View real-time data analytics."), + ui.tags.li("Manage user settings."), + ui.tags.li("Access exclusive features."), + ), + ) + else: + return ui.div( + ui.h4("🚫 Access Restricted"), + ui.p("Please log in to view the protected content and features."), + ui.p("Register if you don't have an account yet.") + ) + + # Render the JWT token text with styling for readability + @output + @render.ui + def token_text(): + if show_token() and jwt_token(): + return ui.div( + jwt_token(), + class_="token-display" + ) + return None + + # Reactive effect to toggle the visibility of the JWT token + @reactive.Effect + @reactive.event(input.btn_toggle_token) + def toggle_token(): + show_token.set(not show_token()) + + # Reactive effect to switch to the login page + @reactive.Effect + @reactive.event(input.go_to_login) + def go_to_login_event(): + message.set("") + message_type.set("error") + page_state.set("login") + + # Reactive effect to switch to the register page + @reactive.Effect + @reactive.event(input.go_to_register) + def go_to_register_event(): + message.set("") + message_type.set("error") + page_state.set("register") + + # Reactive effect to switch to the forgot password initiation page + @reactive.Effect + @reactive.event(input.go_to_forgot_password_initiate) + def go_to_forgot_password_initiate_event(): + message.set("") + message_type.set("error") + page_state.set("forgot_password_initiate") + + # Reactive effect to handle user registration + @reactive.Effect + @reactive.event(input.btn_register) + def register(): + username = input.reg_username() + email = input.reg_email() + password = input.reg_password() + + if not username or not email or not password: + message_type.set("error") + message.set("Please fill in all registration fields.") + return + + db = SessionLocal() + try: + if db.query(User).filter((User.username == username) | (User.email == email)).first(): + message_type.set("error") + message.set("Username or Email already registered. Please log in or use different credentials.") + page_state.set("login") + return + + hashed = bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode() + new_user = User(username=username, email=email, password_hash=hashed) + db.add(new_user) + db.commit() + message_type.set("success") + message.set("Registration successful! You can now log in.") + page_state.set("login") + except Exception as e: + db.rollback() + message_type.set("error") + message.set(f"An error occurred during registration: {e}") + finally: + db.close() + + # Reactive effect to handle user login + @reactive.Effect + @reactive.event(input.btn_login) + def login(): + username = input.login_username() + password = input.login_password() + + if not username or not password: + message_type.set("error") + message.set("Please enter both username and password.") + return + + db = SessionLocal() + user = db.query(User).filter(User.username == username).first() + db.close() + + if not user: + message_type.set("error") + message.set("User not found or incorrect username.") + return + + if bcrypt.checkpw(password.encode(), user.password_hash.encode()): + logged_in.set(True) + current_user.set(username) + token = create_jwt_token(username) + jwt_token.set(token) + message_type.set("success") + message.set("Login successful! Welcome.") + else: + message_type.set("error") + message.set("Incorrect password.") + + # Reactive effect to handle initiation of password reset + @reactive.Effect + @reactive.event(input.btn_reset_password_initiate) + def reset_password_initiate(): + username = input.reset_username_input() + email = input.reset_email_input() + + if not username or not email: + message_type.set("error") + message.set("Please provide both username and email.") + return + + db = SessionLocal() + user = db.query(User).filter(User.username == username, User.email == email).first() + db.close() + + if user: + reset_username.set(username) + reset_email.set(email) + message_type.set("success") + message.set("Username and email verified. Please set your new password.") + page_state.set("forgot_password_reset") + else: + message_type.set("error") + message.set("Username or email is incorrect. Please try again.") + + # Reactive effect to handle final password reset + @reactive.Effect + @reactive.event(input.btn_reset_password_final) + def reset_password_final(): + new_password = input.new_password() + confirm_new_password = input.confirm_new_password() + + if not new_password or not confirm_new_password: + message_type.set("error") + message.set("Please enter and confirm your new password.") + return + + if new_password != confirm_new_password: + message_type.set("error") + message.set("Passwords do not match. Please try again.") + return + + if not reset_username() or not reset_email(): + message_type.set("error") + message.set("Error: User information not found for password reset. Please start again.") + page_state.set("forgot_password_initiate") + return + + db = SessionLocal() + try: + user = db.query(User).filter(User.username == reset_username(), User.email == reset_email()).first() + if user: + hashed = bcrypt.hashpw(new_password.encode(), bcrypt.gensalt()).decode() + user.password_hash = hashed + db.commit() + message_type.set("success") + message.set("Your password has been successfully reset. You can now log in with your new password.") + page_state.set("login") + # Clear reset state + reset_username.set(None) + reset_email.set(None) + else: + message_type.set("error") + message.set("User not found during password reset. Please try again.") + page_state.set("forgot_password_initiate") + except Exception as e: + db.rollback() + message_type.set("error") + message.set(f"An error occurred during password reset: {e}") + finally: + db.close() + + # Reactive effect to handle user logout + @reactive.Effect + @reactive.event(input.btn_logout) + def logout(): + logged_in.set(False) + current_user.set(None) + jwt_token.set(None) + show_token.set(False) + page_state.set("login") + message_type.set("success") + message.set("You have been successfully logged out.") + +# Create the Shiny App instance +app = App(app_ui, server) + +# This line starts the Shiny web server. +if __name__ == "__main__": + app.run() diff --git a/data/pbmc3k_raw.h5ad b/data/pbmc3k_raw.h5ad new file mode 100644 index 0000000..460ee0b Binary files /dev/null and b/data/pbmc3k_raw.h5ad differ diff --git a/pcaprocess/demo_pcanlysis.py b/pcaprocess/demo_pcanlysis.py new file mode 100644 index 0000000..10a0607 --- /dev/null +++ b/pcaprocess/demo_pcanlysis.py @@ -0,0 +1,93 @@ +import scanpy as sc +import numpy as np +import matplotlib.pyplot as plt + +# Define the PCA functions in the same file or import them correctly +# from pca_functions import run_pca, plot_pca, plot_variance, save_results, get_adata + +# Step 1: Create random data +n_cells, n_genes = 100, 2000 +np.random.seed(42) + +# Generate random gene expression data +data = np.random.rand(n_cells, n_genes) + +# Create an AnnData object +adata = sc.AnnData(X=data) + +# Adding cell types (obs) +adata.obs['cell_type'] = ['type1' if i < 50 else 'type2' for i in range(n_cells)] +adata.var['gene_id'] = [f"gene{i}" for i in range(n_genes)] + +# Step 2: Apply PCA +def run_pca(adata, n_comps=50, svd_solver='arpack'): + """Runs PCA and stores the computed components.""" + try: + if 'X_pca' in adata.obsm: + print("PCA already computed. Overwriting previous results...") + + print(f"Running PCA with {n_comps} components using {svd_solver} solver...") + sc.pp.normalize_total(adata, target_sum=1e4) # Normalization + sc.pp.log1p(adata) # Log transformation + sc.pp.scale(adata) # Scaling + sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) + + print("PCA completed.") + except Exception as e: + print(f"Error during PCA: {e}") + raise + +run_pca(adata, n_comps=10, svd_solver='arpack') + +# Step 3: Plot PCA graph +def plot_pca(adata, color=None): + """Plots PCA results, colored by a specified attribute (if provided).""" + try: + print(f"Plotting PCA, color by: {color or 'default'}") + sc.pl.pca(adata, color=color, show=False) + plt.title(f"PCA - Colored by {color if color else 'default'}") + plt.show() + except KeyError: + print(f"Warning: '{color}' not found. Using default coloring.") + sc.pl.pca(adata, show=False) + plt.title("PCA - Default Coloring") + plt.show() + except Exception as e: + print(f"Error in PCA plot: {e}") + raise + +plot_pca(adata, color='cell_type') + +# Step 4: Plot explained variance +def plot_variance(adata, log=True): + """Plots the variance explained by PCA components.""" + try: + print("Plotting explained variance...") + sc.pl.pca_variance_ratio(adata, log=log, show=False) + plt.title("PCA: Explained Variance") + plt.show() + except Exception as e: + print(f"Error in variance plot: {e}") + raise + +plot_variance(adata) + +# Step 5: Save PCA results +def save_results(adata, results_file="pca_results.h5ad"): + """Saves the PCA results to an H5AD file.""" + try: + print(f"Saving results to {results_file}...") + adata.write(results_file) + print("Save successful.") + except Exception as e: + print(f"Error saving results: {e}") + raise + +save_results(adata, "pca_results.h5ad") + +# Step 6: Retrieve processed AnnData object +def get_adata(adata): + """Returns the processed AnnData object.""" + return adata + +processed_adata = get_adata(adata) diff --git a/rank_genes_violin_demo.py b/rank_genes_violin_demo.py new file mode 100644 index 0000000..879a1f0 --- /dev/null +++ b/rank_genes_violin_demo.py @@ -0,0 +1,17 @@ +from rank_genes_violin import get_rank_genes_groups_violin +import scanpy as sc + +#sample dataset +adata = sc.datasets.pbmc3k() + +# calculating neighborhoods +sc.pp.neighbors(adata) + +sc.tl.leiden(adata, resolution=1.0) + +# Differential expression analysis (with t-test) +sc.tl.rank_genes_groups(adata, groupby='leiden', method='t-test') + + +# calling the function and visualizing +get_rank_genes_groups_violin(adata, groups='0', n_genes=8) diff --git a/src/demo_pcanlysis.py b/src/demo_pcanlysis.py new file mode 100644 index 0000000..10a0607 --- /dev/null +++ b/src/demo_pcanlysis.py @@ -0,0 +1,93 @@ +import scanpy as sc +import numpy as np +import matplotlib.pyplot as plt + +# Define the PCA functions in the same file or import them correctly +# from pca_functions import run_pca, plot_pca, plot_variance, save_results, get_adata + +# Step 1: Create random data +n_cells, n_genes = 100, 2000 +np.random.seed(42) + +# Generate random gene expression data +data = np.random.rand(n_cells, n_genes) + +# Create an AnnData object +adata = sc.AnnData(X=data) + +# Adding cell types (obs) +adata.obs['cell_type'] = ['type1' if i < 50 else 'type2' for i in range(n_cells)] +adata.var['gene_id'] = [f"gene{i}" for i in range(n_genes)] + +# Step 2: Apply PCA +def run_pca(adata, n_comps=50, svd_solver='arpack'): + """Runs PCA and stores the computed components.""" + try: + if 'X_pca' in adata.obsm: + print("PCA already computed. Overwriting previous results...") + + print(f"Running PCA with {n_comps} components using {svd_solver} solver...") + sc.pp.normalize_total(adata, target_sum=1e4) # Normalization + sc.pp.log1p(adata) # Log transformation + sc.pp.scale(adata) # Scaling + sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) + + print("PCA completed.") + except Exception as e: + print(f"Error during PCA: {e}") + raise + +run_pca(adata, n_comps=10, svd_solver='arpack') + +# Step 3: Plot PCA graph +def plot_pca(adata, color=None): + """Plots PCA results, colored by a specified attribute (if provided).""" + try: + print(f"Plotting PCA, color by: {color or 'default'}") + sc.pl.pca(adata, color=color, show=False) + plt.title(f"PCA - Colored by {color if color else 'default'}") + plt.show() + except KeyError: + print(f"Warning: '{color}' not found. Using default coloring.") + sc.pl.pca(adata, show=False) + plt.title("PCA - Default Coloring") + plt.show() + except Exception as e: + print(f"Error in PCA plot: {e}") + raise + +plot_pca(adata, color='cell_type') + +# Step 4: Plot explained variance +def plot_variance(adata, log=True): + """Plots the variance explained by PCA components.""" + try: + print("Plotting explained variance...") + sc.pl.pca_variance_ratio(adata, log=log, show=False) + plt.title("PCA: Explained Variance") + plt.show() + except Exception as e: + print(f"Error in variance plot: {e}") + raise + +plot_variance(adata) + +# Step 5: Save PCA results +def save_results(adata, results_file="pca_results.h5ad"): + """Saves the PCA results to an H5AD file.""" + try: + print(f"Saving results to {results_file}...") + adata.write(results_file) + print("Save successful.") + except Exception as e: + print(f"Error saving results: {e}") + raise + +save_results(adata, "pca_results.h5ad") + +# Step 6: Retrieve processed AnnData object +def get_adata(adata): + """Returns the processed AnnData object.""" + return adata + +processed_adata = get_adata(adata) diff --git a/src/modules/Neighborhood.py b/src/modules/Neighborhood.py new file mode 100644 index 0000000..7e25636 --- /dev/null +++ b/src/modules/Neighborhood.py @@ -0,0 +1,64 @@ +import shiny #Only needed if you are planning to use it later +import scanpy as sc +import umap + +adata = sc.read("C:/Users/pc/.ipython/HW3/Hw3covid_Data_AllCells.h5ad") + +#computing +def compute(adata, n_neighbors=10, n_pcs=40): + """_summary_ + + Args: + Hw3covid_Data_AllCells (_type_): _description_ + n_neighbors (int, optional): _description_. Defaults to 10. + n_pcs (int, optional): _description_. Defaults to 40. + """ + try: + sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=n_pcs) + print("Neighbors computed successfully.") + except Exception as e: + print(f"Error in computing neighbors: {e}") + +#embedding +def embed(adata, color=['CST3', 'NKG7', 'PPBP']): + """_summary_ + + Args: + adata (_type_): _description_ + color (list, optional): _description_. Defaults to ['CST3', 'NKG7', 'PPBP']. + """ + try: + sc.pl.umap(adata, color=color) + sc.pl.umap(adata, color=color) # Note: This plots twice; check if you really want this + print("Embedding and plotting successful.") + except Exception as e: + print(f"Error in embedding: {e}") + +#clustering +def cluster(adata, color=['leiden', 'CST3', 'NKG7']): + """_summary_ + + Args: + adata (_type_): _description_ + color (list, optional): _description_. Defaults to ['leiden', 'CST3', 'NKG7']. + """ + try: + sc.tl.leiden(adata) + sc.pl.umap(adata, color=color) + print("Clustering and plotting successful.") + except Exception as e: + print(f"Error in clustering: {e}") + +#saving +def save(results_file, adata): + """_summary_ + + Args: + results_file (_type_): _description_ + adata (_type_): _description_ + """ + try: + adata.write(results_file) + print(f"Data saved successfully to {results_file}.") + except Exception as e: + print(f"Error in saving data: {e}") diff --git a/src/modules/Neighborhood_demo.py b/src/modules/Neighborhood_demo.py new file mode 100644 index 0000000..28ff284 --- /dev/null +++ b/src/modules/Neighborhood_demo.py @@ -0,0 +1,30 @@ +import scanpy as sc +import Neighborhood as nb # Your functions are here! +import umap + +#Load the dataset +print("Loading dataset...") +adata = sc.datasets.pbmc3k() +print(f"Dataset loaded! Number of cells: {adata.n_obs}, Number of genes: {adata.n_vars}\n") + +#Compute the neighbor graph +print("Computing the neighborhood graph...") +nb.compute(adata, n_neighbors=10, n_pcs=40) + +#Compute UMAP +print("Computing UMAP embedding...") +sc.tl.umap(adata) + +#Visualize the embedding +print("Plotting UMAP...") +genes_of_interest = ['CD3D', 'MS4A1', 'GNLY'] +nb.embed(adata, color=genes_of_interest) + +#Perform clustering +print("Performing Leiden clustering...") +nb.cluster(adata, color=['leiden'] + genes_of_interest) + +#Save the results +results_file = "pbmc3k_final_results.h5ad" +print(f"Saving results to {results_file}...") +nb.save(results_file, adata) diff --git a/src/modules/SingleCellWebApp b/src/modules/SingleCellWebApp new file mode 160000 index 0000000..655ed2d --- /dev/null +++ b/src/modules/SingleCellWebApp @@ -0,0 +1 @@ +Subproject commit 655ed2d37d10c4c3830633da388db467fe845571 diff --git a/src/modules/data/pbmc3k_raw.h5ad b/src/modules/data/pbmc3k_raw.h5ad new file mode 100644 index 0000000..460ee0b Binary files /dev/null and b/src/modules/data/pbmc3k_raw.h5ad differ diff --git a/src/modules/demo_pcanlysis.py b/src/modules/demo_pcanlysis.py new file mode 100644 index 0000000..2e16cf3 --- /dev/null +++ b/src/modules/demo_pcanlysis.py @@ -0,0 +1,78 @@ +import scanpy as sc +import numpy as np +import matplotlib.pyplot as plt + +# Step 1: Create random data +n_cells, n_genes = 100, 2000 +np.random.seed(42) + +data = np.random.rand(n_cells, n_genes) +adata = sc.AnnData(X=data) + +adata.obs['cell_type'] = ['type1' if i < 50 else 'type2' for i in range(n_cells)] +adata.var['gene_id'] = [f"gene{i}" for i in range(n_genes)] + +# Step 2: Apply PCA +def run_pca(adata, n_comps=50, svd_solver='arpack'): + """Runs PCA and stores the computed components.""" + if 'X_pca' in adata.obsm: + print("PCA already computed. Overwriting previous results...") + + try: + print(f"Running PCA with {n_comps} components using {svd_solver} solver...") + sc.pp.normalize_total(adata, target_sum=1e4) + sc.pp.log1p(adata) + sc.pp.scale(adata) + sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) + print("PCA completed.") + except Exception as e: + raise Exception(f"PCA sırasında hata oluştu: {e}") + +run_pca(adata, n_comps=10, svd_solver='arpack') + +# Step 3: Plot PCA +def plot_pca(adata, color=None): + """Plots PCA results, colored by a specified attribute (if provided).""" + try: + print(f"Plotting PCA, color by: {color or 'default'}") + sc.pl.pca(adata, color=color, show=False) + plt.title(f"PCA - Colored by {color if color else 'default'}") + plt.show() + except KeyError: + raise Exception(f"'{color}' özelliği bulunamadı, renklemek için geçersiz.") + except Exception as e: + raise Exception(f"PCA görselleştirmesinde hata oluştu: {e}") + +plot_pca(adata, color='cell_type') + +# Step 4: Plot explained variance +def plot_variance(adata, log=True): + """Plots the variance explained by PCA components.""" + try: + print("Plotting explained variance...") + sc.pl.pca_variance_ratio(adata, log=log, show=False) + plt.title("PCA: Explained Variance") + plt.show() + except Exception as e: + raise Exception(f"Varyans grafiğinde hata oluştu: {e}") + +plot_variance(adata) + +# Step 5: Save PCA results +def save_results(adata, results_file="pca_results.h5ad"): + """Saves the PCA results to an H5AD file.""" + try: + print(f"Saving results to {results_file}...") + adata.write(results_file) + print("Save successful.") + except Exception as e: + raise Exception(f"Sonuçları kaydederken hata oluştu: {e}") + +save_results(adata, "pca_results.h5ad") + +# Step 6: Retrieve processed AnnData object +def get_adata(adata): + """Returns the processed AnnData object.""" + return adata + +processed_adata = get_adata(adata) diff --git a/src/modules/functions/10xformat_to_anndata.py b/src/modules/functions/10xformat_to_anndata.py new file mode 100644 index 0000000..0b4e68a --- /dev/null +++ b/src/modules/functions/10xformat_to_anndata.py @@ -0,0 +1,43 @@ +import scanpy as sc +import anndata +import os + +def read_10x_matrix_folder(matrix_dir: str, sample_name: str = None) -> anndata.AnnData: + """ + Loads a 10x Genomics dataset (already extracted folder) using Scanpy. + + Parameters: + matrix_dir (str): Path to the directory containing matrix.mtx, barcodes.tsv, and features.tsv or genes.tsv + sample_name (str, optional): If given, adds this as a 'source' column in adata.obs + + Returns: + anndata.AnnData: The loaded AnnData object + """ + if not os.path.isdir(matrix_dir): + raise FileNotFoundError(f"Directory not found: {matrix_dir}") + + adata = sc.read_10x_mtx(matrix_dir, var_names="gene_symbols", cache=True) + + if sample_name: + adata.obs["source"] = sample_name + + return adata + +if __name__ == "__main__": + test_matrix_dir = "/Users/eceyigit/Desktop/filtered_gene_bc_matrices 2/hg19" # Replace with actual path + test_sample_name = "test_sample" + + print(f"Attempting to load data from: {test_matrix_dir}") + + try: + adata = read_10x_matrix_folder(test_matrix_dir, test_sample_name) + print("\nSuccessfully loaded data!") + print(f"AnnData object shape: {adata.shape}") + print(f"Sample name: {test_sample_name}") + print("\nFirst few observations:") + print(adata.obs.head()) + except Exception as e: + print(f"\nError occurred: {str(e)}") + print("Please check:") + print(f"- Does the directory exist? {os.path.exists(test_matrix_dir)}") + print(f"- Does it contain matrix.mtx, barcodes.tsv, and features.tsv files?") \ No newline at end of file diff --git a/src/modules/functions/fetching_data_from_census.py b/src/modules/functions/fetching_data_from_census.py new file mode 100644 index 0000000..16a0476 --- /dev/null +++ b/src/modules/functions/fetching_data_from_census.py @@ -0,0 +1,59 @@ +!pip install cellxgene_census +import cellxgene_census +from typing import List, Optional +import pandas as pd +import anndata + +def census_function( + organism: str = "Homo sapiens", + cell_filter: Optional[str] = None, + gene_filter: Optional[str] = None, + metadata_columns: Optional[List[str]] = None, + return_anndata: bool = False, + census_version: str = "stable" +) -> pd.DataFrame | anndata.AnnData: + + if metadata_columns is None: # if there are no parameters set from the user just get the parameters below. + metadata_columns = [ + "dataset_id","assay","suspension_type", + "sex","tissue_general","tissue","cell_type" + ] + + with cellxgene_census.open_soma(census_version=census_version) as census: #connecting to the census dataset and census version is to decide which census we are fetching the data + if return_anndata: # if true the get_anndata function is called + adata = cellxgene_census.get_anndata( + census=census, + organism=organism, + var_value_filter=gene_filter, + obs_value_filter=cell_filter, + column_names={"obs": metadata_columns}, + ) + return adata + else: + obs = census["census_data"][organism.lower().replace(" ", "_")].obs # we are getting the cell informations about the given organism like "homo sapiens" + table = obs.read( + value_filter=cell_filter, + column_names=metadata_columns + ) + data = table.concat().to_pandas() + return data # the data is like an excel table + +#testing the function +if __name__ == "__main__": + # example usage + print("Test is running...") + + # test with assumed parameters + default_result = census_function() + print("\nResult with default parameters (DataFrame):") + print(default_result.head()) + + adata = census_function( + organism="Homo sapiens", + cell_filter="tissue == 'brain' and sex == 'male'", + gene_filter="feature_id in ['ENSG00000161798','ENSG00000188229']", + metadata_columns=["cell_type","tissue"], + return_anndata=True + ) + print("\nAnndata object:") + print(adata) diff --git a/src/modules/functions/reading_tarfile_and_extract10x_to_anndata.py b/src/modules/functions/reading_tarfile_and_extract10x_to_anndata.py new file mode 100644 index 0000000..c4080c8 --- /dev/null +++ b/src/modules/functions/reading_tarfile_and_extract10x_to_anndata.py @@ -0,0 +1,75 @@ +import tarfile +import os +import pandas as pd +import scipy.io +import scanpy as sc +import anndata +from scipy import sparse +def extract_10x_tar_to_anndata(tar_path: str, extract_dir: str = "./temp") -> anndata.AnnData: + os.makedirs(extract_dir, exist_ok=True) + + with tarfile.open(tar_path, "r") as tar: + tar.extractall(path=extract_dir) + + matrix_file = None + barcodes_file = None + genes_file = None + + for root, _, files in os.walk(extract_dir): + for file in files: + path = os.path.join(root, file) + if file.endswith("matrix.mtx"): + matrix_file = path + elif file.endswith("barcodes.tsv"): # More specific check for barcodes file + barcodes_file = path + elif file.endswith("genes.tsv") or file.endswith("features.tsv"): # Check for both genes.tsv and features.tsv + genes_file = path + + if not all([matrix_file, barcodes_file, genes_file]): # checking for all the necessary files if not foun an error will be raised. + + # Construct the missing list with descriptive strings instead of None + missing = [] + if matrix_file is None: + missing.append(".mtx matrix file") + if barcodes_file is None: + missing.append(".tsv file") + if genes_file is None: + missing.append(".tsv file") + + raise FileNotFoundError(f"Required file(s) not found in tar file: {', '.join(missing)}") + + matrix = scipy.io.mmread(matrix_file) + barcodes = pd.read_csv(barcodes_file, header=None)[0].tolist() + + genes_df = pd.read_csv(genes_file, header=None) + + if genes_df.shape[1] >= 2: # if there are 2 or more columns in genes file, this code takes the first column as gene id and the second as gene names + genes = genes_df[1].tolist() + gene_ids = genes_df[0].tolist() + else: + genes = genes_df[0].tolist() + gene_ids = genes_df[0].tolist() + + matrix_transposed = matrix.transpose().tocsc() # transposing the matrice because anndata format is obsxvar + + adata = anndata.AnnData(X=matrix_transposed) + adata.obs_names = barcodes + adata.var_names = genes + if gene_ids and gene_ids != genes: + + adata.var['gene_id'] = gene_ids +# this code checks if there's a list called gene_ids and is it's different from the existing genes list if both are true it adds these gene_ids as a new column named 'gene_id' to the AnnData object's gene-specific information (adata.var). + + return adata +#testing +if __name__ == "__main__": + test_file = "/pbmc3k_filtered_gene_bc_matrices.tar.gz" + try: + adata = extract_10x_tar_to_anndata(test_file) + print("test is succesfull, AnnData :") + print(adata) + except FileNotFoundError: + print(f" Error: {test_file} not found! Please try another file path.") + + + diff --git a/src/modules/get_marker_genes_demo.py b/src/modules/get_marker_genes_demo.py new file mode 100644 index 0000000..2ff2dbb --- /dev/null +++ b/src/modules/get_marker_genes_demo.py @@ -0,0 +1,18 @@ +import scanpy as sc +import pandas as pd +from markedgenes import get_marker_genes # defined function in main code + +# Demo dataset +adata = sc.datasets.pbmc3k() + +#preprocessing and clustering (same with the main code) +sc.pp.pca(adata, svd_solver='arpack') +sc.pp.neighbors(adata) +sc.tl.leiden(adata) # Leiden clustering yap + +# ranking genes between groups +sc.tl.rank_genes_groups(adata, 'leiden', method='t-test') + +# recalling the function +scores_and_pvals = get_scores_and_pvals(adata) +print(scores_and_pvals) diff --git a/src/modules/get_rank_genes_demo.py b/src/modules/get_rank_genes_demo.py new file mode 100644 index 0000000..45380a1 --- /dev/null +++ b/src/modules/get_rank_genes_demo.py @@ -0,0 +1,23 @@ +from rank_genes_group import rank_genes_groups_custom +import scanpy as sc + +sc.datasets.pbmc3k() +sc.pp.normalize_total(adata, target_sum=1e4) +sc.pp.log1p(adata) +sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5) +adata = adata[:, adata.var.highly_variable] +sc.pp.scale(adata, max_value=10) + +# PCA and neighborhoods +sc.pp.pca(adata, svd_solver='arpack') # Bu satır uyarıyı çözer +sc.pp.neighbors(adata, n_pcs=40) + +# Clustering (Leiden) +sc.tl.leiden(adata, resolution=1.0) + +sc.settings.verbosity = 2 + +#to save results +results_file = "pbmc3k_rank_genes.h5ad" +rank_genes_groups_custom(adata, groupby='leiden' , method='t-test' , n_genes=25 , sharey=False) +adata.write(results_file) \ No newline at end of file diff --git a/src/modules/get_scors_and_pvals.py b/src/modules/get_scors_and_pvals.py new file mode 100644 index 0000000..f079087 --- /dev/null +++ b/src/modules/get_scors_and_pvals.py @@ -0,0 +1,36 @@ +import scanpy as sc +import pandas as pd +from rank_genes_group import rank_genes_groups + +# Load the dataset +adata = sc.datasets.pbmc3k() + +# Apply PCA to reduce dimensionality (to avoid high-dimension warning) +sc.pp.pca(adata, svd_solver='arpack') + +# Calculate neighbors on PCA-reduced data +sc.pp.neighbors(adata) + +# Run Leiden algorithm (with future-proofing for 'igraph' backend) +sc.tl.leiden(adata, resolution=1.0, flavor='igraph', directed=False, n_iterations=2) + +# Run differential expression analysis to rank genes +sc.tl.rank_genes_groups(adata, groupby='leiden', method='t-test') + + +# Reduce verbosity for cleaner output +sc.settings.verbosity = 2 + +# Define a function to get gene names and p-values from rank_genes_groups +def get_scores_and_pvals(adata): + result = adata.uns['rank_genes_groups'] + groups = result['names'].dtype.names + df = pd.DataFrame( + {group + '_' + key[:1]: result[key][group] + for group in groups for key in ['names', 'pvals']}).head(5) + return df + +# Run the function to get the results +scores_and_pvals = get_scores_and_pvals(adata) +print(scores_and_pvals) + diff --git a/src/modules/get_scors_and_pvals_demo.py b/src/modules/get_scors_and_pvals_demo.py new file mode 100644 index 0000000..87429ce --- /dev/null +++ b/src/modules/get_scors_and_pvals_demo.py @@ -0,0 +1,17 @@ +from get_scors_and_pvals import get_scores_and_pvals +import scanpy as sc +import pandas as pd + +# Demo dataset +adata = sc.datasets.pbmc3k() + +# ranking genes between groups +sc.pp.pca(adata, svd_solver='arpack') +sc.pp.neighbors(adata) +sc.tl.leiden(adata) +sc.tl.rank_genes_groups(adata, 'leiden', method='t-test') + +# recalling the function +scores_and_pvals = get_scores_and_pvals(adata) +print(scores_and_pvals) + \ No newline at end of file diff --git a/src/modules/preproccesing2-demo.py b/src/modules/preproccesing2-demo.py new file mode 100644 index 0000000..deae8d8 --- /dev/null +++ b/src/modules/preproccesing2-demo.py @@ -0,0 +1,57 @@ +import scanpy as sc +import numpy as np +import pandas as pd +from preprocessing import filter_data, calculate_qc_metrics, select_highly_variable_genes + +def load_file_demo(file_path): + adata = sc.read_h5ad(file_path) + adata.write("adata_demo.h5ad") + print("Data saved as adata_demo.h5ad") + +file_path = "C:/Users/pc/OneDrive/Masaüstü/bioinformatic/Hw3covid_Data_AllCells.h5ad" +adata = load_file_demo(file_path) + +# Step 1: Create example data (random counts matrix) +np.random.seed(42) # For reproducibility + +n_cells = 1000 # Number of cells +n_genes = 5000 # Number of genes + +# Create random gene expression data (Poisson distribution) +counts = np.random.poisson(lam=1.0, size=(n_cells, n_genes)) # Shape: (n_cells, n_genes) + +# Create an AnnData object +adata = sc.AnnData(counts) + +# Assign gene and cell names +adata.var_names = [f"Gene_{i}" for i in range(n_genes)] # Gene names: Gene_0, Gene_1, ..., Gene_4999 +adata.obs_names = [f"Cell_{i}" for i in range(n_cells)] # Cell names: Cell_0, Cell_1, ..., Cell_999 + +# Before adding 'MT-', note that var_names is not a list but an Index object. +# So we need to convert var_names to a list before making changes. +var_names_list = adata.var_names.tolist() + +# Add some mitochondrial genes for demonstration (every 10th gene is mitochondrial) +for i in range(0, n_genes, 10): + var_names_list[i] = f"MT-{var_names_list[i]}" # Rename every 10th gene to start with "MT-" + +# We assign the modified list back as an Index object +adata.var_names = pd.Index(var_names_list) + +# Step 2: Apply filter_data to filter cells and genes +adata_filtered = filter_data(adata, min_genes=200, min_cells=3) +print(f"Filtered AnnData: {adata_filtered.shape} (cells, genes)") + +# Step 3: Calculate QC metrics +adata_qc = calculate_qc_metrics(adata_filtered) +print("\nQC Metrics (first few rows of obs):") +print(adata_qc.obs.head()) # View the QC metrics such as 'pct_counts_mt' (percentage of mitochondrial genes) + +# Step 4: Select highly variable genes +adata_hvg = select_highly_variable_genes(adata_qc, min_mean=0.0125, max_mean=3, min_disp=0.5) +print("\nHighly Variable Genes (first few rows of 'highly_variable' column):") +print(adata_hvg.var[['highly_variable']].head()) # Display which genes are highly variable + +# Step 5: Count how many genes are highly variable +num_highly_variable_genes = adata_hvg.var['highly_variable'].sum() +print(f"\nNumber of highly variable genes selected: {num_highly_variable_genes}") diff --git a/src/modules/preprocessing.py b/src/modules/preprocessing.py index e69de29..f62fc37 100644 --- a/src/modules/preprocessing.py +++ b/src/modules/preprocessing.py @@ -0,0 +1,77 @@ +import scanpy as sc +import pandas as pd +from anndata import AnnData + +def filter_data( + adata: AnnData, + min_genes: int = 200, + min_cells: int = 3 +) -> AnnData: + """ + Filters the dataset. + + Args: + adata (AnnData): The single-cell dataset. + min_genes (int): Minimum number of genes expressed per cell. Default is 200. + min_cells (int): Minimum number of cells expressing a gene. Default is 3. + + Returns: + AnnData: Filtered dataset. + """ + sc.pp.filter_cells(adata, min_genes=min_genes) + sc.pp.filter_genes(adata, min_cells=min_cells) + return adata + +def calculate_qc_metrics(adata: AnnData) -> AnnData: + """ + Computes quality control (QC) metrics and adds them to the dataset. + Checks for mitochondrial genes with both uppercase and lowercase 'mt-' prefix. + + Args: + adata (AnnData): The filtered dataset. + + Returns: + AnnData: Dataset with QC metrics added. + """ + if adata.var_names.str.startswith("MT-").any(): + adata.var["mt"] = adata.var_names.str.startswith("MT-") + elif adata.var_names.str.startswith("mt-").any(): + adata.var["mt"] = adata.var_names.str.startswith("mt-") + else: + # In case neither is found, check in a case-insensitive way just to be sure + adata.var["mt"] = adata.var_names.str.upper().str.startswith("MT-") + + sc.pp.calculate_qc_metrics( + adata, + qc_vars=["mt"], + percent_top=None, + log1p=False, + inplace=True + ) + return adata + +def select_highly_variable_genes( + adata: AnnData, + min_mean: float = 0.0125, + max_mean: int = 3, + min_disp: float = 0.5 + ) -> AnnData: + """ + Identifies highly variable genes in the dataset. + + Args: + adata (AnnData): The dataset with QC metrics. + min_mean (float): Minimum mean expression threshold. Default is 0.0125. + max_mean (float): Maximum mean expression threshold. Default is 3. + min_disp (float): Minimum dispersion threshold. Default is 0.5. + + Returns: + AnnData: Dataset with highly variable gene information. + """ + sc.pp.highly_variable_genes( + adata, + min_mean=min_mean, + max_mean=max_mean, + min_disp=min_disp + ) + return adata diff --git a/src/modules/pvals_demo.py b/src/modules/pvals_demo.py new file mode 100644 index 0000000..3445c66 --- /dev/null +++ b/src/modules/pvals_demo.py @@ -0,0 +1,16 @@ +from get_scors_and_pvals import get_scores_and_pvals +import scanpy as sc +import pandas as pd + +# Demo dataset +adata = sc.datasets.pbmc3k() + +# ranking genes between groups +sc.pp.pca(adata, svd_solver='arpack') +sc.pp.neighbors(adata) +sc.tl.leiden(adata) +sc.tl.rank_genes_groups(adata, 'leiden', method='t-test') + +# recalling the function +scores_and_pvals = get_scores_and_pvals(adata) +print(scores_and_pvals) \ No newline at end of file diff --git a/src/modules/qc.py b/src/modules/qc.py index e69de29..bca2052 100644 --- a/src/modules/qc.py +++ b/src/modules/qc.py @@ -0,0 +1,52 @@ +import scanpy as sc +import matplotlib.pyplot as plt + +def run_pca(adata, n_comps=50, svd_solver='arpack'): + """Runs PCA and stores the computed components.""" + if 'X_pca' in adata.obsm: + print("PCA already computed. Overwriting previous results...") + + try: + print(f"Running PCA with {n_comps} components using {svd_solver} solver...") + sc.pp.normalize_total(adata, target_sum=1e4) + sc.pp.log1p(adata) + sc.pp.scale(adata) + sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) + print("PCA completed.") + except Exception as e: + raise Exception(f"PCA işleminde hata oluştu: {e}") + +def plot_pca(adata, color=None): + """Plots PCA results, colored by a specified attribute (if provided).""" + try: + print(f"Plotting PCA, color by: {color or 'default'}") + sc.pl.pca(adata, color=color, show=False) + plt.title(f"PCA - Colored by {color if color else 'default'}") + plt.show() + except KeyError: + raise Exception(f"Belirtilen renk özelliği '{color}' bulunamadı.") + except Exception as e: + raise Exception(f"PCA grafiğinde hata oluştu: {e}") + +def plot_variance(adata, log=True): + """Plots the variance explained by PCA components.""" + try: + print("Plotting explained variance...") + sc.pl.pca_variance_ratio(adata, log=log, show=False) + plt.title("PCA: Explained Variance") + plt.show() + except Exception as e: + raise Exception(f"Varyans grafiğinde hata oluştu: {e}") + +def save_results(adata, results_file="pca_results.h5ad"): + """Saves the PCA results to an H5AD file.""" + try: + print(f"Saving results to {results_file}...") + adata.write(results_file) + print("Save successful.") + except Exception as e: + raise Exception(f"Sonuçlar kaydedilirken hata oluştu: {e}") + +def get_adata(adata): + """Returns the processed AnnData object.""" + return adata \ No newline at end of file diff --git a/src/modules/rank_genes_group_demo.py b/src/modules/rank_genes_group_demo.py new file mode 100644 index 0000000..1b409d3 --- /dev/null +++ b/src/modules/rank_genes_group_demo.py @@ -0,0 +1,24 @@ +from rankgenesgroup import rank_genes_groups +import scanpy as sc + +adata = sc.datasets.pbmc3k() + +sc.pp.normalize_total(adata, target_sum=1e4) +sc.pp.log1p(adata) +sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5) +adata = adata[:, adata.var.highly_variable] +sc.pp.scale(adata, max_value=10) + +# PCA and neighborhoods +sc.pp.pca(adata, svd_solver='arpack') # Bu satır uyarıyı çözer +sc.pp.neighbors(adata, n_pcs=40) + +# Clustering (Leiden) +sc.tl.leiden(adata, resolution=1.0) + +sc.settings.verbosity = 2 + +#to save results +results_file = "pbmc3k_rank_genes.h5ad" +sc.tl.rank_genes_groups(adata, groupby='leiden' , method='t-test' , n_genes=25 , sharey=False) +adata.write(results_file) diff --git a/src/modules/rank_genes_violin.py b/src/modules/rank_genes_violin.py new file mode 100644 index 0000000..022747d --- /dev/null +++ b/src/modules/rank_genes_violin.py @@ -0,0 +1,18 @@ +import scanpy as sc +adata = sc.datasets.pbmc3k() +sc.pp.neighbors(adata) +sc.tl.leiden(adata, resolution=1.0) +sc.settings.verbosity = 2 # reduce the verbosity + +def get_rank_genes_groups_violin(adata, groups='0', n_genes=8): + """ + -Plots a violin plot for the top ranked genes in specified groups (clusters). + + Parameters: + - adata: AnnData object with results from rank_genes_groups + - groups: List or string specifying the groups (clusters) to plot (default is all groups) + - n_genes: Number of top genes to display in the plot (default is 8) + + """ + sc.pl.rank_genes_groups_violin(adata, groups=groups, n_genes=n_genes) + return adata diff --git a/src/modules/rank_genes_violin_demo.py b/src/modules/rank_genes_violin_demo.py new file mode 100644 index 0000000..879a1f0 --- /dev/null +++ b/src/modules/rank_genes_violin_demo.py @@ -0,0 +1,17 @@ +from rank_genes_violin import get_rank_genes_groups_violin +import scanpy as sc + +#sample dataset +adata = sc.datasets.pbmc3k() + +# calculating neighborhoods +sc.pp.neighbors(adata) + +sc.tl.leiden(adata, resolution=1.0) + +# Differential expression analysis (with t-test) +sc.tl.rank_genes_groups(adata, groupby='leiden', method='t-test') + + +# calling the function and visualizing +get_rank_genes_groups_violin(adata, groups='0', n_genes=8) diff --git a/src/modules/rankgenesgroup.py b/src/modules/rankgenesgroup.py new file mode 100644 index 0000000..795fd94 --- /dev/null +++ b/src/modules/rankgenesgroup.py @@ -0,0 +1,36 @@ +import scanpy as sc + +# Örnek veri setini yükle +adata = sc.datasets.pbmc3k() + +# Temel ön işleme +sc.pp.normalize_total(adata, target_sum=1e4) +sc.pp.log1p(adata) +sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5) +adata = adata[:, adata.var.highly_variable] +sc.pp.scale(adata, max_value=10) + +# PCA ve komşuluk hesaplama +sc.pp.pca(adata, svd_solver='arpack') # Bu satır uyarıyı çözer +sc.pp.neighbors(adata, n_pcs=40) + +# Clustering (Leiden) +sc.tl.leiden(adata, resolution=1.0) + +# Verbosity ayarı +sc.settings.verbosity = 2 + +# Sonuçları kaydetmek için bir dosya ismi belirle +results_file = "pbmc3k_rank_genes.h5ad" + +# Gen sıralama ve görselleştirme fonksiyonu +def rank_genes_groups(adata, method='t-test', n_genes=25, sharey=False): + """ + - Perform differential expression analysis and plot top marker genes. + """ + sc.tl.rank_genes_groups(adata, groupby='leiden', method=method) + sc.pl.rank_genes_groups(adata, n_genes=n_genes, sharey=sharey) + adata.write(results_file) + +# Fonksiyonu çağır +rank_genes_groups(adata) diff --git a/src/pca_analysis.py b/src/pca_analysis.py new file mode 100644 index 0000000..4760724 --- /dev/null +++ b/src/pca_analysis.py @@ -0,0 +1,60 @@ +import scanpy as sc +import matplotlib.pyplot as plt + +def run_pca(adata, n_comps=50, svd_solver='arpack'): + """Runs PCA and stores the computed components.""" + try: + if 'X_pca' in adata.obsm: + print("PCA already computed. Overwriting previous results...") + + print(f"Running PCA with {n_comps} components using {svd_solver} solver...") + sc.pp.normalize_total(adata, target_sum=1e4) # Normalization + sc.pp.log1p(adata) # Log transformation + sc.pp.scale(adata) # Scaling + sc.tl.pca(adata, n_comps=n_comps, svd_solver=svd_solver) + + print("PCA completed.") + except Exception as e: + print(f"Error during PCA: {e}") + raise + +def plot_pca(adata, color=None): + """Plots PCA results, colored by a specified attribute (if provided).""" + try: + print(f"Plotting PCA, color by: {color or 'default'}") + sc.pl.pca(adata, color=color, show=False) + plt.title(f"PCA - Colored by {color if color else 'default'}") + plt.show() + except KeyError: + print(f"Warning: '{color}' not found. Using default coloring.") + sc.pl.pca(adata, show=False) + plt.title("PCA - Default Coloring") + plt.show() + except Exception as e: + print(f"Error in PCA plot: {e}") + raise + +def plot_variance(adata, log=True): + """Plots the variance explained by PCA components.""" + try: + print("Plotting explained variance...") + sc.pl.pca_variance_ratio(adata, log=log, show=False) + plt.title("PCA: Explained Variance") + plt.show() + except Exception as e: + print(f"Error in variance plot: {e}") + raise + +def save_results(adata, results_file="pca_results.h5ad"): + """Saves the PCA results to an H5AD file.""" + try: + print(f"Saving results to {results_file}...") + adata.write(results_file) + print("Save successful.") + except Exception as e: + print(f"Error saving results: {e}") + raise + +def get_adata(adata): + """Returns the processed AnnData object.""" + return adata