forked from liuyigh/PyHRM
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPyHRM.py
More file actions
102 lines (62 loc) · 2.08 KB
/
PyHRM.py
File metadata and controls
102 lines (62 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# coding: utf-8
# ## Introduction
# Please read a very nice introduction provided by Kapa BioSystems to understand, prepare and troubleshoot
#
# http://www.kapabiosystems.com/document/introduction-high-resolution-melt-analysis-guide/
#
# ### Import Python modules for analysis
# In[ ]:
get_ipython().magic(u'matplotlib inline')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# ### Read and Plot Melting Data
# In[ ]:
df = pd.read_csv('Sample-HRM-p50-genotyping.csv')
plt.plot(df.iloc[:,[0]],df.iloc[:,1:])
plt.show()
# ### Select melting range
# In[ ]:
df_melt=df.loc[(df.iloc[:,0]>75) & (df.iloc[:,0]<89)]
df_data=df_melt.iloc[:,1:]
plt.plot(df_melt.iloc[:,[0]],df_data)
plt.show()
# ### Normalizing
# In[ ]:
df_norm= (df_data - df_data.min()) / (df_data.max()-df_data.min())*100
plt.plot(df_melt.iloc[:,[0]],df_norm)
plt.show()
# ### Calculate and Show Diff Plot
# In[ ]:
dfdif = df_norm.sub(df_norm['J14'],axis=0)
plt.plot(df_melt.iloc[:,[0]],dfdif)
plt.show()
# ### Clustering
# Use KMeans module from SciKit-Learn to cluster your sample into three groups (WT, KO, HET). Be careful, your samples may have less than three groups. So always check the diff plots first.
# In[ ]:
import sklearn.cluster as sc
from IPython.display import display
# In[ ]:
mat = dfdif.T.as_matrix()
hc = sc.KMeans(n_clusters=3)
hc.fit(mat)
labels = hc.labels_
results = pd.DataFrame([dfdif.T.index,labels])
display(results.loc[:0,results.iloc[1]==0])
display(results.loc[:0,results.iloc[1]==1])
display(results.loc[:0,results.iloc[1]==2])
# My controls are
# * WT: I12, J12
# * KO: I13, J13
# * HET: I14, J14
#
# So you can identify your genotyping results by looking at: to which control they cluster.
# Ploting with plot.ly, so you can look at individual lines for better pattern recognition
# In[ ]:
import plotly.plotly as py
import cufflinks as cf
import plotly.graph_objs as go
cf.set_config_file(offline=False, world_readable=True, theme='ggplot')
dfpy = dfdif.set_index(df_melt.iloc[:,0])
# Plot and embed in ipython notebook!
dfpy.iplot(kind='scatter', filename='pyHRM')