PyHRM/PyHRM.py at master · markus2929/PyHRM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

# coding: utf-8

# ## Introduction

# Please read a very nice introduction provided by Kapa BioSystems to understand, prepare and troubleshoot
#
# http://www.kapabiosystems.com/document/introduction-high-resolution-melt-analysis-guide/
#

# ### Import Python modules for analysis

# In[ ]:

get_ipython().magic(u'matplotlib inline')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# ### Read and Plot Melting Data

# In[ ]:

df = pd.read_csv('Sample-HRM-p50-genotyping.csv')
plt.plot(df.iloc[:,[0]],df.iloc[:,1:])
plt.show()


# ### Select melting range

# In[ ]:

df_melt=df.loc[(df.iloc[:,0]>75) & (df.iloc[:,0]<89)]
df_data=df_melt.iloc[:,1:]
plt.plot(df_melt.iloc[:,[0]],df_data)
plt.show()


# ### Normalizing

# In[ ]:

df_norm= (df_data - df_data.min()) / (df_data.max()-df_data.min())*100
plt.plot(df_melt.iloc[:,[0]],df_norm)
plt.show()


# ### Calculate and Show Diff Plot

# In[ ]:

dfdif = df_norm.sub(df_norm['J14'],axis=0)
plt.plot(df_melt.iloc[:,[0]],dfdif)
plt.show()


# ### Clustering

# Use KMeans module from SciKit-Learn to cluster your sample into three groups (WT, KO, HET). Be careful, your samples may have less than three groups. So always check the diff plots first.

# In[ ]:

import sklearn.cluster as sc
from IPython.display import display


# In[ ]:

mat = dfdif.T.as_matrix()
hc = sc.KMeans(n_clusters=3)
hc.fit(mat)

labels = hc.labels_
results = pd.DataFrame([dfdif.T.index,labels])
display(results.loc[:0,results.iloc[1]==0])
display(results.loc[:0,results.iloc[1]==1])
display(results.loc[:0,results.iloc[1]==2])


# My controls are
# * WT: I12, J12
# * KO: I13, J13
# * HET: I14, J14
#
# So you can identify your genotyping results by looking at: to which control they cluster.

# Ploting with plot.ly, so you can look at individual lines for better pattern recognition
# In[ ]:
import plotly.plotly as py
import cufflinks as cf
import plotly.graph_objs as go

cf.set_config_file(offline=False, world_readable=True, theme='ggplot')

dfpy = dfdif.set_index(df_melt.iloc[:,0])

# Plot and embed in ipython notebook!
dfpy.iplot(kind='scatter', filename='pyHRM')