-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathReadingNotes.tex
More file actions
1452 lines (1093 loc) · 82.1 KB
/
ReadingNotes.tex
File metadata and controls
1452 lines (1093 loc) · 82.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% Options for packages loaded elsewhere
\PassOptionsToPackage{unicode}{hyperref}
\PassOptionsToPackage{hyphens}{url}
%
\documentclass[
]{book}
\usepackage{amsmath,amssymb}
\usepackage{lmodern}
\usepackage{iftex}
\ifPDFTeX
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{textcomp} % provide euro and other symbols
\else % if luatex or xetex
\usepackage{unicode-math}
\defaultfontfeatures{Scale=MatchLowercase}
\defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
\fi
% Use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\IfFileExists{microtype.sty}{% use microtype if available
\usepackage[]{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\makeatletter
\@ifundefined{KOMAClassName}{% if non-KOMA class
\IfFileExists{parskip.sty}{%
\usepackage{parskip}
}{% else
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}}
}{% if KOMA class
\KOMAoptions{parskip=half}}
\makeatother
\usepackage{xcolor}
\usepackage{color}
\usepackage{fancyvrb}
\newcommand{\VerbBar}{|}
\newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
% Add ',fontsize=\small' for more characters per line
\usepackage{framed}
\definecolor{shadecolor}{RGB}{248,248,248}
\newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
\newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}}
\newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}}
\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\BuiltInTok}[1]{#1}
\newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
\newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}}
\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}}
\newcommand{\ExtensionTok}[1]{#1}
\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\ImportTok}[1]{#1}
\newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
\newcommand{\NormalTok}[1]{#1}
\newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.81,0.36,0.00}{\textbf{#1}}}
\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
\newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
\newcommand{\RegionMarkerTok}[1]{#1}
\newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\WarningTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\usepackage{longtable,booktabs,array}
\usepackage{calc} % for calculating minipage widths
% Correct order of tables after \paragraph or \subparagraph
\usepackage{etoolbox}
\makeatletter
\patchcmd\longtable{\par}{\if@noskipsec\mbox{}\fi\par}{}{}
\makeatother
% Allow footnotes in longtable head/foot
\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}}
\makesavenoteenv{longtable}
\usepackage{graphicx}
\makeatletter
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
\makeatother
% Scale images if necessary, so that they will not overflow the page
% margins by default, and it is still possible to overwrite the defaults
% using explicit options in \includegraphics[width, height, ...]{}
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
% Set default figure placement to htbp
\makeatletter
\def\fps@figure{htbp}
\makeatother
\setlength{\emergencystretch}{3em} % prevent overfull lines
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\setcounter{secnumdepth}{5}
\usepackage{booktabs}
\ifLuaTeX
\usepackage{selnolig} % disable illegal ligatures
\fi
\usepackage[]{natbib}
\bibliographystyle{apalike}
\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
\urlstyle{same} % disable monospaced font for URLs
\hypersetup{
pdftitle={ReadingNotes: Have you read today's papers?},
pdfauthor={Rongting Huang rthuang@connect.hku.hk},
hidelinks,
pdfcreator={LaTeX via pandoc}}
\title{ReadingNotes: Have you read today's papers?}
\author{Rongting Huang \href{mailto:rthuang@connect.hku.hk}{\nolinkurl{rthuang@connect.hku.hk}}}
\date{2025-02-26}
\begin{document}
\maketitle
{
\setcounter{tocdepth}{1}
\tableofcontents
}
\hypertarget{preface}{%
\chapter*{Preface}\label{preface}}
\addcontentsline{toc}{chapter}{Preface}
This book is for Rongting's daily reading notes.
\hypertarget{about-the-author}{%
\chapter*{About the author}\label{about-the-author}}
\addcontentsline{toc}{chapter}{About the author}
\begin{figure}
\centering
\includegraphics{./figs/Rongting/IMG-5393.jpg}
\caption{1}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/Rongting/IMG-6316.PNG}
\caption{2}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/Rongting/IMG-7418.JPG}
\caption{3}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/Rongting/IMG-8010.JPG}
\caption{Keep moving}
\end{figure}
\hypertarget{research-topics}{%
\chapter*{Research topics}\label{research-topics}}
\addcontentsline{toc}{chapter}{Research topics}
\hypertarget{about-the-book}{%
\chapter*{About the book}\label{about-the-book}}
\addcontentsline{toc}{chapter}{About the book}
\textbf{Note}: to build this book, use the following script in R and follow the \href{https://bookdown.org/yihui/bookdown/}{bookdown mannual}:
\begin{verbatim}
bookdown::render_book("index.Rmd", "bookdown::gitbook")
\end{verbatim}
\hypertarget{intro}{%
\chapter{Introduction}\label{intro}}
You can label chapter and section titles using \texttt{\{\#label\}} after them, e.g., we can reference Chapter \ref{intro}. If you do not manually label them, there will be automatic labels anyway, e.g., Chapter \ref{methods}.
Figures and tables with captions will be placed in \texttt{figure} and \texttt{table} environments, respectively.
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{par}\NormalTok{(}\AttributeTok{mar =} \FunctionTok{c}\NormalTok{(}\DecValTok{4}\NormalTok{, }\DecValTok{4}\NormalTok{, .}\DecValTok{1}\NormalTok{, .}\DecValTok{1}\NormalTok{))}
\FunctionTok{plot}\NormalTok{(pressure, }\AttributeTok{type =} \StringTok{\textquotesingle{}b\textquotesingle{}}\NormalTok{, }\AttributeTok{pch =} \DecValTok{19}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\begin{figure}
{\centering \includegraphics[width=0.8\linewidth]{ReadingNotes_files/figure-latex/nice-fig-1}
}
\caption{Here is a nice figure!}\label{fig:nice-fig}
\end{figure}
Reference a figure by its code chunk label with the \texttt{fig:} prefix, e.g., see Figure \ref{fig:nice-fig}. Similarly, you can reference tables generated from \texttt{knitr::kable()}, e.g., see Table \ref{tab:nice-tab}.
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{knitr}\SpecialCharTok{::}\FunctionTok{kable}\NormalTok{(}
\FunctionTok{head}\NormalTok{(iris, }\DecValTok{20}\NormalTok{), }\AttributeTok{caption =} \StringTok{\textquotesingle{}Here is a nice table!\textquotesingle{}}\NormalTok{,}
\AttributeTok{booktabs =} \ConstantTok{TRUE}
\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\begin{table}
\caption{\label{tab:nice-tab}Here is a nice table!}
\centering
\begin{tabular}[t]{rrrrl}
\toprule
Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\
\midrule
5.1 & 3.5 & 1.4 & 0.2 & setosa\\
4.9 & 3.0 & 1.4 & 0.2 & setosa\\
4.7 & 3.2 & 1.3 & 0.2 & setosa\\
4.6 & 3.1 & 1.5 & 0.2 & setosa\\
5.0 & 3.6 & 1.4 & 0.2 & setosa\\
\addlinespace
5.4 & 3.9 & 1.7 & 0.4 & setosa\\
4.6 & 3.4 & 1.4 & 0.3 & setosa\\
5.0 & 3.4 & 1.5 & 0.2 & setosa\\
4.4 & 2.9 & 1.4 & 0.2 & setosa\\
4.9 & 3.1 & 1.5 & 0.1 & setosa\\
\addlinespace
5.4 & 3.7 & 1.5 & 0.2 & setosa\\
4.8 & 3.4 & 1.6 & 0.2 & setosa\\
4.8 & 3.0 & 1.4 & 0.1 & setosa\\
4.3 & 3.0 & 1.1 & 0.1 & setosa\\
5.8 & 4.0 & 1.2 & 0.2 & setosa\\
\addlinespace
5.7 & 4.4 & 1.5 & 0.4 & setosa\\
5.4 & 3.9 & 1.3 & 0.4 & setosa\\
5.1 & 3.5 & 1.4 & 0.3 & setosa\\
5.7 & 3.8 & 1.7 & 0.3 & setosa\\
5.1 & 3.8 & 1.5 & 0.3 & setosa\\
\bottomrule
\end{tabular}
\end{table}
You can write citations, too. For example, we are using the \textbf{bookdown} package \citep{R-bookdown} in this sample book, which was built on top of R Markdown and \textbf{knitr} \citep{xie2015}.
\hypertarget{sgcell}{%
\chapter{SingleCell}\label{sgcell}}
\hypertarget{technology}{%
\section{Technology}\label{technology}}
\hypertarget{single-cell}{%
\subsection{single cell}\label{single-cell}}
\begin{itemize}
\tightlist
\item
scDNA
\item
scRNA
\item
scATAC
\end{itemize}
\hypertarget{spatial-single-cell}{%
\subsection{spatial single cell}\label{spatial-single-cell}}
\hypertarget{review}{%
\section{Review}\label{review}}
\hypertarget{review-2021-06}{%
\subsection{Review-2021-06}\label{review-2021-06}}
\href{https://www.nature.com/articles/s41592-021-01171-x}{The triumphs and limitations of computational methods for scRNA-seq}\citep{kharchenko2021triumphs}
Though computational approaches vary, most formulate (1) a statistical model of the measurement, (2) a representation of the data in reduced dimensions, and (3) an approximation of the expression manifold (Box 2), with a set of discrete transcriptional subpopulations being the simplest and the most common approximation. The problems motivating these steps, and the specific solutions and their assumptions, are the subject of this review.
\includegraphics{./figs/singleCell/preprocessing_review1.jpg}
\href{https://www.nature.com/articles/s41592-021-01171-x/figures/1}{Key preprocessing steps in single-cell RNA-seq analysis}
\begin{figure}
\centering
\includegraphics{./figs/singleCell/key_analysis_review2.jpg}
\caption{Key analysis steps in single-cell RNA-seq analysis}
\end{figure}
\href{https://www.nature.com/articles/s41592-021-01171-x/figures/2}{Key analysis steps in single-cell RNA-seq analysis}
\textbf{Box 1 Select software tools}
Tools for alignment, barcode correction, count matrix estimation, and quality control include:
\begin{itemize}
\item
CellRanger (\url{https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/installation}): supports 10x Chromium datasets (commercial product)
\item
dropEst (\url{https://github.com/hms-dbmi/dropEst}): supports multiple droplet-based protocols
\item
STAR (\url{https://github.com/alexdobin/STAR}): aligner (used internally by CellRanger and dropEst), also has built-in options for count matrix estimation
\item
Optimus (\url{https://data.humancellatlas.org/pipelines/optimus-workflow}): supports 10x Chromium v2 and v3 datasets, designed for Human Cell Atlas
\item
Kallisto/bustools (\url{https://www.kallistobus.tools}): fast processing using pseudoalignment
\end{itemize}
Cell filter and doublet identification tools include:
\begin{itemize}
\item
EmptyDrops (\url{https://rdrr.io/github/MarioniLab/DropletUtils/man/emptyDrops.html}): uses a classifier to distinguish `empty' cells that look like the low-end tail of the cell size distribution
\item
Scrublet (\url{https://github.com/AllonKleinLab/scrublet}): python-based, doublet simulation and doublet scoring
\item
doubletFinder (\url{https://github.com/chris-mcginnis-ucsf/DoubletFinder}): R-based, doublet simulation and doublet scoring
\item
scds (\url{https://github.com/kostkalab/scds}): fast doublet scoring implementation
\end{itemize}
Tools for normalization, dimensionality reduction, and clustering and differential expression include:
\begin{itemize}
\item
Seurat (\url{https://satijalab.org/seurat/}): the most popular analysis toolkit, R-based
\item
scanpy (\url{https://github.com/theislab/scanpy}): the most popular python-based toolkit
\item
scVI (\url{https://github.com/YosefLab/scVI}): latent space identification using variational neural net
\item
pagoda2 (\url{https://github.com/hms-dbmi/pagoda2}): fast, R-based processing
\item
SAUCIE (\url{https://www.krishnaswamylab.org/projects/saucie}): a neural-net-based dimensionality reduction, using maximal mean discrepancy penalty
\end{itemize}
Tools for trajectory fitting include:
\begin{itemize}
\item
Monocle3 (\url{https://cole-trapnell-lab.github.io/monocle3/}): third iteration of the Monocle package, including updated tree utilities
\item
Slingshot (\url{https://github.com/kstreet13/slingshot}): tree fitting with improved pseudotime estimation
\item
PAGA (\url{https://github.com/theislab/paga}): tree/graph fitting approach combined with cell aggregation, also supports cluster-based velocity estimates
\item
Wishbone (\url{https://dpeerlab.github.io/dpeerlab-website/wishbone.html}): a bifurcation analysis method
\item
Destiny, DPT (\url{https://github.com/theislab/destiny/}): dimensionality reduction and trajectory fitting using diffusion maps82
\end{itemize}
Tools for velocity estimation include:
\begin{itemize}
\item
velocyto (\url{http://velocyto.org/}): reference python/R implementation
\item
scVelo (\url{https://scvelo.readthedocs.io/}): new implementation using curve-based phase portrait fit
\end{itemize}
\hypertarget{statistical-view-of-a-cell}{%
\subsubsection{Statistical view of a cell}\label{statistical-view-of-a-cell}}
\hypertarget{comparing-transcriptional-states}{%
\subsubsection{Comparing transcriptional states}\label{comparing-transcriptional-states}}
\hypertarget{the-quest-for-reduced-dimensions}{%
\subsubsection{The quest for reduced dimensions}\label{the-quest-for-reduced-dimensions}}
\hypertarget{section}{%
\subsubsection{}\label{section}}
\begin{figure}
\centering
\includegraphics{./figs/singleCell/scRNA-seq basics.jpg}
\caption{scRNA-seq basics}
\end{figure}
\href{https://www.nature.com/articles/s41592-021-01171-x/figures/3}{scRNA-seq basics}
a, Beating Moore's law. The number of cells measured by landmark scRNA-seq datasets over years (red), compared with the increase in the CPU transistor counts (black). The set of all published scRNA-seq studies83 is shown with small red dots. The estimated number of cells in a human body is shown by a green dashed line.
b, Shallow coverage of each cell can be compensated for by measuring more cells. The ability to distinguish two cell populations, assessed by the area under the receiver operating characteristic curve (ROC AUC) measure, is shown as a function of the number of measured cells (x axis) and the mean cell depth (y axis). Examples of three different simulations (1-3) within different parts of this design parameter space are shown on PCA projections.
c, Probabilistic view of scRNA-seq estimates. Posterior probability of IL32 gene expression magnitude is shown for five cells from two different CD8+ T cell populations (red and blue, thin lines). Joint posteriors assessing the mean expression magnitude within each subpopulation are shown by thick dashed lines.
d, Comparing CD4+ T cells and CD14+ monocytes, the plot shows the number (y axis, left) and the fraction (y axis, right) of the genes passing a 1\% statistical significance threshold for differential expression (DE) as a function of the number of cells compared from each population (x axis).
e, The scatter plot shows for each gene (dots) the mean (x axis) and variance (y axis) of the normalized UMI counts (CPM, counts per million) in CD4+ T cells. The Poisson expected value is shown in green, with a quadratic-based negative binomial fit shown in red. f-i, Variance normalization and most variable genes.
f, A t-SNE embedding of a primary peripheral blood mononuclear cell (PBMC) dataset with cell annotations. NK, natural killer, separated into CD56 bright and dim subsets. pDC, plasmacytoid dendritic cell.
g, Mean-variance relationship of different genes (dots) in the PBMC dataset is shown for log-transformed expression estimates. The genome-wide relationship, as captured by smoothed regression, is shown by the blue line. Genes whose variance is significantly higher than the genome-wide trend are shown as red dots.
h, Residual variance is shown for the top 5,000 overdispersed genes, ordered by the statistical significance (x axis).
i, Expression pattern of several example genes, with circles highlighting the subpopulations distinguished by the genes.
j, Distribution of normalized expression magnitudes (CPM) for the CTSH gene across all CD14+ monocytes is shown on the linear scale (top) and after log transformation (bottom) with a pseudocount.
\hypertarget{review-2021-07}{%
\subsection{Review-2021-07}\label{review-2021-07}}
\href{https://www.nature.com/articles/s41587-021-00895-7}{Computational principles and challenges in single-cell data integration}\citep{argelaguet2021computational}
\hypertarget{review-2021-10}{%
\subsection{Review-2021-10}\label{review-2021-10}}
\href{https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02519-4}{Over 1000 tools reveal trends in the single-cell RNA-seq analysis landscape}\citep{zappia2021over}
Single-cell gene expression measurements are cell type-specific (unlike DNA), more easily interpretable (compared to epigenetic modalities), and scalable to thousands of features (unlike antibody-based protein measurements) and thousands of cells. These features mean that scRNA-seq can be used as an anchor, often measured in parallel and used to link other modalities.
\href{https://github.com/Rongtingting/awesome-single-cell}{The Awesome Single Cell repository} is a community-curated list of software packages, resources, researchers, and publications for various single-cell technologies and \href{https://docs.google.com/spreadsheets/d/1IPe2ozb1Mny8sLvJaSE57RJr3oruiBoSudAVhSH-O8M/edit?pli=1\#gid=237186399}{Albert Villela's SingleCell Omics spreadsheet} tracks a range of information including technologies, companies, and software tools.
The scRNA-tools database focuses specifically on the cataloging and manual curation of software tools for analyzing scRNA-seq data {[}23{]}. When tools become available (usually through a bioRxiv preprint), we classify them according to the analysis tasks they can be used for and record information such as associated preprints and publications, software licenses, code location, software repositories, and a short description. Most tools are added to the database within 30 days of the first preprint or publication (Additional file 1: Figure S1). All the recorded information is publicly available in an interactive format at \url{https://www.scrna-tools.org/} {[}24{]}. As the number of tools in the database has moved past 1000, we have taken this opportunity to provide an update on the current state of the database and explore trends in scRNA-seq analysis across the past 5 years. We find that the focus of tool developers has moved on from continuous ordering of cells to methods for integrating samples and classifying cells. The database also shows us that more new tools are built using Python while the relative usage of R is declining. We also examine the role of open science in the development of the field and find that open source practices lead to increased citations. While the scRNA-tools database does not record every scRNA-seq analysis tool, the large proportion it does include over the history of what is still a young field make these analyses possible and a reasonable estimate of trends across all tools.
\hypertarget{data-processing}{%
\section{Data processing}\label{data-processing}}
\href{https://www.nature.com/articles/s41587-021-00875-x}{Bayesian inference of gene expression states from single-cell RNA-seq data}\citep{breda2021bayesian}
\href{https://www.nature.com/articles/s41587-021-00875-x/figures/1}{summary of sanity approach}
\begin{figure}
\centering
\includegraphics{./figs/singleCell/summary_Sanity.jpg}
\caption{summary of sanity approach}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/singleCell/sanity_notes.png}
\caption{sanity notes}
\end{figure}
\hypertarget{clustering-methods}{%
\section{Clustering methods}\label{clustering-methods}}
\hypertarget{benchmarking}{%
\section{Benchmarking}\label{benchmarking}}
\href{https://www.nature.com/articles/s41592-021-01336-8}{Benchmarking atlas-level data integration in single-cell genomics}\citep{luecken2021benchmarking}
\begin{figure}
\centering
\includegraphics{./figs/singleCell/benchmarking1.jpg}
\caption{Design of single-cell integration benchmarking (scIB).}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/singleCell/benchmarking2.jpg}
\caption{Benchmarking results for the human immune cell task.}
\end{figure}
\hypertarget{technology-related}{%
\chapter{Technology Related}\label{technology-related}}
\hypertarget{scrna-seq-modelling}{%
\section{scRNA-seq modelling}\label{scrna-seq-modelling}}
\hypertarget{nb-models-for-scrna-seq}{%
\subsection{NB models for scRNA-seq}\label{nb-models-for-scrna-seq}}
KEY WORDS: \textbf{scRNA-seq counts}
\href{http://web.stanford.edu/class/bios221/book/Chap-CountData.html}{TUTORIAL-High-Throughput Count Data}
DEseq2, scTransform, BASICS
\hypertarget{deseq2}{%
\subsubsection{DEseq2}\label{deseq2}}
\textbf{\href{https://genomebiology.biomedcentral.com/articles/10.1186/s13059-014-0550-8}{Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}\citep{love2014moderated}}
\begin{figure}
\centering
\includegraphics{./figs/RNAseqCounts/DEseq2.png}
\caption{DEseq2}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/RNAseqCounts/DEseq2_Fig1.png}
\caption{DEseq2-fig1}
\end{figure}
\textbf{Shrinkage estimation of dispersion.} Plot of dispersion estimates over the average expression strength (A) for the Bottomly et al.~{[}16{]} dataset with six samples across two groups and (B) for five samples from the Pickrell et al.~{[}17{]} dataset, fitting only an intercept term. First, gene-wise MLEs are obtained using only the respective genes data (black dots). Then, a curve (red) is fit to the MLEs to capture the overall trend of dispersion-mean dependence. This fit is used as a prior mean for a second estimation round, which results in the final MAP estimates of dispersion (arrow heads). This can be understood as a shrinkage (along the blue arrows) of the noisy gene-wise estimates toward the consensus represented by the red line. The black points circled in blue are detected as dispersion outliers and not shrunk toward the prior (shrinkage would follow the dotted line). For clarity, only a subset of genes is shown, which is enriched for dispersion outliers. Additional file 1: Figure S1 displays the same data but with dispersions of all genes shown. MAP, maximum a posteriori; MLE, maximum-likelihood estimate.
\hypertarget{sctransform}{%
\subsubsection{scTransform}\label{sctransform}}
\texttt{Our\ procedure\ is\ broadly\ applicable\ for\ any\ UMI-based\ scRNA-seq\ dataset\ and\ is\ freely\ available\ to\ users\ through\ the\ open-source\ R\ package\ sctransform\ (github.com/ChristophH/sctransform),\ with\ a\ direct\ interface\ to\ our\ single-cell\ toolkit\ Seurat.}
\texttt{**Pearson\ residuals**\ from\ "regularized\ negative\ binomial\ regression,"\ help\ remove\ the\ influence\ of\ technical\ characteristics\ from\ downstream\ analyses\ while\ preserving\ biological\ heterogeneity}
\texttt{unconstrained\ negative\ binomial\ model\ may\ overfit\ scRNA-seq\ data,\ and\ overcome\ this\ by\ pooling\ information\ across\ genes\ with\ similar\ abundances\ to\ obtain\ stable\ parameter\ estimates.}
\textbf{\href{https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1}{Normalization and variance stabilization of single-cell RNA-seq data using regularized negative binomial regression}\citep{hafemeister2019normalization}}
\url{https://github.com/ChristophH/sctransform/}
\begin{figure}
\centering
\includegraphics{./figs/RNAseqCounts/sctransform.png}
\caption{SCTransform}
\end{figure}
We propose that the Pearson residuals from regularized negative binomial regression, where cellular sequencing depth is utilized as a covariate in a generalized linear model, successfully remove the influence of technical characteristics from downstream analyses while preserving biological heterogeneity.
\begin{itemize}
\tightlist
\item
UMI-based scRNA-seq dataset
\end{itemize}
\begin{figure}
\centering
\includegraphics{./figs/RNAseqCounts/sctransform-intro.png}
\caption{SCTransform-intro}
\end{figure}
\texttt{observed\ **sequencing\ depth**\ (number\ of\ genes\ or\ molecules\ detected\ per\ cell)\ can\ vary\ significantly\ between\ cells,\ with\ variation\ in\ molecular\ counts\ potentially\ spanning\ an\ order\ of\ magnitude,\ even\ within\ the\ same\ cell\ type}
\texttt{while\ the\ now\ widespread\ use\ of\ unique\ molecular\ identifiers\ (UMI)\ in\ scRNA-seq\ removes\ technical\ variation\ associated\ with\ PCR,\ differences\ in\ cell\ lysis,\ reverse\ transcription\ efficiency,\ and\ stochastic\ molecular\ sampling\ during\ sequencing\ also\ contribute\ significantly,\ necessitating\ technical\ correction}
\includegraphics{./figs/RNAseqCounts/background.png}
\includegraphics{./figs/RNAseqCounts/sctrans_intro.png}
\textbf{Results}
\begin{itemize}
\item
A single scaling factor does not effectively normalize both lowly and highly expressed genes
\item
Modeling single-cell data with a negative binomial distribution leads to overfitting
\item
Applying regularized negative binomial regression for single-cell normalization
\item
Pearson residuals effectively normalize technical differences, while retaining biological variation
\item
Downstream analytical tasks are not biased by sequencing depth
\end{itemize}
\begin{figure}
\centering
\includegraphics{./figs/RNAseqCounts/sctransform-sequencing depth.png}
\caption{.}
\end{figure}
\includegraphics{./figs/RNAseqCounts/sctransform-fig1.png}
\includegraphics{./figs/RNAseqCounts/sctransform-figS1.png}
\textbf{Methods}
\begin{itemize}
\tightlist
\item
Regularized negative binomial regression
\end{itemize}
\hypertarget{basics}{%
\subsubsection{BASiCS}\label{basics}}
\textbf{\href{https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004333}{BASiCS: Bayesian Analysis of Single-Cell Sequencing Data}\citep{vallejos2015basics}}
\includegraphics{./figs/RNAseqCounts/BASICS.png}
\includegraphics{./figs/RNAseqCounts/BASICS2.png}
\textbf{INTRODUCTION}
\begin{itemize}
\tightlist
\item
Cell\_SPECIFIC NORMALIZATION
\end{itemize}
For instance, in Fig 1(a), each gene has the same expression rate in both cells, yet the expression counts in the first cell will be roughly twice as much as those from the second cell. In the same spirit, if different sequencing depths (the number of times a single nucleotide is read during the sequencing) are applied to these cells, the scale of expression counts will also be affected. Thus, normalisation is a crucial issue in this context.
\begin{itemize}
\tightlist
\item
gene specific variation
\end{itemize}
Another fundamental problem for interpreting single-cell sequencing is the presence of high levels of unexplained technical noise (unrelated to sequencing depth and other amplification biases) {[}5{]}. This creates new challenges for identifying genes that show genuine biological cell-to-cell heterogeneity-beyond that induced by technical variation-and motivates the systematic inclusion of spike-in genes in single-cell experiments
\begin{itemize}
\tightlist
\item
UMI
\end{itemize}
Recently, the introduction of Unique Molecular Identifiers (UMI) attached to each cDNA molecule during reverse transcription has substantially reduced the levels of unexplained technical noise and eliminated the effect of sequencing depth changes and other amplification biases in single-cell experiments.
Nevertheless, our analysis of a mouse Embryonic Stem Cells (ESC) suggests that unexplained technical variability can not be completely removed by using UMIs (see Results section) and that an accurate quantification of technical variability still remains important.
\textbf{HIGHLIGHT}
In BASiCS (Bayesian Analysis of Single-Cell Sequencing data), a joint model of biological and spike-in genes is formulated to simultaneously quantify unexplained technical noise and cell-to-cell biological heterogeneity using the complete set of data, borrowing information between both sets of genes (spike-in and biological) through common parameters in a hierarchical structure. Additionally, BASiCS incorporates an automated normalisation method, where normalising constants are treated as model parameters.
\includegraphics{./figs/RNAseqCounts/pcbi.1004333.g001.png}
\includegraphics{./figs/RNAseqCounts/pcbi.1004333.g002.png}
\includegraphics{./figs/RNAseqCounts/BASiCS-FIG2.png}
\#\#\#\# edgeR
\textbf{\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2796818/}{edgeR: a Bioconductor package for differential expression analysis of digital gene expression data}\citep{robinson2010edger}}
\textbf{\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3378882/}{Differential expression analysis of multifactor RNA-Seq experiments with respect to biological variation}\citep{mccarthy2012differential}}
\hypertarget{sanity}{%
\subsubsection{Sanity}\label{sanity}}
\textbf{\href{https://www.nature.com/articles/s41587-021-00875-x}{Bayesian inference of gene expression states from single-cell RNA-seq data}\citep{breda2021bayesian}}
\hypertarget{batch-effect-correction-methods-for-scrna-seq}{%
\subsection{Batch-effect correction methods for scRNA-seq}\label{batch-effect-correction-methods-for-scrna-seq}}
\textbf{\href{https://link.springer.com/article/10.1186/s13059-019-1850-9}{A benchmark of batch-effect correction methods for single-cell RNA sequencing data}\citep{tran2020benchmark}}
\begin{itemize}
\tightlist
\item
Abstract
\textbf{Background}: Large-scale single-cell transcriptomic datasets generated using different technologies contain batchspecific
systematic variations that present a challenge to batch-effect removal and data integration. With continued
growth expected in scRNA-seq data, achieving effective batch integration with available computational resources is
crucial. Here, we perform an in-depth benchmark study on available batch correction methods to determine the
most suitable method for batch-effect removal.
\end{itemize}
\textbf{Results}: We compare 14 methods in terms of computational runtime, the ability to handle large datasets, and
batch-effect correction efficacy while preserving cell type purity. Five scenarios are designed for the study: identical
cell types with different technologies, non-identical cell types, multiple batches, big data, and simulated data.
Performance is evaluated using four benchmarking metrics including kBET, LISI, ASW, and ARI. We also investigate
the use of batch-corrected data to study differential gene expression.
\textbf{Conclusion}: Based on our results, Harmony, LIGER, and Seurat 3 are the recommended methods for batch
integration. Due to its significantly shorter runtime, Harmony is recommended as the first method to try, with the
other methods as viable alternatives.
\textbf{Keywords}: Single-cell RNA-seq, Batch correction, Batch effect, Integration, Differential gene expression
\hypertarget{hi-seq}{%
\section{Hi-Seq}\label{hi-seq}}
\textbf{\href{https://www.nature.com/articles/s41587-021-00981-w}{Enhanced detection of minimal residual disease by targeted sequencing of phased variants in circulating tumor DNA}\citep{Kurtz2021}}
KEY WORDS: \textbf{phased variants}
\hypertarget{languages-and-compilers}{%
\section{languages and compilers}\label{languages-and-compilers}}
\begin{itemize}
\tightlist
\item
Seq
\end{itemize}
\href{https://www.nature.com/articles/s41587-021-00985-6}{A Python-based programming language for high-performance computational genomics}\citep{shajii2021python}
\begin{figure}
\centering
\includegraphics{./figs/computationalBio/The Seq programming language.jpg}
\caption{The Seq programming language.}
\end{figure}
a, Conceptual comparison of Seq, Python and C++. Seq combines the high performance of C++ with the programming ease and clarity of Python, by virtue of domain-specific compiler optimizations that are hidden from the user. b, Example Seq code for a simple k-mer-based read mapper. c, Schematic of standard genomics pipeline and those state-of-the-art tools compared to Seq.
To demonstrate Seq's versatility, we reimplemented eight popular genomics tools in Seq, spanning key tasks in the genomics analysis pipeline (Fig. 1c and Supplementary Note 2), such as the finding of super-maximal exact matches, or SMEMs (BWA-MEM13), genome homology table construction (CORA14), Hamming distance-based all-mapping (mrsFAST15), long-read alignment (minimap216), \textbf{single-cell data preprocessing (UMI-tools17)}, SAM/BAM post-processing (GATK18), global sequence alignment (AVID19) and \textbf{haplotype phasing (Haptree-X20,21)}.
\href{https://github.com/seq-lang/seq-benchmarks/tree/master/seq-nbt\#haptree-x-haplotype-phasing}{Hap Tree-X}
\hypertarget{singlecell-analysis-tools}{%
\section{singlecell analysis tools}\label{singlecell-analysis-tools}}
\begin{itemize}
\item
scanpy
\href{https://genomebiology.biomedcentral.com/articles/10.1186/s13059-017-1382-0}{SCANPY: large-scale single-cell gene expression data analysis}
\item
seurat
\end{itemize}
\hypertarget{singlecell-analysis-sources}{%
\section{singlecell analysis sources}\label{singlecell-analysis-sources}}
\hypertarget{spatial-technology}{%
\chapter{Spatial Technology}\label{spatial-technology}}
\hypertarget{sequencing-based-st}{%
\section{Sequencing based ST}\label{sequencing-based-st}}
\hypertarget{statmethods}{%
\chapter{StatMethods}\label{statmethods}}
\hypertarget{hmm-based-methods}{%
\section{HMM based methods}\label{hmm-based-methods}}
\hypertarget{em-based-methods}{%
\section{EM based methods}\label{em-based-methods}}
\hypertarget{vb-based-methods}{%
\section{VB based methods}\label{vb-based-methods}}
\hypertarget{glm}{%
\section{GLM}\label{glm}}
\href{https://www.statsmodels.org/stable/glm.html\#links}{statsmodel GLM}
\href{https://www.statsmodels.org/stable/generated/statsmodels.discrete.discrete_model.NegativeBinomial.html}{statsmodels.discrete.discrete\_model.NegativeBinomial}
\hypertarget{phd-research-topic-based}{%
\chapter{PhD Research topic based}\label{phd-research-topic-based}}
\hypertarget{cnv-calling}{%
\section{CNV calling}\label{cnv-calling}}
\hypertarget{breaking-point-detection}{%
\subsection{breaking point detection}\label{breaking-point-detection}}
\begin{itemize}
\tightlist
\item
4 CNV breakpoint detection methods (2021-07-17 Group meeting)
\end{itemize}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
CHISEL: \url{https://www.nature.com/articles/s41587-020-0661-6\#Sec8} (see global clustering subsection)
\end{enumerate}
\begin{itemize}
\tightlist
\item
seemingly no breakpoint detection, but rather a global clustering (ie. entry-wise for a bin-by-cell matrix), thus the resolution of CNV is the bin size (5MB)
\end{itemize}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{1}
\tightlist
\item
Alleloscope: \url{https://www.nature.com/articles/s41587-021-00911-w\#Sec10} (see segmentation subsection)
\end{enumerate}
\begin{itemize}
\tightlist
\item
HMM on a pooled cells (pseudo-bulk?) with pre-defined Gaussian means and variance for each state
\end{itemize}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{2}
\tightlist
\item
InferCNV: \url{https://github.com/broadinstitute/inferCNV/wiki/inferCNV-HMM-based-CNV-Prediction-Methods}
\end{enumerate}
\begin{itemize}
\tightlist
\item
i6-HMM generates in silico spike-in; seemingly define CNV region (segment) on cluster instead of cell, but using noise model on each cell (not quite sure from the doc).
\end{itemize}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{3}
\tightlist
\item
CopyKat: \url{https://www.nature.com/articles/s41587-020-00795-2\#Sec9}
\end{enumerate}
\begin{itemize}
\tightlist
\item
KS test for whether to two neighbour bins should be joined, by using the posterior samples of Gamma-Poisson posterior. Seemingly using noise model on each cell within a cluster
\end{itemize}
FACLON
\url{https://academic.oup.com/nar/article/43/4/e23/2410993}
\begin{figure}
\centering
\includegraphics{./figs/CNV/HATCHet.jpg}
\caption{Overview of HATCHet algorithm}
\end{figure}
\url{https://www.nature.com/articles/s41467-020-17967-y/figures/1}
\textbf{a} HATCHet takes in input DNA sequencing data from multiple bulk tumor samples of the same patient and has five steps.
\textbf{b} First, HATCHet calculates the RDRs and BAFs in bins of the reference genome (black squares). Here, we show two tumor samples p and q.
\textbf{c} Second, HATCHet clusters the bins based on RDRs and BAFs globally along the entire genome and jointly across samples p and q. Each cluster (color) includes bins with the same copy-number state within each clone present in p or q.
\textbf{d} Third, HATCHet estimates two values for the fractional copy number of each cluster by scaling RDRs. If there is no WGD, the identification of the cluster (magenta) with copy-number state (1, 1) is sufficient and RDRs are scaled correspondingly.
If a WGD occurs, HATCHet identifies an additional cluster with identical copy-number state in all tumor clones. Dashed black horizontal lines in the scaled BAF-RDR plot represent values of fractional copy numbers that correspond to clonal CNAs.
\textbf{e} Fourth, HATCHet factors the allele-specific fractional copy numbers FA, FB into the allele-specific copy numbers A, B, respectively, and the clone proportions U. Here, there is a normal clone and 3 tumor clones.
\textbf{f} Last, HATCHet's model-selection criterion identifies the matrices A, B, and U in the factorization while evaluating the fit according to both the inferred number of clones and presence/absence of a WGD.
\textbf{g} HATCHet outputs allele- and clone-specific copy numbers (with the color of the corresponding clone) and clone proportions (in the top right part of each plot) for each sample.
Clusters are classified according to the inference of unique/different copy-number states in each sample (sample-clonal/subclonal) and across all tumor clones (tumor-clonal/subclonal).
\begin{figure}
\centering
\includegraphics{./figs/CNV/chisel.jpg}
\caption{Overview of chisel algorithm}
\end{figure}
\url{https://www.nature.com/articles/s41587-020-0661-6/figures/1}
\textbf{a}, CHISEL computes RDRs and BAFs in low-coverage (\textless0.05× per cell) single-cell DNA sequencing data (top left). Read counts from 2,000 individual cells (rows) in 5-Mb genomic bins (columns) across three chromosomes (gray rectangles in first row) are shown.
For each bin in each cell, CHISEL computes the RDR (top) by normalizing the observed read counts. CHISEL computes the BAF in each bin and cell (bottom) by first performing referenced-based phasing of germline SNPs in 50-kb haplotype blocks (magenta and green) and then phasing all these blocks jointly across all cells.
\textbf{b}, CHISEL clusters RDRs and BAFs globally along the genome and jointly across all cells resulting here in five clusters of genomic bins (red, blue, purple, yellow and gray) with distinct copy-number states.
\textbf{c}, CHISEL infers a pair \{c\^{}t,cˇt\} of allele-specific copy numbers for each cluster by determining whether the allele-specific copy numbers of the largest balanced (BAF of \textasciitilde0.5) cluster are equal to \{1, 1\} (diploid), \{2, 2\} (tetraploid) or are higher ploidy.
\textbf{d}, CHISEL infers haplotype-specific copy numbers (at, bt) by phasing the allele-specific copy numbers \{c\^{}t,cˇt\} consistently across all cells.
\textbf{e}, CHISEL clusters tumor cells into clones according to their haplotype-specific copy numbers. Here, a diploid clone (light gray) and two tumor clones (red and blue) are obtained.
A phylogenetic tree describes the evolution of these clones. Somatic SNVs are derived from pseudo-bulk samples and placed on the branches of the tree.
\begin{figure}
\centering
\includegraphics{./figs/CNV/TITAN.jpg}
\caption{Overview of TITAN local clustering for segemetation}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/CNV/infercnv_i3HMM_model.png}
\caption{Overview of inferCNV i3HMM}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/CNV/infercnv_i6HMM_model.png}
\caption{Overview of inferCNV i6HMM}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/CNV/HoneyBADGER.png}
\caption{Overview of HoneyBADGER method}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/CNV/Alleloscope.jpg}
\caption{Overview of Alleloscope algorithm}
\end{figure}
\begin{figure}
\centering
\includegraphics{./figs/CNV/copykat.jpg}
\caption{Overview of copykat algorithm}
\end{figure}
\hypertarget{related-research}{%
\subsection{Related research}\label{related-research}}
\textbf{clonealign}: statistical integration of independent single-cell RNA and DNA sequencing data from human cancers
However, independently sampled single-cell measurements introduce a new analytical challenge of how to associate cells across each modality. Assuming a population structure with a fixed number of clones, this can be expressed as a mapping problem, whereby cells measured with transcriptome assays must be aligned to those measured with a genome assay.
\includegraphics{./figs/CNV/clonealign.jpg}
In order to relate the independent measurements, we assume that an increase in the copy number of a gene will result in a corresponding increase in that gene's expression and vice versa (Fig. 1b)
a relationship previously observed in joint RNA-DNA assays in bulk tissues {[}12{]} and at the single-cell level {[}9, 10, 13{]}.
\textbf{CHISEL}
\textbf{inferCNV}
\textbf{copyKAT}
\textbf{CaSpER}
\textbf{HoneyBADGER}
\hypertarget{smoothing-strategies-in-rdr}{%
\subsection{Smoothing strategies in RDR}\label{smoothing-strategies-in-rdr}}
-TODO
\hypertarget{pubmon}{%
\subsection{PUBMON}\label{pubmon}}
\href{https://www.biorxiv.org/content/10.1101/2021.11.25.469995v1}{Precise identification of cancer cells from allelic imbalances in single cell transcriptomes}
this paper used BAF information to identify cancer cells, seems quite relevant.
\textbf{2022-03-09}
\href{https://www.biorxiv.org/content/10.1101/2021.06.04.447031v1.full}{Evolutionary tracking of cancer haplotypes at single-cell resolution}
\href{https://www.biorxiv.org/content/10.1101/2022.02.07.479314v1.full}{Haplotype-enhanced inference of somatic copy number profiles from single-cell transcriptomes}
\begin{itemize}
\item
\textbf{Numbat}
\item
introduction
\end{itemize}
Existing approaches for CNV detection from scRNA-seq do not utilize the prior knowledge of haplotypes, or the individual-specific configuration of variant alleles on the two homologous chromosomes, which can enable more sensitive detection of allelic imbalance.
The utility of phasing in detecting CNV signals from scRNA-based assays, however, has not been explored.
We therefore developed a computational method, Numbat, which integrates expression, allele, and haplotype information derived from population-based phasing to comprehensively characterize the CNV landscape in single-cell transcriptomes.
Numbat does not require sample-matched DNA data or a priori genotyping, and is widely applicable to a wide range of experimental settings and cancer types.
\begin{itemize}
\tightlist
\item
Results
\end{itemize}
\textbf{Enhanced detection of subclonal allelic imbalances using population-based haplotype phasing}
Prior phasing information can effectively amplify weak allelic imbalance signals of individual SNPs induced by the CNV, by exposing joint behavior of entire haplotype sequences and thereby increasing the statistical power.
The ability to infer phasing between genes is particularly useful for CNV inference, as it provides means to overcome stochastic allele-specific expression effects which give rise to bursts of gene-specific allelic imbalances in individual cells.
The differential phasing accuracy from within and between genes reflects the fact that the strength of genetic linkage decays with increasing distance (Supplementary Figure 1a).
To reflect the decay in phasing strength over longer genetic distances, we introduced site-specific transition probabilities between haplotype states in the Numbat allele HMM (see Methods).
\textbf{Accurate copy number inference from single-cell transcriptomes}
To increase robustness, Numbat models gene expression as integer read counts using a discrete Poisson Lognormal mixture distribution, and accounts for excess variance in the allele frequency (e.g.~due to allele-specific detection or transcriptional bursts) using a Beta-Binomial distribution.
\textbf{Iterative strategy to decompose tumor clonal architecture}
\textbf{Reliable identification of cancer cells in the tumor microenvironment}
\textbf{Allele-specific CNV analysis reveals additional subclonal complexity}
\textbf{Unraveling the interplay between genetic and transcriptional heterogeneity in tumor evolution}
\begin{itemize}
\item
Discussion
\item
Methods
\end{itemize}
\hypertarget{clonal-tree}{%
\section{Clonal Tree}\label{clonal-tree}}
\hypertarget{deconvolution}{%
\section{Deconvolution}\label{deconvolution}}
\hypertarget{deconvolution-of-bulk-tissue-and-spatial-transcriptomic-data}{%
\subsection{Deconvolution of bulk tissue and spatial transcriptomic data}\label{deconvolution-of-bulk-tissue-and-spatial-transcriptomic-data}}
Related papers are as follows:
\begin{itemize}
\tightlist
\item
\href{https://www.nature.com/articles/s41587-019-0114-2}{CIBERSORTx}
\item
\href{https://www.nature.com/articles/s41587-021-00830-w}{RCTD}
\item
\href{https://academic.oup.com/nar/article/49/9/e50/6129341}{SPOTlight}
\item
\href{https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02362-7}{SpatialDWLS}
\end{itemize}
From xiunan's pre
\href{https://www.biorxiv.org/content/10.1101/2021.06.15.448381v1}{Reference-free cell-type deconvolution of pixel-resolution spatially resolved transcriptomics data}
Yuanhua add
\hypertarget{omics-integration}{%
\section{Omics integration}\label{omics-integration}}
\hypertarget{spatial-transcriptomics}{%
\section{Spatial transcriptomics}\label{spatial-transcriptomics}}
\begin{itemize}
\tightlist
\item
202109 Read in depth (lead by xianjie)
\end{itemize}
\textbf{\href{https://www.biorxiv.org/content/10.1101/2021.07.12.452018v1.full}{The spatial landscape of clonal somatic mutations in benign and malignant tissue}\citep{erickson2021spatial}}
Keywords: CNV
\textbf{\href{https://www.sciencedirect.com/science/article/pii/S0092867420306723}{Multimodal analysis of composition and spatial architecture in human squamous cell carcinoma}\citep{zhang2021supergnova}}
Keywords: alignment of scRNA AND ST
\begin{itemize}
\tightlist
\item
2022 spatial and CNV
\end{itemize}
\textbf{\href{https://genomebiology.biomedcentral.com/articles/10.1186/s13059-022-02653-7}{Statistical and machine learning methods for spatially resolved transcriptomics data analysis}\citep{zeng2022statistical}}
\textbf{\href{https://pubmed.ncbi.nlm.nih.gov/33022659/}{STARCH: copy number and clone inference from spatial transcriptomics data}\citep{elyanow2021starch}}
\textbf{\href{https://www.biorxiv.org/content/10.1101/2021.07.12.452018v1.abstract}{The spatial landscape of clonal somatic mutations in benign and malignant tissue}\citep{erickson2021spatial}}
Keywords: spatial; CNV
\begin{itemize}
\tightlist
\item
202409 Review
\end{itemize}