NumDL-CourseNotes/NumDNN.bib at master · IPAIopen/NumDL-CourseNotes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
@article{lecun2015deep,
  title={Deep learning},
  author={LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
  journal={Nature},
  volume={521},
  number={7553},
  pages={436--444},
  year={2015},
  publisher={Nature Research}
}
@inproceedings{LeCun1990,
author = {LeCun, Y and Boser, B E and Denker, J S},
title = {{Handwritten digit recognition with a back-propagation network}},
booktitle = {Advances in neural information processing systems},
year = {1990},
pages = {396--404}
}

@article{GlorotBengio2010,
author = {Glorot, X and Bengio, Yoshua},
title = {{Understanding the difficulty of training deep feedforward neural networks}},
journal = {jmlr.org}
}


@article{Rosenblatt1958,
author = {Rosenblatt, F},
title = {{The perceptron: A probabilistic model for information storage and organization in the brain.}},
journal = {Psychological review},
year = {1958},
volume = {65},
number = {6},
pages = {386--408},
publisher = {American Psychological Association},
doi = {10.1037/h0042519},
url = {http://psycnet.apa.org/journals/rev/65/6/386.html}
}
@article{Rumelhart1986,
  title={Learning representations by back-propagating errors},
  author={Rumelhart, D.E. and Hinton, Geoffrey and Williams, R,J.},
  journal={Nature},
  volume={323},
  number={6088},
  pages={533--538},
  year={1986}
}

@article{bengio2009learning,
  title={Learning deep architectures for {AI}},
  author={Bengio, Yoshua and others},
  journal={Foundations and trends{\textregistered} in Machine Learning},
  volume={2},
  number={1},
  pages={1--127},
  year={2009},
  publisher={Now Publishers, Inc.}
}

@article{KrizhevskySutskeverHinton2012,
	Author = {A. Krizhevsky and I. Sutskever and G. Hinton},
	Journal = {Advances in neural information processing systems},
	Title = { Imagenet classification with deep convolutional neural networks},
    Volume = {61},
    pages = {1097–1105},
	Year = {2012}
}

@article{CollobertEtAl2011,
author = {Collobert, Ronan and Weston, Jason and Bottou, L{\'e}on and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel},
title = {{Natural Language Processing (Almost) from Scratch}},
journal = {Journal of Machine Learning Research},
year = {2011},
volume = {12},
pages = {2493--2537},
url = {http://www.jmlr.org/papers/v12/collobert11a.html}
}


@article{BordesEtAl2014,
author = {Bordes, Antoine and Chopra, Sumit and Weston, Jason},
title = {{Question Answering with Subgraph Embeddings}},
journal = {arXiv preprint arXiv:1406.3676},
year = {2014},
eprint = {1406.3676v3},
eprinttype = {arxiv},
eprintclass = {cs.CL},
url = {http://arxiv.org/abs/1406.3676v3}
}

@article{JeanEtAl2014,
author = {Jean, S{\'e}bastien and Cho, Kyunghyun and Memisevic, Roland and Bengio, Yoshua},
title = {{On Using Very Large Target Vocabulary for Neural Machine Translation}},
journal = {arXiv preprint arXiv:1412.2007},
year = {2014},
eprint = {1412.2007v2},
eprinttype = {arxiv},
eprintclass = {cs.CL},
url = {http://arxiv.org/abs/1412.2007v2}
}


@inproceedings{RainaEtAl2009,
author = {Raina, Rajat and Madhavan, Anand and Ng, Andrew Y},
title = {{Large-scale deep unsupervised learning using graphics processors}},
booktitle = {the 26th Annual International Conference},
year = {2009},
pages = {873--880},
publisher = {ACM},
month = jun,
doi = {10.1145/1553374.1553486},
url = {http://portal.acm.org/citation.cfm?doid=1553374.1553486}
}

@article{hinton2012deep,
  title={Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups},
  author={Hinton, Geoffrey and Deng, Li and Yu, Dong and Dahl, George E and Mohamed, Abdel-rahman and Jaitly, Navdeep and Senior, Andrew and Vanhoucke, Vincent and Nguyen, Patrick and Sainath, Tara N and others},
  journal={IEEE Signal Processing Magazine},
  volume={29},
  number={6},
  pages={82--97},
  year={2012},
  publisher={IEEE}
}


@booklet{AscherGreif2011,
title = {{A First Course on Numerical Methods}},
author = {Ascher, U M and Greif, C},
howpublished = {SIAM},
address = {Philadelphia},
year = {2011}
}

@book{NocedalWright2006,
author = {Nocedal, Jorge and Wright, Stephen},
title = {{Numerical Optimization}},
publisher = {Springer Science {\&} Business Media},
year = {2006},
series = {Springer Series in Operations Research and Financial Engineering},
address = {New York},
month = dec
}

@book{Saad2003,
author = {Saad, Yousef},
title = {{Iterative Methods for Sparse Linear Systems}},
publisher = {SIAM},
year = {2003},
series = {Second Edition},
address = {Philadelphia},
month = apr
}

@book{Vogel2002,
author = {Vogel, Curtis R},
title = {{Computational Methods for Inverse Problems}},
publisher = {SIAM},
year = {2002},
address = {Philadelphia}
}


@book{Hansen2010,
author = {Hansen, Per Christian},
title = {{Discrete inverse problems}},
publisher = {Society for Industrial and Applied Mathematics (SIAM), Philadelphia, PA},
year = {2010},
volume = {7},
series = {Fundamentals of Algorithms}
}

@book{Hansen1998,
author = {Hansen, Per Christian},
title = {{Rank-deficient and discrete ill-posed problems}},
publisher = {Society for Industrial and Applied Mathematics (SIAM), Philadelphia, PA},
year = {1998},
series = {SIAM Monographs on Mathematical Modeling and Computation}
}


@book{BoydVandenberghe2004,
author = {Boyd, Stephen P and Vandenberghe, Lieven},
title = {{Convex Optimization}},
publisher = {Cambridge University Press},
year = {2004},
month = mar
}


@book{Beck2014,
author = {Beck, Amir},
title = {{Introduction to Nonlinear Optimization}},
publisher = {SIAM},
year = {2014},
series = {Theory, Algorithms, and Applications with MATLAB},
address = {Philadelphia},
month = oct
}


@article{HornikEtAl1989,
author = {Hornik, K and Stinchcombe, M and White, H},
title = {{Multilayer feedforward networks are universal approximators}},
journal = {Neural Networks},
year = {1989},
volume = {2},
number = {5},
pages = {359--366}
}

@article{HuangEtAl2006,
author = {Huang, Guang-Bin and Zhu, Qin-Yu and Siew, Chee-Kheong},
title = {{Extreme learning machine: Theory and applications}},
journal = {Neurocomputing},
year = {2006},
volume = {70},
number = {1-3},
pages = {489--501},
month = dec
}


@article{bliss1919,
  title={The use of adjoint systems in the problem of differential corrections for trajectories},
  author={Bliss, Go A},
  journal={JUS Artillery},
  volume={51},
  pages={296--311},
  year={1919}
}

@book{BorzSchulz2012,
author = {Borz{\`\i}, Alfio and Schulz, Volker},
title = {{Computational optimization of systems governed by partial differential equations}},
publisher = {SIAM, Philadelphia, PA},
year = {2012},
volume = {8},
keywords = {PDE-ConstrainedOpti},
isbn = {978-1-611972-04-7},
url = {http://www.ams.org/mathscinet-getitem?mr=MR2895881}
}


@article{LiEtAl2017,
author = {Li, Hao and Xu, Zheng and Taylor, Gavin and Goldstein, Tom},
title = {{Visualizing the Loss Landscape of Neural Nets}},
year = {2017},
eprint = {1712.09913},
eprinttype = {arxiv}
}

@article{E2017,
author = {E, Weinan},
title = {{A Proposal on Machine Learning via Dynamical Systems}},
journal = {Communications in Mathematics and Statistics},
year = {2017},
volume = {5},
number = {1},
pages = {1--11},
month = mar
}
@article{HaberRuthotto2017,
  author    = {Eldad Haber and
               Lars Ruthotto},
journal     = {Inverse Problems},
  title     = {Stable Architectures for Deep Neural Networks},
  volume    = {34},
  issue     = {1},
  year      = {2017},
pages      = {014004},
  url       = {http://arxiv.org/abs/1705.03341}
}
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={770--778},
  year={2016}
}


@inproceedings{he2016identity,
  title={Identity mappings in deep residual networks},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={European Conference on Computer Vision},
  pages={630--645},
  year={2016},
  organization={Springer}
}


@book{Ascher2010,
	Address = {Philadelphia},
	Author = {U.M. Ascher},
	Publisher = {SIAM},
	Title = {Numerical methods for Evolutionary Differential Equations},
	Year = {2010}}
@book{AscherPetzold1998,
	Address = {Philadelphia, PA},
	Author = {U. Ascher and L. Petzold},
	Publisher = {SIAM},
	Title = {Computer Methods for Ordinary Differential Equations and Differential-Algebraic Equations},
	Year = 1998}

@article{bottou2016optimization,
author = {Bottou, L{\'e}on and Curtis, Frank E and Nocedal, Jorge},
title = {{Optimization Methods for Large-Scale Machine Learning}},
journal = {arXiv preprint [stat.ML] (1606.04838v1)},
year = {2016}
}

@article{RobbinsMonro1951,
author = {Robbins, H and Monro, S},
title = {{A Stochastic Approximation Method}},
journal = {The annals of mathematical statistics},
year = {1951},
volume = {22},
number = {3},
pages = {400--407}
}


@book{HansenNagyOLeary2006,
author = {Hansen, P C and Nagy, J G and O'Leary, D P},
title = {{Deblurring Images: Matrices, Spectra and Filtering}},
publisher = {Society for Industrial and Applied Mathematics (SIAM)},
year = {2006},
series = {Matrices, Spectra, and Filtering},
address = {Philadelphia, PA}
}


@article{GoPe03,
	Author = {G.~Golub and V.~Pereyra},
	Journal = {Inverse Problems},
	Pages = {R1--R26},
	Title = {Separable nonlinear least squares: the variable projection method and its applications},
	Volume = 19,
	Year = 2003}

@article{GoPe1973,
author = {Golub, G H and Pereyra, V},
title = {{The differentiation of pseudo-inverses and nonlinear least squares problems whose variables separate}},
journal = {SIAM Journal on Numerical Analysis},
year = {1973},
volume = {10},
number = {2},
pages = {413--432}
}

@article{OLearyRust2013,
author = {O'Leary, Dianne P and Rust, Bert W},
title = {{Variable projection for nonlinear least squares problems}},
journal = {Computational Optimization and Applications. An International Journal},
year = {2013},
volume = {54},
number = {3},
pages = {579--593}
}

@article{IoffeSzegedy2015,
author = {Ioffe, Sergey and Szegedy, Christian},
title = {{Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift}},
journal = {arXiv preprint [cs.LG] 1502.03167v3},
year = {2015}
}

@article{RudinOsherFatemi1992,
author = {Rudin, Leonid I and Osher, Stanley and Fatemi, Emad},
title = {{Nonlinear total variation based noise removal algorithms}},
journal = {Physica D: Nonlinear Phenomena},
year = {1992},
volume = {60},
number = {1-4},
pages = {259--268}
}

@article{UlyanovEtAl2016,
author = {Ulyanov, Dmitry and Vedaldi, Andrea and Lempitsky, Victor},
title = {{Instance Normalization: The Missing Ingredient for Fast Stylization}},
journal = {arxiv preprint [cs.CV] 1607.08022v3},
year = {2016}
}

@article{ChenPock2017,
author = {Chen, Yunjin and Pock, Thomas},
title = {{Trainable Nonlinear Reaction Diffusion: A Flexible Framework for Fast and Effective Image Restoration.}},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
year = {2017},
volume = {39},
number = {6},
pages = {1256--1272},
month = jun
}

@article{Bertsekas2015,
author = {Bertsekas, Dimitri P},
title = {{Incremental Gradient, Subgradient, and Proximal Methods for Convex Optimization: A Survey}},
journal = {arXiv preprint [cs.SY 1507.01030v1] },
year = {2015}
}


@article{Bottou2012,
author = {Bottou, L},
title = {{Stochastic gradient descent tricks}},
journal = {Neural networks: Tricks of the trade},
year = {2012}
}


@article{Mathieu:2013wa,
author = {Mathieu, Michael and Henaff, Mikael and LeCun, Yann},
title = {{Fast Training of Convolutional Networks through FFTs}},
year = {2013},
journal = {arxiv preprint 1312.5851}
}

@article{Vasilache:2014wh,
author = {Vasilache, Nicolas and Johnson, Jeff and Mathieu, Michael and Chintala, Soumith and Piantino, Serkan and LeCun, Yann},
title = {{Fast Convolutional Nets With fbfft: A GPU Performance Evaluation}},
journal = {arxiv preprint [cs.LG] 1412.7580v3},
year = {2014}
}

@article{Grathwohl:2018vf,
author = {Grathwohl, Will and Chen, Ricky T Q and Bettencourt, Jesse and Sutskever, Ilya and Duvenaud, David},
title = {{FFJORD: Free-form Continuous Dynamics for Scalable Reversible Generative Models}},
year = {2018},
eprint = {1810.01367},
eprinttype = {arxiv},
month = oct
}

@inproceedings{ChenEtAl2018,
author = {Chen, Tian Qi and Rubanova, Yulia and Bettencourt, Jesse and Duvenaud, David},
title = {{Neural Ordinary Differential Equations}},
booktitle = {NeurIPS },
year = {2018},
month = jun
}
@article{GholamiEtAl2019,
author = {Gholami, Amir and Keutzer, Kurt and Biros, George},
title = {{ANODE: Unconditionally Accurate Memory-Efficient Gradients for Neural ODEs}},
journal = {arXiv.org},
year = {2019},
eprint = {1902.10298v1},
eprinttype = {arxiv},
eprintclass = {cs.LG},
month = feb
}

@article{Zhang:2018th,
author = {Zhang, Linfeng and E, Weinan and Wang, Lei},
title = {{Monge-Amp{\`e}re Flow for Generative Modeling}},
journal = {arXiv.org},
year = {2018},
eprint = {1809.10188v1},
eprinttype = {arxiv},
eprintclass = {cs.LG},
month = sep
}


@article{HighamHigham2018,
author = {Higham, Catherine F and Higham, Desmond J},
title = {{Deep Learning: An Introduction for Applied Mathematicians}},
journal = {arXiv.org},
year = {2018},
eprint = {1801.05894v1},
eprinttype = {arxiv},
month = jan
}


@article{PothenEtAl1990,
author = {Pothen, A and Simon, H D and Paul Liu, Kang-Pu},
title = {{Partitioning sparse matrices with eigenvectors of graphs}},
journal = {SIAM Review},
year = {1990},
volume = {11},
number = {3},
pages = {430--452},
month = jul
}

@article{NgEtAl2002,
author = {Ng, A Y and Jordan, M I and Weiss, Yair},
title = {{On spectral clustering: Analysis and an algorithm}},
journal = {papers.nips.cc
},
year = {2002}
}

@article{vonLuxburg2007,
author = {von Luxburg, Ulrike},
title = {{A tutorial on spectral clustering}},
journal = {Statistics and Computing},
year = {2007},
volume = {17},
number = {4},
pages = {395--416},
month = aug
}

	@article{WuFungEtAl2019SoftmaxADMM,
	 Annote = {Supported by NSF DMS 1522599 and 1751636},
	author = {Fung, Samy Wu and Tyrv{\"a}inen, Sanna and Ruthotto, Lars and Haber, Eldad},
	title = {{Large-Scale Classification using Multinomial Regression and ADMM}},
	year = {2019},
	eprint = {1901.09450},
	eprinttype = {arxiv},
	month = jan
	}


@article{Pinkus1999,
author = {Pinkus, Allan},
title = {{Approximation theory of the MLP model in neural networks}},
journal = {Acta Numerica},
year = {1999},
pages = {143--195},
month = jan
}


@article{Cybenko1989,
author = {Cybenko, G},
title = {{Approximation by superpositions of a sigmoidal function}},
journal = {Mathematics of Control, Signals and Systems},
year = {1989},
volume = {2},
number = {4},
pages = {303--314}
}


@article{lin2018resnet,
  title={Resnet with one-neuron hidden layers is a universal approximator},
  author={Lin, Hongzhou and Jegelka, Stefanie},
  journal={Advances in neural information processing systems},
  volume={31},
  pages={6169--6178},
  year={2018}
}

@article{lu2019deeponet,
  title={Deeponet: Learning nonlinear operators for identifying differential equations based on the universal approximation theorem of operators},
  author={Lu, Lu and Jin, Pengzhan and Karniadakis, George Em},
  journal={arXiv preprint arXiv:1910.03193},
  year={2019}
}

@inproceedings{kidger2020universal,
  title={Universal approximation with deep narrow networks},
  author={Kidger, Patrick and Lyons, Terry},
  booktitle={Conference on Learning Theory},
  pages={2306--2327},
  year={2020},
  organization={PMLR}
}

@article{hornik1990universal,
  title={Universal approximation of an unknown mapping and its derivatives using multilayer feedforward networks},
  author={Hornik, Kurt and Stinchcombe, Maxwell and White, Halbert},
  journal={Neural networks},
  volume={3},
  number={5},
  pages={551--560},
  year={1990},
  publisher={Elsevier}
}

@article{Rahimi:vq,
author = {Rahimi, A and Recht, Benjamin},
title = {{Random features for large-scale kernel machines}},
journal = {31st Conference on Neural Information Processing Systems}
}


@article{NewmanEtAl2020,
author = {Newman, Elizabeth and Ruthotto, Lars and Hart, Joseph and van Bloemen Waanders, Bart},
title = {{Train Like a (Var)Pro: Efficient Training of Neural Networks with Variable Projection}},
journal = {arXiv.org},
year = {2020},
eprint = {2007.13171v1},
eprinttype = {arxiv},
eprintclass = {cs.LG},
month = jul
}

@article{Onken2020DO,
author = {Onken, Derek and Ruthotto, Lars},
title = {{Discretize-Optimize vs. Optimize-Discretize for Time-Series Regression and Continuous Normalizing Flows}},
journal = {arXiv.org},
year = {2020},
eprint = {2005.13420v1},
eprinttype = {arxiv},
eprintclass = {cs.LG},
month = may
}

@article{RuthottoHaber2018,
author = {Ruthotto, Lars and Haber, Eldad},
title = {{Deep neural networks motivated by partial differential equations}},
journal = {Journal of Mathematical Imaging and Vision},
year = {2020},
volume = {62},
number = {3},
pages = {352--364}}

@inproceedings{ChangEtAl2017Reversible,
author = {Chang, Bo and Meng, Lili and Haber, Eldad and Ruthotto, Lars and Begert, David and Holtham, Elliot},
title = {{Reversible architectures for arbitrarily deep residual neural networks}},
booktitle = {Thirty-Second AAAI Conference on Artificial Intelligence},
year = {2018},
pages = {1--8}}