-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreferences.bib
More file actions
489 lines (446 loc) · 19.8 KB
/
references.bib
File metadata and controls
489 lines (446 loc) · 19.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
@book{alberts2002,
title = {Molecular Biology of the Cell},
author = {Alberts, Bruce and Johnson, Alexander and Lewis, Julian and Raff, Martin and Roberts, Keith and Walter, Peter},
edition = {4th},
year = {2002},
publisher = {Garland Science},
address = {New York},
chapter = {The Shape and Structure of Proteins},
url = {https://www.ncbi.nlm.nih.gov/books/NBK26830/}
}
@article{adhikari2016,
title = {Protein Residue Contacts and Prediction Methods},
author = {Adhikari, B. and Cheng, J.},
journal = {Methods in Molecular Biology},
year = {2016},
volume = {1415},
pages = {463--476},
doi = {10.1007/978-1-4939-3572-7_27}
}
@article{mfDCA,
author = {Faruck Morcos and Andrea Pagnani and Bryan Lunt and Arianna Bertolino and Debora S. Marks and Chris Sander and Riccardo Zecchina and José N. Onuchic and Terence Hwa and Martin Weigt },
title = {Direct-coupling analysis of residue coevolution captures native contacts across many protein families},
journal = {Proceedings of the National Academy of Sciences},
volume = {108},
number = {49},
pages = {E1293-E1301},
year = {2011},
doi = {10.1073/pnas.1111471108},
URL = {https://www.pnas.org/doi/abs/10.1073/pnas.1111471108},
eprint = {https://www.pnas.org/doi/pdf/10.1073/pnas.1111471108},
}
@article{weigt2009,
author = {Weigt M, White RA, Szurmant H, Hoch JA, Hwa T.},
title = {Identification of direct residue contacts in protein-protein interaction by message passing},
journal = {Proc Natl Acad Sci USA},
year = {2009},
doi = {10.1073/pnas.0805923106},
volume = {106},
pages = {67-72}
}
@online{ebi_protein_families,
author = {{European Bioinformatics Institute (EMBL-EBI)}},
title = {What are protein families?},
year = {2024},
url = {https://www.ebi.ac.uk/training/online/courses/protein-classification-intro-ebi-resources/protein-classification/what-are-protein-families/},
note = {Accessed: 2025-08-20}
}
@incollection{WILTGEN201938,
title = {Algorithms for Structure Comparison and Analysis: Homology Modelling of Proteins},
editor = {Shoba Ranganathan and Michael Gribskov and Kenta Nakai and Christian Schönbach},
booktitle = {Encyclopedia of Bioinformatics and Computational Biology},
publisher = {Academic Press},
pages = {38-61},
year = {2019},
isbn = {978-0-12-811432-2},
doi = {https://doi.org/10.1016/B978-0-12-809633-8.20484-6},
url = {https://www.sciencedirect.com/science/article/pii/B9780128096338204846},
author = {Marco Wiltgen}
}
@article{Jaynes1957a,
author = {E. T. Jaynes},
title = {Information Theory and Statistical Mechanics},
journal = {Physical Review},
volume = {106},
number = {4},
pages = {620--630},
year = {1957},
month = may,
doi = {10.1103/PhysRev.106.620}
}
@article{Jaynes1957b,
title = {Information Theory and Statistical Mechanics. II},
author = {Jaynes, E. T.},
journal = {Physical Review},
volume = {108},
number = {2},
pages = {171--190},
year = {1957},
doi = {10.1103/PhysRev.108.171}
}
@misc{weigt2020,
author = {Weigt, Martin},
title = {Coevolutionary Analysis of Protein-Protein Interactions},
howpublished = {Lecture video, 3rd Course on Multiscale Integration in Biological Systems},
year = {2020},
note = {YouTube, uploaded by ICTP},
url = {https://www.youtube.com/watch?v=IYA8WEsUcG0},
}
@article{dietler2023,
author = {Nicola Dietler and Umberto Lupo and Anne-Florence Bitbol},
journal = {Journal of the Royal Society, Interface},
number = {199},
title = {Impact of phylogeny on structural contact inference from protein sequence data},
volume = {20},
year = {2023},
doi = {10.1098/rsif.2022.0707}
}
@misc{MITinfent,
author = {Paul Penfield, Jr.},
title = {Lecture Notes for Information and Entropy},
howpublished = {\url{https://mtlsites.mit.edu/Courses/6.050/notes/index.html}},
institution = {Massachusetts Institute of Technology},
year = {2003}
}
@article{Shannon1948,
author = {Claude E. Shannon},
title = {A Mathematical Theory of Communication},
journal = {Bell System Technical Journal},
year = {1948},
volume = {27},
pages = {379--423, 623--656}
}
@article{fastplmDCA,
title = {Fast pseudolikelihood maximization for direct-coupling analysis of protein structure from many homologous amino-acid sequences},
journal = {Journal of Computational Physics},
volume = {276},
pages = {341-356},
year = {2014},
issn = {0021-9991},
doi = {https://doi.org/10.1016/j.jcp.2014.07.024},
url = {https://www.sciencedirect.com/science/article/pii/S0021999114005178},
author = {Magnus Ekeberg and Tuomo Hartonen and Erik Aurell},
}
@article{plmDCA,
title = {Improved contact prediction in proteins: Using pseudolikelihoods to infer Potts models},
author = {Ekeberg, Magnus and Lovkvist, Cecilia and Lan, Yueheng and Weigt, Martin and Aurell, Erik},
journal = {Phys. Rev. E},
volume = {87},
issue = {1},
pages = {012707},
numpages = {16},
year = {2013},
month = {Jan},
publisher = {American Physical Society},
doi = {10.1103/PhysRevE.87.012707},
url = {https://link.aps.org/doi/10.1103/PhysRevE.87.012707}
}
@article{mh-algorithm,
ISSN = {00063444, 14643510},
URL = {http://www.jstor.org/stable/2334940},
author = {W. K. Hastings},
journal = {Biometrika},
number = {1},
pages = {97--109},
publisher = {[Oxford University Press, Biometrika Trust]},
title = {Monte Carlo Sampling Methods Using Markov Chains and Their Applications},
volume = {57},
year = {1970}
}
@ARTICLE{gibbs-algorithm,
author={Geman, Stuart and Geman, Donald},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
title={Stochastic Relaxation, Gibbs Distributions, and the Bayesian Restoration of Images},
year={1984},
volume={PAMI-6},
number={6},
pages={721-741},
keywords={Stochastic processes;Bayesian methods;Image restoration;Degradation;Markov random fields;Additive noise;Deformable models;Temperature distribution;Energy states;Annealing;Annealing;Gibbs distribution;image restoration;line process;MAP estimate;Markov random field;relaxation;scene modeling;spatial degradation},
doi={10.1109/TPAMI.1984.4767596}}
@article{adabmDCA,
title = {adabmDCA: adaptive Boltzmann machine learning for biological sequences},
author = {Muntoni, Andrea P. and Pagnani, Andrea and Weigt, Martin and Zamponi, Francesco},
journal = {BMC Bioinformatics},
volume = {22},
number = {528},
year = {2021},
doi = {10.1186/s12859-021-04441-9},
url = {https://doi.org/10.1186/s12859-021-04441-9}
}
@article{alphafold,
title={Highly accurate protein structure prediction with {AlphaFold}},
author={Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and Žídek, Augustin and Potapenko, Anna and Bridgland, Alex and Meyer, Clemens and Kohl, Simon A.A. and Ballard, Andrew J. and Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov, Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor and Petersen, Stig and Reiman, David and Clancy, Ellen and Zielinski, Michal and Steinegger, Martin and Pacholska, Marta and Berghammer, Tamas and Bodenstein, Sebastian and Silver, Dylan and Vinyals, Oriol and Senior, Andrew W. and Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis},
journal={Nature},
volume={596},
number={7873},
pages={583--589},
year={2021},
publisher={Nature Publishing Group},
doi={10.1038/s41586-021-03819-2}
}
@article{ising1925,
title={Beitrag zur Theorie des Ferromagnetismus},
author={Ising, Ernst},
journal={Zeitschrift f{\"u}r Physik},
volume={31},
number={1},
pages={253--258},
year={1925},
publisher={Springer},
doi={10.1007/BF02980577}
}
@article{potts1952,
title={Some generalized order-disorder transformations},
author={Potts, Renfrey B.},
journal={Mathematical Proceedings of the Cambridge Philosophical Society},
volume={48},
number={1},
pages={106--109},
year={1952},
publisher={Cambridge University Press},
doi={10.1017/S0305004100027419}
}
@Article{biom14121531,
author = {Zhang, Chenyue and Wang, Qinxin and Li, Yiyang and Teng, Anqi and Hu, Gang and Wuyun, Qiqige and Zheng, Wei},
title = {The Historical Evolution and Significance of Multiple Sequence Alignment in Molecular Structure and Function Prediction},
journal = {Biomolecules},
volume = {14},
year = {2024},
number = {12},
article-number = {1531},
url = {https://www.mdpi.com/2218-273X/14/12/1531},
issn = {2218-273X},
doi = {10.3390/biom14121531}
}
@article{plefka1982,
title = {Convergence condition of the TAP equation for the infinite-ranged Ising spin glass model},
author = {Plefka, T.},
journal = {Journal of Physics A: Mathematical and General},
volume = {15},
number = {6},
pages = {1971--1978},
year = {1982},
publisher = {IOP Publishing},
doi = {10.1088/0305-4470/15/6/035}
}
@article{georges1991,
title = {How to expand around mean-field theory using high-temperature expansions},
author = {Georges, A. and Yedidia, J. S.},
journal = {Journal of Physics A: Mathematical and General},
volume = {24},
number = {9},
pages = {2173--2192},
year = {1991},
publisher = {IOP Publishing},
doi = {10.1088/0305-4470/24/9/018}
}
@article{jones2012psicov,
author = {Jones, David T. and Buchan, Daniel W. A. and Cozzetto, Domenico and Pontil, Massimiliano},
title = {PSICOV: precise structural contact prediction using sparse inverse covariance estimation on large multiple sequence alignments},
journal = {Bioinformatics},
year = {2012},
volume = {28},
number = {2},
pages = {184--190},
doi = {10.1093/bioinformatics/btr638},
issn = {1367-4811},
pmid = {22101153},
note = {Epub 2011 Nov 17},
}
@article{Metropolis1953,
author = {Metropolis, N. and Rosenbluth, A. W. and Rosenbluth, M. N. and Teller, A. H. and Teller, E.},
title = {Equation of State Calculations by Fast Computing Machines},
journal = {The Journal of Chemical Physics},
year = {1953},
volume = {21},
number = {6},
pages = {1087--1092},
doi = {10.1063/1.1699114}
}
@article{bmDCA,
author = {Figliuzzi, Matteo and Barrat-Charlaix, Pierre and Weigt, Martin},
title = {How Pairwise Coevolutionary Models Capture the Collective Residue Variability in Proteins},
journal = {Molecular Biology and Evolution},
volume = {35},
number = {4},
pages = {1018--1027},
year = {2018},
month = apr,
doi = {10.1093/molbev/msy007},
url = {https://doi.org/10.1093/molbev/msy007},
issn = {0737-4038},
publisher = {Oxford University Press}
}
@misc{AlphaFold_pLDDT,
title = {pLDDT: Understanding local confidence},
author = {EMBL-EBI Training},
howpublished = {Online tutorial, AlphaFold course, EMBL-EBI},
year = {2025},
url = {https://www.ebi.ac.uk/training/online/courses/alphafold/inputs-and-outputs/evaluating-alphafolds-predicted-structures-using-confidence-scores/plddt-understanding-local-confidence/},
organization = {EMBL-EBI}
}
@misc{EMBL-EBI_Phylogenetics,
title = {What is phylogenetics?},
author = {EMBL-EBI Training},
howpublished = {Online tutorial},
year = {2025},
url = {https://www.ebi.ac.uk/training/online/courses/introduction-to-phylogenetics/what-is-phylogenetics/},
organization = {EMBL-EBI}
}
@article{Haldane_2019,
title={Influence of multiple-sequence-alignment depth on Potts statistical models of protein covariation},
volume={99},
ISSN={2470-0053},
url={http://dx.doi.org/10.1103/PhysRevE.99.032405},
DOI={10.1103/physreve.99.032405},
number={3},
journal={Physical Review E},
publisher={American Physical Society (APS)},
author={Haldane, Allan and Levy, Ronald M.},
year={2019},
month=mar }
@article{Trinquier2021,
author = {Trinquier, Julien and Uguzzoni, Guido and Pagnani, Andrea and Zamponi, Francesco and Weigt, Martin},
title = {Efficient generative modeling of protein sequences using simple autoregressive models},
journal = {Nature Communications},
year = {2021},
volume = {12},
number = {1},
pages = {5800},
doi = {10.1038/s41467-021-25756-4},
url = {https://doi.org/10.1038/s41467-021-25756-4},
publisher = {Springer Nature},
received = {2021-02-24},
accepted = {2021-08-23},
published = {2021-10-04}
}
@book{hastie2009elements,
title = {The Elements of Statistical Learning: Data Mining, Inference, and Prediction},
author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
year = {2009},
publisher = {Springer Science \& Business Media},
edition = {2nd},
address = {New York},
isbn = {9780387848570}
}
@article{wu2019solving,
title = {Solving statistical mechanics using variational autoregressive networks},
author = {Wu, Dian and Wang, Lei and Zhang, Pan},
journal = {Physical Review Letters},
volume = {122},
number = {8},
pages = {080602},
year = {2019},
publisher = {American Physical Society},
doi = {10.1103/PhysRevLett.122.080602}
}
@article{sharir2020deep,
title = {Deep autoregressive models for the efficient variational simulation of many-body quantum systems},
author = {Sharir, Or and Levine, Yoav and Wies, Noam and Carleo, Giuseppe and Shashua, Amnon},
journal = {Physical Review Letters},
volume = {124},
number = {2},
pages = {020503},
year = {2020},
publisher = {American Physical Society},
doi = {10.1103/PhysRevLett.124.020503}
}
@article{balakrishnan2011learning,
title = {Learning generative models for protein fold families},
author = {Balakrishnan, Sivaraman and Kamisetty, Hiranmay and Carbonell, Jaime G. and Lee, Su-In and Langmead, Christopher J.},
journal = {Proteins: Structure, Function, and Bioinformatics},
volume = {79},
number = {4},
pages = {1061--1078},
year = {2011},
publisher = {Wiley},
doi = {10.1002/prot.22934}
}
@article{perplexity,
author = {Ciprian Chelba and
Tom{\'{a}}s Mikolov and
Mike Schuster and
Qi Ge and
Thorsten Brants and
Phillipp Koehn},
title = {One Billion Word Benchmark for Measuring Progress in Statistical Language
Modeling},
journal = {CoRR},
volume = {abs/1312.3005},
year = {2013},
url = {http://arxiv.org/abs/1312.3005},
eprinttype = {arXiv},
eprint = {1312.3005},
timestamp = {Mon, 28 Dec 2020 11:31:02 +0100},
biburl = {https://dblp.org/rec/journals/corr/ChelbaMSGBK13.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{deepsequence,
title={Deep generative models of genetic variation capture the effects of mutations},
author={Riesselman, Adam J and Ingraham, John B and Marks, Debora S},
journal={Nature Methods},
volume={15},
number={10},
pages={816--822},
year={2018},
publisher={Nature Publishing Group}
}
@misc{moshi4_pyMSAviz_2024,
author = {moshi4},
title = {pyMSAviz: MSA (Multiple Sequence Alignment) visualization Python package},
howpublished = {\url{https://github.com/moshi4/pyMSAviz}},
note = {Version 0.5.0, released 14 September 2024},
year = {2024},
lastchecked = {2025-09-20}
}
@misc{PredictionCenter,
title = {Prediction Center},
howpublished = {\url{https://predictioncenter.org/}},
}
@article{Mirdita2022_ColabFold,
author = {Milot Mirdita and Konstantin Schütze and Yoshitaka Moriwaki and Lim Heo and Sergey Ovchinnikov and Martin Steinegger},
title = {ColabFold: making protein folding accessible to all},
journal = {Nature Methods},
year = {2022},
volume = {19},
number = {6},
pages = {679--682},
doi = {10.1038/s41592-022-01488-1},
}
@article{LiuNocedal1989,
title = {On the Limited Memory Method for Large Scale Optimization},
author = {Dong C. Liu and Jorge Nocedal},
journal = {Mathematical Programming},
series = {Series B},
volume = {45},
number = {1--3},
pages = {503--528},
year = {1989},
publisher = {Springer},
doi = {10.1007/BF01589116}
}
@article{AdamW,
author = {Ilya Loshchilov and Frank Hutter},
title = {Fixing Weight Decay Regularization in Adam},
journal = {CoRR},
volume = {abs/1711.05101},
year = {2017},
url = {http://arxiv.org/abs/1711.05101},
eprinttype = {arXiv},
eprint = {1711.05101},
timestamp = {Mon, 13 Aug 2018 16:48:18 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1711-05101.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article {Caredda2024.02.06.579080,
author = {Caredda, Francesco and Pagnani, Andrea},
title = {Direct Coupling Analysis and the Attention Mechanism},
elocation-id = {2024.02.06.579080},
year = {2024},
doi = {10.1101/2024.02.06.579080},
publisher = {Cold Spring Harbor Laboratory},
abstract = {Proteins serve as the foundation for nearly all biological functions within cells, encompassing roles in transport, signaling, enzymatic activity, and more. Their functionalities hinge significantly on their intricate three-dimensional structures, often posing challenges in terms of difficulty, time, and expense for accurate determination. The introduction of AlphaFold 2 marked a groundbreaking solution to the enduring challenge of predicting a protein{\textquoteright}s tertiary structure from its amino acid sequence. However, the inherent complexity of AlphaFold{\textquoteright}s architecture presents obstacles in deciphering its learning process and understanding the decision-making that ultimately shapes the protein{\textquoteright}s final structure.In this study, we introduce a shallow, unsupervised model designed to understand the selfattention layer within the Evoformer block of AlphaFold. We establish a method based on Direct Coupling Analysis (DCA), wherein the interaction tensor undergoes decomposition, leveraging the same structure employed in Transformer architectures. The model{\textquoteright}s parameters, notably fewer than those in standard DCA, are interpretable through an examination of the resulting attention matrices. These matrices enable the extraction of contact information, subsequently utilized for constructing the contact map of a protein family. Additionally, the self-attention decomposition in the DCA Hamiltonian form adopted here facilitates the definition of multifamily learning architecture, enabling the inference of parameter sets shared across diverse protein families. Finally, an autoregressive generative version of the model is implemented, capable of efficiently generating new proteins in silico. This generative model reproduces the summary statistics of the original protein family while concurrently inferring direct contacts in the tertiary structure of the protein. The effectiveness of our Attention-Based DCA architecture is evaluated using Multiple Sequence Alignments (MSAs) of varying lengths and depths, with structural data sourced from the Pfam database.Competing Interest StatementThe authors have declared no competing interest.},
URL = {https://www.biorxiv.org/content/early/2024/02/08/2024.02.06.579080},
eprint = {https://www.biorxiv.org/content/early/2024/02/08/2024.02.06.579080.full.pdf},
journal = {bioRxiv}
}