-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathfold.h
More file actions
604 lines (553 loc) · 21.5 KB
/
fold.h
File metadata and controls
604 lines (553 loc) · 21.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
#ifndef __VIENNA_RNA_PACKAGE_FOLD_H__
#define __VIENNA_RNA_PACKAGE_FOLD_H__
#include "data_structures.h"
#ifdef __GNUC__
#define DEPRECATED(func) func __attribute__ ((deprecated))
#else
#define DEPRECATED(func) func
#endif
/**
* \addtogroup mfe_fold
* \ingroup folding_routines
* \brief This section covers all functions and variables related to the calculation
* of minimum free energy (MFE) structures.
*
* The library provides a fast dynamic programming minimum free energy
* folding algorithm as described in \cite zuker:1981.
* All relevant parts that directly implement the "Zuker & Stiegler" algorithm for single
* sequences are described in this section.
*
* Folding of circular RNA sequences is handled as a post-processing step of the forward
* recursions. See \cite hofacker:2006 for further details.
*
* Nevertheless, the RNAlib also
* provides interfaces for the prediction of consensus MFE structures of sequence alignments,
* MFE structure for two hybridized sequences, local optimal structures and many more. For
* those more specialized variants of MFE folding routines, please consult the appropriate
* subsections (Modules) as listed above.
*
* \file fold.h
* \brief MFE calculations and energy evaluations for single RNA sequences
*
* This file includes (almost) all function declarations within the RNAlib that are related to
* MFE folding...
*/
/**
* \defgroup eval Energy evaluation
* @{
* \brief This module contains all functions and variables related to energy evaluation
* of sequence/structure pairs.
*
*
* @}
*/
/**
* \defgroup mfe_fold Calculating Minimum Free Energy Structures
* @{
* \brief This module contains all functions and variables related to the calculation
* of global minimum free energy structures for single sequences.
*
* The library provides a fast dynamic programming minimum free energy
* folding algorithm as described by \ref zuker_81 "Zuker & Stiegler (1981)".
* @}
*/
/** \brief if nonzero use logarithmic ML energy in energy_of_struct */
extern int logML;
/** \brief do ML decomposition uniquely (for subopt) */
extern int uniq_ML;
/** \brief set to first pos of second seq for cofolding */
extern int cut_point;
/**
* \brief verbose info from energy_of_struct
* \ingroup eval
*/
extern int eos_debug;
/**
* \brief Compute minimum free energy and an appropriate secondary
* structure of an RNA sequence
*
* The first parameter given, the RNA sequence, must be \a uppercase and should only contain
* an alphabet \f$\Sigma\f$ that is understood by the RNAlib\n
* (e.g. \f$ \Sigma = \{A,U,C,G\} \f$)\n
*
* The second parameter, \a structure, must always point to an allocated
* block of memory with a size of at least \f$\mathrm{strlen}(\mathrm{sequence})+1\f$
*
* If the third parameter is NULL, global model detail settings are assumed for the folding
* recursions. Otherwise, the provided parameters are used.
*
* The fourth parameter indicates whether a secondary structure constraint in enhanced dot-bracket
* notation is passed through the structure parameter or not. If so, the characters " | x < > " are
* recognized to mark bases that are paired, unpaired, paired upstream, or downstream, respectively.
* Matching brackets " ( ) " denote base pairs, dots "." are used for unconstrained bases.
*
* To indicate that the RNA sequence is circular and thus has to be post-processed, set the last
* parameter to non-zero
*
* After a successful call of fold_par(), a backtracked secondary structure (in dot-bracket notation)
* that exhibits the minimum of free energy will be written to the memory \a structure is pointing to.
* The function returns the minimum of free energy for any fold of the sequence given.
*
* \note OpenMP: Passing NULL to the 'parameters' argument involves access to several global model
* detail variables and thus is not to be considered threadsafe
*
* \ingroup mfe_fold
*
* \see fold(), circfold(), #model_detailsT, set_energy_model(), get_scaled_parameters()
*
* \param sequence RNA sequence
* \param structure A pointer to the character array where the
* secondary structure in dot-bracket notation will be written to
* \param parameters A data structure containing the prescaled energy contributions
* and the model details. (NULL may be passed, see OpenMP notes above)
* \param is_constrained Switch to indicate that a structure contraint is passed via the structure argument (0==off)
* \param is_circular Switch to (de-)activate postprocessing steps in case RNA sequence is circular (0==off)
*
* \return the minimum free energy (MFE) in kcal/mol
*/
float fold_par( const char *sequence,
char *structure,
paramT *parameters,
int is_constrained,
int is_circular);
/**
* \brief Compute minimum free energy and an appropriate secondary structure of an RNA sequence
*
* This function essentially does the same thing as fold_par(). However, it takes its model details,
* i.e. #temperature, #dangles, #tetra_loop, #noGU, #no_closingGU, #fold_constrained, #noLonelyPairs
* from the current global settings within the library
*
* Use fold_par() for a completely threadsafe variant
*
* \ingroup mfe_fold
*
* \see fold_par(), circfold()
*
* \param sequence RNA sequence
* \param structure A pointer to the character array where the
* secondary structure in dot-bracket notation will be written to
* \return the minimum free energy (MFE) in kcal/mol
*/
float fold( const char *sequence,
char *structure);
/**
* \brief Compute minimum free energy and an appropriate secondary structure of a circular RNA sequence
*
* This function essentially does the same thing as fold_par(). However, it takes its model details,
* i.e. #temperature, #dangles, #tetra_loop, #noGU, #no_closingGU, #fold_constrained, #noLonelyPairs
* from the current global settings within the library
*
* Use fold_par() for a completely threadsafe variant
*
* \ingroup mfe_fold
*
* \see fold_par(), circfold()
*
* \param sequence RNA sequence
* \param structure A pointer to the character array where the
* secondary structure in dot-bracket notation will be written to
* \return the minimum free energy (MFE) in kcal/mol
*/
float circfold( const char *sequence,
char *structure);
/**
* \addtogroup eval Energy evaluation
* \ingroup folding_routines
* @{
* \brief This module contains all functions and variables related to energy evaluation
* of sequence/structure pairs.
* @}
*/
/**
* \brief Calculate the free energy of an already folded RNA using global model detail settings
*
* If verbosity level is set to a value >0, energies of structure elements are printed to stdout
*
* \note OpenMP: This function relies on several global model settings variables and thus is
* not to be considered threadsafe. See energy_of_struct_par() for a completely threadsafe
* implementation.
*
* \ingroup eval
*
* \see energy_of_struct_par(), energy_of_circ_structure()
*
* \param string RNA sequence
* \param structure secondary structure in dot-bracket notation
* \param verbosity_level a flag to turn verbose output on/off
* \return the free energy of the input structure given the input sequence in kcal/mol
*/
float energy_of_structure(const char *string,
const char *structure,
int verbosity_level);
/**
* \brief Calculate the free energy of an already folded RNA
*
* If verbosity level is set to a value >0, energies of structure elements are printed to stdout
*
* \ingroup eval
*
* \see energy_of_circ_structure(), energy_of_structure_pt(), get_scaled_parameters()
*
* \param string RNA sequence in uppercase letters
* \param structure Secondary structure in dot-bracket notation
* \param parameters A data structure containing the prescaled energy contributions and the model details.
* \param verbosity_level A flag to turn verbose output on/off
* \return The free energy of the input structure given the input sequence in kcal/mol
*/
float energy_of_struct_par( const char *string,
const char *structure,
paramT *parameters,
int verbosity_level);
/**
* \brief Calculate the free energy of an already folded circular RNA
*
* \note OpenMP: This function relies on several global model settings variables and thus is
* not to be considered threadsafe. See energy_of_circ_struct_par() for a completely threadsafe
* implementation.
*
* If verbosity level is set to a value >0, energies of structure elements are printed to stdout
*
* \ingroup eval
*
* \see energy_of_circ_struct_par(), energy_of_struct_par()
*
* \param string RNA sequence
* \param structure Secondary structure in dot-bracket notation
* \param verbosity_level A flag to turn verbose output on/off
* \return The free energy of the input structure given the input sequence in kcal/mol
*/
float energy_of_circ_structure( const char *string,
const char *structure,
int verbosity_level);
/**
* \brief Calculate the free energy of an already folded circular RNA
*
* If verbosity level is set to a value >0, energies of structure elements are printed to stdout
*
* \ingroup eval
*
* \see energy_of_struct_par(), get_scaled_parameters()
*
* \param string RNA sequence
* \param structure Secondary structure in dot-bracket notation
* \param parameters A data structure containing the prescaled energy contributions and the model details.
* \param verbosity_level A flag to turn verbose output on/off
* \return The free energy of the input structure given the input sequence in kcal/mol
*/
float energy_of_circ_struct_par(const char *string,
const char *structure,
paramT *parameters,
int verbosity_level);
float energy_of_gquad_structure(const char *string,
const char *structure,
int verbosity_level);
float energy_of_gquad_struct_par( const char *string,
const char *structure,
paramT *parameters,
int verbosity_level);
/**
* \brief Calculate the free energy of an already folded RNA
*
* If verbosity level is set to a value >0, energies of structure elements are printed to stdout
*
* \note OpenMP: This function relies on several global model settings variables and thus is
* not to be considered threadsafe. See energy_of_struct_pt_par() for a completely threadsafe
* implementation.
*
* \ingroup eval
*
* \see make_pair_table(), energy_of_struct_pt_par()
*
* \param string RNA sequence
* \param ptable the pair table of the secondary structure
* \param s encoded RNA sequence
* \param s1 encoded RNA sequence
* \param verbosity_level a flag to turn verbose output on/off
* \return the free energy of the input structure given the input sequence in 10kcal/mol
*/
int energy_of_structure_pt( const char *string,
short *ptable,
short *s,
short *s1,
int verbosity_level);
/**
* \brief Calculate the free energy of an already folded RNA
*
* If verbosity level is set to a value >0, energies of structure elements are printed to stdout
*
* \ingroup eval
*
* \see make_pair_table(), energy_of_struct_par(), get_scaled_parameters()
*
* \param string RNA sequence in uppercase letters
* \param ptable The pair table of the secondary structure
* \param s Encoded RNA sequence
* \param s1 Encoded RNA sequence
* \param parameters A data structure containing the prescaled energy contributions and the model details.
* \param verbosity_level A flag to turn verbose output on/off
* \return The free energy of the input structure given the input sequence in 10kcal/mol
*/
int energy_of_struct_pt_par(const char *string,
short *ptable,
short *s,
short *s1,
paramT *parameters,
int verbosity_level);
/**
* \brief Free arrays for mfe folding
*
* \ingroup mfe_fold
*
*/
void free_arrays(void);
/**
* \brief Create a dot-backet/parenthesis structure from backtracking stack
*
* \note This function is threadsafe
*/
void parenthesis_structure(char *structure,
bondT *bp,
int length);
/**
* \brief Create a dot-backet/parenthesis structure from backtracking stack
* obtained by zuker suboptimal calculation in cofold.c
*
* \note This function is threadsafe
*/
void parenthesis_zuker( char *structure,
bondT *bp,
int length);
void letter_structure(char *structure,
bondT *bp,
int length);
/**
* \brief Recalculate energy parameters
*
* \ingroup mfe_fold
*/
void update_fold_params(void);
/**
*
* \ingroup mfe_fold
*
*/
void update_fold_params_par(paramT *parameters);
/**
*
* \ingroup mfe_fold
*
*/
char *backtrack_fold_from_pair(char *sequence,
int i,
int j);
/**
* \brief Calculate energy of a move (closing or opening of a base pair)
*
* If the parameters m1 and m2 are negative, it is deletion (opening)
* of a base pair, otherwise it is insertion (opening).
*
* \see make_pair_table(), energy_of_move()
* \param string RNA sequence
* \param structure secondary structure in dot-bracket notation
* \param m1 first coordinate of base pair
* \param m2 second coordinate of base pair
* \returns energy change of the move in kcal/mol
*/
float energy_of_move( const char *string,
const char *structure,
int m1,
int m2);
/**
*
* \brief Calculate energy of a move (closing or opening of a base pair)
*
* If the parameters m1 and m2 are negative, it is deletion (opening)
* of a base pair, otherwise it is insertion (opening).
*
* \see make_pair_table(), energy_of_move()
* \param pt the pair table of the secondary structure
* \param s encoded RNA sequence
* \param s1 encoded RNA sequence
* \param m1 first coordinate of base pair
* \param m2 second coordinate of base pair
* \returns energy change of the move in 10cal/mol
*/
int energy_of_move_pt(short *pt,
short *s,
short *s1,
int m1,
int m2);
/**
* \brief Calculate energy of a loop
*
* \param ptable the pair table of the secondary structure
* \param s encoded RNA sequence
* \param s1 encoded RNA sequence
* \param i position of covering base pair
* \returns free energy of the loop in 10cal/mol
*/
int loop_energy(short *ptable,
short *s,
short *s1,
int i);
/**
*
* \ingroup mfe_fold
*
*/
void export_fold_arrays(int **f5_p,
int **c_p,
int **fML_p,
int **fM1_p,
int **indx_p,
char **ptype_p);
/**
*
* \ingroup mfe_fold
*
*/
void export_fold_arrays_par(int **f5_p,
int **c_p,
int **fML_p,
int **fM1_p,
int **indx_p,
char **ptype_p,
paramT **P_p);
/**
*
* \ingroup mfe_fold
*
*/
void export_circfold_arrays(int *Fc_p,
int *FcH_p,
int *FcI_p,
int *FcM_p,
int **fM2_p,
int **f5_p,
int **c_p,
int **fML_p,
int **fM1_p,
int **indx_p,
char **ptype_p);
/**
*
* \ingroup mfe_fold
*
*/
void export_circfold_arrays_par(int *Fc_p,
int *FcH_p,
int *FcI_p,
int *FcM_p,
int **fM2_p,
int **f5_p,
int **c_p,
int **fML_p,
int **fM1_p,
int **indx_p,
char **ptype_p,
paramT **P_p);
/**
* \brief Create a plist from a dot-bracket string
*
* The dot-bracket string is parsed and for each base pair an
* entry in the plist is created. The probability of each pair in
* the list is set by a function parameter.
*
* The end of the plist is marked by sequence positions i as well as j
* equal to 0. This condition should be used to stop looping over its
* entries
*
* This function is threadsafe
*
* \param pl A pointer to the plist that is to be created
* \param struc The secondary structure in dot-bracket notation
* \param pr The probability for each base pair
*/
void assign_plist_from_db(plist **pl,
const char *struc,
float pr);
/* inally moved the loop energy function declarations to this header... */
/* UT: The functions only exist for backward compatibility reasons! */
/* ou better include "loop_energies.h" and call the functions: */
/* _Hairpin() and E_IntLoop() which are (almost) threadsafe as they get */
/* pointer to the energy parameter datastructure as additional argument */
/**
* \deprecated {This function is deprecated and will be removed soon.
* Use \ref E_IntLoop() instead!}
*/
DEPRECATED(int LoopEnergy(int n1,
int n2,
int type,
int type_2,
int si1,
int sj1,
int sp1,
int sq1));
/**
* \deprecated {This function is deprecated and will be removed soon.
* Use \ref E_Hairpin() instead!}
*/
DEPRECATED(int HairpinE(int size,
int type,
int si1,
int sj1,
const char *string));
/**
* Allocate arrays for folding\n
* \deprecated {This function is deprecated and will be removed soon!}
*
*/
DEPRECATED(void initialize_fold(int length));
/**
* Calculate the free energy of an already folded RNA
*
* \note This function is not entirely threadsafe! Depending on the state of the global
* variable \ref eos_debug it prints energy information to stdout or not...\n
*
* \deprecated This function is deprecated and should not be used in future programs!
* Use \ref energy_of_structure() instead!
*
* \see energy_of_structure, energy_of_circ_struct(), energy_of_struct_pt()
* \param string RNA sequence
* \param structure secondary structure in dot-bracket notation
* \return the free energy of the input structure given the input sequence in kcal/mol
*/
DEPRECATED(float energy_of_struct(const char *string,
const char *structure));
/**
* Calculate the free energy of an already folded RNA
*
* \note This function is not entirely threadsafe! Depending on the state of the global
* variable \ref eos_debug it prints energy information to stdout or not...\n
*
* \deprecated This function is deprecated and should not be used in future programs!
* Use \ref energy_of_structure_pt() instead!
*
* \see make_pair_table(), energy_of_structure()
* \param string RNA sequence
* \param ptable the pair table of the secondary structure
* \param s encoded RNA sequence
* \param s1 encoded RNA sequence
* \return the free energy of the input structure given the input sequence in 10kcal/mol
*/
DEPRECATED(int energy_of_struct_pt( const char *string,
short *ptable,
short *s,
short *s1));
/**
* Calculate the free energy of an already folded circular RNA
*
* \note This function is not entirely threadsafe! Depending on the state of the global
* variable \ref eos_debug it prints energy information to stdout or not...\n
*
* \deprecated This function is deprecated and should not be used in future programs
* Use \ref energy_of_circ_structure() instead!
*
* \see energy_of_circ_structure(), energy_of_struct(), energy_of_struct_pt()
* \param string RNA sequence
* \param structure secondary structure in dot-bracket notation
* \return the free energy of the input structure given the input sequence in kcal/mol
*/
DEPRECATED(float energy_of_circ_struct( const char *string,
const char *structure));
#endif