@inbook{9c47b60c4f62439ca518ae057ff34a46,

title = "Recognition of transcription factor binding sites with Variable-order Bayesian networks.",

abstract = "We propose Variable-order Bayesian network models for the recognition of transcription factor binding sites (TFBSs). These models extend the widely-used position weight matrix models, Markov models, and Bayesian network models. In contrast to the latter conventional models, where for each position a fixed subset of the remaining positions is used to model statistical dependencies, in Variable-order Bayesian network models these subsets may vary based on the specific nucleotides observed, which are called the context. This flexibility turns out to be of advantage for the classification and analysis of TFBSs, as statistical dependencies between nucleotides in different TFBS positions (not necessarily adjacent) may be taken into account efficiently - in a position-specific and context-specific manner. We apply the Variable-order Bayesian network model to a dataset of 238 experimentally verified sigma-70 binding sites in E. coli, and find that this model can distinguish those 238 sites from a dataset of 472 intergenic 'non-promoter' sequences with higher accuracy than fixed-order Markov models or Bayesian networks. We use 106 fold replicated stratified-holdout sampling experiments fixing the true-negative rate at 99.9%, and find that for a foreground inhomogeneous Variable-order Bayesian network model of order 1 and a background homogeneous Variable-order Markov model of order 5 the obtained mean true-positive rate is 47.56%. In comparison, the best mean true-positive rate for the conventional models is 44.39%, obtained by a foreground position weight matrix model and a background fixed-order Markov model of order 2. As the standard deviation of the estimated mean true-positive rate is approximately 0.01%, this improvement is statistically significant. This chapter is based on work published in Ben-Gal et al. (2005). All datasets and a web server for utilizing Variable-order Bayesian network models and Variable-order Markov models are available at http://www.bic-gh.de/vombat/.",

author = "I Ben-Gal and A Shani and A Gohr and J Grau and S Arviv and A Shmilovici and S Posch and I Grosse",

year = "2008",

language = "English",

isbn = "1933699396",

series = "Plant genomics and bioinformatics",

publisher = "Studium press LLC",

pages = "155--183",

editor = "Rao, { G. P.} and Wagner, { C.} and Singh, { R. K.} and Sharma, {M. L.}",

booktitle = "Plant genomics and bioinformatics",

}