% \newcommand{\etal}{{\em et. al.}}
@ARTICLE{MacKay92a,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="{B}ayesian Interpolation",
JOURNAL ="Neural Computation",
YEAR ="1992",
VOLUME ="4 ",
NUMBER ="3",
PAGES ="415--447"}
@ARTICLE{MacKay92b,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="A Practical {B}ayesian Framework for Backpropagation Networks",
JOURNAL ="Neural Computation",
YEAR ="1992",
VOLUME ="4 ",
NUMBER ="3",
PAGES ="448--472"}
@ARTICLE{MacKay92c,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="Information Based Objective Functions for Active
Data Selection",
JOURNAL ="Neural Computation",
YEAR ="1992",
VOLUME ="4 ",
NUMBER ="4",
PAGES ="589--603"}
@ARTICLE{MacKay92d,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="The Evidence Framework Applied to Classification Networks",
JOURNAL ="Neural Computation",
YEAR ="1992",
VOLUME ="4 ",
NUMBER ="5",
PAGES ="698-714"}
% -----------------------------------
% INDEX:
% -----------------------------------
% SPIN GLASS PAPERS
% STATISTICS AND NEURAL NETS
% LUTTRELL
% BM'S, MEAN FIELD THEORY
% TSP
% BASIC NEURAL NET REFS
% HEBBIAN, LINSKER
% NUMERICAL
% GULL, SKILLING, OCCAM, MAXENT, MDL
% NEURAL NETS OPTIMISATION OF number parameters, regularisers, etc.
% OTHER PAPERS ON OCCAM
% -----------------------------------
% SPIN GLASS PAPERS
@TECHREPORT{Yau.tr,
KEY ="Yau and Wallace",
AUTHOR ="H.W. Yau and D.J. Wallace",
TITLE ="Basins of
Attraction in Sparse Neural Network Models with Persistent Inputs,",
YEAR ="1990",
NUMBER ="in preparation",
INSTITUTION ="Edinburgh University"}
@ARTICLE{Yau,
AUTHOR = "H W Yau and D J Wallace",
TITLE = "Enlarging the Attractor Basins of Neural Networks with Noisy
External Fields",
JOURNAL = "Journal of Physics A: Maths and General",
YEAR = "1991",
VOLUME = "24",
PAGES = "5639--5650"}
@ARTICLE{BDS,
KEY ="Buhmann et. al.",
AUTHOR ="J. Buhmann and R. Divko and K. Schulten",
TITLE ="Associative memory with high information content",
JOURNAL ="preprint",
YEAR ="1988",
VOLUME ="",
NUMBER ="",
PAGES =""}
% Papers on hop
@ARTICLE{Hopfield82,
KEY ="Hopfield",
AUTHOR ="J.J. Hopfield",
TITLE ="Neural Networks and physical
systems with emergent collective computational abilities",
JOURNAL ="Proc. Natl. Acad.
Sci. USA",
YEAR ="1982",
VOLUME ="79",
NUMBER ="",
PAGES ="2554--8"}
@ARTICLE{Hopfield84,
KEY ="Hopfield",
AUTHOR ="J.J. Hopfield",
TITLE ="Neurons with
graded response properties have collective computational properties like those of
two--state Neurons",
JOURNAL ="Proc. Natl. Acad. Sci. USA",
YEAR ="1984",
VOLUME ="81",
NUMBER ="",
PAGES ="3088--92"}
@ARTICLE{Hopfield87,
KEY ="Hopfield",
AUTHOR ="J.J. Hopfield",
TITLE ="Learning algorithms
and probability distributions in feed--forward and feed--back
networks",
JOURNAL ="Proc. Natl. Acad. Sci. USA",
YEAR ="1987",
VOLUME ="84",
NUMBER ="",
PAGES ="8429--33"}
% Discussion of introduction of biases or low levels of activity: see Amit in Network1
% Best ref:
@ARTICLE{RS89,
KEY ="Rubin and Sompolinsky",
AUTHOR ="N. Rubin and H. Sompolinsky",
TITLE ="Neural Networks with low local firing rates",
JOURNAL ="Europhys. Lett.",
YEAR ="1989",
VOLUME ="8",
NUMBER ="",
PAGES ="465"}
% they study thetas that look linear in average background but may not be..
% Applied field references: see 0.14 below. But the first good paper is:
@ARTICLE{EES89,
KEY ="Engel et. al.",
AUTHOR ="A. Engel and H. English and A. Schutte",
TITLE ="Improved retrieval in Neural Networks with external fields",
JOURNAL ="Europhys Lett.",
YEAR ="1989",
VOLUME ="8",
NUMBER ="",
PAGES ="393"}
@ARTICLE{Amit87b,
KEY ="Amit et. al.",
AUTHOR ="D.J. Amit and H. Gutfreund and H. Sompolinsky",
TITLE ="
Information storage in Neural Networks with low levels of activity",
JOURNAL ="Phys. Rev. A",
YEAR ="1987",
VOLUME ="35",
PAGES ="2293"}
% Hop capacity:
@ARTICLE{Amit85,
KEY ="Amit et. al.",
AUTHOR ="D.J. Amit and H. Gutfreund and H. Sompolinsky",
TITLE ="Spin glass models of Neural Networks",
JOURNAL ="Phys. Rev. A",
YEAR ="1987",
VOLUME ="32",
PAGES ="1007"}
% the above only discusses the case alpha -> 0, constant P, N-> infty.
% It derives T = 0.46 Tc for hopfield prescription
% p=0.14N derived in
@ARTICLE{Amit85,
KEY ="Amit et. al.",
AUTHOR ="D.J. Amit and H. Gutfreund and H. Sompolinsky",
TITLE ="
Storing infinite numbers of patterns in a spin glass model of Neural Networks",
JOURNAL ="Phys. Rev. Lett.",
YEAR ="1985",
VOLUME ="55",
NUMBER ="",
PAGES ="1530"}
@ARTICLE{Amit87,
KEY ="Amit et. al.",
AUTHOR ="D.J. Amit and H. Gutfrend and H. Sompolinsky",
TITLE ="Statistical mechanics of Neural Networks near saturation",
JOURNAL ="Ann. Phys. (New York)",
YEAR ="1987",
VOLUME ="173",
PAGES ="30"}
% ^^This one is prob the best 0.14 ref, and
% has a lot more in it too. It evven discusses applied fields and dismisses
% them because they imagine the field being fixed, regardless of the cue vector.
% Blackout is discussed in
% J-P Nadal, G. Toulouse, J.P Changeux, and S. Dehaene, 1986, Networks of formal
% Neurons and memory palimpsests. Europhys Lett 1 535
% Blackout = loss of memories due to overload. Their paper suggests weight decay [?]
% So as to not go above capacity.
% Pseudoinverse refs can be found referred to in Gardner 1987 below. They get cap =1.
% alpha =2 is derived in
@ARTICLE{Gardner,
KEY ="Gardner",
AUTHOR ="E.J. Gardner",
TITLE ="Maximum storage capacity
of Neural Networks",
JOURNAL ="Europhys. Lett.",
YEAR ="1987",
VOLUME ="4",
PAGES ="481"}
% Other Gardner refs:
% B. Derrida and E.J. Gardner and A. Zippelius, Europhys Lett 4 1987 167
% E.J. Gardner B. Derrida and Mottishaw, J. Phys (paris) 48 1987 441
% E.J. Gardner J. Phys A 19 1986 L 1047
% A. Bruce, E.J. Gardner and D.J. Wallace, J. Phys A 20 1987 A 2909
% `Dynamics and Statistical Mechanics of the Hopfield Model'
% The latter two are meant to include derivariton of 0.14-like results.
%
@ARTICLE{Amit85,
KEY ="Amit et. al.",
AUTHOR ="D.J. Amit and H. Gutfreund and H. Sompolinsky",
TITLE ="
Statistical mechanics of Neural Networks near saturation",
JOURNAL ="Ann. Phys. (New York)",
YEAR ="1985",
VOLUME ="173",
PAGES ="30"}
@ARTICLE{Amit90,
KEY ="Amit et. al.",
AUTHOR ="D.J. Amit and G. Parisi and S. Nicolis",
TITLE ="Neural Potentials as stimuli for attractor Neural Networks",
JOURNAL ="Network",
YEAR ="1990",
VOLUME ="1 ",
NUMBER ="1",
PAGES ="75-88"}
@INPROCEEDINGS{MM89:nips,
KEY ="MacKay and Miller",
AUTHOR ="D.J.C. MacKay and K.D. Miller",
TITLE ="Analysis of Linsker's simulations of Hebbian rules",
BOOKTITLE ="Advances in Neural Information Processing Systems II",
EDITOR ="D. Touretzky",
PAGES ="694-701",
YEAR ="1989"}
@ARTICLE{MM90:nc,
KEY ="MacKay and Miller",
AUTHOR ="D.J.C. MacKay and K.D. Miller",
TITLE ="Analysis of Linsker's simulations of Hebbian rules",
JOURNAL ="Neural Computation",
VOLUME ="2",
NUMBER ="2",
PAGES ="173-187",
YEAR ="1990"}
@ARTICLE{MM90:network,
KEY ="MacKay and Miller",
AUTHOR ="D.J.C. MacKay and K.D. Miller",
TITLE ="Analysis of Linsker's application of
Hebbian rules to linear networks",
JOURNAL ="Network",
VOLUME ="1",
NUMBER ="3",
PAGES ="257-297",
YEAR ="1990"}
@ARTICLE{MM94:nc,
KEY ="",
AUTHOR ="K.D. Miller and D.J.C. MacKay",
TITLE ="The role of constraints in Hebbian learning",
JOURNAL ="Neural Computation",
VOLUME ="4",
NUMBER ="1",
PAGES ="98-124",
YEAR ="1994"}
@INCOLLECTION{Bridle,
KEY ="Bridle",
AUTHOR ="J.S. Bridle",
TITLE ="Probabilistic interpretation of
feedforward classification Network outputs, with relationships to statistical
pattern recognition",
BOOKTITLE ="Neuro-computing: algorithms, architectures and applications",
YEAR ="1989",
EDITOR ="F. Fougelman--Soulie and J. H\'erault, editors",
PAGES ="",
PUBLISHER ="Springer--Verlag"}
@TECHREPORT{Discrim,
KEY ="Bridle",
AUTHOR ="J.S. Bridle",
TITLE ="Discriminative training of hidden markov models for
speech recognition",
YEAR ="1988?",
NUMBER ="RSRE Memo",
INSTITUTION =""}
@ARTICLE{alphanets,
KEY ="Bridle",
AUTHOR ="J.S. Bridle",
TITLE ="Alpha-Nets: A recurrent `neural' network
architecture with a hidden Markov
model interpretation",
JOURNAL ="Speech Communication",
VOLUME ="9",
NUMBER ="1",
YEAR ="1990",
PAGES ="83-92",
}
% John S Bridle
% Speech Communication 9 (1990) 83-92.
% That's Volume 9, No.1, February 1990.
% ISSN 0167-6393
% Publisher: North Holland.
% A more recent version of the AlphaNet stuff, with CSR and better notation,
% is
% An AlphaNet approach to optimising input transformations
% for continuous speech recognition
% J S Bridle and L Dodd,
% Proc ICASSP91 (Toronto)
@TECHREPORT{Fantargs1,
KEY ="Bridle",
AUTHOR ="J.S. Bridle",
TITLE ="The phantom target
cluster Network: a peculiar relative of (unsupervised)
maximum likelihood stochastic modelling and (supervised)
error backpropagation",
YEAR ="1988",
NUMBER ="SP4: 66",
INSTITUTION ="RSRE"}
@INPROCEEDINGS{phnips,
KEY ="Bridle, Heading and MacKay",
AUTHOR ="J.S. Bridle and Anthony J.R. Heading and D.J.C. MacKay",
TITLE ="Unsupervised Classifiers, Mutual Information and `Phantom targets'",
BOOKTITLE ="Advances in Neural Information Processing Systems 4",
EDITOR ="J.E. Moody and S.J. Hanson and R.P. Lippmann",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1992",
PAGES ="1096-1101"}
@INPROCEEDINGS{Moody,
KEY ="",
AUTHOR ="J.E. Moody",
TITLE ="Note on generalization,
regularization and architecture selection in nonlinear learning
systems",
BOOKTITLE ="First IEEE--SP Workshop on neural networks for signal
processing",
PUBLISHER ="IEEE Computer society press",
YEAR ="1991",
PAGES ="847-854"
}
@INPROCEEDINGS{Moody.nips4,
KEY ="",
AUTHOR ="J.E. Moody",
TITLE ="The {\it Effective} Number of Parameters: An
Analysis of Generalization and Regularization in Nonlinear Learning
Systems",
BOOKTITLE ="Advances in Neural Information Processing Systems 4",
EDITOR ="J.E. Moody and S.J. Hanson and R.P. Lippmann",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1992",
PAGES ="847-854"}
@INPROCEEDINGS{Guyon.nips4,
KEY ="",
AUTHOR ="I. Guyon and V.N. Vapnik and B.E. Boser
and L.Y. Bottou and S.A. Solla",
TITLE ="Structural risk minimization for character recognition",
BOOKTITLE ="Advances in Neural Information Processing Systems 4",
EDITOR ="J.E. Moody and S.J. Hanson and R.P. Lippmann",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1992",
PAGES ="471-479"}
@INPROCEEDINGS{MacKay.nips4,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="Bayesian Model Comparison and Backprop Nets",
BOOKTITLE ="Advances in Neural Information Processing Systems 4",
EDITOR ="J.E. Moody and S.J. Hanson and R.P. Lippmann",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1992",
PAGES ="839-846"}
@BOOK{Bayes.Kalman,
KEY ="",
AUTHOR ="Bar-Shalom, Y. and T.E. Fortmann",
TITLE ="Tracking and Data Association",
PUBLISHER ="Academic Press",
YEAR ="1988"}
% Bayesian model comparison for Kalman filter models
% STATISTICS AND NEURAL NETS
@ARTICLE{Solla,
KEY ="Solla",
AUTHOR ="S.A. Solla and E. Levin and M. Fleisher",
TITLE ="Accelerated learning in layered Neural Networks",
JOURNAL ="Complex systems",
YEAR ="1988",
VOLUME ="2",
NUMBER ="",
PAGES ="625--640"}
@INPROCEEDINGS{HintonSej,
KEY ="Hinton and Sejnowski",
AUTHOR ="G.E. Hinton and T.J. Sejnowski",
TITLE ="Optimal Perceptual Inference",
BOOKTITLE ="Proc. IEEE Conference on Computer Vision and Pattern Recognition",
YEAR ="1983",
PAGES ="448--453"}
@INCOLLECTION{Brain_Damage,
KEY ="LeCun \etal",
AUTHOR ="LeCun, Y. and J.S. Denker and S.A. Solla",
TITLE ="Optimal Brain Damage",
BOOKTITLE ="Advances in Neural Information Processing Systems 2",
YEAR ="1990",
EDITOR ="D.S. Touretzky",
PAGES ="598--605",
PUBLISHER ="Morgan Kaufmann"}
@INPROCEEDINGS{Luttrell,
KEY ="Luttrell",
AUTHOR ="S.P. Luttrell",
TITLE ="Hierarchical Self-organising Networks",
BOOKTITLE ="Proc. 1st {IEE} Conf on Artificial Neural Networks, {L}ondon",
YEAR ="1989",
PAGES ="2--6"}
% Luttrell 1989c, `Self-organisation: a derivation from first principles
% of a class of learning algorithms' presented at IJCNN 1989, Washington
@INPROCEEDINGS{Luttrell_Maxent,
KEY ="Luttrell",
AUTHOR ="S.P. Luttrell",
TITLE ="The use of
{B}ayesian and entropic methods in Neural Network theory",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {C}ambridge 1988",
EDITOR ="J. Skilling",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1989",
PAGES ="363--370"}
@ARTICLE{SL.transinformation,
KEY ="Luttrell",
AUTHOR ="S.P. Luttrell",
TITLE ="The use of transinformation in the design of data sampling
schemes for inverse problems",
JOURNAL ="Inverse Problems",
VOLUME ="1",
PAGES ="199-218",
YEAR ="1985"}
@ARTICLE{WillshawDayan,
KEY ="Willshaw and Dayan",
AUTHOR ="D. Willshaw and P. Dayan",
TITLE ="Optimal Plasticity from Matrix Memories:
what goes up must come down",
JOURNAL ="Neural Computation",
YEAR ="1990",
VOLUME ="2 ",
NUMBER ="1",
PAGES ="85--93"}
@INPROCEEDINGS{Solla_generalisation,
KEY ="Tishby \etal",
AUTHOR ="N. Tishby and E. Levin and S.A. Solla",
TITLE ="Consistent inference of probabilities
in layered Networks: predictions and generalization",
BOOKTITLE ="Proc. {IJCNN}, {W}ashington",
YEAR ="1989",
PAGES =""}
@INPROCEEDINGS{LevinTishbySolla,
KEY ="Levin \etal",
AUTHOR ="E. Levin and N. Tishby and S.A. Solla",
TITLE ="A statistical approach to learning and generalization
in layered Neural Networks",
BOOKTITLE ="{COLT} '89: 2nd workshop on computational learning theory",
YEAR ="1989",
PAGES ="245--260"}
%more details in Buntine paper
@ARTICLE{Buntine_Weigend,
KEY ="Buntine and Weigend",
AUTHOR ="W.L. Buntine and A.S. Weigend",
TITLE ="{B}ayesian Back--propagation",
JOURNAL ="Complex Systems",
YEAR ="1991",
VOLUME ="5",
PAGES ="603--643"}
@TECHREPORT{Wolpert_rig,
KEY ="Wolpert",
AUTHOR ="D.H. Wolpert",
TITLE ="A rigorous investigation of
`evidence' and `{O}ccam factors' in {B}ayesian reasoning'",
YEAR ="1992",
NUMBER ="T.R. 92-03-013",
INSTITUTION ="Santa Fe Inst."}
@TECHREPORT{Buntine2,
KEY ="Buntine",
AUTHOR ="W.L. Buntine",
TITLE ="Theory refinement on {B}ayesian Networks",
YEAR ="1991",
INSTITUTION =""}
@ARTICLE{Buntine:trees,
KEY ="Buntine",
AUTHOR ="W.L. Buntine",
TITLE ="Learning classification trees",
YEAR ="1992",
JOURNAL ="Statistics and Computing",
VOLUME ="2",
PAGES ="63-73"}
@ARTICLE{Bishop,
KEY ="Bishop",
AUTHOR ="C.M. Bishop",
TITLE ="Exact calculation
of the {H}essian matrix for the multilayer perceptron",
JOURNAL ="Neural Computation",
YEAR ="1992",
VOLUME ="4 ",
NUMBER ="4",
PAGES ="494--501"}
@UNPUBLISHED{Peto,
AUTHOR ="L.~Peto",
TITLE ="Language modelling",
YEAR ="1994",
NOTE ="In preparation"}
@UNPUBLISHED{Buntine3,
KEY ="Buntine",
AUTHOR ="W.L. Buntine and A.S. Weigend",
TITLE ="Calculating second derivatives on feed-forward Networks",
NOTE ="Submitted to IEEE Trans. on Neural Networks",
YEAR ="1991"}
@INCOLLECTION{Denker2,
KEY ="Denker and LeCun",
AUTHOR ="J.S. Denker and LeCun, Y.",
TITLE ="Transforming Neural-net output levels
to probability distributions",
BOOKTITLE ="Advances in Neural Information Processing Systems 3",
YEAR ="1991",
EDITOR ="R.P. Lippmann",
PAGES ="853--859",
ADDRESS ="San Mateo, California",
PUBLISHER ="Morgan Kaufmann"}
@INCOLLECTION{Becker_Le_Cun,
KEY ="Becker and LeCun",
AUTHOR ="S. Becker and LeCun, Y.",
TITLE ="Improving the convergence of back-propagation learning
with second order methods",
BOOKTITLE ="Proc. of the connectionist
models Summer school",
YEAR ="1988",
EDITOR ="D.S. Touretzky et. al.",
PAGES ="29",
ADDRESS ="San Mateo, California",
PUBLISHER ="Morgan Kaufmann"}
% LUTTRELL
@ARTICLE{Luttrell_IEEE90,
KEY ="Luttrell",
AUTHOR ="S.P. Luttrell",
TITLE ="Derivation of a
class of training algorithms",
JOURNAL ="IEEE
Trans. on Neural Networks",
YEAR ="1990",
VOLUME ="1",
NUMBER ="2",
PAGES ="229--232"}
% BM'S, MEAN FIELD THEORY
@ARTICLE{mean-field,
KEY ="Peterson et. al.",
AUTHOR ="C. Peterson and J.R. Anderson",
TITLE ="A Mean Field Theory Learning Algorithm for Neural Networks",
JOURNAL ="Complex Systems",
YEAR ="1987",
VOLUME ="1",
NUMBER ="",
PAGES ="995-1019"}
@INPROCEEDINGS{Sej,
KEY ="Sejnowski",
AUTHOR ="T.J. Sejnowski",
TITLE ="Higher order Boltzmann machines",
BOOKTITLE ="Neural networks for computing",
EDITOR ="J.S. Denker",
PAGES ="398-403",
ADDRESS ="New York",
PUBLISHER ="American Institute of Physics",
YEAR ="1986"
}
@INPROCEEDINGS{MaxentCons,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="Maximum Entropy Connections: Neural Networks",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {L}aramie, 1990",
YEAR ="1991",
EDITOR ="W.T. Grandy and L. Schick",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
PAGES =""
}
% TSP
% Other Neural Nets
@ARTICLE{DurbWill,
KEY ="Durbin and Willshaw",
AUTHOR ="R. Durbin and D. Willshaw",
TITLE ="An
analogue approach to the travelling salesman problem using an elastic Net
method",
JOURNAL ="Nature",
YEAR ="1987",
VOLUME ="326",
NUMBER ="",
PAGES ="689--91"}
@ARTICLE{Aiyer,
KEY ="Aiyer et. al.",
AUTHOR ="S.V.B. Aiyer and M. Niranjan and F. Fallside",
TITLE ="
A Theoretical investigation into the performance of the Hopfield model",
JOURNAL ="IEEE
Trans. on Neural Networks",
YEAR ="1990",
VOLUME ="1",
NUMBER ="2",
PAGES ="204--215"}
% BASIC NEURAL NET REFS
@BOOK{PDP,
KEY ="Rumelhart \etal",
AUTHOR ="Rumelhart \etal",
TITLE ="Parallel Distributed Processing",
PUBLISHER ="MIT Press",
YEAR ="1986"}
@ARTICLE{backprop,
KEY ="Rumelhart \etal",
AUTHOR ="D.E. Rumelhart and G.E. Hinton and
R.J. Williams",
TITLE ="Learning representations by
back--propagating errors",
JOURNAL ="Nature",
YEAR ="1986",
VOLUME ="323",
NUMBER ="",
PAGES ="533--536"}
% in the pdp book this is 318--362
@ARTICLE{Pineda,
KEY ="Pineda",
AUTHOR ="F.J. Pineda",
TITLE ="Recurrent back--propagation and the dynamical approach to adaptive Neural computation",
JOURNAL ="Neural Computation",
YEAR ="1989",
VOLUME ="1",
NUMBER ="",
PAGES ="161--172"}
% initial of Heil?
@ARTICLE{Baldi,
KEY ="Baldi",
AUTHOR ="P. Baldi and Heiligenberg",
TITLE ="How sensory maps could enhance resolution through ordered
arrangement of broadly tuned receivers",
JOURNAL ="Biol. Cyb.",
VOLUME ="59",
PAGES ="313-318",
YEAR ="1988"}
% NUMERICAL
@BOOK{NR,
KEY ="Press \etal",
AUTHOR ="W.H. Press and B.P. Flannery, S.A. Teukolsky and W.T. Vetterling",
TITLE ="Numerical Recipes in {C}",
PUBLISHER ="Cambridge",
YEAR ="1988"}
% {B}ayes
@BOOK{Berger_Wolpert,
KEY ="Berger and Wolpert",
AUTHOR ="J.O. Berger and R.L. Wolpert",
TITLE ="",
PUBLISHER ="Institute of Mathematical Statistics",
ADDRESS ="Hayward, California",
YEAR ="1984"}
% Nice quote: [from savage originally] `Indeed to many {B}ayesians, belief
% in the LP is the big difference between {B}ayesians and frequentists,
% not the desire to involve prior information'
@BOOK{Berger,
KEY ="Berger",
AUTHOR ="J. Berger",
TITLE ="Statistical Decision theory and {B}ayesian
Analysis",
PUBLISHER ="Springer",
YEAR ="1985"}
@BOOK{Zellner,
KEY ="Zellner",
AUTHOR ="A. Zellner",
TITLE ="Basic issues in econometrics",
PUBLISHER ="Chicago",
YEAR ="1984"}
University of Chicago Press, Chicago
@BOOK{Duda_Hart,
KEY ="Duda and Hart",
AUTHOR ="R. Duda and P. Hart",
TITLE ="Pattern Classification and Scene Analysis",
PUBLISHER ="Wiley",
YEAR ="1973"}
@BOOK{Good,
KEY ="Osteyee and Good",
AUTHOR ="D.B. Osteyee and I.J. Good",
TITLE ="Information, weight of
evidence, the singularity between probability measures and
signal detection",
PUBLISHER ="Springer",
YEAR ="1974"}
@BOOK{Meyer_Collier,
KEY ="Meyer and Collier",
AUTHOR ="D.L. Meyer and R.O. Collier, eds.",
TITLE ="{B}ayesian statistics",
PUBLISHER ="Peacock publishers",
YEAR ="1970"}
@INCOLLECTION{Lindley-philosophy,
KEY ="Lindley",
AUTHOR ="D.V. Lindley",
TITLE ="{B}ayesian analysis in regression problems",
BOOKTITLE ="{B}ayesian statistics",
YEAR ="1970",
EDITOR ="D.L. Meyer and R.O. Collier, eds.",
PUBLISHER ="Peacock publishers"}
% History
@ARTICLE{laplace,
KEY ="Stigler",
AUTHOR ="S.M. Stigler",
TITLE ="Laplace's 1774 memoir on inverse
probability",
JOURNAL ="Stat. Sci.",
YEAR ="1986",
VOLUME ="1",
NUMBER ="3",
PAGES ="359--378"}
@ARTICLE{cox,
KEY ="Cox",
AUTHOR ="R.T. Cox",
TITLE ="Probability, frequency, and reasonable expectation.",
JOURNAL ="Am. J. Physics",
YEAR ="1946",
VOLUME ="14",
NUMBER ="",
PAGES ="1--13"}
@ARTICLE{Akaike,
KEY ="Akaike",
AUTHOR ="H. Akaike",
TITLE ="Statistical predictor identification",
JOURNAL ="Ann. Inst. Statist. Math.",
YEAR ="1970",
VOLUME ="22",
NUMBER ="",
PAGES ="203--217"}
% CLT
@ARTICLE{clt,
KEY ="Walker",
AUTHOR ="A.M. Walker",
TITLE ="On the asymptotic behaviour of posterior
distributions",
JOURNAL ="J. R. Stat. Soc. B",
YEAR ="1967",
VOLUME ="31",
NUMBER ="",
PAGES ="80--88"}
% GULL, SKILLING, OCCAM, MAXENT, MDL
@ARTICLE{Smith_and_Spiegelhalter,
KEY ="Smith and Spiegelhalter",
AUTHOR ="A.F.M. Smith and D.J. Spiegelhalter",
TITLE ="{B}ayes factors and choice criteria for linear models",
JOURNAL ="Journal of the Royal Statistical Society B",
YEAR ="1980",
VOLUME ="42 ",
NUMBER ="2",
PAGES ="213--220"}
@ARTICLE{Jefferys_and_Berger,
KEY ="Jefferys and Berger",
AUTHOR ="W.H. Jefferys and J.O. Berger",
TITLE ="{O}ckham's razor and {B}ayesian analysis",
JOURNAL ="American Scientist",
YEAR ="1992",
VOLUME ="80",
PAGES ="64--72"}
% Has good examples including fitting a high polynomial to data,
% detecting plagiarism, detecting that a coin has two heads,
% Newton / GR, also they give bounds on the min Occam factor that
% a model can suffer.
@ARTICLE{Mark_and_Miller,
KEY ="Mark and Miller",
AUTHOR ="K.E. Mark and M.I. Miller",
TITLE ="{B}ayesian model selection and minimum description length
estimation of auditory--nerve discharge rates",
JOURNAL ="J. Acoust. Soc. Am.",
YEAR ="1992",
VOLUME ="91 ",
NUMBER ="2",
PAGES ="989--1002"}
% {B}ayes and Regularisation
% Iversen's {B}ayes Booklet: has several useful simple results, and typical lame philosophy.
@BOOK{Iversen,
KEY ="Iversen",
AUTHOR ="G.R. Iversen",
TITLE ="{B}ayesian statistical inference",
PUBLISHER ="Sage publications, Beverly Hills",
YEAR ="1984"}
% He refers to Box and Tiao as containing inferences concerning robust models'
% parameters. Berger also discusses robustness, but I suspect not the inference
% of those params.
@BOOK{Box_and_Tiao_text,
KEY ="Box and Tiao",
AUTHOR ="G.E.P. Box and G.C. Tiao",
TITLE ="{B}ayesian inference in statistical analysis",
PUBLISHER ="Addison--Wesley",
YEAR ="1973"}
@ARTICLE{Box1,
KEY ="Box and Tiao",
AUTHOR ="G.E.P. Box and G.C. Tiao",
TITLE ="A further look at robustness via {B}ayes' theorem",
JOURNAL ="Biometrika",
YEAR ="1962",
VOLUME ="49",
NUMBER ="",
PAGES ="419--432"}
@ARTICLE{Box2a,
KEY ="Box and Tiao",
AUTHOR ="G.E.P. Box and G.C. Tiao",
TITLE ="A {B}ayesian approach
to the importance of assumptions applied to the comparison of variances",
JOURNAL ="Biometrika",
YEAR ="1964",
VOLUME ="51",
NUMBER ="",
PAGES ="153--167"}
@ARTICLE{Box2b,
KEY ="Box and Tiao",
AUTHOR ="G.E.P. Box and G.C. Tiao",
TITLE ="A note on criterion robustness and inference robustness",
JOURNAL ="Biometrika",
YEAR ="1964",
VOLUME ="51",
PAGES ="169--173"}
@ARTICLE{Box3,
KEY ="Box and Tiao",
AUTHOR ="G.E.P. Box and G.C. Tiao",
TITLE ="A {B}ayesian approach to some outlier problems",
JOURNAL ="Biometrika",
YEAR ="1968",
VOLUME ="55",
PAGES ="119--129"}
% Lindley booklet: has strong detailed and simple arguments showing that
% Fisher is bullshit inchoerent. He wrote this after lecturing for Dan Brunk!
@BOOK{Lindley-booklet,
KEY ="Lindley",
AUTHOR ="D.V. Lindley",
TITLE ="{B}ayesian statistics, a review",
PUBLISHER ="Society for Industrial and Applied Mathematics, Philadelphia",
YEAR ="1972"}
% p.3: Unlike common procedure of proposing a procedure
% and investigating its properties, we instead ask
% what properties are required and then find procedures that have these properties. I like it!
% Lindley reviews Ramsey's gambling scenario that proves that you have
% to have a utility function and a prob dist. Savage later did a rigorous
% version of the same.
% Assume that lotteries can be ordered. The ordering is transitive.
% Lindley also mentions Wald. He knocks Dempster-Schafer.
% He then states that any coherent inferences/decisions
% must be interpretable in terms of a prior. That prob distbn is a
% subjective prob possessed by the decision maker.
% `Objections to this attitude are numerous but none that I am aware of
% have gone to the axioms and criticised those. Indeed, it is hard to see how
% such criticism could be sustained since the requirements impoosed by coherence
% are so modest.'
% If the scientific community makes decisions, it must have a prior
% and a utility. In half a line he mentions the result in game theory that
% e baum takes 50 pages to prove.
% Lindley then distinguishes inference and decision theory nicely.
% Then he attqacks sampling theory for incoherence by showing counter-
% examples. Likelihood p.
% The requirement of unbiasedness violates the l.p.
% eg, If sample r/n binomial, theta = r/n.
% But if sample n for fixed r, theta = r-1/n-1 is the unbiased estimator.
% A statistic t(x) is called ancillary if its P does not depend on theta.
% [That is , for example, it is the deviations of the samples from x_bar]
% Some crap sampling theory dicks base their methods on ancill stats. They
% are wrong of course. counterexamples on p.11-12.
% Maximu likelihood couterexample: mixture model has singularity
% when sigma-> 0 with one component of the mixture on top of a particular
% data point. Similarly sigma N-1 can be generalised to give examples
% that don't converge.
% Significance tests counterexamples.
% Minimax counterexamples.
% Examples where a rejected hypothesis has probability close to 1.
% Examples are citred of confidence intervals where the larger interval
% doesn't include the smaller!
% Another example of a ludicrous unbiased estimator.
% Later on p.42 he cites Edwards et al 1963 as the definitive (but long) paper on
% the robustness of {B}ayesian inferences to the prior.
% He distinguishes [Box and Tiao 62 64a 64b] Criterion robustness and inference
% robustness. The first is robustness of a fixed procedure to the distribution
% being different from the assumptions. THe latter is the {B}ayesian attitude.
% [p.43]
% p.44 reviews the non-normal model studied by Box. He says more work is needed
% here.
%
% p. 46 -> outliers. Box 1968b uses a mixture model, same mean, two gaussians.
% The outlier problem is discussed by Hartigan by seeing how influential each
% individual datum is.
@INCOLLECTION{Cheeseman_on_Occam,
KEY ="Cheeseman",
AUTHOR ="Peter Cheeseman",
TITLE ="On finding the most probable model",
BOOKTITLE ="Computational models of
scientific discovery and theory formation",
YEAR ="19XX",
EDITOR ="J. Shrager and P. Langley",
PAGES ="73--95",
PUBLISHER =""}
% % Quite a nice strong-wroded review of Occam, but with quite a lot of
% alternative free talk as well, I think.
@ARTICLE{Titterington1,
KEY ="Titterington",
AUTHOR ="D. Titterington",
TITLE ="General structure of regularization procedures in image reconstruction",
JOURNAL ="Astron. Astrophys.",
YEAR ="1985",
VOLUME ="144",
PAGES ="381--387"}
@ARTICLE{Titterington2,
KEY ="Titterington",
AUTHOR ="D. Titterington",
TITLE ="
Common structure of smoothing techniques in statistics",
JOURNAL ="Int. Statist. Rev.",
YEAR ="1985",
VOLUME ="53",
PAGES ="141--170"}
% these two papers are pretty similar, neither is that deep, or perhaps
% I just don't understand. The int stat rev one is longer .
% Both papers mention over-smoothing.
@TECHREPORT{Poggio3,
KEY ="Poggio and Girosi",
AUTHOR ="T. Poggio and F. Girosi",
TITLE ="A theory of Networks for approximation and learning",
YEAR ="1989",
INSTITUTION ="M.I.T.",
NUMBER ="A.I. 1140"}
@ARTICLE{Poggio1,
KEY ="Poggio et. al.",
AUTHOR ="T. Poggio and V. Torre and C. Koch",
TITLE ="Computational vision and regularization theory",
JOURNAL ="Nature",
YEAR ="1985",
VOLUME ="317 ",
NUMBER ="6035",
PAGES ="314--319"}
@ARTICLE{CrossVal,
KEY ="Davies and Anderssen",
AUTHOR ="A.R. Davies and R.S Anderssen",
TITLE ="Optimization in the regularization of ill--posed problems",
JOURNAL ="J. Austral. Mat. Soc. Ser. B",
YEAR ="1986",
VOLUME ="28",
NUMBER ="",
PAGES ="114--133"}
% This paper proves properties of alternative choices of alpha,
% including I think that cross val is best. **
@BOOK{Eubank,
KEY ="Eubank",
AUTHOR ="R.L. Eubank",
TITLE ="Spline smoothing and non--parametric
regression",
PUBLISHER ="Marcel Dekker",
YEAR ="1988"}
% In this book they call GCV `the method of choice' p.255
@INPROCEEDINGS{Jaynes,
KEY ="Jaynes",
AUTHOR ="E.T. Jaynes",
TITLE ="{B}ayesian methods: general background",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods in
applied statistics",
EDITOR ="J.H. Justice",
PUBLISHER ="C.U.P.",
YEAR ="1986",
PAGES ="1--25"}
@INCOLLECTION{Jaynes.intervals,
KEY ="Jaynes",
AUTHOR ="E.T. Jaynes",
TITLE ="{B}ayesian Intervals versus Confidence Intervals",
BOOKTITLE ="{E.T. Jaynes}. Papers on Probability,
Statistics and Statistical Physics",
EDITOR ="R.D. Rosencrantz",
PUBLISHER ="Kluwer",
YEAR ="1983",
PAGES ="151"}
% PUBLISHER ="Kluwer Academic Publishers",
% reprinted in paperback 1989,
% I just read utterly the best Jaynes essay ever. It is SO good; so even
% handed and confrontational; rubbing the noses of the opposition in the
% examples he gives, using the opponents of Galileo as analogy -- some of
% his opponents refused to look through his telescope to see Jupiter's
% moons, because they `already knew'. It's a very pragmatic argument he uses,
% not philosophical -- just look at the results of the two approaches
% and see where they give different answers, then magnify those differences
% and ask your common sense which answer makes sense.
@INPROCEEDINGS{Bryan,
KEY ="Bryan",
AUTHOR ="Bryan, R.K.",
TITLE ="Solving Oversampled Data Problems by {M}aximum {E}ntropy",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {D}artmouth, {U.S.A.}, 1989",
EDITOR ="P. Fougere",
PUBLISHER ="Kluwer",
YEAR ="1990",
PAGES ="221-232"}
@INPROCEEDINGS{Loredo,
KEY ="Loredo",
AUTHOR ="T.J. Loredo",
TITLE ="From {L}aplace to Supernova {SN} {1987A}: {B}ayesian Inference
in Astrophysics",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {D}artmouth, {U.S.A.}, 1989",
EDITOR ="P. Fougere",
PUBLISHER ="Kluwer",
YEAR ="1990",
PAGES ="81--142"}
@INPROCEEDINGS{Gregory_Loredo,
KEY ="Gregory and Loredo",
AUTHOR ="P.C.Gregory and T.J. Loredo",
TITLE ="A New Method for the
Detection of a Periodic Signal of Unknown Shape and Period",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods,",
EDITOR ="G.J. Erickson and C.R. Smith",
PUBLISHER ="Kluwer",
YEAR ="1992",
NOTE ="also in The Astrophysical Journal, Oct 10, 1992"}
@INPROCEEDINGS{GS1,
KEY ="Skilling",
AUTHOR ="J. Skilling",
TITLE ="Classic Maximum Entropy",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {C}ambridge 1988",
EDITOR ="J. Skilling",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1989",
PAGES =""}
@ARTICLE{Gull.nature,
KEY ="Gull",
AUTHOR ="S.F. Gull and G.J.~Daniell",
TITLE ="Image reconstruction from incomplete and noisy data",
JOURNAL ="Nature",
VOLUME ="272",
YEAR ="1978",
PAGES ="686-690"}
@INPROCEEDINGS{GS2,
KEY ="Gull",
AUTHOR ="S.F. Gull",
TITLE ="Developments in Maximum entropy data analysis",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {C}ambridge 1988",
EDITOR ="J. Skilling",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1989",
PAGES ="53--71"}
@INPROCEEDINGS{Skilling2,
KEY ="Skilling",
AUTHOR ="J. Skilling",
TITLE ="The eigenvalues of mega--dimensional matrices",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {C}ambridge 1988",
EDITOR ="J. Skilling",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1989",
PAGES ="455--466"}
@INPROCEEDINGS{G1,
KEY ="Gull",
AUTHOR ="S.F. Gull",
TITLE ="{B}ayesian inductive inference and
maximum entropy",
BOOKTITLE =" Maximum Entropy and {B}ayesian Methods in
Science and Engineering, vol. 1: Foundations",
EDITOR ="G.J. Erickson and C.R. Smith",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1988",
PAGES ="53-74"}
@INPROCEEDINGS{Gull88,
KEY ="Gull",
AUTHOR ="S.F. Gull",
TITLE ="{B}ayesian data analysis: straight--line fitting",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {C}ambridge 1988",
EDITOR ="J. Skilling",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1989",
PAGES ="511--518"}
@INPROCEEDINGS{Sibisi1,
KEY ="Sibisi",
AUTHOR ="S. Sibisi",
TITLE ="Regularization and inverse problems",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {C}ambridge 1988",
EDITOR ="J. Skilling",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1989",
PAGES ="389--396"}
% A comparison of cross val with {B}ayes choice of alpha. Not at all
% conclusive. ** above is far more thorough.
@INPROCEEDINGS{Skilling1,
KEY ="Skilling",
AUTHOR ="J. Skilling",
TITLE ="On parameter estimation and quantified MaxEnt",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {L}aramie, 1990",
EDITOR ="W.T. Grandy and L. Schick",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1991",
PAGES ="267--273"}
@INPROCEEDINGS{BubblingSusie,
KEY ="Skilling et. al.",
AUTHOR ="J. Skilling and D.R.T. Robinson and
S.F. Gull",
TITLE ="Probabilistic displays",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {L}aramie, 1990",
YEAR ="1991",
EDITOR ="W.T. Grandy and L. Schick",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
PAGES ="365--368"}
@INPROCEEDINGS{Charter,
KEY ="Charter",
AUTHOR ="M.K. Charter",
TITLE ="Quantifying drug absorption",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {L}aramie, 1990",
EDITOR ="W.T. Grandy and L. Schick",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1991",
PAGES ="245--252"}
@INPROCEEDINGS{JaynesME90,
KEY ="Jaynes",
AUTHOR ="E.T. Jaynes",
TITLE ="",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {L}aramie, 1990",
YEAR ="1991",
EDITOR ="W.T. Grandy and L. Schick",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
PAGES =""}
@INPROCEEDINGS{Image.contest,
KEY ="Bontekoe",
AUTHOR ="T.R. Bontekoe",
TITLE ="The image reconstruction contest",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {L}aramie, 1990",
YEAR ="1991",
EDITOR ="W.T. Grandy and L. Schick",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht"
}
@ARTICLE{Rubin84,
KEY ="Rubin",
AUTHOR ="D.B. Rubin",
TITLE ="{B}ayesianly justifiable and relevant frequency
calculations for the applied statistician",
JOURNAL ="Ann. Stat.",
YEAR ="1984",
VOLUME ="12",
NUMBER ="4",
PAGES ="1151--1172"}
@INPROCEEDINGS{Sibisi2,
KEY ="Sibisi",
AUTHOR ="S. Sibisi",
TITLE ="{B}ayesian interpolation",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {L}aramie, 1990",
YEAR ="1991",
EDITOR ="W.T. Grandy and L. Schick",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
PAGES ="349--355"}
% Studies non-noisy interpolation
@TECHREPORT{Skilling_and_Sibisi,
KEY ="Skilling and Sibisi",
AUTHOR ="J. Skilling and S. Sibisi",
TITLE ="Maximum Entropy Data Analysis",
YEAR ="1990",
INSTITUTION ="University of Cambridge"}
@TECHREPORT{G1.tr,
KEY ="Gull",
AUTHOR ="S.F. Gull",
TITLE ="{B}ayesian inductive inference and maximum entropy",
YEAR ="1985",
INSTITUTION ="University of Cambridge Dept. of Physics",
NUMBER ="1326"}
@MANUAL{GS3,
KEY ="Gull and Skilling",
AUTHOR ="S.F. Gull and J. Skilling",
TITLE ="Quantified Maximum Entropy. \verb+MemSys5+ User's manual",
ORGANIZATION ="M.E.D.C.",
ADDRESS ="33 North End, Royston, SG8 6NR, England",
YEAR ="1991"}
@BOOK{Maxent90,
KEY ="Grandy and Schick",
AUTHOR ="Grandy, Jr., W.T. and L.H. Schick, eds.",
TITLE ="Maximum Entropy and {B}ayesian Methods, {L}aramie 1990",
PUBLISHER ="Kluwer",
YEAR ="1991"}
@BOOK{Maxent88,
KEY ="Skilling",
AUTHOR ="J. Skilling, editor",
TITLE ="Maximum Entropy and {B}ayesian Methods, {C}ambridge 1988",
PUBLISHER ="Kluwer",
YEAR ="1989"}
@INPROCEEDINGS{Skilling_clouds,
KEY ="Skilling",
AUTHOR ="J. Skilling",
TITLE ="{B}ayesian numerical analysis",
BOOKTITLE ="Physics and Probability",
EDITOR ="W.T. Grandy Jr. and P. Milonni",
PUBLISHER ="C.U.P.",
ADDRESS ="Cambridge",
YEAR ="1993"}
@TECHREPORT{Radford.mixture,
KEY ="Neal",
AUTHOR ="R.M. Neal",
TITLE ="{B}ayesian mixture modelling by
{M}onte {C}arlo simulation",
YEAR ="1991",
NUMBER ="Dept. of Computer Science, University
of Toronto",
INSTITUTION ="Technical Report CRG--TR--91--2"}
@TECHREPORT{Neal_92,
KEY ="Neal",
AUTHOR ="R.M. Neal",
TITLE ="{B}ayesian Training of Backpropagation
Networks by the Hybrid {M}onte {C}arlo method",
YEAR ="1992",
NUMBER ="CRG--TR--92--1",
INSTITUTION ="Dept. of Computer Science, University
of Toronto"}
% better to ref Neal_nips5
@TECHREPORT{Neal_dop,
KEY ="Neal",
AUTHOR ="R.M. Neal",
TITLE ="Probabilistic Inference using
{M}arkov Chain {M}onte {C}arlo Methods",
YEAR ="1993",
NUMBER ="CRG--TR--93--1",
INSTITUTION ="Dept. of Computer Science, University of Toronto"}
@TECHREPORT{AutoClass,
KEY ="Hanson, Stutz and Cheeseman",
AUTHOR ="R. Hanson and J. Stutz and Peter Cheeseman",
TITLE ="{B}ayesian classification theory",
YEAR ="1991",
NUMBER ="FIA--90-12-7-01",
INSTITUTION ="NASA Ames"}
@INPROCEEDINGS{NW91,
KEY ="Weir",
AUTHOR ="N. Weir",
TITLE ="Applications of maxmimum entropy techniques to {HST} data",
BOOKTITLE ="Proceedings of the {ESO/ST--ECF Data} Analysis Workshop, {A}pril 1991",
YEAR ="1991"}
@ARTICLE{Kashyap,
KEY ="Kashyap",
AUTHOR ="R.L. Kashyap",
TITLE ="A {B}ayesian comparison of different classes of dynamic
models using empirical data",
JOURNAL ="IEEE Transactions on Automatic Control",
YEAR ="1977",
VOLUME ="AC-22",
NUMBER ="5",
PAGES ="715--727"}
% This paper includes a rediscovery of {B}ayesian model comparison and the fact
% that it embodies Occam's razor. - In the context of models for time series.
% It also includes a thorough discussion of how this is different from `Hypothesis
% testing'. At a few points I disagree with his statements but nearly all of it
% gets full marks from me.
@BOOK{Lempers,
KEY ="Lempers",
AUTHOR ="F.B. Lempers",
TITLE ="Posterior probabilities of alternative linear models",
PUBLISHER ="Rotterdam University Press",
YEAR ="1971"}
% Has a lot of discussion of conjugate priors. No mention of Occam's razor.
% Looks readable in parts.
% Active learning
% Experimental design
@ARTICLE{Lindley,
KEY ="Lindley",
AUTHOR ="D.V. Lindley",
TITLE ="On a measure of the information provided
by an experiment",
JOURNAL ="Ann. Math. Statist.",
YEAR ="1956",
VOLUME ="27",
PAGES ="986-1005"}
@INPROCEEDINGS{Skilling92,
KEY ="Skilling",
AUTHOR ="J. Skilling",
TITLE ="{B}ayesian solution of ordinary differential equations",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {S}eattle 1991,",
EDITOR ="C.R. Smith and G.J. Erickson and P.O. Neudorfer",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1992",
PAGES ="23-37"}
@BOOK{Fedorov,
KEY ="Fedorov",
AUTHOR ="V.V. Fedorov",
TITLE ="Theory of optimal experiments",
PUBLISHER ="Academic press",
YEAR ="1972"}
@BOOK{Fukunaga,
KEY ="Fukunaga",
AUTHOR ="K. Fukunaga",
TITLE ="Introduction to statistical pattern recognition",
PUBLISHER ="Academic press",
YEAR ="1972"}
@INPROCEEDINGS{El-Gamal,
KEY ="El-Gamal",
AUTHOR ="M.A. El-Gamal",
TITLE ="The role of priors in active {B}ayesian learning in the
sequential statistical decision framework",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {L}aramie, 1990",
YEAR ="1991",
EDITOR ="W.T. Grandy and L. Schick",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
PAGES ="33--38"}
@ARTICLE{Baum,
KEY ="Baum",
AUTHOR ="E.B. Baum",
TITLE ="Neural Net algorithms that learn in
polynomial time from examples and queries",
JOURNAL ="IEEE Trans. on neural
networks",
YEAR ="1991",
VOLUME ="2",
NUMBER ="1",
PAGES ="5--19"}
@ARTICLE{Query91,
KEY ="Hwang \etal",
AUTHOR ="J-N. Hwang and J.J. Choi and S. Oh and R.J. Marks II",
TITLE ="Query--based learning applied to partially trained
multilayer perceptrons",
JOURNAL ="IEEE Trans. on Neural
networks",
YEAR ="1991",
VOLUME ="2",
NUMBER ="1",
PAGES ="131--136"}
@TECHREPORT{Plutowski_White,
KEY ="Plutowski and White",
AUTHOR ="M. Plutowski and H. White",
TITLE ="Active selection of training examples for Network learning
in noiseless environments",
YEAR ="1991",
NUMBER ="TR 90-011",
INSTITUTION ="Dept. Computer Science, UCSD"}
% MDL
All three of these make clear that MDL = {B}ayes
@ARTICLE{Wallace_Freeman,
KEY ="Wallace and Freeman",
AUTHOR ="C.S. Wallace and P.R. Freeman",
TITLE ="Estimation and Inference by Compact Coding",
JOURNAL ="J.R. Statist. Soc. B",
YEAR ="1987",
VOLUME ="49",
NUMBER ="3",
PAGES ="240-265"}
@INCOLLECTION{Patrick_Wallace,
KEY ="Patrick and Wallace",
AUTHOR ="J.D. Patrick and C.S. Wallace",
TITLE ="Stone circle geometries: an information
theory approach",
BOOKTITLE ="Archaeoastronomy in the {O}ld {W}orld",
YEAR ="1982",
EDITOR ="D.C. Heggie",
PAGES ="231-264",
PUBLISHER ="Cambridge Univ. Press"}
@ARTICLE{Schwarz,
KEY ="Schwarz",
AUTHOR ="G. Schwarz",
TITLE ="Estimating the dimension of a model",
JOURNAL ="Ann. Stat.",
YEAR ="1978",
VOLUME ="6 ",
NUMBER ="2",
PAGES ="461--464"}
@ARTICLE{WB,
KEY ="Wallace and Boulton",
AUTHOR ="C.S. Wallace and D.M. Boulton",
TITLE ="An information measure for classification",
JOURNAL ="Comput. J.",
YEAR ="1968",
VOLUME ="11 ",
NUMBER ="2",
PAGES ="185--194"}
% Marginalization
@ARTICLE{Spiegelhalter,
KEY ="Spiegelhalter and Lauritzen",
AUTHOR ="D.J. Spiegelhalter and S.L. Lauritzen",
TITLE ="Sequential updating of conditional probabilities on
directed graphical structures",
JOURNAL ="Networks",
YEAR ="1990",
VOLUME ="20",
NUMBER ="",
PAGES ="579--605"}
% neural net algorithm for MaxEnt:
@ARTICLE{MP2,
KEY ="Marrian and Peckerar",
AUTHOR ="C.R.K. Marrian and M.C. Peckerar",
TITLE ="Electronic Neural Net Algorithm for Maximum Entropy Solutions of
Ill-Posed Problems",
JOURNAL ="IEEE Trans. Circ. Sys.",
YEAR ="1989",
VOLUME ="36",
NUMBER ="",
PAGES ="288--294"}
@INPROCEEDINGS{MP1,
KEY ="Marrian and Peckerar",
AUTHOR ="C.R.K. Marrian and M.C. Peckerar",
TITLE ="Electronic Neural Net Algorithm for Maximum Entropy Solutions of
Ill-Posed Problems",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {C}ambridge 1988",
EDITOR ="J. Skilling",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1989"}
% NEURAL NETS OPTIMISATION OF number parameters, regularisers, etc.
@INCOLLECTION{Weigend,
KEY ="Weigend et. al.",
AUTHOR ="A.S. Weigend and D.E. Rumelhart
and B.A. Huberman",
TITLE ="Generalization by weight--elimination
with applications to forecasting",
BOOKTITLE ="Advances in Neural Information Processing Systems 3",
YEAR ="1991",
EDITOR ="R.P. Lippmann et. al.",
PAGES ="875--882",
PUBLISHER ="Morgan Kaufmann"}
@PHDTHESIS{Nowlan,
KEY ="Nowlan",
AUTHOR ="S.J. Nowlan",
TITLE ="Soft competitive adaptation:
neural Network learning algorithms based on fitting statistical mixtures",
YEAR ="1991",
NOTE ="CS--91--126",
SCHOOL ="Carnegie Mellon University"}
@INCOLLECTION{BM,
KEY ="Hinton and Sejnowski",
AUTHOR ="G.E. Hinton and T.J. Sejnowski",
TITLE ="Learning and relearning in Boltzmann machines",
BOOKTITLE ="Parallel Distributed Processing",
YEAR ="1986",
EDITOR ="Rumelhart \etal",
PAGES ="pp. 282--317",
PUBLISHER ="MIT Press"}
@ARTICLE{Ji,
KEY ="Ji \etal",
AUTHOR ="C. Ji and R.R. Snapp and D. Psaltis",
TITLE ="Generalizing smoothness constraints from discrete samples",
JOURNAL ="Neural Computation",
YEAR ="1990",
VOLUME ="2 ",
NUMBER ="2",
PAGES ="188-197"}
@TECHREPORT{LT,
KEY ="Lee and Tenorio",
AUTHOR ="W.T. Lee and M.F. Tenorio",
TITLE ="On Optimal Adaptive Classifier Design Criterion ---
How many hidden units are Necessary for an optimal Neural
network classifier?",
YEAR ="1991",
NUMBER ="TR-EE-91-5",
INSTITUTION ="Purdue University"}
@ARTICLE{Abu1,
KEY ="Abu-Mostafa",
AUTHOR ="Y.S. Abu-Mostafa",
TITLE ="The {V}apnik--{C}hervonenkis
dimension: information versus complexity in learning",
JOURNAL ="Neural Computation",
YEAR ="1990",
VOLUME ="1 ",
NUMBER ="3",
PAGES ="312--317"}
@ARTICLE{Abu,
KEY ="Abu-Mostafa",
AUTHOR ="Y.S. Abu-Mostafa",
TITLE ="Learning from hints in Neural Networks",
JOURNAL ="J. Complexity",
YEAR ="1990",
VOLUME ="6",
NUMBER ="",
PAGES ="192--198"}
% includes an example of a regulariser `hint'
@INPROCEEDINGS{Haussler,
KEY ="Haussler \etal",
AUTHOR ="D. Haussler and M. Kearns and R. Schapire",
TITLE ="Bounds on the sample complexity of {B}ayesian learning using information
theory and the {VC} dimension",
BOOKTITLE ="Proceedings of the fourth {COLT} workshop",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1991"
}
@INPROCEEDINGS{MacKay92am,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="{B}ayesian interpolation",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {S}eattle 1991,",
EDITOR ="C.R. Smith and G.J. Erickson and P.O. Neudorfer",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1992",
PAGES ="39-66"}
@INPROCEEDINGS{MacKay92bm,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="The evidence for neural networks",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {S}eattle 1991,",
EDITOR ="C.R. Smith and G.J. Erickson and P.O. Neudorfer",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1992",
PAGES ="165-183"}
% OTHER PAPERS ON OCCAM
@INPROCEEDINGS{Ponting,
KEY ="Ponting",
AUTHOR ="K.M. Ponting",
TITLE ="A statistical approach to the determination of hidden markov model structure",
BOOKTITLE ="7th {FASE} Symposium",
YEAR ="1988",
PUBLISHER =""}
%---------------------------------
@ARTICLE{Angel,
KEY ="Angel \etal",
AUTHOR ="J.R.P. Angel and P. Wizinowich and M. Lloyd-Hart and D. Sandler",
TITLE ="Adaptive optics for array telescopes using Neural-network techniques",
JOURNAL ="Nature",
YEAR ="1990",
VOLUME ="348",
NUMBER ="",
PAGES ="221--224"}
% Nov 1990
% J.R.P.Angel
% Steward Observatory
% University of Arizona
% Tucson
% AZ 85721
% USA
@ARTICLE{{B}ayes,
KEY ="{B}ayes",
AUTHOR ="T. {B}ayes",
TITLE ="An essay towards solving a
problem in the doctrine of chances",
JOURNAL ="Philos. Trans. R. Soc.
London",
YEAR ="1763",
VOLUME ="53",
NUMBER ="",
PAGES ="370--418"}
% , reprinted in {\em Biometrika} (1958) {\bf 45}, 293--315
@BOOK{Bretthorst,
KEY ="Bretthorst",
AUTHOR ="G.L. Bretthorst",
TITLE ="{B}ayesian spectrum analysis
and parameter estimation",
PUBLISHER ="Springer",
YEAR ="1988"}
@ARTICLE{Bretthorst.JMR,
KEY ="Bretthorst",
AUTHOR ="G.L. Bretthorst",
TITLE ="{B}ayesian Analysis.
I. Parameter Estimation Using Quadrature NMR Models.
II. Signal Detection and Model Selection.
III. Applications to NMR.",
JOURNAL ="J. Magnetic Resonance",
YEAR ="1990",
VOLUME ="88 ",
NUMBER ="3",
PAGES ="533--595"}
@INPROCEEDINGS{Gull:nonparam,
KEY ="Gull and Fielden",
AUTHOR ="S.F. Gull and J. Fielden",
TITLE ="{B}ayesian Non-Parametric Statistics",
BOOKTITLE =" Maximum Entropy and {B}ayesian Methods in
Applied Statistics,",
EDITOR ="J.H. Justice",
PUBLISHER ="C.U.P.",
ADDRESS ="Cambridge",
YEAR ="1986",
PAGES ="85-94"}
@BOOK{Jeffreys,
KEY ="Jeffreys",
AUTHOR ="H. Jeffreys",
TITLE ="Theory of Probability",
PUBLISHER ="Oxford Univ. Press",
YEAR ="1939"}
@TECHREPORT{Radford,
KEY ="Neal",
AUTHOR ="R.M. Neal",
TITLE ="{B}ayesian mixture modeling by
{M}onte {C}arlo simulation",
YEAR ="1991",
NUMBER ="Preprint",
INSTITUTION ="Dept. of Computer Science, University of Toronto"}
@ARTICLE{Rissanen1,
KEY ="Rissanen",
AUTHOR ="J. Rissanen",
TITLE ="Modeling by shortest data description",
JOURNAL ="Automatica",
YEAR ="1978",
VOLUME ="14",
NUMBER ="",
PAGES ="465--471"}
@TECHREPORT{Seung,
KEY ="Seung \etal",
AUTHOR ="H.S. Seung and H. Sompolinsky and N. Tishby",
TITLE ="Statistical mechanics of learning from examples",
YEAR ="1991",
NUMBER ="",
INSTITUTION ="preprint"}
@BOOK{Maxent88,
KEY ="Skilling",
AUTHOR ="J. Skilling, editor",
TITLE ="Maximum Entropy and {B}ayesian Methods, Cambridge 1988",
PUBLISHER ="Kluwer",
YEAR ="1989a"}
@BOOK{Szeliski,
KEY ="Szeliski",
AUTHOR ="R.Szeliski",
TITLE ="{B}ayesian modeling of uncertainty in low level vision",
PUBLISHER ="Kluwer",
YEAR ="1989"}
% From haussler
% We just did a journal version as an invited paper to the special
% issue of MAchine Learning, on the COLT `91 conference. However, that
% paper is still being reviewed. This long version will appear also as
% tech rep UCSC-CRL-91-44. Right now though, the best references to this
% and related work are:
@inproceedings{OH.colt,
author= "Opper, M. and D. Haussler",
title= "Calculation of the learning curve of {B}ayes Optimal
classification algorithm for learning a perceptron with noise",
booktitle= "Computational Learning Theory: Proceedings of the
Fourth Annual Workshop",
publisher= "Morgan Kaufmann",
pages= "75-87",
year= 1991
}
@inproceedings{HKS,
author= "Haussler, D. and M. Kearns and R. Schapire",
title= "Bounds on the sample complexity of {B}ayesian learning
using information theory and the {VC} dimension",
booktitle= "Proceedings of the Fourth Workshop on Computational
Learning Theory",
pages= "61-74",
year= 1991
}
@article{OH.prl,
author= "Opper, M. and D. Haussler",
title= "Generalization performance of {B}ayes Optimal classification
algorithm for learning a perceptron",
journal= "Physical Review Letters",
year= 1991,
volume= 66,
number= 20,
month= May,
pages= "2677-2680"
}
@ARTICLE{dirichlet,
KEY ="Zabell",
AUTHOR ="S.L. Zabell",
TITLE ="{W.E. Johnson}'s ``sufficientness''
postulate",
JOURNAL ="Annals of Statistics",
YEAR ="1982",
VOLUME ="10 ",
NUMBER ="4",
PAGES ="1091--1099"}
% Arithmetic Encoding:
@ARTICLE{arith_coding,
KEY ="Witten \etal",
AUTHOR ="I.H. Witten and R.M. Neal and J.G. Cleary",
TITLE ="Arithmetic encoding for data compression",
JOURNAL ="Communications of the ACM",
YEAR ="1987",
VOLUME ="30",
NUMBER ="6",
PAGES ="520--540"}
% Abstract:
% The state of the art in data comrpession is arithmetic encoding, not the
% better known Huffman method. Arithmetic encoding gives greater compression,
% is faster for adaptive models, and clearly separates the model from the
% channel encoding.
% ``Smoothing'' a la IBM
@ARTICLE{Bahl,
KEY ="Bahl et. al.",
AUTHOR ="L.R. Bahl and F. Jelinek and R.L. Mercer",
TITLE ="A maximum likelihood approach to continuous speech
recognition",
JOURNAL ="IEEE Trans",
YEAR ="1983",
VOLUME ="PAMI--5 ",
NUMBER ="2",
PAGES ="179--190"}
@INPROCEEDINGS{Jelinek_Mercer,
KEY ="Jelinek and Mercer",
AUTHOR ="F. Jelinek and R.L. Mercer",
TITLE ="Interpolated estimation of {M}arkov source parameters
from sparse data",
BOOKTITLE ="Pattern recognition in practice",
EDITOR ="E.S. Gelsema and L.N. Kanal",
PUBLISHER ="North--Holland publishing company",
YEAR ="1980",
PAGES ="381--402"}
@ARTICLE{Nadas,
KEY ="Nadas",
AUTHOR ="A. Nadas",
TITLE ="Estimation of probabilities in the language model of the
{IBM} speech recognition system",
JOURNAL ="IEEE Trans",
YEAR ="1984",
VOLUME ="ASSP--32 ",
NUMBER ="4",
PAGES ="859--861"}
@ARTICLE{Copas:83,
KEY ="Copas",
AUTHOR ="J.B. Copas",
TITLE ="Regression, Prediction and Shrinkage (with Discussion)",
JOURNAL ="J.R.Statist.Soc B",
YEAR ="1983",
VOLUME ="45",
NUMBER ="3",
PAGES ="311-354"}
% This discusses ``Preshrunk predictors''.
% It does also give the Bayesian answer for one case, but then rambles
% off again to terrible
% all-predictions-fudged-by-factor-k
% methods.
% This one is about including the possibility of incorrect binary labels
@ARTICLE{Copas:88,
KEY ="Copas",
AUTHOR ="J.B. Copas",
TITLE ="Binary Regression Models for Contaminated Data
(with Discussion)",
JOURNAL ="J.R.Statist.Soc B",
YEAR ="1988",
VOLUME ="50",
NUMBER ="2",
PAGES ="225-265"}
@INPROCEEDINGS{Nowlan.sunspot,
KEY ="Nowlan and Hinton",
AUTHOR ="S.J. Nowlan and G.E. Hinton",
TITLE ="Adaptive Soft Weight Tying using {G}aussian Mixtures",
BOOKTITLE ="Advances in Neural Information Processing Systems 4",
EDITOR ="J.E. Moody and S.J. Hanson and R.P. Lippmann",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1992",
PAGES ="993--1000"}
@BOOK{Reif,
KEY ="Reif",
AUTHOR ="F. Reif",
TITLE ="Fundamentals of statistical and thermal physics",
PUBLISHER ="McGraw--Hill",
YEAR ="1965"}
@INPROCEEDINGS{Brain_Surgeon,
KEY ="Hassibi and Stork",
AUTHOR ="B. Hassibi and D.G. Stork",
TITLE ="Second Order Derivatives for Network Pruning:
Optimal Brain Surgeon",
BOOKTITLE ="Advances in Neural Information Processing Systems 5",
EDITOR ="C.L. Giles and S.J. Hanson and J.D. Cowan",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1993",
PAGES ="164-171"}
% They use a cute iterative procedure for calculating the inverse
% (Hessian+alpha I) in a single pass through the data, with a large number of
% matrix multiplications.
@INPROCEEDINGS{LSP:hessian,
KEY ="LeCun et. al.",
AUTHOR ="LeCun, Y. and P.Y. Simard and B. Pearlmutter",
TITLE ="Automatic Learning Rate Maximization by On-line
Estimation of the Hessian's Eigenvectors",
BOOKTITLE ="Advances in Neural Information Processing Systems 5",
EDITOR ="C.L. Giles and S.J. Hanson and J.D. Cowan",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1993",
PAGES ="156-163"}
@INPROCEEDINGS{SLD:nips5,
KEY ="Simard, LeCun and Denker",
AUTHOR ="P. Simard and LeCun, Y. and J. Denker",
TITLE ="Efficient Pattern Recognition Using a New
Transformation Distance",
BOOKTITLE ="Advances in Neural Information Processing Systems 5",
EDITOR ="C.L. Giles and S.J. Hanson and J.D. Cowan",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1993",
PAGES ="50-58"}
@INPROCEEDINGS{nips6,
KEY ="",
AUTHOR ="",
TITLE ="",
BOOKTITLE ="Advances in Neural Information Processing Systems 6",
EDITOR ="J.D. Cowan and G. Tesauro and J. Alspector",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1994",
PAGES =""}
@INPROCEEDINGS{boosting,
KEY ="",
AUTHOR ="H. Drucker and R. Schapire and P. Simard",
TITLE ="Improving Performance in Neural Networks Using a
Boosting Algorithm",
BOOKTITLE ="Advances in Neural Information Processing Systems 5",
EDITOR ="C.L. Giles and S.J. Hanson and J.D. Cowan",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1993",
PAGES ="42-49"}
% I would call this a data selection procedure, plus a funky modelling
% rule. It depends theoretically on the assumption that the model can
% do better than 50% on any sub-ensemble from the training set.
@INPROCEEDINGS{nips5,
KEY ="",
AUTHOR ="",
TITLE ="",
BOOKTITLE ="Advances in Neural Information Processing Systems 5",
EDITOR ="C.L. Giles and S.J. Hanson and J.D. Cowan",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1993",
PAGES =""}
@INPROCEEDINGS{Wolpert_nips,
KEY ="Wolpert",
AUTHOR ="D.H. Wolpert",
TITLE ="On the use of evidence in Neural Networks",
BOOKTITLE ="Advances in Neural Information Processing Systems 5",
EDITOR ="C.L. Giles and S.J. Hanson and J.D. Cowan",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1993",
PAGES ="539-546"}
@INPROCEEDINGS{Neal_nips5,
KEY ="Neal",
AUTHOR ="R.M. Neal",
TITLE ="{B}ayesian learning via stochastic dynamics",
BOOKTITLE ="Advances in Neural Information Processing Systems 5",
EDITOR ="C.L. Giles and S.J. Hanson and J.D. Cowan",
PUBLISHER ="Morgan Kaufmann",
ADDRESS ="San Mateo, California",
YEAR ="1993",
PAGES ="475-482"}
@TECHREPORT{Thodberg,
KEY ="Thodberg",
AUTHOR ="H.H. Thodberg",
TITLE ="Ace of {B}ayes: application
of Neural Networks with pruning",
YEAR ="1993",
NUMBER ="1132 E",
INSTITUTION ="Danish meat research institute"}
@BOOK{HKP,
KEY ="Hertz \etal",
AUTHOR ="J. Hertz and A. Krogh and R.G. Palmer",
TITLE ="Introduction to the Theory of Neural Computation",
PUBLISHER ="Addison-Wesley",
YEAR ="1991"}
@INPROCEEDINGS{strauss,
KEY ="Strauss \etal",
AUTHOR ="C.E.M. Strauss, D.H. Wolpert and D.R. Wolf",
TITLE ="Alpha, Evidence, and the Entropic Prior",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {P}aris 1992",
EDITOR ="A. Mohammed-Djafari",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1993"}
@INPROCEEDINGS{MacKay94:alpha,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="Hyperparameters: Optimise, or Integrate out?",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {S}anta {B}arbara 1993",
EDITOR ="G. Heidbreder",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1994"}
@INCOLLECTION{MacKay93:review,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="Bayesian Methods for Backpropagation Networks",
BOOKTITLE ="Models of Neural Networks II",
EDITOR ="van Hemmen, J.L. and E.~Domany and K.~Schulten",
PUBLISHER ="Springer-Verlag",
ADDRESS ="New York",
YEAR ="1993"}
@TECHREPORT{MacKay93:alpha,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="Hyperparameters: Optimise, or Integrate out?",
YEAR ="1993",
NUMBER ="in preparation",
INSTITUTION ="University of Cambridge"}
@TECHREPORT{MacKay93:pred,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="Bayesian non-linear modelling for the 1993 energy prediction
competition",
YEAR ="1993",
NUMBER ="in preparation",
INSTITUTION ="University of Cambridge"}
@INPROCEEDINGS{MacKay94:pred,
KEY ="MacKay",
AUTHOR ="D.J.C. MacKay",
TITLE ="Bayesian non-linear modelling for the 1993 energy prediction
competition",
BOOKTITLE ="Maximum Entropy and {B}ayesian Methods, {S}anta {B}arbara 1993",
EDITOR ="G. Heidbreder",
PUBLISHER ="Kluwer",
ADDRESS ="Dordrecht",
YEAR ="1994"}
@TECHREPORT{breiman,
KEY ="Breiman",
AUTHOR ="L. Breiman",
TITLE ="Stacked regressions",
YEAR ="1992",
NUMBER ="367",
INSTITUTION ="Dept. of Stat., Univ. of Cal. Berkeley"}
@TECHREPORT{ARD,
KEY ="MacKay and Neal",
AUTHOR ="D.J.C. MacKay and R.M. Neal",
TITLE ="Automatic relevance determination for Neural Networks",
YEAR ="1993",
NUMBER ="in preparation",
INSTITUTION ="Cambridge University"}
@TECHREPORT{Radford_infinite_nets,
KEY ="Neal",
AUTHOR ="R.M. Neal",
TITLE ="Priors for infinite Networks",
YEAR ="1993",
NUMBER ="in preparation",
INSTITUTION ="Univ. of Toronto"}
@PHDTHESIS{MacKay91,
AUTHOR ="D.J.C. MacKay",
TITLE ="Bayesian Methods for Adaptive Models",
YEAR ="1991",
SCHOOL ="California Institute of Technology"}
@ARTICLE{Pearlmutter,
KEY ="Pearlmutter",
AUTHOR ="Pearlmutter, B.",
TITLE ="To Appear",
JOURNAL ="Neural Computation",
YEAR ="1993",
VOLUME ="",
NUMBER ="",
PAGES =""}
@ARTICLE{mollon92,
KEY ="Mollon and Bowmaker",
AUTHOR ="Mollon, J.D. and Bowmaker, J.K.",
TITLE ="The Spatial Arrangement of Cones in the Primate Fovea",
JOURNAL ="Nature",
YEAR ="1992",
VOLUME ="360",
NUMBER ="",
PAGES ="677-679"}
@UNPUBLISHED{Hinton_bb,
AUTHOR ="Hinton, G.E. and van Camp, D.",
TITLE ="Keeping Neural Networks Simple by Minimizing the
Description Length of the Weights",
NOTE ="To appear in: {\it Proceedings of COLT-93}",
YEAR ="1993"}
@BOOK{Draper,
KEY ="Draper, Norman Richard",
AUTHOR ="Draper, N.R. and H. Smith",
TITLE ="Applied regression analysis",
PUBLISHER ="Wiley",
ADDRESS ="New York",
YEAR ="1966"
}