% \VignetteIndexEntry{Bayesian Hierarchical Clustering} \documentclass[a4paper]{article} \title{Bayesian Hierarchical Clustering} \author{Rich Savage} \SweaveOpts{echo=true} \usepackage{a4wide} \begin{document} \maketitle This is a simple Sweave test of the Bayesian Hierarchical Clustering method, as implemented in the R package \emph{BHC}. It runs the example code as given in the R help file and generates a resulting dendrogram plot, to show the sort of results one can expect. %%Code to generate the BHC dendrogram <>= require(BHC) ##BUILD SAMPLE DATA AND LABELS data <- matrix(0,15,10) itemLabels <- vector("character",15) data[1:5,] <- 1 ; itemLabels[1:5] <- "a" data[6:10,] <- 2 ; itemLabels[6:10] <- "b" data[11:15,] <- 3 ; itemLabels[11:15] <- "c" timePoints <- 1:10 # for the time-course case ##DATA DIMENSIONS nDataItems <- nrow(data) nFeatures <- ncol(data) ##RUN MULTINOMIAL CLUSTERING hc1 <- bhc(data,itemLabels,verbose=TRUE) ##RUN TIME-COURSE CLUSTERING hc2 <- bhc(data, itemLabels, 0, timePoints, "time-course", numReps=1, noiseMode=0, numThreads=1, verbose=TRUE) ##OUTPUT CLUSTER LABELS TO FILE WriteOutClusterLabels(hc1, "labels.txt", verbose=TRUE) ##FOR THE MULTINOMIAL CASE, THE DATA CAN BE DISCRETISED newData <- data[] + rnorm(150, 0, 0.1); percentiles <- FindOptimalBinning(newData, itemLabels, transposeData=TRUE, verbose=TRUE) discreteData <- DiscretiseData(t(newData), percentiles=percentiles) discreteData <- t(discreteData) hc3 <- bhc(discreteData, itemLabels, verbose=TRUE) @ %% Embed the plot in the LaTex document \begin{center} <>= plot(hc1, axes=FALSE) plot(hc2, axes=FALSE) plot(hc3, axes=FALSE) @ \end{center} The plot shows a simple example dendrogram. (note that the structure is quite distinctive; this may the be the result of discretising and analysing a small data-set). \end{document}