From eebf197a5d49125ddca53fd9b78cb0d860058d70 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 4 Sep 2016 17:44:49 -0700 Subject: [PATCH 1/4] test-pr2 --- pom.xml | 14 +++++----- .../scala/com/neilferguson/PopStrat.scala | 27 +++++++++++++------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/pom.xml b/pom.xml index 5e3ce0b..d189b68 100644 --- a/pom.xml +++ b/pom.xml @@ -7,10 +7,10 @@ popstrat 0.1-SNAPSHOT - 1.2.0 - 3.0.0.8 - 1.2.5 - 0.16.0 + 1.6.1 + 3.8.2.6 + 1.6.5 + 0.19.0 @@ -27,12 +27,12 @@ org.bdgenomics.adam - adam-core + adam-core_2.10 ${adam.version} org.bdgenomics.adam - adam-apis + adam-apis_2.10 ${adam.version} @@ -87,7 +87,7 @@ org.scala-tools maven-scala-plugin - 2.14.1 + 2.15.2 diff --git a/src/main/scala/com/neilferguson/PopStrat.scala b/src/main/scala/com/neilferguson/PopStrat.scala index 286fb39..cf07583 100644 --- a/src/main/scala/com/neilferguson/PopStrat.scala +++ b/src/main/scala/com/neilferguson/PopStrat.scala @@ -17,6 +17,15 @@ import scala.collection.JavaConverters._ import scala.collection.immutable.Range.inclusive import scala.io.Source +import org.apache.spark.sql.types.DataTypes +import hex._ +import water.fvec._ +import water.support._ +import _root_.hex.Distribution.Family +import _root_.hex.deeplearning.DeepLearningModel +import _root_.hex.tree.gbm.GBMModel +import _root_.hex.{Model, ModelMetricsBinomial} + object PopStrat { def main(args: Array[String]): Unit = { @@ -96,8 +105,8 @@ object PopStrat { case (sampleId, variants) => (sampleId, variants.toArray.sortBy(_.variantId)) } - val header = StructType(Array(StructField("Region", StringType)) ++ - sortedVariantsBySampleId.first()._2.map(variant => {StructField(variant.variantId.toString, IntegerType)})) + val header = DataTypes.createStructType(Array(DataTypes.createStructField("Region", DataTypes.StringType,false)) ++ + sortedVariantsBySampleId.first()._2.map(variant => {DataTypes.createStructField(variant.variantId.toString,DataTypes.IntegerType,false)})) val rowRDD: RDD[Row] = sortedVariantsBySampleId.map { case (sampleId, sortedVariants) => val region: Array[String] = Array(panel.getOrElse(sampleId, "Unknown")) @@ -107,13 +116,15 @@ object PopStrat { // Create the SchemaRDD from the header and rows and convert the SchemaRDD into a H2O dataframe val sqlContext = new org.apache.spark.sql.SQLContext(sc) + //val dataFrame=sqlContext.createDataFrame(rowRDD, header) val schemaRDD = sqlContext.applySchema(rowRDD, header) val h2oContext = new H2OContext(sc).start() - import h2oContext._ - val dataFrame = h2oContext.toDataFrame(schemaRDD) + import h2oContext._ + val dataFrame1 =h2oContext.asH2OFrame(schemaRDD) + val dataFrame=H2OFrameSupport.allStringVecToCategorical(dataFrame1) // Split the dataframe into 50% training, 30% test, and 20% validation data - val frameSplitter = new FrameSplitter(dataFrame, Array(.5, .3), Array("training", "test", "validation").map(Key.make), null) + val frameSplitter =new FrameSplitter(dataFrame, Array(.5, .3), Array("training", "test", "validation").map(Key.make[Frame](_)), null) water.H2O.submitTask(frameSplitter) val splits = frameSplitter.getResult val training = splits(0) @@ -121,8 +132,8 @@ object PopStrat { // Set the parameters for our deep learning model. val deepLearningParameters = new DeepLearningParameters() - deepLearningParameters._train = training - deepLearningParameters._valid = validation + deepLearningParameters._train = training._key + deepLearningParameters._valid = validation._key deepLearningParameters._response_column = "Region" deepLearningParameters._epochs = 10 deepLearningParameters._activation = Activation.RectifierWithDropout @@ -134,7 +145,7 @@ object PopStrat { // Score the model against the entire dataset (training, test, and validation data) // This causes the confusion matrix to be printed - deepLearningModel.score(dataFrame)('predict) + deepLearningModel.score(dataFrame) } From f534bcd9272a71af032bedb558bcdd44922c518b Mon Sep 17 00:00:00 2001 From: ZhipengCheng <1024521227@qq.com> Date: Mon, 5 Sep 2016 08:55:43 +0800 Subject: [PATCH 2/4] Update pom.xml --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d189b68..0e8ee4b 100644 --- a/pom.xml +++ b/pom.xml @@ -1,7 +1,7 @@ + http://maven.apache.org/maven-v4_0_0.xsd"> 4.0.0 com.neilferguson popstrat From f8aa603025ab7e551b273be3707c2700ace4b25d Mon Sep 17 00:00:00 2001 From: ZhipengCheng <1024521227@qq.com> Date: Mon, 5 Sep 2016 09:05:52 +0800 Subject: [PATCH 3/4] Update PopStrat.scala --- src/main/scala/com/neilferguson/PopStrat.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/scala/com/neilferguson/PopStrat.scala b/src/main/scala/com/neilferguson/PopStrat.scala index cf07583..ad5c0e1 100644 --- a/src/main/scala/com/neilferguson/PopStrat.scala +++ b/src/main/scala/com/neilferguson/PopStrat.scala @@ -25,7 +25,6 @@ import _root_.hex.Distribution.Family import _root_.hex.deeplearning.DeepLearningModel import _root_.hex.tree.gbm.GBMModel import _root_.hex.{Model, ModelMetricsBinomial} - object PopStrat { def main(args: Array[String]): Unit = { From 7eb180c6ea7e321cc22a90cd67727f27f1b1de9a Mon Sep 17 00:00:00 2001 From: ZhipengCheng <1024521227@qq.com> Date: Mon, 5 Sep 2016 09:07:19 +0800 Subject: [PATCH 4/4] Update PopStrat.scala --- src/main/scala/com/neilferguson/PopStrat.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/neilferguson/PopStrat.scala b/src/main/scala/com/neilferguson/PopStrat.scala index ad5c0e1..9fb9eed 100644 --- a/src/main/scala/com/neilferguson/PopStrat.scala +++ b/src/main/scala/com/neilferguson/PopStrat.scala @@ -123,7 +123,7 @@ object PopStrat { val dataFrame=H2OFrameSupport.allStringVecToCategorical(dataFrame1) // Split the dataframe into 50% training, 30% test, and 20% validation data - val frameSplitter =new FrameSplitter(dataFrame, Array(.5, .3), Array("training", "test", "validation").map(Key.make[Frame](_)), null) + val frameSplitter = new FrameSplitter(dataFrame, Array(.5, .3), Array("training", "test", "validation").map(Key.make[Frame](_)), null) water.H2O.submitTask(frameSplitter) val splits = frameSplitter.getResult val training = splits(0)