From 086bb695ef3e7f8dee9dc3f2b7a0b1de638197b3 Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Mon, 21 Jan 2013 13:23:49 -0200 Subject: [PATCH 01/12] add one-jar plugin --- tools/import/mongodb/build.sbt | 26 ++++ tools/import/mongodb/project/plugins.sbt | 3 + .../tools/importers/mongo/ImportMongo.scala | 128 ++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100644 tools/import/mongodb/build.sbt create mode 100644 tools/import/mongodb/project/plugins.sbt create mode 100644 tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala diff --git a/tools/import/mongodb/build.sbt b/tools/import/mongodb/build.sbt new file mode 100644 index 0000000..ec51c8b --- /dev/null +++ b/tools/import/mongodb/build.sbt @@ -0,0 +1,26 @@ +name := "import-mongodb" + +version := "0.1" + +scalaVersion := "2.9.2" + +resolvers ++= Seq( + "ReportGrid (public)" at "http://nexus.reportgrid.com/content/repositories/public-releases", + "Sonatype" at "http://oss.sonatype.org/content/repositories/public", + "Typesafe" at "http://repo.typesafe.com/typesafe/releases/", + "Typesafe-snapshots" at "http://repo.typesafe.com/typesafe/snapshots/", + "Scala Tools" at "http://scala-tools.org/repo-snapshots/" +) + +scalacOptions ++= Seq("-unchecked", "-deprecation") + +seq(com.github.retronym.SbtOneJar.oneJarSettings: _*) + +libraryDependencies ++= Seq( + "com.reportgrid" %% "blueeyes-core" % "1.0.0-M6", + "com.reportgrid" %% "blueeyes-json" % "1.0.0-M6", + "com.reportgrid" %% "blueeyes-mongo" % "1.0.0-M6", + "org.mongodb" %% "casbah" % "2.3.0", + "org.scalaz" %% "scalaz-core" % "7.0.0-M3" , + "org.specs2" %% "specs2" % "1.12.2" % "test" +) diff --git a/tools/import/mongodb/project/plugins.sbt b/tools/import/mongodb/project/plugins.sbt new file mode 100644 index 0000000..50d00c8 --- /dev/null +++ b/tools/import/mongodb/project/plugins.sbt @@ -0,0 +1,3 @@ + +addSbtPlugin("com.github.retronym" % "sbt-onejar" % "0.8") + diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala new file mode 100644 index 0000000..9166f2c --- /dev/null +++ b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala @@ -0,0 +1,128 @@ +package com.precog.tools.importers.mongo + +import com.mongodb.casbah.Imports._ +import blueeyes.persistence.mongo.json.BijectionsMongoJson._ +import blueeyes.persistence.mongo.json.BijectionsMongoJson.MongoToJson._ +import com.mongodb.casbah.commons.TypeImports.ObjectId +import scalaz.{Monad, StreamT} +import akka.dispatch.{Await, Future} +import blueeyes.json.{JObject, JString, JParser, JValue} +import blueeyes.core.service.engines.HttpClientXLightWeb +import blueeyes.bkka.AkkaDefaults._ +import blueeyes.core.data.DefaultBijections._ +import blueeyes.bkka.AkkaDefaults.defaultFutureDispatch +import blueeyes.bkka.FutureMonad +import blueeyes.core.data.ByteChunk +import java.nio.ByteBuffer +import akka.util.Duration +import blueeyes.core.http.HttpResponse +import blueeyes.core.service._ +import annotation.tailrec + +/** + * User: gabriel + * Date: 1/17/13 + */ +object ImportMongo { + + implicit val as=actorSystem + implicit val executionContext = defaultFutureDispatch + implicit val M: Monad[Future] = new FutureMonad(executionContext) + + def parseInt(s : String) : Option[Int] = try { + Some(s.toInt) + } catch { + case _ : java.lang.NumberFormatException => None + } + + def main(args:Array[String]){ + + if (args.length != 5) { + println("Wrong number of parameters.") + println("Usage: ImportMongo mongo_host mongo_port precog_host precog_ingest_path precog_apiKey") + actorSystem.shutdown() + sys.exit(1) + } + + val mongoHost=args(0) + val mongoPort=parseInt(args(1)).get + + val precogHost=args(2) + val basePath=args(3) + val apiKey=args(4) + + implicit val mongoConn= MongoConnection(mongoHost,mongoPort) + + @tailrec + def readConfigLine(acc:List[String]):List[String]={ + val line=readLine() + if (line != null && line != ""){ + if (line.startsWith("#")) readConfigLine(acc) //skip lines starting with # + else readConfigLine(line::acc) + } else acc + } + println("# enter json import descriptors, EOF or empty line to continue") + println("""# format: { "database":"", "collection":"" } or { "database":"", "collection":"", "lastId":"" } """) + val jsonImputs=readConfigLine(Nil) + val fresults=jsonImputs.map(JParser.parseFromString(_).map(importCollection(precogHost,basePath,apiKey,_))).flatMap(_.toList) + + val continueJson=Await.result(Future.sequence(fresults), Duration("24 hours")) + println("#################################################################") + println("# to continue ingestion from last point, use the following imput:") + println(continueJson.mkString("\n")) + + actorSystem.shutdown() + + } + + def importCollection(precogHost:String, basePath:String, apiKey:String, jparams: JValue) (implicit mongoConn: MongoConnection):Future[String]={ + def strValue(jv: JValue) = (jv --> classOf[JString]).value + val dbName = strValue(jparams \ "database") + val collName = strValue(jparams \ "collection") + val lastId = (jparams \? "lastId").map(strValue(_)) getOrElse ("000000000000000000000000") + + val fdsid = Future { + readFromMongo(mongoConn, dbName, collName, lastId) + } + val (fds, fmaxId) = (fdsid map (_._1), fdsid map (_._2)) + val fjsons = fds.map(_.flatMap(MongoToJson(_).toStream)) + val fullPath = "%s/ingest/v1/sync/fs%s/%s".format(precogHost, basePath, collName) + val data = StreamT.fromStream[Future, JValue](fjsons) + val fresult = M.lift2((a: HttpResponse[ByteChunk], b: ObjectId) => (a, b))(sendToPrecog(fullPath, apiKey, data), fmaxId) + + fresult.map(r => { + val (result, oid) = r + result match { + case HttpResponse(_, _, Some(Left(buffer)), _) => { + println("### result from precog: %s".format(new String(buffer.array(), "UTF-8"))) + } + case _ => println("### error: %s".format(result.toString())) + } + """{ "database":"%s", "collection":"%s" "lastId":"%s" }""".format(dbName, collName, oid) + }) + } + + def readFromMongo(mongoConn: MongoConnection, dbName: String, colName: String, oid:String):(Stream[DBObject],ObjectId)={ + val mongoDB = mongoConn(dbName) + val mongoColl = mongoDB(colName) + val q = "_id" $gt (new ObjectId(oid)) + val rStrm=mongoColl.find(q).toStream //.view ? + val (oids,dataStrm)=rStrm.map(m=>(m.get("_id").asInstanceOf[ObjectId],m)).unzip + (dataStrm,oids.max) + } + + def sendToPrecog(fullPath:String, apiKey:String, dataStream:StreamT[Future,JValue]): Future[HttpResponse[ByteChunk]] = { + + val httpClient = new HttpClientXLightWeb()(defaultFutureDispatch) + + val byteStream: StreamT[Future, ByteBuffer] = dataStream.map(jv => ByteBuffer.wrap({ + val js = "%s\n".format(jv.renderCompact) + print("# %s".format(js)) + js + }.getBytes("UTF-8"))) + + //get the last/biggest id + val byteChunks: ByteChunk = Right(byteStream) + httpClient.parameters('apiKey -> apiKey).post(fullPath)(byteChunks) + } +} From 53b2c98996bb6aef81d1770623448d22f6a5a16a Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Tue, 22 Jan 2013 16:05:08 -0200 Subject: [PATCH 02/12] Fix bug in Json building Change collections to use streamTs --- tools/import/jdbc/build.sbt | 4 + .../tools/importers/jdbc/DbAccess.scala | 6 +- .../tools/importers/jdbc/DbAnalysis.scala | 2 +- .../tools/importers/jdbc/ImportJdbc.scala | 131 +++++++++++------- .../importers/jdbc/ImportJdbcConsole.scala | 104 +++++++++++--- .../importers/jdbc/ImportJdbcService.scala | 13 +- .../tools/importers/jdbc/DbAnalysisTest.scala | 14 ++ .../jdbc/ImportJdbcServiceTest.scala | 15 +- .../tools/importers/jdbc/ImportJdbcTest.scala | 88 +++++++++--- .../precog/tools/importers/jdbc/package.scala | 6 +- 10 files changed, 272 insertions(+), 111 deletions(-) diff --git a/tools/import/jdbc/build.sbt b/tools/import/jdbc/build.sbt index 625d5c4..1efb4a3 100644 --- a/tools/import/jdbc/build.sbt +++ b/tools/import/jdbc/build.sbt @@ -17,9 +17,13 @@ resolvers ++= Seq( scalacOptions ++= Seq("-unchecked", "-deprecation") +seq(com.github.retronym.SbtOneJar.oneJarSettings: _*) + libraryDependencies ++= Seq( "com.reportgrid" % "blueeyes-core_2.9.2" % "1.0.0-M6", "com.reportgrid" % "blueeyes-json_2.9.2" % "1.0.0-M6", + "org.scalaz" % "scalaz-core_2.9.2" % "7.0.0-M3" , "org.specs2" %% "specs2" % "1.12.2" , + //sbtVersion(v => "com.github.siasia" %% "xsbt-proguard-plugin" % (v+"-0.1.1")), "com.h2database" % "h2" % "1.2.134" % "test" ) diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala index c4d0922..618b00c 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala @@ -2,6 +2,7 @@ package com.precog.tools.importers.jdbc import java.sql._ import Datatypes._ +import scalaz.{StreamT, Id} /** * User: gabriel @@ -26,11 +27,14 @@ object DbAccess { for ( i <- 1 to count) yield Column(tblMetaData.getColumnName(i),Table(tblMetaData.getTableName(i))) } - def rsIterator[T](rs:ResultSet)(f:ResultSet => T) = new Iterator[T] { + //don't use! + private def rsIterator[T](rs:ResultSet)(f:ResultSet => T) = new Iterator[T] { def hasNext = rs.next() def next():T = f(rs) } + def rsStreamT[T](rs:ResultSet)(f:ResultSet => T)=StreamT.unfold(rs)( (rs:ResultSet) => if (rs.next()) { Some(f(rs),rs)} else None ) + def oneColumnRs(rs:ResultSet) = rsIterator(rs)(rs=> rs.getString(1)) def tables(rs:ResultSet) = rsIterator(rs)(rs=> Table(rs.getString("TABLE_NAME"))) def columns(rs:ResultSet) = rsIterator(rs)(rs=> Column(rs.getString("COLUMN_NAME"), Table(rs.getString("TABLE_NAME")))) diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAnalysis.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAnalysis.scala index 41af715..c6e1018 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAnalysis.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAnalysis.scala @@ -18,7 +18,7 @@ object DbAnalysis{ def findTables(metadata: DatabaseMetaData, oCat: Option[String], tableName: => Option[String]): Array[Table] = { - val cat= toNullUppercase(oCat) + val cat= null//toNullUppercase(oCat) val tableNm= tableName.map(_.toUpperCase).getOrElse(null) tables(metadata.getTables(cat, null, tableNm, Array("TABLE"))).toArray } diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala index 8ff1bd5..00c5048 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala @@ -8,6 +8,17 @@ import blueeyes.bkka.AkkaDefaults.defaultFutureDispatch import scala.Some import blueeyes.core.service.engines.HttpClientXLightWeb import Datatypes._ +import blueeyes.bkka.FutureMonad +import scalaz.{Hoist, StreamT, ~>} +import akka.dispatch.{Await, ExecutionContext, Future} +import java.nio.ByteBuffer +import scalaz.Id._ +import annotation.tailrec +import akka.util.Duration +import blueeyes.core.http.HttpResponse +import blueeyes.core.data.ByteChunk +import akka.dispatch.Future + /** * User: gabriel @@ -17,6 +28,8 @@ object ImportJdbc { import DbAccess._ + val httpClient=new HttpClientXLightWeb()(defaultFutureDispatch) + case class ImportTable(name:String, columns:Seq[String], baseOrJoin:Either[Table,Join]){ val isCollection = baseOrJoin.right.toOption.map(_.exported).getOrElse(false) } case class IngestInfo(tables:Seq[ImportTable]) @@ -34,53 +47,63 @@ object ImportJdbc { def buildSort(ingestInfo:IngestInfo) =ingestInfo.tables.flatMap( t => t.columns.map("%s.%s".format(t.name,_)) ) + def getElements(o:Option[JValue]):List[JValue]= o match { + case Some(l:JArray) => l.elements + case _ => Nil + } + + def toJObject(o:JValue):JObject= o match { + case j:JObject => j + case _ => sys.error("base value is not jobject!") + } - def mkPartialJson(baseName:String, ingestInfo:IngestInfo, s: Seq[String], prevMap:Map[String,JValue]=Map())= { + def buildField( nm: (String,String)) =Option(nm._2).map( s=>JField(nm._1,JString(s))) - def getElements(o:Option[JValue]):List[JValue]= o match { - case Some(l:JArray) => l.elements - case _ => Nil - } - def toJObject(o:JValue):JObject= o match { - case j:JObject => j - case _ => sys.error("base value is not jobject!") - } + type StrJVMap= Map[String,JValue] - def buildJValues( ms:(Map[String,JValue],Seq[String]), tblDesc: ImportTable ):(Option[(String,JValue)],Seq[String])={ - val (m,s)=ms - val (tblColValues,rest)=s.splitAt(tblDesc.columns.length) - val objValues =(tblDesc.columns.zip(tblColValues)).flatMap(buildField(_) ).toList - val tblName = tblDesc.name - val keyValue=if (objValues.isEmpty) if (tblDesc.isCollection) Some(tblName->JArray.empty) else None + def buildJValues( map:StrJVMap, s:Seq[String], tblDesc: ImportTable ):(Option[(String,JValue)],Seq[String])={ + val (tblColValues,rest)=s.splitAt(tblDesc.columns.length) + val objValues =(tblDesc.columns.zip(tblColValues)).flatMap(buildField(_) ).toList + val tblName = tblDesc.name.toUpperCase + val keyValue= + if (objValues.isEmpty) if (tblDesc.isCollection) Some(tblName->JArray.empty) else None else { val data=JObject(objValues) - val obj= if (tblDesc.isCollection) JArray(data:: getElements(m.get(tblName)) ) else data + val obj= if (tblDesc.isCollection) JArray(getElements(map.get(tblName)):+data ) else data Some(tblName->obj) } - (keyValue,rest) + (keyValue,rest) + } + + @tailrec + def mkPartialJson(baseName:String, ingestInfo:IngestInfo, dataStream: StreamT[Id,Seq[String]], prevMap:StrJVMap=Map()):Option[(JValue,StreamT[Id,Seq[String]])] = + if (dataStream.isEmpty) None + else { + val s=dataStream.head + val tail=dataStream.tail + val jsonMap=buildJsonObjMap(ingestInfo, prevMap, s) + val baseNameUC=baseName.toUpperCase + //peek into the stream + val nextJsonMap:StrJVMap=if (tail.isEmpty) Map() else buildJsonObjMap(ingestInfo, Map(), tail.head) + if ( !nextJsonMap.isEmpty && (jsonMap.get(baseNameUC) == nextJsonMap.get(baseNameUC)) ) { + //if next row is the same object, keep building + mkPartialJson(baseNameUC,ingestInfo,tail,jsonMap) + } else { + val base= toJObject(jsonMap(baseNameUC)) + val values = (jsonMap-baseNameUC).map(nv => JField(nv._1, nv._2)).toList + Some(JObject(base.fields ++ values),tail) + } } - def buildField( nm: (String,String)) =Option(nm._2).map( s=>JField(nm._1,JString(s))) - - val jsonMap:Map[String,JValue]=ingestInfo.tables.foldLeft( (prevMap,s) )( - (ms,v) =>{ - val (opt,r)= buildJValues(ms,v) - val (m,_)=ms - opt.map( (kobj)=>{ - val (k,obj) =kobj - if (k!=baseName) - (m+(kobj),r) - else if (prevMap.isEmpty || prevMap(k)!= obj) - (Map(kobj),r) - else (m,r) - }).getOrElse((m,r)) - } )._1 - - val base:JObject = toJObject(jsonMap(baseName)) - val values:List[JField] = (jsonMap-baseName).map(nv => JField(nv._1, nv._2)).toList - (JObject(base.fields ++ values),jsonMap) - } + def buildJsonObjMap(ingestInfo: ImportJdbc.IngestInfo, prevMap: ImportJdbc.StrJVMap, s: Seq[String]): StrJVMap = { + ingestInfo.tables.foldLeft((prevMap, s))( + (ms, v) => { + val (m,seq)=ms + val (opt, r): (Option[(String, JValue)], Seq[String]) = buildJValues(m, seq, v) //build a json object from the seq values + opt.map(kv => (m + kv, r)).getOrElse((m, r)) + })._1 + } def names(cs:Seq[Column])=cs.map(_.name) @@ -92,33 +115,39 @@ object ImportJdbc { "select %s from %s order by %s".format(colSelect,join,sort) } - def executeQuery(connDb: Connection, query: String ): (Iterator[IndexedSeq[String]],IndexedSeq[Column]) = { + def executeQuery(connDb: Connection, query: String ): (StreamT[Id,IndexedSeq[String]],IndexedSeq[Column]) = { val stmt = connDb.prepareStatement(query) val columns = getColumns(stmt) val rs = stmt.executeQuery() - (rsIterator(rs)(row => for (i <- 1 to columns.size) yield row.getString(i)),columns) + (rsStreamT(rs)(row => for (i <- 1 to columns.size) yield row.getString(i)),columns) } - def getConnection(dbUrl: String, user: String, password: String): Connection = { - DriverManager.getConnection(dbUrl, user, password) + def getConnection(dbUrl: String, user: String, password: String, database:Option[String]): Connection = { + val uri= database.map( dbName=>if (dbUrl.endsWith(dbName)) dbUrl else "%s%s".format(dbUrl,dbName)).getOrElse(dbUrl) + DriverManager.getConnection(uri, user, password) } - def ingest(connDb: Connection, objName:String, query: String, oTblDesc:Option[IngestInfo], ingestPath: =>String, host: =>String, apiKey: =>String) = { + def ingest(connDb: Connection, objName:String, query: String, oTblDesc:Option[IngestInfo], ingestPath: =>String, host: =>String, apiKey: =>String)(implicit executor: ExecutionContext):Future[HttpResponse[ByteChunk]] = { + implicit val M = new FutureMonad(executor) val (data,columns) = executeQuery(connDb, query) val tblDesc= oTblDesc.getOrElse(IngestInfo(Seq(ImportTable(objName,names(columns),Left(Table("base")))))) - val body = buildBody(data, objName, tblDesc) + + val dataStream:StreamT[Future,ByteBuffer] =buildBody(data, objName, tblDesc) + .map(jv=>ByteBuffer.wrap({val js="%s\n".format(jv.renderCompact); print(js); js}.getBytes("UTF-8"))) + + val body:ByteChunk= Right(dataStream) val fullPath = "%s/ingest/v1/sync/fs%s/%s".format(host, ingestPath,objName) - val httpClient=new HttpClientXLightWeb()(defaultFutureDispatch) //TODO add owner account id - httpClient.parameters('apiKey -> apiKey).post(fullPath)(jvalueToChunk(body)) + println("sending to ingest: path=%s query=%s".format(fullPath,query)) + httpClient.parameters('apiKey -> apiKey).post(fullPath)(body) } - def buildBody(data: Iterator[IndexedSeq[String]], baseTable: String, i: IngestInfo): JArray = - JArray(data.foldLeft((List[JValue](), Map[String, JValue]()))((lm, r) => { - val (l, m) = lm - val (values, map) = mkPartialJson(baseTable, i, r, m) - (values :: l, map) - })._1) + def buildBody(data: StreamT[Id,Seq[String]], baseTable: String, i: IngestInfo)(implicit executor: ExecutionContext, m:FutureMonad): StreamT[Future,JValue] = + StreamT.unfoldM[Future,JValue,StreamT[Id,Seq[String]]](data)(ds=> + if (ds.isEmpty) Future(None) + else Future(mkPartialJson(baseTable,i,ds))) + + } diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala index 9e6a55e..f0e0245 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala @@ -1,11 +1,22 @@ package com.precog.tools.importers.jdbc import annotation.tailrec -import java.sql.{Connection, DatabaseMetaData, DriverManager} +import java.sql.{Connection, DatabaseMetaData} import DbAccess._ import DbAnalysis._ import ImportJdbc._ -import Datatypes._ +import blueeyes.bkka.AkkaDefaults._ +import blueeyes.core.http.HttpResponse +import blueeyes.core.data._ +import scala.Left +import com.precog.tools.importers.jdbc.Datatypes.Join +import com.precog.tools.importers.jdbc.ImportJdbc.IngestInfo +import scala.Some +import scala.Right +import com.precog.tools.importers.jdbc.ImportJdbc.ImportTable +import com.precog.tools.importers.jdbc.Datatypes.Table +import akka.dispatch.{Future, Await} +import akka.util.Duration /** * User: gabriel @@ -13,59 +24,93 @@ import Datatypes._ */ object ImportJdbcConsole { + implicit val as=actorSystem + def main(args:Array[String]){ println("Welcome to Precog JDBC import wizard") lazy val dbUrl=readLine("Enter database URL:") lazy val user=readLine("User:") lazy val password = readLine("Password:") // use api key and dispatch to call ingest - lazy val host="http://beta.precog.com" //readLine("ingestion host") //TODO move to trait ? + lazy val host=readLine("Precog ingestion host") lazy val apiKey=readLine("API KEY for ingestion") lazy val basePath=readLine("Base ingestion path ( /{userId}/....)") - importJdbc(dbUrl,user,password, host, apiKey, basePath) + + + /*lazy val dbUrl="jdbc:mysql://localhost/" //readLine("Enter database URL:") + lazy val user="root" //readLine("User:") + lazy val password = "root" //readLine("Password:") + // use api key and dispatch to call ingest + lazy val host="https://beta.precog.com" //readLine("Precog ingestion host") // https://beta.precog.com + lazy val apiKey="43AB865E-BB86-4F74-A57E-7E8BBD77F2B5" //readLine("API KEY for ingestion") + lazy val basePath="/0000000457/import" //readLine("Base ingestion path ( /{userId}/....)")*/ + val fresult=importJdbc(dbUrl,user,password, host, apiKey, basePath) + + Await.result(Future.sequence(fresult),Duration("24 hours")).map( + result => result match { + case HttpResponse(_ ,_,Some(Left(buffer)),_) => { println(new String(buffer.array(), "UTF-8"))} + case _ => "error %s".format(result.toString()) + } + ) + as.shutdown() } - def importJdbc(dbUrl: =>String, user: =>String, password: =>String, host: =>String, apiKey: =>String, basePath: =>String) { + def importJdbc(dbUrl: =>String, user: =>String, password: =>String, host: =>String, apiKey: =>String, basePath: =>String)={ - val conn= getConnection(dbUrl, user, password) - val metadata= conn.getMetaData - val cat= getCatalogs(metadata) + val catConn= getConnection(dbUrl, user, password,None) + val cat= getCatalogs(catConn.getMetaData) //for querying tables, the connection must be specific to a database - val connDb= DriverManager.getConnection("%s%s".format(dbUrl,cat),user,password) - val tqs = getQuery(connDb, metadata, cat) + val connDb= getConnection(dbUrl, user, password,Some(cat)) + val tqs = getQuery(connDb, cat) tqs.map( tqs => { val (table,tDesc,q) = tqs val path= "%s/%s".format(basePath, table) - println(ingest(connDb,table, q, tDesc, path, host, apiKey)) + println("importing %s".format(table)) + ingest(connDb,table, q, tDesc, path, host, apiKey).onComplete { + case Right(result) => callSucceded(result) + case Left(failure) => println("Failed to import %s, error: %s".format(table,failure.getMessage)) + } }) } + def callSucceded(response:HttpResponse[ByteChunk]){ + response match { + case HttpResponse(_ ,_,Some(Left(buffer)),_) => println("Result: %s".format(new String(buffer.array(), "UTF-8"))) + case _ => println("Unexpected stream in %s".format(response)) + } + } + def getCatalogs(metadata: DatabaseMetaData): String = { println("Catalogs:") val catalogs = oneColumnRs(metadata.getCatalogs).toArray - println(present(catalogs)) - catalogs({println("Select a catalog: ");readInt()-1}) + selectOne("Catalog/Database",catalogs).getOrElse("") } def selectColumns(connDb: Connection, table: Table): List[String] = { val labels = names(getColumns(connDb, "select * from %s".format(table.name))) //column selection - println("table: %s".format(table.name)) + println("Table: %s".format(table.name.toUpperCase)) selectSet("column", labels).toList } - def getQuery(connDb: Connection, metadata: DatabaseMetaData, cat: String): Seq[(String,Option[IngestInfo],String)] = { + def getQuery(connDb: Connection, cat: String): Seq[(String,Option[IngestInfo],String)] = { if (readLine("Do you have a SQL query to select the data? (y/N)").toLowerCase == "y") { List((readLine("table/object name: "),None,readLine("Query="))) - } else createQueries(connDb, metadata, cat, selectedTables(findTables(metadata, Some(cat), readTableName())), readLine("Denormalize related tables? (y/n)").toLowerCase == "y") + } else { + val tblName=readTableName() + val metadata= connDb.getMetaData + val tables=findTables(metadata, Some(cat), tblName) + createQueries(connDb, metadata, cat, selectedTables(tables), readLine("Denormalize related tables? (y/n)").toLowerCase == "y") + } } def createQueries(conn:Connection, metadata: DatabaseMetaData, cat: String, selected: Seq[Table],denormalize: => Boolean): Seq[(String,Option[IngestInfo],String)] = { selected.map( table =>{ - val allRelationships = relationships( conn, metadata, Some(cat),table).toSeq + val allRelationships = relationships( conn, metadata, None,table).toSeq + println(allRelationships) val relations= selectSet("relation",allRelationships).toList val tblDesc=buildIngestInfo(table, conn, relations) @@ -94,10 +139,6 @@ object ImportJdbcConsole { } - //case class ImportTable(name:String, columns:Seq[String], baseOrJoin:Either[Table,Join]){ val isCollection = baseOrJoin.right.toOption.map(_.exported).getOrElse(false) } - //case class IngestInfo(tables:Seq[ImportTable]) - - def selectedTables(tablesList: Array[Table]): Seq[Table] = { selectSet("table", tablesList) } @@ -124,7 +165,7 @@ object ImportJdbcConsole { val elem:T = availArray(x - 1) selectSet(label,available.filterNot(_==elem), selected:+elem) } - case s:String if (available.exists(_.toString == s)) => { + case s if (available.exists(_.toString == s)) => { val elem:T =availArray.find(_.toString == s).get selectSet(label,available.filterNot(_==elem), selected:+elem) } @@ -132,6 +173,25 @@ object ImportJdbcConsole { } } + @tailrec + private def selectOne[T](label:String, available: Seq[T] )(implicit arg0: ClassManifest[T]): Option[T] = + if (available.isEmpty) None + else { + val availArray=available.toArray + + println("Select a %s:".format(label)) + println(present(availArray)) + + println("Select a number/enter the name: ") + + val selIdx = readLine() + selIdx match { + case ParseInt(x) if (x<=available.size) => Option(availArray(x - 1)) + case s if (available.exists(_.toString == s)) => availArray.find(_.toString == s) + case _ => selectOne(label,available) + } + } + def present[T](arr:Seq[T])= (1 to arr.length).zip(arr).map(x=>x._1 +":"+ x._2).mkString(", ") def show(baseTable:Table,set: Set[Join])= set.map( r=> " %s with %s on %s=%s".format(baseTable.name, r.refKey.table, r.baseColName,r.refKey.columnName )).mkString(", ") diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala index 605d38f..6a5010b 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala @@ -23,7 +23,7 @@ import Datatypes._ trait ImportJdbcService extends BlueEyesServiceBuilder { - val host="http://beta.precog.com" //TODO move to trait + val host="https://beta.precog.com" // "https://devapi.precog.com" //TODO move to trait def handleRequest[T](f: HttpRequest[T]=> Future[HttpResponse[T]])= (request: HttpRequest[T]) => @@ -35,11 +35,10 @@ trait ImportJdbcService extends BlueEyesServiceBuilder { def withConnectionFromRequest[T](r:HttpRequest[T])(f: (Connection,HttpRequest[T])=> Future[HttpResponse[T]])= { val dbUrl = r.parameters('dbUrl) - val database= r.parameters.get('database).getOrElse("") + val database= r.parameters.get('database) val user = r.parameters.get('user).getOrElse(null) val pwd = r.parameters.get('password).getOrElse(null) - val uri= if (dbUrl.endsWith(database)) dbUrl else "%s%s".format(dbUrl,database) - val c=getConnection(uri, user, pwd) + val c=getConnection(dbUrl, user, pwd,database) try { f(c,r) } finally { @@ -150,15 +149,17 @@ trait ImportJdbcService extends BlueEyesServiceBuilder { }~ path('database / "table" / 'table / "config") { post { - handleRequestWithConnection( (conn:Connection,request:HttpRequest[ByteChunk]) => { + handleRequest( (request:HttpRequest[ByteChunk]) => { val apiKey= request.parameters('apiKey) val path= request.parameters('path) val table= Table(request.parameters('table)) val cToJ=chunkToFutureJValue request.content.map(cToJ(_)).map(_.flatMap( ingestInfo =>{ + withConnectionFromRequest(request)( (conn:Connection,_)=>{ val query = buildQuery(ingestInfo) ingest(conn,table.name, query, Some(ingestInfo), path, host, apiKey) - })).get + }) + })).getOrElse(Future{ HttpResponse[ByteChunk](content = None) }) }) } } diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/DbAnalysisTest.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/DbAnalysisTest.scala index 5789fdb..9d69118 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/DbAnalysisTest.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/DbAnalysisTest.scala @@ -3,6 +3,9 @@ package com.precog.tools.importers.jdbc import org.specs2.mutable.Specification import DbAnalysis._ import Datatypes._ +import com.precog.tools.importers.jdbc.ImportJdbc._ +import com.precog.tools.importers.jdbc.Datatypes.Table +import com.precog.tools.importers.jdbc.Datatypes.Join /** * User: gabriel @@ -10,6 +13,17 @@ import Datatypes._ */ class DbAnalysisTest extends Specification { + + "find tables" should { + + + "find all tables" in new Conn { val dbName="tables" + tblA; tblB; tblC; tblD + findTables(conn.getMetaData,None,None) must_== Array(Table("A"),Table("B"),Table("C"),Table("D")) + } + + } + "declared relations" should { "identify one to many" in new Conn{ val dbName ="onemany" diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala index 07eafb0..372bbb1 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala @@ -115,7 +115,7 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc 'dbUrl-> dbUrl(dbName), 'denormalize->"y", 'apiKey->apiKey, - 'path -> basePath //path('database / "table" / 'table / "auto") { + 'path -> basePath ).post[ByteChunk]("/ingest/%s/table/%s/auto".format(dbName,"A"))(Array.empty[Byte]) Await.result(r,1 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== @@ -123,19 +123,20 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc } } - //TODO: fix the closed connection issue with the test - /*"ingest with config" in new Conn{ val dbName ="iwcfg" + "ingest with config" in new Conn{ val dbName ="iwcfg" + import DefaultBijections.jvalueToChunk tblA;tblB; dataA; dataB + val r=client.parameters( 'dbUrl-> dbUrl(dbName), 'apiKey->apiKey, - 'path -> basePath //path('database / "table" / 'table / "auto") { - ).post[ByteChunk]("/ingest/%s/table/%s/config".format(dbName,"A"))(JValueToByteArray(tblABDesc)) - Await.result(r,1 minute) must beLike { + 'path -> basePath + ).post[ByteChunk]("/ingest/%s/table/%s/config".format(dbName,"A"))(ingestInfo2Json(tblABDesc)) + Await.result(r,2 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== """{"failed":0,"skipped":0,"errors":[],"total":1,"ingested":1}""" } - }*/ + } } } diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala index 053aa31..b0d05f8 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala @@ -18,6 +18,8 @@ import com.precog.tools.importers.jdbc.ImportJdbc.ImportTable import blueeyes.bkka.AkkaDefaults._ import blueeyes.core.http.{HttpStatus, HttpResponse} import blueeyes.core.http.HttpStatusCodes.OK +import scalaz.StreamT +import blueeyes.bkka.FutureMonad /** * User: gabriel @@ -49,50 +51,97 @@ class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestM } } + + implicit def toStreamElem[T](l:List[T])=l.toSeq::StreamT.empty + "Json build from data" should { "build a simple Json" in { - ImportJdbc.mkPartialJson("a",tblADesc,aData)._1 must_== jA + ImportJdbc.mkPartialJson("a",tblADesc,aData).get._1 must_== jA } "build a composite Json" in { - ImportJdbc.mkPartialJson("a",tblABDesc,aData++bData)._1 must_== jAB + ImportJdbc.mkPartialJson("a",tblABDesc,aData++bData).get._1 must_== jAB } "build a relation Json" in { - ImportJdbc.mkPartialJson("c",tblCABDesc,cData++aData++bData)._1 must_== jC + ImportJdbc.mkPartialJson("c",tblCABDesc,cData++aData++bData).get._1 must_== jC } - "build a JArray for multiple values" in { - val tblDesc = IngestInfo(List(ImportTable("parent",List("ID","name"), Left(Table("Parent"))),ImportTable("child",List("ID","name","P_ID"), Right(Join("id",Key(Table("child"),"parent_id"),ExportedKey))))) - val dataChld1 = List("1","parent","1","child1","1") - val dataNoChld = List("1","parent",null,null,null) - val dataChld2 = List("1","parent","2","child2","1") - val dataParent3 = List("3","parent3","2","child2","1") + val tblDesc = IngestInfo(List(ImportTable("parent",List("ID","name"), Left(Table("Parent"))),ImportTable("child",List("ID","name","P_ID"), Right(Join("id",Key(Table("child"),"parent_id"),ExportedKey))))) + val dataChld1 = List("1","parent","1","child1","1") + val dataNoChld = List("1","parent",null,null,null) + val dataChld2 = List("1","parent","2","child2","1") + val dataParent3 = List("3","parent3","2","child2","1") - val (emptyChildJson,_)=ImportJdbc.mkPartialJson("parent",tblDesc,dataNoChld) + + "build Jobjects for multiple values" in { + + val Some((emptyChildJson,_))=ImportJdbc.mkPartialJson("parent",tblDesc,dataNoChld) emptyChildJson must_== - JObject(JField("ID",JString("1"))::JField("name",JString("parent"))::JField("child",JArray(Nil))::Nil) + JObject(JField("ID",JString("1"))::JField("name",JString("parent"))::JField("CHILD",JArray(Nil))::Nil) - val (partJson,m)=ImportJdbc.mkPartialJson("parent",tblDesc,dataChld2) + val Some((partJson,_))=ImportJdbc.mkPartialJson("parent",tblDesc,dataChld2) partJson must_== JObject(JField("ID",JString("1"))::JField("name",JString("parent")):: - JField("child",JArray( + JField("CHILD",JArray( JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil)::Nil) )::Nil) - val (d1Json,m1)=ImportJdbc.mkPartialJson("parent",tblDesc,dataChld1,m) + + ImportJdbc.mkPartialJson("parent",tblDesc,dataParent3).get._1 must_== JObject(JField("ID",JString("3"))::JField("name",JString("parent3")):: + JField("CHILD",JArray( + JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil)::Nil) + )::Nil) + } + + "build a composite object" in { + val ds=dataChld1.toSeq::dataChld2.toSeq::StreamT.empty + val Some((d1Json,_))=ImportJdbc.mkPartialJson("parent",tblDesc,ds) d1Json must_== JObject(JField("ID",JString("1"))::JField("name",JString("parent")):: - JField("child",JArray( + JField("CHILD",JArray( JObject(JField("ID",JString("1"))::JField("name",JString("child1"))::JField("P_ID",JString("1"))::Nil):: JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil)::Nil) )::Nil) + } - ImportJdbc.mkPartialJson("parent",tblDesc,dataParent3,m1)._1 must_== JObject(JField("ID",JString("3"))::JField("name",JString("parent3")):: - JField("child",JArray( + "objects must be uppercase" in { + val ds=dataChld1.toSeq::dataChld2.toSeq::StreamT.empty + val Some((d1Json,_))=ImportJdbc.mkPartialJson("parent",tblDesc,ds) + d1Json must_== + JObject(JField("ID",JString("1"))::JField("name",JString("parent")):: + JField("CHILD",JArray( + JObject(JField("ID",JString("1"))::JField("name",JString("child1"))::JField("P_ID",JString("1"))::Nil):: + JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil)::Nil) + )::Nil) + } + + "buildBody for multiple values" in { + val tblDesc = IngestInfo(List(ImportTable("parent",List("ID","name"), Left(Table("Parent"))),ImportTable("child",List("ID","name","P_ID"), Right(Join("id",Key(Table("child"),"parent_id"),ExportedKey))))) + val dataChld1 = List("1","parent1","1","child1","1") + val dataChld2 = List("1","parent1","2","child2","1") + val dataNoChld = List("2","parent2",null,null,null) + val dataParent3 = List("3","parent3","2","child2","1") + + val allData= StreamT.fromIterable((dataChld1::dataChld2::dataNoChld::dataParent3::Nil).reverse.map( _.toIndexedSeq).toIterable) + + implicit val executionContext = defaultFutureDispatch + implicit val futureMonad= new FutureMonad(executionContext) + + val r= ImportJdbc.buildBody(allData,"parent",tblDesc) + Await.result(r.toStream,1 minute).toList must_==( + JObject(JField("ID",JString("1"))::JField("name",JString("parent1")):: + JField("CHILD",JArray( + JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil):: + JObject(JField("ID",JString("1"))::JField("name",JString("child1"))::JField("P_ID",JString("1"))::Nil):: + Nil) + )::Nil):: + JObject(JField("ID",JString("2"))::JField("name",JString("parent2"))::JField("CHILD",JArray(Nil))::Nil):: + JObject(JField("ID",JString("3"))::JField("name",JString("parent3")):: + JField("CHILD",JArray( JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil)::Nil) - )::Nil) + )::Nil)::Nil).reverse } } @@ -121,5 +170,4 @@ class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestM } } } -} - +} \ No newline at end of file diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala index 0499cf3..f3f58d0 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala @@ -18,7 +18,7 @@ package object jdbc { Class.forName("org.h2.Driver") // use api key and dispatch to call ingest - val host="http://beta.precog.com" + val host="https://beta.precog.com" val apiKey="43AB865E-BB86-4F74-A57E-7E8BBD77F2B5" val basePath="/0000000457/data" @@ -77,8 +77,8 @@ package object jdbc { val jA = JObject(JField("ID",JString("1"))::JField("name",JString("aaa"))::Nil) val jB =JObject(JField("ID",JString("2"))::JField("A_ID",JString("1"))::JField("name",JString("bbb"))::Nil) - val jAB = JObject(JField("ID",JString("1"))::JField("name",JString("aaa"))::JField("b",JArray(jB::Nil))::Nil) - val jC = JObject(JField("A_ID",JString("1"))::JField("B_ID",JString("2"))::JField("name",JString("ccc"))::JField("a",jA)::JField("b",JObject(JField("ID",JString("2"))::JField("A_ID",JString("1"))::JField("name",JString("bbb"))::Nil))::Nil) + val jAB = JObject(JField("ID",JString("1"))::JField("name",JString("aaa"))::JField("B",JArray(jB::Nil))::Nil) + val jC = JObject(JField("A_ID",JString("1"))::JField("B_ID",JString("2"))::JField("name",JString("ccc"))::JField("A",jA)::JField("B",JObject(JField("ID",JString("2"))::JField("A_ID",JString("1"))::JField("name",JString("bbb"))::Nil))::Nil) //def getConn(db:String)=DriverManager.getConnection("jdbc:h2:~/%s".format(db)) From e01200915722e5f8146c391267a16fb8d91bc889 Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Tue, 22 Jan 2013 19:03:24 -0200 Subject: [PATCH 03/12] add database name to precog ingest path --- .../scala/com/precog/tools/importers/mongo/ImportMongo.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala index 9166f2c..9a5d027 100644 --- a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala +++ b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala @@ -86,7 +86,7 @@ object ImportMongo { } val (fds, fmaxId) = (fdsid map (_._1), fdsid map (_._2)) val fjsons = fds.map(_.flatMap(MongoToJson(_).toStream)) - val fullPath = "%s/ingest/v1/sync/fs%s/%s".format(precogHost, basePath, collName) + val fullPath = "%s/ingest/v1/sync/fs%s/%s/%s".format(precogHost, basePath, dbName, collName) val data = StreamT.fromStream[Future, JValue](fjsons) val fresult = M.lift2((a: HttpResponse[ByteChunk], b: ObjectId) => (a, b))(sendToPrecog(fullPath, apiKey, data), fmaxId) From 8c1b3bbb83d323d5b476a18675c7f027edb8f1b7 Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Sun, 27 Jan 2013 16:13:22 -0200 Subject: [PATCH 04/12] create common package add configuration via mongodb --- .../tools/importers/common/ConsoleUtils.scala | 44 ++++++ .../tools/importers/common/package.scala | 18 +++ tools/import/mongodb/build.sbt | 10 +- .../tools/importers/mongo/ImportMongo.scala | 147 ++++++++++++------ 4 files changed, 171 insertions(+), 48 deletions(-) create mode 100644 tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala create mode 100644 tools/import/common/src/main/scala/com/precog/tools/importers/common/package.scala diff --git a/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala b/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala new file mode 100644 index 0000000..b9c82ad --- /dev/null +++ b/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala @@ -0,0 +1,44 @@ +package com.precog.tools.importers.common + +import annotation.tailrec + +/** + * User: gabriel + * Date: 1/25/13 + */ +object ConsoleUtils { + + @tailrec + def selectSet[T](label:String, available: Seq[T], selected: Seq[T]=List())(implicit arg0: ClassManifest[T]): Seq[T] = + if (available.isEmpty) selected + else { + val availArray=available.toArray + + println("Available %ss:".format(label)) + println(present(availArray)) + + println("Selected %ss:".format(label)) + println(present(selected)) + + println("Select a number/enter the name, 0 to select all, or enter to continue: ") + + val selIdx = readLine() + selIdx match { + case "" => selected + case ParseInt(0) => available + case ParseInt(x) if (x<=available.size) => { + val elem:T = availArray(x - 1) + selectSet(label,available.filterNot(_==elem), selected:+elem) + } + case s:String if (available.exists(_.toString == s)) => { + val elem:T =availArray.find(_.toString == s).get + selectSet(label,available.filterNot(_==elem), selected:+elem) + } + case _ => selectSet(label,available, selected) + } + } + + def present[T](arr:Seq[T])= (1 to arr.length).zip(arr).map(x=>x._1 +":"+ x._2).mkString(", ") + + +} diff --git a/tools/import/common/src/main/scala/com/precog/tools/importers/common/package.scala b/tools/import/common/src/main/scala/com/precog/tools/importers/common/package.scala new file mode 100644 index 0000000..e97bdc4 --- /dev/null +++ b/tools/import/common/src/main/scala/com/precog/tools/importers/common/package.scala @@ -0,0 +1,18 @@ +package com.precog.tools.importers + +/** + * User: gabriel + * Date: 1/25/13 + */ +package object common { + + object ParseInt{ + def unapply(s : String) : Option[Int] = try { + Some(s.toInt) + } catch { + case _ : java.lang.NumberFormatException => None + } + } + + +} diff --git a/tools/import/mongodb/build.sbt b/tools/import/mongodb/build.sbt index ec51c8b..1b96fe6 100644 --- a/tools/import/mongodb/build.sbt +++ b/tools/import/mongodb/build.sbt @@ -1,5 +1,7 @@ name := "import-mongodb" +organization := "org.precog" + version := "0.1" scalaVersion := "2.9.2" @@ -14,12 +16,12 @@ resolvers ++= Seq( scalacOptions ++= Seq("-unchecked", "-deprecation") -seq(com.github.retronym.SbtOneJar.oneJarSettings: _*) +//seq(com.github.retronym.SbtOneJar.oneJarSettings: _*) libraryDependencies ++= Seq( - "com.reportgrid" %% "blueeyes-core" % "1.0.0-M6", - "com.reportgrid" %% "blueeyes-json" % "1.0.0-M6", - "com.reportgrid" %% "blueeyes-mongo" % "1.0.0-M6", + "com.reportgrid" %% "blueeyes-core" % "latest.milestone", //"1.0.0-M6", + "com.reportgrid" %% "blueeyes-json" % "latest.milestone", //"1.0.0-M6", + "com.reportgrid" %% "blueeyes-mongo" % "latest.milestone", //"1.0.0-M6", "org.mongodb" %% "casbah" % "2.3.0", "org.scalaz" %% "scalaz-core" % "7.0.0-M3" , "org.specs2" %% "specs2" % "1.12.2" % "test" diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala index 9a5d027..08da77c 100644 --- a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala +++ b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala @@ -6,7 +6,7 @@ import blueeyes.persistence.mongo.json.BijectionsMongoJson.MongoToJson._ import com.mongodb.casbah.commons.TypeImports.ObjectId import scalaz.{Monad, StreamT} import akka.dispatch.{Await, Future} -import blueeyes.json.{JObject, JString, JParser, JValue} +import blueeyes.json._ import blueeyes.core.service.engines.HttpClientXLightWeb import blueeyes.bkka.AkkaDefaults._ import blueeyes.core.data.DefaultBijections._ @@ -17,7 +17,17 @@ import java.nio.ByteBuffer import akka.util.Duration import blueeyes.core.http.HttpResponse import blueeyes.core.service._ -import annotation.tailrec +import com.precog.tools.importers.common._ +import ConsoleUtils._ +import scala.Left +import scala.Some +import scala.Right +import scala.Left +import scala.Some +import scala.Right +import com.mongodb +import collection.JavaConversions.SeqWrapper + /** * User: gabriel @@ -29,12 +39,50 @@ object ImportMongo { implicit val executionContext = defaultFutureDispatch implicit val M: Monad[Future] = new FutureMonad(executionContext) + val configDb="_precog_mongo_importer" + val collsConfig="collections_to_import" + val sampleSize=100 + def parseInt(s : String) : Option[Int] = try { Some(s.toInt) } catch { case _ : java.lang.NumberFormatException => None } + // No @tailrec but we don't expect getting back from mongoDb a hierarchy big enough to blow the stack + def columnsOf(bObject: MongoDBObject): Seq[String]={ + bObject.flatMap(kv => kv._2 match { + case m:MongoDBObject => columnsOf(m).map("%s.%s".format(kv._1,_)) + case _ => Set(kv._1) + }).toSeq + } + + def sampleColumns(db: String, coll: String)(implicit mongoConn:MongoConnection)={ + val collection=mongoConn(db)(coll).find().take(sampleSize) + collection.flatMap(columnsOf(_)).toSet + } + + def configureCollections(connection: MongoConnection):Seq[DBObject]={ + implicit val c=connection + println("No configuration found in the mongo instance, creating a new one.") + val databases=selectSet("database",connection.databaseNames) + val dbColls=databases.map( db=>{ println("Database %s".format(db)); (db,selectSet("collection",connection(db).getCollectionNames().toSeq))}) + dbColls.flatMap(dbColl =>{ + val (db,colls) = dbColl + colls.map( coll =>{ + val fields=if (readLine("Sample and select columns of %s.%s? (y/N)".format(db,coll)).toLowerCase == "y"){ + Some(selectSet("column", sampleColumns(db,coll).toSeq )) + } else { + None + } + val dbObj =MongoDBObject("database"->db, "collection"->coll) + fields.map(flds=>dbObj ++ ("fields"->flds)).getOrElse(dbObj) + } + ) + } + ) + } + def main(args:Array[String]){ if (args.length != 5) { @@ -50,65 +98,76 @@ object ImportMongo { val precogHost=args(2) val basePath=args(3) val apiKey=args(4) + try { + implicit val mongoConn= MongoConnection(mongoHost,mongoPort) - implicit val mongoConn= MongoConnection(mongoHost,mongoPort) + val inputConfigColl=mongoConn(configDb)(collsConfig) - @tailrec - def readConfigLine(acc:List[String]):List[String]={ - val line=readLine() - if (line != null && line != ""){ - if (line.startsWith("#")) readConfigLine(acc) //skip lines starting with # - else readConfigLine(line::acc) - } else acc - } - println("# enter json import descriptors, EOF or empty line to continue") - println("""# format: { "database":"", "collection":"" } or { "database":"", "collection":"", "lastId":"" } """) - val jsonImputs=readConfigLine(Nil) - val fresults=jsonImputs.map(JParser.parseFromString(_).map(importCollection(precogHost,basePath,apiKey,_))).flatMap(_.toList) + //workaround + if (inputConfigColl.isEmpty) { + val configs=configureCollections(mongoConn) + configs.map(inputConfigColl.save(_)) + } + val jsonImputs= inputConfigColl.find().toList - val continueJson=Await.result(Future.sequence(fresults), Duration("24 hours")) - println("#################################################################") - println("# to continue ingestion from last point, use the following imput:") - println(continueJson.mkString("\n")) + val fimports=jsonImputs.flatMap(x=> MongoToJson(x).toList.map(importCollection(precogHost,basePath,apiKey,_))) - actorSystem.shutdown() + val fresults=Await.result(Future.sequence(fimports), Duration("24 hours")) + jsonImputs.zip(fresults).map( r =>{ + val (mDbObj,(result,lastId)) = r + println("%s".format(result)) + inputConfigColl.save(mDbObj++("lastId"->lastId)) //JsonToMongo(continueJson).map(inputConfigColl.save(_)) + } + ) + } finally { + println("Shutting down...") + actorSystem.shutdown() + } } - def importCollection(precogHost:String, basePath:String, apiKey:String, jparams: JValue) (implicit mongoConn: MongoConnection):Future[String]={ - def strValue(jv: JValue) = (jv --> classOf[JString]).value - val dbName = strValue(jparams \ "database") - val collName = strValue(jparams \ "collection") - val lastId = (jparams \? "lastId").map(strValue(_)) getOrElse ("000000000000000000000000") + def pair[T](getter: String=>T)(name:String ) = (name-> getter(name)) + def getString(jo: JObject)(field:String) = strValue(jo \ field) + def getArray(jo: JObject)(field:String) = arrOfStrValues(jo \ field) + + def strValue(jv: JValue) = (jv --> classOf[JString]).value + def arrOfStrValues(jv: JValue) = (jv -->? classOf[JArray]).map(_.elements.map(strValue(_))).getOrElse(Nil) + + def importCollection(precogHost:String, basePath:String, apiKey:String, jparams: JObject) (implicit mongoConn: MongoConnection):Future[(String,String)]={ + val dbName = getString(jparams)("database") + val collName = getString(jparams)("collection") + val fieldNames = getArray(jparams)("fields") + val lastId = (jparams \? "lastId").map(strValue(_)) getOrElse ("000000000000000000000000") val fdsid = Future { - readFromMongo(mongoConn, dbName, collName, lastId) + val rStrm=readFromMongo(mongoConn, dbName, collName, lastId, fieldNames) + val (oids,dataStrm)=rStrm.map(m=>(m.get("_id").asInstanceOf[ObjectId],m)).unzip + val maxOid= if (oids.isEmpty) lastId else oids.max.toStringMongod + (dataStrm,maxOid) } val (fds, fmaxId) = (fdsid map (_._1), fdsid map (_._2)) + val fjsons = fds.map(_.flatMap(MongoToJson(_).toStream)) - val fullPath = "%s/ingest/v1/sync/fs%s/%s/%s".format(precogHost, basePath, dbName, collName) - val data = StreamT.fromStream[Future, JValue](fjsons) - val fresult = M.lift2((a: HttpResponse[ByteChunk], b: ObjectId) => (a, b))(sendToPrecog(fullPath, apiKey, data), fmaxId) - - fresult.map(r => { - val (result, oid) = r - result match { - case HttpResponse(_, _, Some(Left(buffer)), _) => { - println("### result from precog: %s".format(new String(buffer.array(), "UTF-8"))) + val fullPath = "%s/ingest/v1/sync/fs%s/%sr/%s".format(precogHost, basePath, dbName, collName) + val data = StreamT.fromStream[Future, JObject](fjsons) + val fsend=data.isEmpty.flatMap( isEmpty => + if (isEmpty) Future("No new data found in %s.%s".format(dbName,collName)) + else sendToPrecog(fullPath, apiKey, data)map( _ match { + case HttpResponse(_, _, Some(Left(buffer)), _) => { + "Result from precog: %s".format(new String(buffer.array(), "UTF-8")) + } + case result => "Error: %s".format(result.toString()) } - case _ => println("### error: %s".format(result.toString())) - } - """{ "database":"%s", "collection":"%s" "lastId":"%s" }""".format(dbName, collName, oid) - }) + )) + M.lift2((a: String, b: String) => (a, b))(fsend, fmaxId) } - def readFromMongo(mongoConn: MongoConnection, dbName: String, colName: String, oid:String):(Stream[DBObject],ObjectId)={ + def readFromMongo(mongoConn: MongoConnection, dbName: String, colName: String, oid:String, fieldNames:Seq[String]):Stream[DBObject]={ val mongoDB = mongoConn(dbName) val mongoColl = mongoDB(colName) val q = "_id" $gt (new ObjectId(oid)) - val rStrm=mongoColl.find(q).toStream //.view ? - val (oids,dataStrm)=rStrm.map(m=>(m.get("_id").asInstanceOf[ObjectId],m)).unzip - (dataStrm,oids.max) + val fields = MongoDBObject(fieldNames.map(_->""):_*) + mongoColl.find(q,fields).toStream //.view ? } def sendToPrecog(fullPath:String, apiKey:String, dataStream:StreamT[Future,JValue]): Future[HttpResponse[ByteChunk]] = { @@ -117,7 +176,7 @@ object ImportMongo { val byteStream: StreamT[Future, ByteBuffer] = dataStream.map(jv => ByteBuffer.wrap({ val js = "%s\n".format(jv.renderCompact) - print("# %s".format(js)) + print("%s".format(js)) js }.getBytes("UTF-8"))) From 86516b94e09e047fa56675a45c346c9642365c56 Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Fri, 1 Feb 2013 10:46:49 -0200 Subject: [PATCH 05/12] fix premature closing of connection add org to build.sbt --- tools/import/jdbc/build.sbt | 2 ++ .../tools/importers/jdbc/ImportJdbc.scala | 18 +++++++++++++----- .../importers/jdbc/ImportJdbcService.scala | 6 +----- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tools/import/jdbc/build.sbt b/tools/import/jdbc/build.sbt index 1efb4a3..fcff104 100644 --- a/tools/import/jdbc/build.sbt +++ b/tools/import/jdbc/build.sbt @@ -1,5 +1,7 @@ name := "import-jdbc" +organization := "org.precog" + version := "0.1" scalaVersion := "2.9.2" diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala index 00c5048..7f61efa 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala @@ -134,20 +134,28 @@ object ImportJdbc { val dataStream:StreamT[Future,ByteBuffer] =buildBody(data, objName, tblDesc) .map(jv=>ByteBuffer.wrap({val js="%s\n".format(jv.renderCompact); print(js); js}.getBytes("UTF-8"))) + dataStream.isEmpty.flatMap( isEmpty => + if (isEmpty) Future(HttpResponse.empty) + else { + val body:ByteChunk= Right(dataStream) + val fullPath = "%s/ingest/v1/sync/fs%s/%s".format(host, ingestPath,objName) + //TODO add owner account id + println("sending to ingest: path=%s query=%s".format(fullPath,query)) + httpClient.parameters('apiKey -> apiKey).post(fullPath)(body) + } + ) - val body:ByteChunk= Right(dataStream) - val fullPath = "%s/ingest/v1/sync/fs%s/%s".format(host, ingestPath,objName) - //TODO add owner account id - println("sending to ingest: path=%s query=%s".format(fullPath,query)) - httpClient.parameters('apiKey -> apiKey).post(fullPath)(body) } + + //def unfoldM[Future[+_],A,StreamT[Future,Seq[String]](start: StreamT[Future,Seq[String])(f: B => Future[Option[(A,StreamT[Future,Seq[String])]])(implicit Future: Functor[Future]): StreamT[Future,A] def buildBody(data: StreamT[Id,Seq[String]], baseTable: String, i: IngestInfo)(implicit executor: ExecutionContext, m:FutureMonad): StreamT[Future,JValue] = StreamT.unfoldM[Future,JValue,StreamT[Id,Seq[String]]](data)(ds=> if (ds.isEmpty) Future(None) else Future(mkPartialJson(baseTable,i,ds))) + } diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala index 6a5010b..a8ceb98 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala @@ -39,11 +39,7 @@ trait ImportJdbcService extends BlueEyesServiceBuilder { val user = r.parameters.get('user).getOrElse(null) val pwd = r.parameters.get('password).getOrElse(null) val c=getConnection(dbUrl, user, pwd,database) - try { - f(c,r) - } finally { - c.close() - } + f(c,r).flatMap(x=>Future({c.close();x})) } def handleRequestWithConnection[T](f: (Connection,HttpRequest[T])=> Future[HttpResponse[T]])= handleRequest( (r: HttpRequest[T]) => withConnectionFromRequest(r)(f)) From f46d93441af06f9c0fc90c404f5c639641af6816 Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Thu, 21 Feb 2013 10:30:03 -0200 Subject: [PATCH 06/12] add assembly task and update build config --- tools/import/jdbc/build.sbt | 2 ++ tools/import/mongodb/build.sbt | 6 +++++- tools/import/mongodb/project/plugins.sbt | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/import/jdbc/build.sbt b/tools/import/jdbc/build.sbt index 1efb4a3..fcff104 100644 --- a/tools/import/jdbc/build.sbt +++ b/tools/import/jdbc/build.sbt @@ -1,5 +1,7 @@ name := "import-jdbc" +organization := "org.precog" + version := "0.1" scalaVersion := "2.9.2" diff --git a/tools/import/mongodb/build.sbt b/tools/import/mongodb/build.sbt index 1b96fe6..ab5b691 100644 --- a/tools/import/mongodb/build.sbt +++ b/tools/import/mongodb/build.sbt @@ -1,8 +1,10 @@ +import AssemblyKeys._ + name := "import-mongodb" organization := "org.precog" -version := "0.1" +version := "0.2" scalaVersion := "2.9.2" @@ -18,6 +20,8 @@ scalacOptions ++= Seq("-unchecked", "-deprecation") //seq(com.github.retronym.SbtOneJar.oneJarSettings: _*) +assemblySettings + libraryDependencies ++= Seq( "com.reportgrid" %% "blueeyes-core" % "latest.milestone", //"1.0.0-M6", "com.reportgrid" %% "blueeyes-json" % "latest.milestone", //"1.0.0-M6", diff --git a/tools/import/mongodb/project/plugins.sbt b/tools/import/mongodb/project/plugins.sbt index 50d00c8..00680b1 100644 --- a/tools/import/mongodb/project/plugins.sbt +++ b/tools/import/mongodb/project/plugins.sbt @@ -1,3 +1,3 @@ +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.6") -addSbtPlugin("com.github.retronym" % "sbt-onejar" % "0.8") From 6f7383524403288491bf991353db23fd6877c376 Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Thu, 21 Feb 2013 10:32:15 -0200 Subject: [PATCH 07/12] handle different "sorting" columns --- .../tools/importers/mongo/ImportMongo.scala | 169 +++++++++++------- 1 file changed, 102 insertions(+), 67 deletions(-) diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala index 08da77c..e9afe11 100644 --- a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala +++ b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala @@ -2,14 +2,12 @@ package com.precog.tools.importers.mongo import com.mongodb.casbah.Imports._ import blueeyes.persistence.mongo.json.BijectionsMongoJson._ -import blueeyes.persistence.mongo.json.BijectionsMongoJson.MongoToJson._ import com.mongodb.casbah.commons.TypeImports.ObjectId -import scalaz.{Monad, StreamT} +import scalaz._ import akka.dispatch.{Await, Future} import blueeyes.json._ import blueeyes.core.service.engines.HttpClientXLightWeb import blueeyes.bkka.AkkaDefaults._ -import blueeyes.core.data.DefaultBijections._ import blueeyes.bkka.AkkaDefaults.defaultFutureDispatch import blueeyes.bkka.FutureMonad import blueeyes.core.data.ByteChunk @@ -19,14 +17,17 @@ import blueeyes.core.http.HttpResponse import blueeyes.core.service._ import com.precog.tools.importers.common._ import ConsoleUtils._ +import com.mongodb +import mongodb.casbah.MongoURI +import mongodb.casbah.query.AsQueryParam +import java.util.Date +import java.util +import collection.JavaConversions._ + +//import mongodb.casbah.commons.Imports._ import scala.Left import scala.Some import scala.Right -import scala.Left -import scala.Some -import scala.Right -import com.mongodb -import collection.JavaConversions.SeqWrapper /** @@ -39,8 +40,7 @@ object ImportMongo { implicit val executionContext = defaultFutureDispatch implicit val M: Monad[Future] = new FutureMonad(executionContext) - val configDb="_precog_mongo_importer" - val collsConfig="collections_to_import" + val collsConfig="precog_import_config" val sampleSize=100 def parseInt(s : String) : Option[Int] = try { @@ -57,69 +57,85 @@ object ImportMongo { }).toSeq } - def sampleColumns(db: String, coll: String)(implicit mongoConn:MongoConnection)={ - val collection=mongoConn(db)(coll).find().take(sampleSize) + def sampleColumns(db: MongoDB, coll: String)(implicit mongoConn:MongoConnection)={ + val collection=db(coll).find().take(sampleSize) collection.flatMap(columnsOf(_)).toSet } - def configureCollections(connection: MongoConnection):Seq[DBObject]={ - implicit val c=connection + def configureCollections(db: MongoDB)(implicit mongoConn:MongoConnection):Seq[DBObject]={ println("No configuration found in the mongo instance, creating a new one.") - val databases=selectSet("database",connection.databaseNames) - val dbColls=databases.map( db=>{ println("Database %s".format(db)); (db,selectSet("collection",connection(db).getCollectionNames().toSeq))}) - dbColls.flatMap(dbColl =>{ - val (db,colls) = dbColl - colls.map( coll =>{ - val fields=if (readLine("Sample and select columns of %s.%s? (y/N)".format(db,coll)).toLowerCase == "y"){ - Some(selectSet("column", sampleColumns(db,coll).toSeq )) - } else { - None - } - val dbObj =MongoDBObject("database"->db, "collection"->coll) - fields.map(flds=>dbObj ++ ("fields"->flds)).getOrElse(dbObj) - } - ) + val databases=db.name + println("DATABASE %s \n".format(db)) + val colls=selectSet("collection",db.getCollectionNames().toSeq) + colls.map( coll =>{ + println("\n ---- Collection %s ----".format(coll)) + val columns=sampleColumns(db,coll).toSeq + val fields=selectSet("column", columns) + + //TODO ugly, maybe using a wrapper type? + val sortColumns=db(coll).find().take(sampleSize).map(mobj => mobj.toMap).reduceLeft(_++_).filter( kv => kv._2 match { + case s:String => true + case d:java.lang.Long => true + case oid:ObjectId => true + case dt:Date => true + case _ => false + }) + + val sortColumn=selectOne("import control column", sortColumns.keys.toSeq) + MongoDBObject("collection"->coll, "fields"->fields, "sortColumn"->sortColumn) } ) } def main(args:Array[String]){ - if (args.length != 5) { + if (args.length != 4) { println("Wrong number of parameters.") - println("Usage: ImportMongo mongo_host mongo_port precog_host precog_ingest_path precog_apiKey") + println("Usage: ImportMongo mongo_uri precog_host precog_ingest_path precog_apiKey") actorSystem.shutdown() sys.exit(1) } - val mongoHost=args(0) - val mongoPort=parseInt(args(1)).get + val mongoUri=args(0) - val precogHost=args(2) - val basePath=args(3) - val apiKey=args(4) + val precogHost=args(1) + val basePath=args(2) + val apiKey=args(3) try { - implicit val mongoConn= MongoConnection(mongoHost,mongoPort) + val uri = MongoURI(mongoUri) + + implicit val mongoConn=MongoConnection(uri) + uri.database.map { database => + + //TODO: use uri.database.asList and if it's empty, load the full list of dbs + val db = mongoConn(database) + for { + user <- uri.username + password <- uri.password + } { + db.authenticate(user, password.mkString) + } - val inputConfigColl=mongoConn(configDb)(collsConfig) + val inputConfigColl=db(collsConfig) - //workaround - if (inputConfigColl.isEmpty) { - val configs=configureCollections(mongoConn) - configs.map(inputConfigColl.save(_)) - } - val jsonImputs= inputConfigColl.find().toList - val fimports=jsonImputs.flatMap(x=> MongoToJson(x).toList.map(importCollection(precogHost,basePath,apiKey,_))) + if (inputConfigColl.isEmpty) { + val configs=configureCollections(db) + configs.map(inputConfigColl.save(_)) + } + val jsonImputs= inputConfigColl.find().toList - val fresults=Await.result(Future.sequence(fimports), Duration("24 hours")) + val fimports=jsonImputs.map(config=> importCollection(precogHost,basePath,apiKey,db, config)) - jsonImputs.zip(fresults).map( r =>{ - val (mDbObj,(result,lastId)) = r - println("%s".format(result)) - inputConfigColl.save(mDbObj++("lastId"->lastId)) //JsonToMongo(continueJson).map(inputConfigColl.save(_)) - } - ) + val fresults=Await.result(Future.sequence(fimports.toList), Duration("24 hours")) + + jsonImputs.zip(fresults).map( r =>{ + val (mDbObj,(result,lastId)) = r + println("%s".format(result)) + inputConfigColl.save(mDbObj++("lastId"->lastId)) + } + ) + } } finally { println("Shutting down...") actorSystem.shutdown() @@ -134,24 +150,36 @@ object ImportMongo { def strValue(jv: JValue) = (jv --> classOf[JString]).value def arrOfStrValues(jv: JValue) = (jv -->? classOf[JArray]).map(_.elements.map(strValue(_))).getOrElse(Nil) - def importCollection(precogHost:String, basePath:String, apiKey:String, jparams: JObject) (implicit mongoConn: MongoConnection):Future[(String,String)]={ - val dbName = getString(jparams)("database") - val collName = getString(jparams)("collection") - val fieldNames = getArray(jparams)("fields") - val lastId = (jparams \? "lastId").map(strValue(_)) getOrElse ("000000000000000000000000") + + def importCollection(precogHost:String, basePath:String, apiKey:String, db:MongoDB, mdbobj: MongoDBObject) (implicit mongoConn: MongoConnection):Future[(String,AnyRef)]={ + //val jparams: JObject=MongoToJson(mdbobj) + //val dbName = mdbobj.getAs[String]("database").get//getString(jparams)("database") + val collName = mdbobj.getAs[String]("collection").get//getString(jparams)("collection") + val fieldNames = mdbobj.getAsOrElse[util.ArrayList[String]]("fields",new util.ArrayList())//getArray(jparams)("fields") MongoDB + val lastId = mdbobj.getAs[String]("lastId") //(jparams \? "lastId").map(strValue(_)) getOrElse ("000000000000000000000000") + val sortColumn=mdbobj.getAs[String]("sortColumn").get val fdsid = Future { - val rStrm=readFromMongo(mongoConn, dbName, collName, lastId, fieldNames) - val (oids,dataStrm)=rStrm.map(m=>(m.get("_id").asInstanceOf[ObjectId],m)).unzip - val maxOid= if (oids.isEmpty) lastId else oids.max.toStringMongod + val rStrm=readFromMongo(db, collName, sortColumn, lastId, fieldNames) + val (oids,dataStrm)=rStrm.map(m=>(m(sortColumn),m)).unzip + + //ugly but need the runtime type to go form AnyRef to Ordering[_] for max to work... sum types + def ordering for sum types? + val maxOid= if (oids.isEmpty) lastId else { + oids.head match { + case s:String => oids.map( {case ss:String => ss}).max + case d:java.lang.Long => oids.map( {case ds:java.lang.Long => ds}).max + case oid:ObjectId => oids.map( {case oids:ObjectId => oids}).max + case dt:Date => oids.map( {case ds:Date => ds}).max + } + } (dataStrm,maxOid) } val (fds, fmaxId) = (fdsid map (_._1), fdsid map (_._2)) val fjsons = fds.map(_.flatMap(MongoToJson(_).toStream)) - val fullPath = "%s/ingest/v1/sync/fs%s/%sr/%s".format(precogHost, basePath, dbName, collName) + val fullPath = "%s/ingest/v1/sync/fs%s/%s/%s".format(precogHost, basePath, db.name, collName) val data = StreamT.fromStream[Future, JObject](fjsons) val fsend=data.isEmpty.flatMap( isEmpty => - if (isEmpty) Future("No new data found in %s.%s".format(dbName,collName)) + if (isEmpty) Future("No new data found in %s.%s".format(db.name,collName)) else sendToPrecog(fullPath, apiKey, data)map( _ match { case HttpResponse(_, _, Some(Left(buffer)), _) => { "Result from precog: %s".format(new String(buffer.array(), "UTF-8")) @@ -159,13 +187,21 @@ object ImportMongo { case result => "Error: %s".format(result.toString()) } )) - M.lift2((a: String, b: String) => (a, b))(fsend, fmaxId) + M.lift2((a: String, b: AnyRef) => (a, b))(fsend, fmaxId) } - def readFromMongo(mongoConn: MongoConnection, dbName: String, colName: String, oid:String, fieldNames:Seq[String]):Stream[DBObject]={ - val mongoDB = mongoConn(dbName) - val mongoColl = mongoDB(colName) - val q = "_id" $gt (new ObjectId(oid)) + def readFromMongo(mongoDB: MongoDB, collName: String, idCol:String, oLastId:Option[AnyRef], fieldNames:Seq[String]):Stream[DBObject]={ + val mongoColl = mongoDB(collName) + + //ugly, maybe using a wrapper type? + val q = oLastId.map( + _ match { + case s:String => idCol $gt s + case d:java.lang.Long => idCol $gt d.longValue() + case oid:ObjectId => idCol $gt oid + case dt:Date => idCol $gt dt + } + ).getOrElse(MongoDBObject()) val fields = MongoDBObject(fieldNames.map(_->""):_*) mongoColl.find(q,fields).toStream //.view ? } @@ -180,7 +216,6 @@ object ImportMongo { js }.getBytes("UTF-8"))) - //get the last/biggest id val byteChunks: ByteChunk = Right(byteStream) httpClient.parameters('apiKey -> apiKey).post(fullPath)(byteChunks) } From 671bf26933d9aa6b252ce4cf58f4b10487402828 Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Thu, 21 Feb 2013 10:43:26 -0200 Subject: [PATCH 08/12] add common package --- .../tools/importers/common/ConsoleUtils.scala | 61 +++++++++++++++++++ .../tools/importers/common/package.scala | 18 ++++++ 2 files changed, 79 insertions(+) create mode 100644 tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala create mode 100644 tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/package.scala diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala new file mode 100644 index 0000000..caba47b --- /dev/null +++ b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala @@ -0,0 +1,61 @@ +package com.precog.tools.importers.common + +import annotation.tailrec + +/** + * User: gabriel + * Date: 1/25/13 + */ +object ConsoleUtils { + + @tailrec + def selectSet[T](label:String, available: Seq[T], selected: Seq[T]=List())(implicit arg0: ClassManifest[T]): Seq[T] = + if (available.isEmpty) selected + else { + val availArray=available.toArray + + println("Available %ss:".format(label)) + println(present(availArray)) + + println("Selected %ss:".format(label)) + println(present(selected)) + + println("Select a number/enter the name, 0 to select all, or enter to continue: ") + + val selIdx = readLine() + selIdx match { + case "" => selected + case ParseInt(0) => available + case ParseInt(x) if (x<=available.size) => { + val elem:T = availArray(x - 1) + selectSet(label,available.filterNot(_==elem), selected:+elem) + } + case s:String if (available.exists(_.toString == s)) => { + val elem:T =availArray.find(_.toString == s).get + selectSet(label,available.filterNot(_==elem), selected:+elem) + } + case _ => selectSet(label,available, selected) + } + } + + def selectOne[T](label:String, available: Seq[T])(implicit arg0: ClassManifest[T]): T ={ + + val availArray=available.toArray + + println("Available %ss:".format(label)) + println(present(availArray)) + + println("Select a number/enter the name: ") + + val selIdx = readLine() + selIdx match { + case ParseInt(x) if (x<=available.size) => availArray(x - 1) + case s:String if (available.exists(_.toString == s)) => availArray.find(_.toString == s).get + case _ => selectOne(label,available) + } + } + + def present[T](arr:Seq[T])= (1 to arr.length).zip(arr).map(x=>x._1 +":"+ x._2).mkString(", ") + + +} diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/package.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/package.scala new file mode 100644 index 0000000..e97bdc4 --- /dev/null +++ b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/package.scala @@ -0,0 +1,18 @@ +package com.precog.tools.importers + +/** + * User: gabriel + * Date: 1/25/13 + */ +package object common { + + object ParseInt{ + def unapply(s : String) : Option[Int] = try { + Some(s.toInt) + } catch { + case _ : java.lang.NumberFormatException => None + } + } + + +} From 587c59bfa047bae8d4681ae7df1d68d7751ebefc Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Wed, 20 Mar 2013 21:36:04 -0300 Subject: [PATCH 09/12] fixed sbt dependencies, update importers to new ingest api --- .../tools/importers/common/ConsoleUtils.scala | 17 ++++++ tools/import/jdbc/build.sbt | 10 +-- .../tools/importers/jdbc/DbAccess.scala | 2 +- .../tools/importers/jdbc/ImportJdbc.scala | 4 +- .../importers/jdbc/ImportJdbcConsole.scala | 8 --- .../importers/jdbc/ImportJdbcService.scala | 21 ++++--- .../jdbc/ImportJdbcServiceTest.scala | 7 ++- .../tools/importers/jdbc/ImportJdbcTest.scala | 4 +- .../precog/tools/importers/jdbc/package.scala | 6 +- tools/import/mongodb/build.sbt | 8 +-- tools/import/mongodb/project/plugins.sbt | 3 +- .../tools/importers/common/ConsoleUtils.scala | 61 ------------------- .../tools/importers/common/package.scala | 18 ------ .../tools/importers/mongo/ImportMongo.scala | 2 +- 14 files changed, 52 insertions(+), 119 deletions(-) delete mode 100644 tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala delete mode 100644 tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/package.scala diff --git a/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala b/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala index b9c82ad..caba47b 100644 --- a/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala +++ b/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala @@ -38,6 +38,23 @@ object ConsoleUtils { } } + def selectOne[T](label:String, available: Seq[T])(implicit arg0: ClassManifest[T]): T ={ + + val availArray=available.toArray + + println("Available %ss:".format(label)) + println(present(availArray)) + + println("Select a number/enter the name: ") + + val selIdx = readLine() + selIdx match { + case ParseInt(x) if (x<=available.size) => availArray(x - 1) + case s:String if (available.exists(_.toString == s)) => availArray.find(_.toString == s).get + case _ => selectOne(label,available) + } + } + def present[T](arr:Seq[T])= (1 to arr.length).zip(arr).map(x=>x._1 +":"+ x._2).mkString(", ") diff --git a/tools/import/jdbc/build.sbt b/tools/import/jdbc/build.sbt index fcff104..88c400d 100644 --- a/tools/import/jdbc/build.sbt +++ b/tools/import/jdbc/build.sbt @@ -1,7 +1,5 @@ name := "import-jdbc" -organization := "org.precog" - version := "0.1" scalaVersion := "2.9.2" @@ -19,13 +17,11 @@ resolvers ++= Seq( scalacOptions ++= Seq("-unchecked", "-deprecation") -seq(com.github.retronym.SbtOneJar.oneJarSettings: _*) +assemblySettings libraryDependencies ++= Seq( - "com.reportgrid" % "blueeyes-core_2.9.2" % "1.0.0-M6", - "com.reportgrid" % "blueeyes-json_2.9.2" % "1.0.0-M6", - "org.scalaz" % "scalaz-core_2.9.2" % "7.0.0-M3" , + "com.reportgrid" % "blueeyes-core_2.9.2" % "1.0.0-M7.7", + "com.reportgrid" % "blueeyes-json_2.9.2" % "1.0.0-M7.7", "org.specs2" %% "specs2" % "1.12.2" , - //sbtVersion(v => "com.github.siasia" %% "xsbt-proguard-plugin" % (v+"-0.1.1")), "com.h2database" % "h2" % "1.2.134" % "test" ) diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala index 618b00c..17bec48 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala @@ -27,7 +27,7 @@ object DbAccess { for ( i <- 1 to count) yield Column(tblMetaData.getColumnName(i),Table(tblMetaData.getTableName(i))) } - //don't use! + //warning: this serves the purpose but it doesn't private def rsIterator[T](rs:ResultSet)(f:ResultSet => T) = new Iterator[T] { def hasNext = rs.next() def next():T = f(rs) diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala index 00c5048..ab0bd43 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala @@ -136,10 +136,10 @@ object ImportJdbc { .map(jv=>ByteBuffer.wrap({val js="%s\n".format(jv.renderCompact); print(js); js}.getBytes("UTF-8"))) val body:ByteChunk= Right(dataStream) - val fullPath = "%s/ingest/v1/sync/fs%s/%s".format(host, ingestPath,objName) + val fullPath = "%s/ingest/v1/fs%s/%s".format(host, ingestPath,objName) //TODO add owner account id println("sending to ingest: path=%s query=%s".format(fullPath,query)) - httpClient.parameters('apiKey -> apiKey).post(fullPath)(body) + httpClient.parameters('apiKey -> apiKey,'mode -> "streaming").header("Content-Type","application/json").post(fullPath)(body) } def buildBody(data: StreamT[Id,Seq[String]], baseTable: String, i: IngestInfo)(implicit executor: ExecutionContext, m:FutureMonad): StreamT[Future,JValue] = diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala index f0e0245..9e4c502 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala @@ -36,14 +36,6 @@ object ImportJdbcConsole { lazy val apiKey=readLine("API KEY for ingestion") lazy val basePath=readLine("Base ingestion path ( /{userId}/....)") - - /*lazy val dbUrl="jdbc:mysql://localhost/" //readLine("Enter database URL:") - lazy val user="root" //readLine("User:") - lazy val password = "root" //readLine("Password:") - // use api key and dispatch to call ingest - lazy val host="https://beta.precog.com" //readLine("Precog ingestion host") // https://beta.precog.com - lazy val apiKey="43AB865E-BB86-4F74-A57E-7E8BBD77F2B5" //readLine("API KEY for ingestion") - lazy val basePath="/0000000457/import" //readLine("Base ingestion path ( /{userId}/....)")*/ val fresult=importJdbc(dbUrl,user,password, host, apiKey, basePath) Await.result(Future.sequence(fresult),Duration("24 hours")).map( diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala index 6a5010b..ff0a297 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala @@ -1,7 +1,7 @@ package com.precog.tools.importers.jdbc -import akka.dispatch.Future +import akka.dispatch.{ExecutionContext, Future} import blueeyes.BlueEyesServiceBuilder import blueeyes.core.http.{HttpRequest, HttpResponse, HttpStatus} import blueeyes.core.http.HttpStatusCodes._ @@ -14,6 +14,15 @@ import JsonImplicits._ import java.sql.{DatabaseMetaData, Connection} import blueeyes.json.{JValue, JString, JArray} import Datatypes._ +import blueeyes.bkka.AkkaDefaults._ +import scala.Left +import com.precog.tools.importers.jdbc.Datatypes.Join +import com.precog.tools.importers.jdbc.ImportJdbc.IngestInfo +import scala.Right +import scala.Some +import com.precog.tools.importers.jdbc.ImportJdbc.ImportTable +import com.precog.tools.importers.jdbc.Datatypes.Table +import scalaz.Monad /** @@ -22,6 +31,8 @@ import Datatypes._ */ trait ImportJdbcService extends BlueEyesServiceBuilder { + implicit def executionContext: ExecutionContext + implicit def M: Monad[Future] val host="https://beta.precog.com" // "https://devapi.precog.com" //TODO move to trait @@ -49,10 +60,6 @@ trait ImportJdbcService extends BlueEyesServiceBuilder { def handleRequestWithConnection[T](f: (Connection,HttpRequest[T])=> Future[HttpResponse[T]])= handleRequest( (r: HttpRequest[T]) => withConnectionFromRequest(r)(f)) def optionYes(ob:Option[String])=ob.map(_.toLowerCase == "y").getOrElse(false) - /*def response[T](f: HttpRequest[T] => HttpResponse[T] )(request: HttpRequest[T]):Future[HttpResponse[T]] = { - - - }*/ def getJoins(infer: Boolean, conn: Connection, metadata: DatabaseMetaData, cat: Option[String], table: Table, idPattern: String, sample: Boolean): Set[Join] = { val inferred = if (infer) getInferredRelationships(conn, metadata, cat, table, idPattern, sample) else Set() @@ -78,7 +85,7 @@ trait ImportJdbcService extends BlueEyesServiceBuilder { handleRequestWithConnection( (conn:Connection,request:HttpRequest[ByteChunk]) =>{ val tables=JArray(oneColumnRs(conn.getMetaData.getCatalogs).map(JString(_)).toList) Future { - HttpResponse[ByteChunk](content = Option(tables)) + HttpResponse[ByteChunk](content = Option(jvalueToChunk(tables))) } } ) @@ -92,7 +99,7 @@ trait ImportJdbcService extends BlueEyesServiceBuilder { val ts=findTables(conn.getMetaData,cat,None) val result = JArray(ts.map(t=>JString(t.name)).toList) Future { - HttpResponse[ByteChunk](content = Option(result)) + HttpResponse[ByteChunk](content = Option(jvalueToChunk(result))) } } ) diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala index 372bbb1..c02d0aa 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala @@ -6,17 +6,20 @@ import blueeyes.core.http.test.HttpRequestMatchers import blueeyes.core.service._ import blueeyes.core.data.DefaultBijections._ import java.sql.DriverManager -import akka.dispatch.Await +import akka.dispatch.{Future, Await} import blueeyes.core.http.HttpResponse import JsonImplicits._ +import scalaz.Monad +import blueeyes.bkka.{AkkaDefaults, FutureMonad} /** * User: gabriel * Date: 12/4/12 */ -class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbcService with HttpRequestMatchers { +class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbcService with HttpRequestMatchers with AkkaDefaults { val executionContext = defaultFutureDispatch + implicit val M: Monad[Future] = new FutureMonad(executionContext) def dbUrl(db:String)="jdbc:h2:~/%s".format(db) diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala index b0d05f8..c03c6e2 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala @@ -155,7 +155,7 @@ class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestM dataA val r=ImportJdbc.ingest(conn,"a",ImportJdbc.buildQuery(tblADesc),Some(tblADesc),basePath,host,apiKey) Await.result(r,1 minute) must beLike { - case HttpResponse(_ ,_,Some(Left(buffer)),_) => { new String(buffer.array(), "UTF-8") must_== """{"failed":0,"skipped":0,"errors":[],"total":1,"ingested":1}"""} + case HttpResponse(_ ,_,Some(Left(buffer)),_) => { new String(buffer.array(), "UTF-8") must_== """{"ingested":1,"errors":[]}"""} } } @@ -166,7 +166,7 @@ class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestM val r=ImportJdbc.ingest(conn,"a",ImportJdbc.buildQuery(tblABDesc),Some(tblABDesc),basePath,host,apiKey) Await.result(r,1 minute) must beLike { - case HttpResponse(_ ,_,Some(Left(buffer)),_) => { new String(buffer.array(), "UTF-8") must_== """{"failed":0,"skipped":0,"errors":[],"total":1,"ingested":1}"""} + case HttpResponse(_ ,_,Some(Left(buffer)),_) => { new String(buffer.array(), "UTF-8") must_== """{"ingested":1,"errors":[]}"""} } } } diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala index f3f58d0..52d47be 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala @@ -18,9 +18,9 @@ package object jdbc { Class.forName("org.h2.Driver") // use api key and dispatch to call ingest - val host="https://beta.precog.com" - val apiKey="43AB865E-BB86-4F74-A57E-7E8BBD77F2B5" - val basePath="/0000000457/data" + val host="https://devapi.precog.com" + val apiKey="0A24F09F-19CB-45D0-8BFA-543C61BA5EE6" + val basePath="/0000000075/data" def tblA(implicit conn:Connection) = conn.createStatement().execute(" create table A( id int primary key, name varchar(10) ) ") def tblB(implicit conn:Connection) = conn.createStatement().execute(" create table B( id int primary key, a_id int, name varchar(10)) ") diff --git a/tools/import/mongodb/build.sbt b/tools/import/mongodb/build.sbt index ab5b691..91098f8 100644 --- a/tools/import/mongodb/build.sbt +++ b/tools/import/mongodb/build.sbt @@ -18,14 +18,12 @@ resolvers ++= Seq( scalacOptions ++= Seq("-unchecked", "-deprecation") -//seq(com.github.retronym.SbtOneJar.oneJarSettings: _*) - assemblySettings libraryDependencies ++= Seq( - "com.reportgrid" %% "blueeyes-core" % "latest.milestone", //"1.0.0-M6", - "com.reportgrid" %% "blueeyes-json" % "latest.milestone", //"1.0.0-M6", - "com.reportgrid" %% "blueeyes-mongo" % "latest.milestone", //"1.0.0-M6", + "com.reportgrid" %% "blueeyes-core" % "1.0.0-M7.7", + "com.reportgrid" %% "blueeyes-json" % "1.0.0-M7.7", + "com.reportgrid" %% "blueeyes-mongo" % "1.0.0-M7.7", "org.mongodb" %% "casbah" % "2.3.0", "org.scalaz" %% "scalaz-core" % "7.0.0-M3" , "org.specs2" %% "specs2" % "1.12.2" % "test" diff --git a/tools/import/mongodb/project/plugins.sbt b/tools/import/mongodb/project/plugins.sbt index 00680b1..d79423c 100644 --- a/tools/import/mongodb/project/plugins.sbt +++ b/tools/import/mongodb/project/plugins.sbt @@ -1,3 +1,2 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.6") - +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.7") diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala deleted file mode 100644 index caba47b..0000000 --- a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala +++ /dev/null @@ -1,61 +0,0 @@ -package com.precog.tools.importers.common - -import annotation.tailrec - -/** - * User: gabriel - * Date: 1/25/13 - */ -object ConsoleUtils { - - @tailrec - def selectSet[T](label:String, available: Seq[T], selected: Seq[T]=List())(implicit arg0: ClassManifest[T]): Seq[T] = - if (available.isEmpty) selected - else { - val availArray=available.toArray - - println("Available %ss:".format(label)) - println(present(availArray)) - - println("Selected %ss:".format(label)) - println(present(selected)) - - println("Select a number/enter the name, 0 to select all, or enter to continue: ") - - val selIdx = readLine() - selIdx match { - case "" => selected - case ParseInt(0) => available - case ParseInt(x) if (x<=available.size) => { - val elem:T = availArray(x - 1) - selectSet(label,available.filterNot(_==elem), selected:+elem) - } - case s:String if (available.exists(_.toString == s)) => { - val elem:T =availArray.find(_.toString == s).get - selectSet(label,available.filterNot(_==elem), selected:+elem) - } - case _ => selectSet(label,available, selected) - } - } - - def selectOne[T](label:String, available: Seq[T])(implicit arg0: ClassManifest[T]): T ={ - - val availArray=available.toArray - - println("Available %ss:".format(label)) - println(present(availArray)) - - println("Select a number/enter the name: ") - - val selIdx = readLine() - selIdx match { - case ParseInt(x) if (x<=available.size) => availArray(x - 1) - case s:String if (available.exists(_.toString == s)) => availArray.find(_.toString == s).get - case _ => selectOne(label,available) - } - } - - def present[T](arr:Seq[T])= (1 to arr.length).zip(arr).map(x=>x._1 +":"+ x._2).mkString(", ") - - -} diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/package.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/package.scala deleted file mode 100644 index e97bdc4..0000000 --- a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/common/package.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.precog.tools.importers - -/** - * User: gabriel - * Date: 1/25/13 - */ -package object common { - - object ParseInt{ - def unapply(s : String) : Option[Int] = try { - Some(s.toInt) - } catch { - case _ : java.lang.NumberFormatException => None - } - } - - -} diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala index e9afe11..f69e261 100644 --- a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala +++ b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala @@ -217,6 +217,6 @@ object ImportMongo { }.getBytes("UTF-8"))) val byteChunks: ByteChunk = Right(byteStream) - httpClient.parameters('apiKey -> apiKey).post(fullPath)(byteChunks) + httpClient.parameters('apiKey -> apiKey,'mode -> "streaming").header("Content-Type","text/csv").post(fullPath)(byteChunks) } } From 6867bf95241dc9ed3a2f6b7638f2d49f42270e2d Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Wed, 20 Mar 2013 21:39:35 -0300 Subject: [PATCH 10/12] composite project files --- tools/import/build.sbt | 11 +++++++++++ tools/import/common/build.sbt | 9 +++++++++ tools/import/common/project/build.scala | 5 +++++ tools/import/common/project/plugins.sbt | 1 + tools/import/jdbc/project/build.scala | 9 +++++++++ tools/import/jdbc/project/plugins.sbt | 1 + tools/import/mongodb/project/build.scala | 9 +++++++++ tools/import/project/build.scala | 12 ++++++++++++ tools/import/project/plugins.sbt | 3 +++ 9 files changed, 60 insertions(+) create mode 100644 tools/import/build.sbt create mode 100644 tools/import/common/build.sbt create mode 100644 tools/import/common/project/build.scala create mode 100644 tools/import/common/project/plugins.sbt create mode 100644 tools/import/jdbc/project/build.scala create mode 100644 tools/import/jdbc/project/plugins.sbt create mode 100644 tools/import/mongodb/project/build.scala create mode 100644 tools/import/project/build.scala create mode 100644 tools/import/project/plugins.sbt diff --git a/tools/import/build.sbt b/tools/import/build.sbt new file mode 100644 index 0000000..b67db6f --- /dev/null +++ b/tools/import/build.sbt @@ -0,0 +1,11 @@ +name := "import-tools" + +organization := "org.precog" + +version := "0.1" + +scalaVersion := "2.9.2" + + +scalacOptions ++= Seq("-unchecked", "-deprecation") + diff --git a/tools/import/common/build.sbt b/tools/import/common/build.sbt new file mode 100644 index 0000000..3b2e670 --- /dev/null +++ b/tools/import/common/build.sbt @@ -0,0 +1,9 @@ +name := "import-common" + +organization := "org.precog" + +version := "0.1" + +scalaVersion := "2.9.2" + + diff --git a/tools/import/common/project/build.scala b/tools/import/common/project/build.scala new file mode 100644 index 0000000..4ef5d5c --- /dev/null +++ b/tools/import/common/project/build.scala @@ -0,0 +1,5 @@ +import sbt._ +object CommonProj extends Build +{ + lazy val root = Project("import-common", file(".")) +} diff --git a/tools/import/common/project/plugins.sbt b/tools/import/common/project/plugins.sbt new file mode 100644 index 0000000..3ccee0c --- /dev/null +++ b/tools/import/common/project/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.7") \ No newline at end of file diff --git a/tools/import/jdbc/project/build.scala b/tools/import/jdbc/project/build.scala new file mode 100644 index 0000000..ed9f1b3 --- /dev/null +++ b/tools/import/jdbc/project/build.scala @@ -0,0 +1,9 @@ +import sbt._ +object JdbcImportProj extends Build +{ + lazy val root = + Project("import-jdbc", file(".")) dependsOn(common) + lazy val common = + ProjectRef(uri("../common/"), "import-common") +} + diff --git a/tools/import/jdbc/project/plugins.sbt b/tools/import/jdbc/project/plugins.sbt new file mode 100644 index 0000000..3ccee0c --- /dev/null +++ b/tools/import/jdbc/project/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.7") \ No newline at end of file diff --git a/tools/import/mongodb/project/build.scala b/tools/import/mongodb/project/build.scala new file mode 100644 index 0000000..92ee8dd --- /dev/null +++ b/tools/import/mongodb/project/build.scala @@ -0,0 +1,9 @@ +import sbt._ +object MongoImportProj extends Build +{ + lazy val root = + Project("import-mongo", file(".")) dependsOn(common) + lazy val common = + ProjectRef(uri("../common/"), "import-common") +} + diff --git a/tools/import/project/build.scala b/tools/import/project/build.scala new file mode 100644 index 0000000..9fa3a6c --- /dev/null +++ b/tools/import/project/build.scala @@ -0,0 +1,12 @@ +import sbt._ + +object ImportToolsBuild extends Build { + + lazy val root = Project(id = "import-tools", base = file(".")) aggregate(common,jdbc, mongo) + + lazy val common = Project(id = "import-common", base = file("common")) + + lazy val mongo = Project(id = "import-mongodb", base = file("mongodb")) dependsOn("import-common") + + lazy val jdbc = Project(id = "import-jdbc", base = file("jdbc")) dependsOn("import-common") +} diff --git a/tools/import/project/plugins.sbt b/tools/import/project/plugins.sbt new file mode 100644 index 0000000..3ad699a --- /dev/null +++ b/tools/import/project/plugins.sbt @@ -0,0 +1,3 @@ +addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.2.0") + +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.7") From b04f1b9da422125ddb666d8adadb1fe6c159b718 Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Mon, 25 Mar 2013 12:01:08 -0300 Subject: [PATCH 11/12] Changes for code review comments --- .../tools/importers/common/ConsoleUtils.scala | 1 + tools/import/jdbc/build.sbt | 3 +- .../jdbc/src/main/resources/application.conf | 8 ++ .../tools/importers/jdbc/DbAccess.scala | 1 - .../tools/importers/jdbc/DbAnalysis.scala | 2 +- .../importers/jdbc/ImportJdbcConsole.scala | 81 +++---------------- tools/import/mongodb/build.sbt | 3 +- .../tools/importers/mongo/ImportMongo.scala | 29 ++++--- 8 files changed, 45 insertions(+), 83 deletions(-) diff --git a/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala b/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala index caba47b..e2b47e3 100644 --- a/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala +++ b/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala @@ -38,6 +38,7 @@ object ConsoleUtils { } } + @tailrec def selectOne[T](label:String, available: Seq[T])(implicit arg0: ClassManifest[T]): T ={ val availArray=available.toArray diff --git a/tools/import/jdbc/build.sbt b/tools/import/jdbc/build.sbt index 8b7a738..c4ed52d 100644 --- a/tools/import/jdbc/build.sbt +++ b/tools/import/jdbc/build.sbt @@ -25,5 +25,6 @@ libraryDependencies ++= Seq( "com.reportgrid" % "blueeyes-core_2.9.2" % "1.0.0-M8.1", "com.reportgrid" % "blueeyes-json_2.9.2" % "1.0.0-M8.1", "org.specs2" %% "specs2" % "1.12.2" , - "com.h2database" % "h2" % "1.2.134" % "test" + "com.h2database" % "h2" % "1.2.134" % "test", + "ch.qos.logback" % "logback-classic" % "1.0.0" ) diff --git a/tools/import/jdbc/src/main/resources/application.conf b/tools/import/jdbc/src/main/resources/application.conf index e69de29..85664e8 100644 --- a/tools/import/jdbc/src/main/resources/application.conf +++ b/tools/import/jdbc/src/main/resources/application.conf @@ -0,0 +1,8 @@ +blueeyes-async { + name = "DefaultActorPool" + keep-alive-time = 5s + core-pool-size-factor = 1.0 + core-pool-size-max = 8 + max-pool-size-factor = 1.0 + max-pool-size-max = 8 +} \ No newline at end of file diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala index 845fcb4..1d44dd8 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala @@ -17,7 +17,6 @@ object DbAccess { def getColumns(conn:Connection, query:String):IndexedSeq[Column]={ //use a prepared statement to get the metadata - println(query) val stmt = conn.prepareStatement(query) getColumns(stmt) } diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAnalysis.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAnalysis.scala index 9648668..cc023af 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAnalysis.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAnalysis.scala @@ -18,7 +18,7 @@ object DbAnalysis{ def findTables(metadata: DatabaseMetaData, oCat: Option[String], tableName: => Option[String]): Array[Table] = { - val cat= oCat.getOrElse(null)//toNullUppercase(oCat) + val cat= oCat.getOrElse(null) val tableNm= tableName.map(_.toUpperCase).getOrElse(null) tables(metadata.getTables(cat, null, tableNm, Array("TABLE"))).toArray } diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala index 6d39a00..39777bf 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala @@ -1,6 +1,5 @@ package com.precog.tools.importers.jdbc -import annotation.tailrec import java.sql.{Connection, DatabaseMetaData} import DbAccess._ import DbAnalysis._ @@ -15,8 +14,10 @@ import scala.Some import scala.Right import com.precog.tools.importers.jdbc.ImportJdbc.ImportTable import com.precog.tools.importers.jdbc.Datatypes.Table +import com.precog.tools.importers.common.ConsoleUtils._ import akka.dispatch.{Future, Await} import akka.util.Duration +import org.slf4j.LoggerFactory /** * User: gabriel @@ -24,34 +25,28 @@ import akka.util.Duration */ object ImportJdbcConsole { + private lazy val logger = LoggerFactory.getLogger("com.precog.tools.importers.jdbc.ImportJdbc") + implicit val as=actorSystem Option(System.getProperty("jdbc.driver")).map(driver => Class.forName(driver)) def main(args:Array[String]){ println("Welcome to Precog JDBC import wizard") - /*lazy val dbUrl=readLine("Enter database URL:") + lazy val dbUrl=readLine("Enter database URL:") lazy val user=readLine("User:") lazy val password = readLine("Password:") // use api key and dispatch to call ingest lazy val host=readLine("Precog ingestion host") lazy val apiKey=readLine("API KEY for ingestion") - lazy val basePath=readLine("Base ingestion path ( /{userId}/....)")*/ - - lazy val dbUrl="jdbc:mysql://localhost/" //readLine("Enter database URL:") - lazy val user="root" //readLine("User:") - lazy val password = "root" //readLine("Password:") - // use api key and dispatch to call ingest - lazy val host="https://beta.precog.com" //readLine("Precog ingestion host") // https://beta.precog.com - lazy val apiKey="43AB865E-BB86-4F74-A57E-7E8BBD77F2B5" //readLine("API KEY for ingestion") - lazy val basePath="/0000000457/import" //readLine("Base ingestion path ( /{userId}/....)")*/ + lazy val basePath=readLine("Base ingestion path ( /{userId}/....)") val fresult=importJdbc(dbUrl,user,password, host, apiKey, basePath) Await.result(Future.sequence(fresult),Duration("24 hours")).map( result => result match { - case HttpResponse(_ ,_,Some(Left(buffer)),_) => { println(new String(buffer.array(), "UTF-8"))} - case _ => "error %s".format(result.toString()) + case HttpResponse(_ ,_,Some(Left(buffer)),_) => { logger.info(new String(buffer.array(), "UTF-8"))} + case _ => logger.error("error %s".format(result.toString())) } ) as.shutdown() @@ -69,25 +64,25 @@ object ImportJdbcConsole { tqs.map( tqs => { val (table,tDesc,q) = tqs val path= "%s/%s".format(basePath, table) - println("importing %s".format(table)) + logger.info("importing %s".format(table)) ingest(connDb,table, q, tDesc, path, host, apiKey).onComplete { case Right(result) => callSucceded(result) - case Left(failure) => println("Failed to import %s, error: %s".format(table,failure.getMessage)) + case Left(failure) => logger.error("Failed to import %s, error: %s".format(table,failure.getMessage)) } }) } def callSucceded(response:HttpResponse[ByteChunk]){ response match { - case HttpResponse(_ ,_,Some(Left(buffer)),_) => println("Result: %s".format(new String(buffer.array(), "UTF-8"))) - case _ => println("Unexpected stream in %s".format(response)) + case HttpResponse(_ ,_,Some(Left(buffer)),_) => logger.info("Result: %s".format(new String(buffer.array(), "UTF-8"))) + case _ => logger.error("Unexpected stream in %s".format(response)) } } def getCatalogs(metadata: DatabaseMetaData): String = { println("Catalogs:") val catalogs = oneColumnRs(metadata.getCatalogs) - selectOne("Catalog/Database",catalogs).getOrElse("") + selectOne("Catalog/Database",catalogs) } def selectColumns(connDb: Connection, table: Table): List[String] = { @@ -145,56 +140,6 @@ object ImportJdbcConsole { selectSet("table", tablesList) } - @tailrec - private def selectSet[T](label:String, available: Seq[T], selected: Seq[T]=List())(implicit arg0: ClassManifest[T]): Seq[T] = - if (available.isEmpty) selected - else { - val availArray=available.toArray - - println("Available %ss:".format(label)) - println(present(availArray)) - - println("Selected %ss:".format(label)) - println(present(selected)) - - println("Select a number/enter the name, 0 to select all, or enter to continue: ") - - val selIdx = readLine() - selIdx match { - case "" => selected - case ParseInt(0) => available - case ParseInt(x) if (x<=available.size) => { - val elem:T = availArray(x - 1) - selectSet(label,available.filterNot(_==elem), selected:+elem) - } - case s if (available.exists(_.toString == s)) => { - val elem:T =availArray.find(_.toString == s).get - selectSet(label,available.filterNot(_==elem), selected:+elem) - } - case _ => selectSet(label,available, selected) - } - } - - @tailrec - private def selectOne[T](label:String, available: Seq[T] )(implicit arg0: ClassManifest[T]): Option[T] = - if (available.isEmpty) None - else { - val availArray=available.toArray - - println("Select a %s:".format(label)) - println(present(availArray)) - - println("Select a number/enter the name: ") - - val selIdx = readLine() - selIdx match { - case ParseInt(x) if (x<=available.size) => Option(availArray(x - 1)) - case s if (available.exists(_.toString == s)) => availArray.find(_.toString == s) - case _ => selectOne(label,available) - } - } - - def present[T](arr:Seq[T])= (1 to arr.length).zip(arr).map(x=>x._1 +":"+ x._2).mkString(", ") def show(baseTable:Table,set: Set[Join])= set.map( r=> " %s with %s on %s=%s".format(baseTable.name, r.refKey.table, r.baseColName,r.refKey.columnName )).mkString(", ") diff --git a/tools/import/mongodb/build.sbt b/tools/import/mongodb/build.sbt index a6a3678..f0fe6cb 100644 --- a/tools/import/mongodb/build.sbt +++ b/tools/import/mongodb/build.sbt @@ -25,5 +25,6 @@ libraryDependencies ++= Seq( "com.reportgrid" %% "blueeyes-json" % "1.0.0-M8.1", "com.reportgrid" %% "blueeyes-mongo" % "1.0.0-M8.1", "org.mongodb" %% "casbah" % "2.3.0", - "org.specs2" %% "specs2" % "1.12.2" % "test" + "org.specs2" %% "specs2" % "1.12.2" % "test", + "ch.qos.logback" % "logback-classic" % "1.0.0" ) diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala index f922763..69987e9 100644 --- a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala +++ b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala @@ -22,6 +22,7 @@ import collection.JavaConversions._ import scala.Left import scala.Some import com.precog.tools.importers.common.Ingest._ +import org.slf4j.LoggerFactory /** @@ -30,6 +31,8 @@ import com.precog.tools.importers.common.Ingest._ */ object ImportMongo { + private lazy val logger = LoggerFactory.getLogger("com.precog.tools.importers.mongo.ImportMongo") + implicit val as=actorSystem implicit val executionContext = defaultFutureDispatch implicit val M: Monad[Future] = new FutureMonad(executionContext) @@ -60,7 +63,8 @@ object ImportMongo { println("No configuration found in the mongo instance, creating a new one.") val databases=db.name println("DATABASE %s \n".format(db)) - val colls=selectSet("collection",db.getCollectionNames().toSeq) + val userCollections=db.getCollectionNames().filter(name=> !(name.startsWith("system.") || name.startsWith(collsConfig))) + val colls=selectSet("collection",userCollections.toSeq) colls.map( coll =>{ println("\n ---- Collection %s ----".format(coll)) val columns=sampleColumns(mongoConn)(db,coll).toSeq @@ -85,7 +89,7 @@ object ImportMongo { if (args.length != 4) { println("Wrong number of parameters.") - println("Usage: ImportMongo mongo_uri precog_host precog_ingest_path precog_apiKey") + println("Usage: ImportMongo mongo_uri precog_host_url precog_ingest_path precog_apiKey") actorSystem.shutdown() sys.exit(1) } @@ -118,20 +122,23 @@ object ImportMongo { } val jsonInputs= inputConfigColl.find().toList - //TODO: check result of ingest before updating the Id!!!!! val fimports=jsonInputs.map(config=> importCollection(precogHost,basePath,apiKey,db, config, mongoConn)) val fresults=Await.result(Future.sequence(fimports.toList), Duration("24 hours")) jsonInputs.zip(fresults).map( r =>{ val (mDbObj,(result,lastId)) = r - println("%s".format(result)) - inputConfigColl.save(mDbObj++("lastId"->lastId)) + result.left.map(s=> + logger.warn("%s".format(s)) + ).right.map({s=> + logger.info("%s".format(s)) + inputConfigColl.save(mDbObj++("lastId"->lastId)) + }) } ) } } finally { - println("Shutting down...") + logger.info("Shutting down...") actorSystem.shutdown() } } @@ -145,7 +152,7 @@ object ImportMongo { def arrOfStrValues(jv: JValue) = (jv -->? classOf[JArray]).map(_.elements.map(strValue(_))).getOrElse(Nil) - def importCollection(host:String, basePath:String, apiKey:String, db:MongoDB, mdbobj: MongoDBObject, mongoConn: MongoConnection):Future[(String,AnyRef)]={ + def importCollection(host:String, basePath:String, apiKey:String, db:MongoDB, mdbobj: MongoDBObject, mongoConn: MongoConnection):Future[(Either[String,String],AnyRef)]={ val collName = mdbobj.getAs[String]("collection").get val fieldNames = mdbobj.getAsOrElse[util.ArrayList[String]]("fields",new util.ArrayList()) @@ -172,15 +179,15 @@ object ImportMongo { val path = "%s/%s/%s".format(basePath, db.name, collName) val data = StreamT.fromStream[Future, JObject](fjsons) val fsend= data.isEmpty.flatMap( isEmpty => - if (isEmpty) Future("No new data found in %s.%s".format(db.name,collName)) + if (isEmpty) Future(Left("No new data found in %s.%s".format(db.name,collName))) else sendToPrecog(host,path,apiKey,toByteStream(data)) map( _ match { case HttpResponse(_, _, Some(Left(buffer)), _) => { - "Result from precog: %s".format(new String(buffer.array(), "UTF-8")) + Right("Result from precog: %s".format(new String(buffer.array(), "UTF-8"))) } - case result => "Error: %s".format(result.toString()) + case result => Left("Error: %s".format(result.toString())) } )) - M.lift2((a: String, b: AnyRef) => (a, b))(fsend, fmaxId) + M.lift2((a: Either[String,String], b: AnyRef) => (a, b))(fsend, fmaxId) } def readFromMongo(mongoDB: MongoDB, collName: String, idCol:String, oLastId:Option[AnyRef], fieldNames:Seq[String]):Stream[DBObject]={ From 15bc2d65f55d4780fd2afcbf5ac3ed276dbacbaf Mon Sep 17 00:00:00 2001 From: gclaramunt Date: Fri, 5 Apr 2013 20:54:41 -0300 Subject: [PATCH 12/12] lot of changes from code review comments --- tools/import/common/build.sbt | 9 +- .../project/{build.scala => Build.scala} | 0 .../tools/importers/common/ConsoleUtils.scala | 30 +-- .../tools/importers/common/Ingest.scala | 32 ++- .../jdbc/project/{build.scala => Build.scala} | 0 .../tools/importers/jdbc/DbAccess.scala | 26 +- .../tools/importers/jdbc/ImportJdbc.scala | 91 ++++--- .../importers/jdbc/ImportJdbcConsole.scala | 29 +-- .../importers/jdbc/ImportJdbcService.scala | 2 +- ...nalysisTest.scala => DbAnalysisSpec.scala} | 2 +- ...Test.scala => ImportJdbcServiceSpec.scala} | 30 +-- ...ortJdbcTest.scala => ImportJdbcSpec.scala} | 94 ++++---- .../precog/tools/importers/jdbc/package.scala | 2 - .../project/{build.scala => Build.scala} | 0 .../tools/importers/mongo/ImportMongo.scala | 228 ++++++++++-------- .../importers/mongo/ImportMongoSpec.scala | 117 +++++++++ .../project/{build.scala => Build.scala} | 0 17 files changed, 421 insertions(+), 271 deletions(-) rename tools/import/common/project/{build.scala => Build.scala} (100%) rename tools/import/jdbc/project/{build.scala => Build.scala} (100%) rename tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/{DbAnalysisTest.scala => DbAnalysisSpec.scala} (98%) rename tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/{ImportJdbcServiceTest.scala => ImportJdbcServiceSpec.scala} (82%) rename tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/{ImportJdbcTest.scala => ImportJdbcSpec.scala} (62%) rename tools/import/mongodb/project/{build.scala => Build.scala} (100%) create mode 100644 tools/import/mongodb/src/test/scala/com/precog/tools/importers/mongo/ImportMongoSpec.scala rename tools/import/project/{build.scala => Build.scala} (100%) diff --git a/tools/import/common/build.sbt b/tools/import/common/build.sbt index cc72682..3e94c51 100644 --- a/tools/import/common/build.sbt +++ b/tools/import/common/build.sbt @@ -6,10 +6,13 @@ version := "0.1" scalaVersion := "2.9.2" +resolvers ++= Seq( + "ReportGrid (public)" at "http://nexus.reportgrid.com/content/repositories/public-releases" +) + libraryDependencies ++= Seq( - "com.reportgrid" %% "blueeyes-core" % "1.0.0-M7.7", - "com.reportgrid" %% "blueeyes-json" % "1.0.0-M7.7", - "org.scalaz" %% "scalaz-core" % "7.0.0-M3" + "com.reportgrid" %% "blueeyes-core" % "1.0.0-M8.1", + "com.reportgrid" %% "blueeyes-json" % "1.0.0-M8.1" ) diff --git a/tools/import/common/project/build.scala b/tools/import/common/project/Build.scala similarity index 100% rename from tools/import/common/project/build.scala rename to tools/import/common/project/Build.scala diff --git a/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala b/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala index e2b47e3..db2d1b0 100644 --- a/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala +++ b/tools/import/common/src/main/scala/com/precog/tools/importers/common/ConsoleUtils.scala @@ -9,29 +9,28 @@ import annotation.tailrec object ConsoleUtils { @tailrec - def selectSet[T](label:String, available: Seq[T], selected: Seq[T]=List())(implicit arg0: ClassManifest[T]): Seq[T] = + def selectSet[T](label:String, available: Seq[T], selected: Seq[T]=List()): Seq[T] = if (available.isEmpty) selected else { - val availArray=available.toArray println("Available %ss:".format(label)) - println(present(availArray)) + println(present(available)) println("Selected %ss:".format(label)) println(present(selected)) - println("Select a number/enter the name, 0 to select all, or enter to continue: ") + println("Select %ss by entering its number or name, 0 to select all, enter to continue: ".format(label)) val selIdx = readLine() selIdx match { case "" => selected case ParseInt(0) => available - case ParseInt(x) if (x<=available.size) => { - val elem:T = availArray(x - 1) + case ParseInt(x) if x<=available.size => { + val elem:T = available(x - 1) selectSet(label,available.filterNot(_==elem), selected:+elem) } case s:String if (available.exists(_.toString == s)) => { - val elem:T =availArray.find(_.toString == s).get + val elem:T =available.find(_.toString == s).get selectSet(label,available.filterNot(_==elem), selected:+elem) } case _ => selectSet(label,available, selected) @@ -39,24 +38,25 @@ object ConsoleUtils { } @tailrec - def selectOne[T](label:String, available: Seq[T])(implicit arg0: ClassManifest[T]): T ={ - - val availArray=available.toArray + def selectOne[T](label:String, available: Seq[T]): T ={ println("Available %ss:".format(label)) - println(present(availArray)) + println(present(available)) - println("Select a number/enter the name: ") + println("Select one %s by entering its number or name: ".format(label)) val selIdx = readLine() selIdx match { - case ParseInt(x) if (x<=available.size) => availArray(x - 1) - case s:String if (available.exists(_.toString == s)) => availArray.find(_.toString == s).get + case ParseInt(x) if x<=available.size => available(x - 1) + case s:String => available.find(_.toString == s) match { + case Some(t) => t + case None => selectOne(label,available) + } case _ => selectOne(label,available) } } - def present[T](arr:Seq[T])= (1 to arr.length).zip(arr).map(x=>x._1 +":"+ x._2).mkString(", ") + def present[T](arr:Seq[T])= arr.zipWithIndex.map({ case (a, b) => (b+1) + ":" + a }).mkString(", ") } diff --git a/tools/import/common/src/main/scala/com/precog/tools/importers/common/Ingest.scala b/tools/import/common/src/main/scala/com/precog/tools/importers/common/Ingest.scala index 9541eec..c95384a 100644 --- a/tools/import/common/src/main/scala/com/precog/tools/importers/common/Ingest.scala +++ b/tools/import/common/src/main/scala/com/precog/tools/importers/common/Ingest.scala @@ -7,7 +7,6 @@ import scalaz.Monad import blueeyes.json._ import blueeyes.core.data.DefaultBijections._ import blueeyes.core.service._ -import blueeyes.bkka.AkkaDefaults.defaultFutureDispatch import blueeyes.core.service.engines.HttpClientXLightWeb import blueeyes.bkka.FutureMonad import scalaz.StreamT @@ -15,6 +14,7 @@ import java.nio.ByteBuffer import blueeyes.core.http.HttpResponse import blueeyes.core.data.ByteChunk import scala.Right +import org.slf4j.LoggerFactory /** * User: gabriel @@ -22,25 +22,39 @@ import scala.Right */ object Ingest { - def sendToPrecog(host:String, path:String, apiKey:String, dataStream:StreamT[Future,ByteBuffer])(implicit ec:ExecutionContext): Future[HttpResponse[ByteChunk]] = { - implicit val M = new FutureMonad(ec) - val httpClient = new HttpClientXLightWeb()(defaultFutureDispatch) + private lazy val logger = LoggerFactory.getLogger("com.precog.tools.importers.jdbc.Ingest") + + def sendToPrecog(host:String, path:String, apiKey:String, dataStream:StreamT[Future,ByteBuffer], streaming:Boolean = true)(implicit executor:ExecutionContext): Future[HttpResponse[ByteChunk]] = { + implicit val M = new FutureMonad(executor) + val httpClient = new HttpClientXLightWeb()(executor) dataStream.isEmpty.flatMap( isEmpty => - if (isEmpty) Future(HttpResponse.empty) - else { + if (isEmpty) { + logger.info("No need to send empty data stream") + Future(HttpResponse.empty) + } else { val byteChunks: ByteChunk = Right(dataStream) - val fullPath = "%s/ingest/v1/fs%s".format(host, path) - httpClient.parameters('apiKey -> apiKey,'mode -> "streaming").header("Content-Type","application/json").post(fullPath)(byteChunks) + //val fullPath = "%s/ingest/v1/fs%s".format(host, path) + val fullPath = "%s/fs%s".format(host, path) //local test only + val ingestParams = ('apiKey -> apiKey)::( if (streaming) List('mode -> "streaming") else List('mode -> "batch", 'receipt -> "true")) + logger.info("Ingesting to %s".format(path)) + httpClient.parameters(ingestParams:_*).header("Content-Type","application/json").post(fullPath)(byteChunks) } ) } + def callSucceded(response:HttpResponse[ByteChunk]){ + response match { + case HttpResponse(_ ,_,Some(Left(buffer)),_) => logger.info("Result: %s".format(new String(buffer.array(), "UTF-8"))) + case _ => logger.error("Unexpected stream in %s".format(response)) + } + } + def toByteStream(dataStream: StreamT[Future, JValue])(implicit m:Monad[Future]): StreamT[Future, ByteBuffer] = { dataStream.map(jv => ByteBuffer.wrap({ val js = "%s\n".format(jv.renderCompact) - print("%s".format(js)) + logger.trace("to bytes = %s".format(js.replace('\n',' '))) js }.getBytes("UTF-8"))) } diff --git a/tools/import/jdbc/project/build.scala b/tools/import/jdbc/project/Build.scala similarity index 100% rename from tools/import/jdbc/project/build.scala rename to tools/import/jdbc/project/Build.scala diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala index 1d44dd8..f8c785c 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/DbAccess.scala @@ -3,12 +3,18 @@ package com.precog.tools.importers.jdbc import java.sql._ import Datatypes._ import scalaz.StreamT +import annotation.tailrec +import scalaz.effect.IO +import org.slf4j.LoggerFactory /** * User: gabriel * Date: 11/30/12 */ object DbAccess { + + private lazy val logger = LoggerFactory.getLogger("com.precog.tools.importers.jdbc.DbAccess") + def columnCount(stmt:PreparedStatement)=stmt.getMetaData.getColumnCount def getColumns(conn:Connection, table:Table):IndexedSeq[Column]={ @@ -30,17 +36,19 @@ object DbAccess { def rsList[T](rs:ResultSet)(f:ResultSet => T)={ - //warning: this serves the purpose but it not a well behaved iterator. - //In particular, a call to hasNext, advances the resultSet - //it works in this context, because we just call it with "toList" - def rsIterator(rs:ResultSet)(f:ResultSet => T) = new Iterator[T] { - def hasNext = rs.next() - def next():T = f(rs) - } - rsIterator(rs)(f).toList + + @tailrec + def buildList(rs:ResultSet, acc:List[T]=Nil):List[T]= + if (rs.next()) buildList(rs, f(rs)::acc) + else acc + + buildList(rs).reverse } - def rsStreamT[T](rs:ResultSet)(f:ResultSet => T)=StreamT.unfold(rs)( (rs:ResultSet) => if (rs.next()) { Some(f(rs),rs)} else None ) + def rsStreamT[T](rs:ResultSet)(f:ResultSet => T)=StreamT.unfoldM(rs)( + (rs:ResultSet) => IO( { val d=if (rs.next()) { Some(f(rs),rs)} else None; logger.info("read stream = %s".format(d)); d })) + + def rsStream[T](rs:ResultSet)(f:ResultSet => T):Stream[T] = if (rs.next) f(rs) #:: rsStream(rs)(f) else Stream.empty def oneColumnRs(rs:ResultSet) =rsList(rs)(rs=> rs.getString(1)) def tables(rs:ResultSet) = rsList(rs)(rs=> Table(rs.getString("TABLE_NAME"))) diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala index 5c702c0..da96741 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbc.scala @@ -8,16 +8,17 @@ import scala.Some import blueeyes.core.service.engines.HttpClientXLightWeb import Datatypes._ import blueeyes.bkka.FutureMonad -import scalaz.StreamT +import scalaz.{Monad, StreamT,Hoist, ~>} import akka.dispatch.ExecutionContext import java.nio.ByteBuffer -import scalaz.Id._ -import annotation.tailrec import blueeyes.core.http.HttpResponse import blueeyes.core.data.ByteChunk import akka.dispatch.Future import com.precog.tools.importers.common.Ingest._ import DbAccess._ +import scalaz.effect.IO +import org.slf4j.LoggerFactory + /** * User: gabriel * Date: 11/20/12 @@ -26,6 +27,8 @@ object ImportJdbc { val httpClient=new HttpClientXLightWeb()(defaultFutureDispatch) + private lazy val logger = LoggerFactory.getLogger("com.precog.tools.importers.jdbc.ImportJdbc") + case class ImportTable(name:String, columns:Seq[String], baseOrJoin:Either[Table,Join]){ val isCollection = baseOrJoin.right.toOption.map(_.exported).getOrElse(false) } case class IngestInfo(tables:Seq[ImportTable]) @@ -48,57 +51,57 @@ object ImportJdbc { case _ => Nil } - def asJObject(o:JValue):JObject= o match { - case j:JObject => j - case _ => sys.error("base value is not jobject!") - } - def buildField( nm: (String,String)) =Option(nm._2).map( s=>JField(nm._1,JString(s))) type StrJVMap= Map[String,JValue] - def buildJValues( map:StrJVMap, s:Seq[String], tblDesc: ImportTable ):(Option[(String,JValue)],Seq[String])={ + def objFields( map:StrJVMap, s:Seq[String], tblDesc: ImportTable ):(Option[(String,JValue)],Seq[String])={ val (tblColValues,rest)=s.splitAt(tblDesc.columns.length) - val objValues =(tblDesc.columns.zip(tblColValues)).flatMap(buildField(_) ).toList + val objValues =(tblDesc.columns.zip(tblColValues)).flatMap(buildField(_) ) val tblName = tblDesc.name.toUpperCase val keyValue= if (objValues.isEmpty) if (tblDesc.isCollection) Some(tblName->JArray.empty) else None else { - val data=JObject(objValues) + val data=JObject(objValues:_*) val obj= if (tblDesc.isCollection) JArray(getElements(map.get(tblName)):+data ) else data Some(tblName->obj) } (keyValue,rest) } - - @tailrec - def mkPartialJson(baseName:String, ingestInfo:IngestInfo, dataStream: StreamT[Id,Seq[String]], prevMap:StrJVMap=Map()):Option[(JValue,StreamT[Id,Seq[String]])] = - if (dataStream.isEmpty) None - else { - val s=dataStream.head - val tail=dataStream.tail - val jsonMap=buildJsonObjMap(ingestInfo, prevMap, s) - val baseNameUC=baseName.toUpperCase - //peek into the stream - val nextJsonMap:StrJVMap=if (tail.isEmpty) Map() else buildJsonObjMap(ingestInfo, Map(), tail.head) - if ( !nextJsonMap.isEmpty && (jsonMap.get(baseNameUC) == nextJsonMap.get(baseNameUC)) ) { - //if next row is the same object, keep building - mkPartialJson(baseNameUC,ingestInfo,tail,jsonMap) - } else { - val base= asJObject(jsonMap(baseNameUC)) - val values = (jsonMap-baseNameUC).map(nv => JField(nv._1, nv._2)).toList - Some(JObject(base.fields ++ values),tail) - } + def mkJson[M[+_]](baseName:String, ingestInfo:IngestInfo, row: Seq[String], outStream:StreamT[M,JValue], currentObj:StrJVMap=Map())(implicit M:Monad[M]):(StreamT[M,JValue],StrJVMap) ={ + val baseNameUC=baseName.toUpperCase + val singleObjMap=buildJsonObjMap(ingestInfo, Map(),row) + if (currentObj.isEmpty || singleObjMap.get(baseNameUC) == currentObj.get(baseNameUC)){ + val objM=buildJsonObjMap(ingestInfo, currentObj, row) + (outStream, objM) + } else { + val newObj= buildJObject(baseNameUC, currentObj) + (newObj::outStream,singleObjMap) } + } + private def buildJObject(baseNameUC: String, currentObj: StrJVMap): JObject = { + val base = (currentObj(baseNameUC)) --> classOf[JObject] + val values = (currentObj - baseNameUC) + val newObj = JObject(base.fields ++ values) + newObj + } + def buildJsonObjMap(ingestInfo: ImportJdbc.IngestInfo, prevMap: ImportJdbc.StrJVMap, s: Seq[String]): StrJVMap = { - ingestInfo.tables.foldLeft((prevMap, s))( - (ms, v) => { - val (m,seq)=ms - val (opt, r): (Option[(String, JValue)], Seq[String]) = buildJValues(m, seq, v) //build a json object from the seq values + ingestInfo.tables.foldLeft((prevMap, s))({ + case ((m,seq), v) => { + val (opt, r): (Option[(String, JValue)], Seq[String]) = objFields(m, seq, v) //build a json object from the seq values opt.map(kv => (m + kv, r)).getOrElse((m, r)) - })._1 + }})._1 + } + + def buildBody(data: StreamT[IO,Seq[String]], baseTable: String, i: IngestInfo)(implicit executor: ExecutionContext, m:FutureMonad, io:Monad[IO]): Future[StreamT[Future,JValue]] ={ + Future(data.foldLeft((StreamT.empty[Future,JValue], Map():StrJVMap))( + { case ((os,currentMap),row)=>mkJson(baseTable,i,row,os,currentMap) } + ).map( { case (strm,obj)=> + buildJObject(baseTable.toUpperCase,obj)::strm + } ).unsafePerformIO()) } def names(cs:Seq[Column])=cs.map(_.name) @@ -111,7 +114,7 @@ object ImportJdbc { "select %s from %s order by %s".format(colSelect,join,sort) } - def executeQuery(connDb: Connection, query: String ): (StreamT[Id,IndexedSeq[String]],IndexedSeq[Column]) = { + def executeQuery(connDb: Connection, query: String ): (StreamT[IO,IndexedSeq[String]],IndexedSeq[Column]) = { val stmt = connDb.prepareStatement(query) val columns = getColumns(stmt) val rs = stmt.executeQuery() @@ -123,21 +126,15 @@ object ImportJdbc { DriverManager.getConnection(uri, user, password) } - def ingest(connDb: Connection, objName:String, query: String, oTblDesc:Option[IngestInfo], ingestPath: =>String, host: =>String, apiKey: =>String)(implicit executor: ExecutionContext):Future[HttpResponse[ByteChunk]] = { + def ingest(connDb: Connection, objName: String, query: String, oTblDesc:Option[IngestInfo], ingestPath: =>String, host: =>String, apiKey: =>String)(implicit executor: ExecutionContext):Future[HttpResponse[ByteChunk]] = { implicit val M = new FutureMonad(executor) val (data,columns) = executeQuery(connDb, query) val tblDesc= oTblDesc.getOrElse(IngestInfo(Seq(ImportTable(objName,names(columns),Left(Table("base")))))) - val path = "%s/%s".format(ingestPath,objName) - val dataStream:StreamT[Future,ByteBuffer] =toByteStream(buildBody(data, objName, tblDesc)) - sendToPrecog(host,path,apiKey,dataStream) - } - - - def buildBody(data: StreamT[Id,Seq[String]], baseTable: String, i: IngestInfo)(implicit executor: ExecutionContext, m:FutureMonad): StreamT[Future,JValue] = - StreamT.unfoldM[Future,JValue,StreamT[Id,Seq[String]]](data)(ds=> - if (ds.isEmpty) Future(None) - else Future(mkPartialJson(baseTable,i,ds))) + val path= "%s/%s".format(ingestPath,objName) + val dataStream:Future[StreamT[Future,ByteBuffer]]= buildBody(data, objName, tblDesc).map(toByteStream(_)) + dataStream.flatMap(sendToPrecog(host,path,apiKey,_)) + } } diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala index 39777bf..2bbbca3 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcConsole.scala @@ -5,8 +5,6 @@ import DbAccess._ import DbAnalysis._ import ImportJdbc._ import blueeyes.bkka.AkkaDefaults._ -import blueeyes.core.http.HttpResponse -import blueeyes.core.data._ import scala.Left import com.precog.tools.importers.jdbc.Datatypes.Join import com.precog.tools.importers.jdbc.ImportJdbc.IngestInfo @@ -15,9 +13,8 @@ import scala.Right import com.precog.tools.importers.jdbc.ImportJdbc.ImportTable import com.precog.tools.importers.jdbc.Datatypes.Table import com.precog.tools.importers.common.ConsoleUtils._ -import akka.dispatch.{Future, Await} -import akka.util.Duration import org.slf4j.LoggerFactory +import com.precog.tools.importers.common.Ingest._ /** * User: gabriel @@ -25,7 +22,7 @@ import org.slf4j.LoggerFactory */ object ImportJdbcConsole { - private lazy val logger = LoggerFactory.getLogger("com.precog.tools.importers.jdbc.ImportJdbc") + private lazy val logger = LoggerFactory.getLogger("com.precog.tools.importers.jdbc.ImportJdbcConsole") implicit val as=actorSystem @@ -41,18 +38,11 @@ object ImportJdbcConsole { lazy val apiKey=readLine("API KEY for ingestion") lazy val basePath=readLine("Base ingestion path ( /{userId}/....)") - val fresult=importJdbc(dbUrl,user,password, host, apiKey, basePath) - - Await.result(Future.sequence(fresult),Duration("24 hours")).map( - result => result match { - case HttpResponse(_ ,_,Some(Left(buffer)),_) => { logger.info(new String(buffer.array(), "UTF-8"))} - case _ => logger.error("error %s".format(result.toString())) - } - ) + importJdbc(dbUrl,user,password, host, apiKey, basePath) as.shutdown() } - def importJdbc(dbUrl: =>String, user: =>String, password: =>String, host: =>String, apiKey: =>String, basePath: =>String)={ + def importJdbc(dbUrl: =>String, user: =>String, password: =>String, host: =>String, apiKey: =>String, basePath: =>String):Unit={ val catConn= getConnection(dbUrl, user, password,None) val cat= getCatalogs(catConn.getMetaData) @@ -72,13 +62,6 @@ object ImportJdbcConsole { }) } - def callSucceded(response:HttpResponse[ByteChunk]){ - response match { - case HttpResponse(_ ,_,Some(Left(buffer)),_) => logger.info("Result: %s".format(new String(buffer.array(), "UTF-8"))) - case _ => logger.error("Unexpected stream in %s".format(response)) - } - } - def getCatalogs(metadata: DatabaseMetaData): String = { println("Catalogs:") val catalogs = oneColumnRs(metadata.getCatalogs) @@ -107,7 +90,7 @@ object ImportJdbcConsole { selected.map( table =>{ val allRelationships = relationships( conn, metadata, None,table).toSeq - println(allRelationships) + present(allRelationships) val relations= selectSet("relation",allRelationships).toList val tblDesc=buildIngestInfo(table, conn, relations) @@ -140,7 +123,7 @@ object ImportJdbcConsole { selectSet("table", tablesList) } - def present[T](arr:Seq[T])= (1 to arr.length).zip(arr).map(x=>x._1 +":"+ x._2).mkString(", ") + def present[T](arr:Seq[T])= arr.zipWithIndex.map(x=>x._1 +":"+ x._2).mkString(", ") def show(baseTable:Table,set: Set[Join])= set.map( r=> " %s with %s on %s=%s".format(baseTable.name, r.refKey.table, r.baseColName,r.refKey.columnName )).mkString(", ") def readTableName()= { diff --git a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala index c8241c7..446974b 100644 --- a/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala +++ b/tools/import/jdbc/src/main/scala/com/precog/tools/importers/jdbc/ImportJdbcService.scala @@ -32,7 +32,7 @@ trait ImportJdbcService extends BlueEyesServiceBuilder { implicit def executionContext: ExecutionContext implicit def M: Monad[Future] - val host=Option(System.getProperty("host")).getOrElse("https://beta.precog.com") + val host=System.getProperty("host") def handleRequest[T](f: HttpRequest[T]=> Future[HttpResponse[T]])= (request: HttpRequest[T]) => diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/DbAnalysisTest.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/DbAnalysisSpec.scala similarity index 98% rename from tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/DbAnalysisTest.scala rename to tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/DbAnalysisSpec.scala index 02b1941..f5f0aea 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/DbAnalysisTest.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/DbAnalysisSpec.scala @@ -10,7 +10,7 @@ import com.precog.tools.importers.jdbc.Datatypes.Join * User: gabriel * Date: 12/4/12 */ -class DbAnalysisTest extends Specification { +class DbAnalysisSpec extends Specification { "find tables" should { diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceSpec.scala similarity index 82% rename from tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala rename to tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceSpec.scala index d8a6926..3c9ae95 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceTest.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcServiceSpec.scala @@ -16,11 +16,13 @@ import blueeyes.bkka.{AkkaDefaults, FutureMonad} * User: gabriel * Date: 12/4/12 */ -class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbcService with HttpRequestMatchers with AkkaDefaults { +class ImportJdbcServiceSpec extends BlueEyesServiceSpecification with ImportJdbcService with HttpRequestMatchers with AkkaDefaults { val executionContext = defaultFutureDispatch implicit val M: Monad[Future] = new FutureMonad(executionContext) + val servicePrefix="/JdbcImportService/v1" + override val host="https://devapi.precog.com" def dbUrl(db:String)="jdbc:h2:~/%s".format(db) @@ -29,7 +31,7 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc "get database metadata" in new Conn{ val dbName ="TESTSVC" tblA - val r=client.parameters('dbUrl-> dbUrl(dbName)).get[ByteChunk]("/JdbcImportService/v1/metadata/databases") + val r=client.parameters('dbUrl-> dbUrl(dbName)).get[ByteChunk](servicePrefix+"/metadata/databases") Await.result(r,1 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== """["TESTSVC"]""" } @@ -39,7 +41,7 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc "Table metadata" should { "get tables " in new Conn{ val dbName ="tmd" tblA; tblB - val r=client.parameters('dbUrl-> dbUrl(dbName)).get[ByteChunk]("/JdbcImportService/v1/metadata/databases/%s/tables".format(dbName)) + val r=client.parameters('dbUrl-> dbUrl(dbName)).get[ByteChunk](servicePrefix+"/metadata/databases/%s/tables".format(dbName)) Await.result(r,1 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== """["A","B"]""" } @@ -47,7 +49,7 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc "get single table desc w/o relations" in new Conn{ val dbName ="t1wor" tblA;tblB - val r= client.parameters('dbUrl-> dbUrl(dbName)).get[ByteChunk]("/JdbcImportService/v1/metadata/databases/%s/tables/A".format(dbName)) + val r= client.parameters('dbUrl-> dbUrl(dbName)).get[ByteChunk](servicePrefix+"/metadata/databases/%s/tables/A".format(dbName)) Await.result(r,1 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== """[{"name":"A","columns":["ID","NAME"],"base":"A"}]""" } @@ -57,7 +59,7 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc tblA;tblB cnstrBfkA - val r=client.parameters('dbUrl-> dbUrl(dbName)).get[ByteChunk]("/JdbcImportService/v1/metadata/databases/%s/tables/A".format(dbName)) + val r=client.parameters('dbUrl-> dbUrl(dbName)).get[ByteChunk](servicePrefix+"/metadata/databases/%s/tables/A".format(dbName)) Await.result(r,1 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== """[{"name":"A","columns":["ID","NAME"],"base":"A"},{"name":"B","columns":["ID","A_ID","NAME"],"join":{"baseColName":"ID","refKey":{"table":"B","columnName":"A_ID"},"exported":true}}]""" @@ -67,7 +69,7 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc "get table desc with inferred relations w/o sampling" in new Conn{ val dbName ="t1wir" tblA;tblB - val r=client.parameters('dbUrl-> dbUrl(dbName),'infer->"y").get[ByteChunk]("/JdbcImportService/v1/metadata/databases/%s/tables/A".format(dbName)) + val r=client.parameters('dbUrl-> dbUrl(dbName),'infer->"y").get[ByteChunk](servicePrefix+"/metadata/databases/%s/tables/A".format(dbName)) Await.result(r,1 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== """[{"name":"A","columns":["ID","NAME"],"base":"A"},{"name":"B","columns":["ID","A_ID","NAME"],"join":{"baseColName":"ID","refKey":{"table":"B","columnName":"A_ID"},"exported":true}}]""" @@ -77,7 +79,7 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc "get table desc with inferred relations with sampling - no data" in new Conn{ val dbName = "t1wirsnd" tblA;tblB - val r=client.parameters('dbUrl-> dbUrl(dbName),'infer->"y", 'sample->"y").get[ByteChunk]("/JdbcImportService/v1/metadata/databases/%s/tables/A".format(dbName)) + val r=client.parameters('dbUrl-> dbUrl(dbName),'infer->"y", 'sample->"y").get[ByteChunk](servicePrefix+"/metadata/databases/%s/tables/A".format(dbName)) Await.result(r,1 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== """[{"name":"A","columns":["ID","NAME"],"base":"A"}]""" @@ -87,7 +89,7 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc "get table desc with inferred relations with sampling - with data" in new Conn{ val dbName ="t1wirsd" tblA;tblB; dataA; dataB - val r=client.parameters('dbUrl-> dbUrl(dbName),'infer->"y", 'sample->"y").get[ByteChunk]("/JdbcImportService/v1/metadata/databases/%s/tables/A".format(dbName)) + val r=client.parameters('dbUrl-> dbUrl(dbName),'infer->"y", 'sample->"y").get[ByteChunk](servicePrefix+"/metadata/databases/%s/tables/A".format(dbName)) Await.result(r,1 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== """[{"name":"A","columns":["ID","NAME"],"base":"A"},{"name":"B","columns":["ID","A_ID","NAME"],"join":{"baseColName":"ID","refKey":{"table":"B","columnName":"A_ID"},"exported":true}}]""" @@ -105,10 +107,10 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc 'q->"select * from A,B where A.ID = B.A_ID", 'apiKey->apiKey, 'path -> basePath - ).post[ByteChunk]("/JdbcImportService/v1/ingest/%s/query".format(dbName))(Array.empty[Byte]) + ).post[ByteChunk](servicePrefix+"/ingest/%s/query".format(dbName))(Array.empty[Byte]) Await.result(r,1 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== - """{"ingested":1,"errors":[]}""" + """{"ingested":3,"errors":[]}""" } } @@ -119,10 +121,10 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc 'denormalize->"y", 'apiKey->apiKey, 'path -> basePath - ).post[ByteChunk]("/JdbcImportService/v1/ingest/%s/table/%s/auto".format(dbName,"A"))(Array.empty[Byte]) + ).post[ByteChunk](servicePrefix+"/ingest/%s/table/%s/auto".format(dbName,"A"))(Array.empty[Byte]) Await.result(r,1 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== - """{"ingested":1,"errors":[]}""" + """{"ingested":3,"errors":[]}""" } } @@ -134,10 +136,10 @@ class ImportJdbcServiceTest extends BlueEyesServiceSpecification with ImportJdbc 'dbUrl-> dbUrl(dbName), 'apiKey->apiKey, 'path -> basePath - ).post[ByteChunk]("/JdbcImportService/v1/ingest/%s/table/%s/config".format(dbName,"A"))(ingestInfo2Json(tblABDesc)) + ).post[ByteChunk](servicePrefix+"/ingest/%s/table/%s/config".format(dbName,"A"))(ingestInfo2Json(tblABDesc)) Await.result(r,2 minute) must beLike { case HttpResponse(_ ,_,Some(Left(buffer)),_) => new String(buffer.array(), "UTF-8") must_== - """{"ingested":1,"errors":[]}""" + """{"ingested":3,"errors":[]}""" } } } diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcSpec.scala similarity index 62% rename from tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala rename to tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcSpec.scala index c03c6e2..8c1f6ea 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcTest.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/ImportJdbcSpec.scala @@ -3,29 +3,26 @@ package com.precog.tools.importers.jdbc import org.specs2.mutable.Specification import blueeyes.json._ import Datatypes._ -import ImportJdbc.{IngestInfo, ImportTable} -import akka.dispatch.Await -import akka.util.Duration -import blueeyes.core.data.DefaultBijections._ +import akka.dispatch.{Future, Await} import blueeyes.akka_testing.FutureMatchers import blueeyes.core.http.test.HttpRequestMatchers -import blueeyes.core.data._ import scala.Left import com.precog.tools.importers.jdbc.ImportJdbc.IngestInfo import scala.Right import scala.Some import com.precog.tools.importers.jdbc.ImportJdbc.ImportTable import blueeyes.bkka.AkkaDefaults._ -import blueeyes.core.http.{HttpStatus, HttpResponse} -import blueeyes.core.http.HttpStatusCodes.OK -import scalaz.StreamT +import blueeyes.core.http.HttpResponse +import scalaz.{Id,StreamT} +import scalaz.effect.IO import blueeyes.bkka.FutureMonad /** * User: gabriel * Date: 11/22/12 */ -class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestMatchers{ +class ImportJdbcSpec extends Specification with FutureMatchers with HttpRequestMatchers{ + "build queries" should { "single table query" in { @@ -52,19 +49,30 @@ class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestM } - implicit def toStreamElem[T](l:List[T])=l.toSeq::StreamT.empty + //implicit def toStreamElem[T](l:List[T])=l.toSeq::StreamT.empty "Json build from data" should { + //(baseName:String, ingestInfo:IngestInfo, row: Seq[String], outStream:StreamT[M,JValue], currentObj:StrJVMap=Map()) + val empty=StreamT.empty[Id.Id,JValue] "build a simple Json" in { - ImportJdbc.mkPartialJson("a",tblADesc,aData).get._1 must_== jA + ImportJdbc.mkJson("a",tblADesc,aData,empty) must_== (empty, Map("A" -> JObject(Map("ID" -> JString("1"), "name" -> JString("aaa"))))) } "build a composite Json" in { - ImportJdbc.mkPartialJson("a",tblABDesc,aData++bData).get._1 must_== jAB + ImportJdbc.mkJson("a",tblABDesc,aData++bData,empty) must_== (empty, + Map( + "A" -> JObject(Map("ID" -> JString("1"), "name" -> JString("aaa"))), + "B" -> JArray(List(JObject(Map("ID" -> JString("2"), "A_ID" -> JString("1"), "name" -> JString("bbb"))))) + )) } "build a relation Json" in { - ImportJdbc.mkPartialJson("c",tblCABDesc,cData++aData++bData).get._1 must_== jC + ImportJdbc.mkJson("c",tblCABDesc, cData++aData++bData, empty) must_== ( empty, + Map( + "A" -> JObject(Map("ID" -> JString("1"), "name" -> JString("aaa"))), + "B" -> JObject(Map("ID" -> JString("2"), "A_ID" -> JString("1"), "name" -> JString("bbb"))), + "C" -> JObject(Map("A_ID" -> JString("1"), "B_ID" -> JString("2"), "name" -> JString("ccc"))) + )) } val tblDesc = IngestInfo(List(ImportTable("parent",List("ID","name"), Left(Table("Parent"))),ImportTable("child",List("ID","name","P_ID"), Right(Join("id",Key(Table("child"),"parent_id"),ExportedKey))))) @@ -74,47 +82,29 @@ class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestM val dataParent3 = List("3","parent3","2","child2","1") + "build Jobjects for multiple values" in { - val Some((emptyChildJson,_))=ImportJdbc.mkPartialJson("parent",tblDesc,dataNoChld) - emptyChildJson must_== - JObject(JField("ID",JString("1"))::JField("name",JString("parent"))::JField("CHILD",JArray(Nil))::Nil) + val(stream1,map1)=ImportJdbc.mkJson("parent",tblDesc,dataNoChld,StreamT.empty) + map1 must_== + Map("PARENT"->JObject(JField("ID",JString("1"))::JField("name",JString("parent"))::Nil),"CHILD"->JArray(Nil)) - val Some((partJson,_))=ImportJdbc.mkPartialJson("parent",tblDesc,dataChld2) - partJson must_== - JObject(JField("ID",JString("1"))::JField("name",JString("parent")):: - JField("CHILD",JArray( + val (stream2,map2)=ImportJdbc.mkJson("parent",tblDesc,dataChld2,stream1,map1) + map2 must_== + Map( + "PARENT"-> JObject(JField("ID",JString("1"))::JField("name",JString("parent"))::Nil), + "CHILD"-> JArray( JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil)::Nil) - )::Nil) + ) - ImportJdbc.mkPartialJson("parent",tblDesc,dataParent3).get._1 must_== JObject(JField("ID",JString("3"))::JField("name",JString("parent3")):: - JField("CHILD",JArray( + ImportJdbc.mkJson("parent",tblDesc,dataParent3,stream2,map2)._2 must_== + Map( + "PARENT"->JObject(JField("ID",JString("3"))::JField("name",JString("parent3"))::Nil), + "CHILD"->JArray( JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil)::Nil) - )::Nil) - } - - "build a composite object" in { - val ds=dataChld1.toSeq::dataChld2.toSeq::StreamT.empty - val Some((d1Json,_))=ImportJdbc.mkPartialJson("parent",tblDesc,ds) - d1Json must_== - JObject(JField("ID",JString("1"))::JField("name",JString("parent")):: - JField("CHILD",JArray( - JObject(JField("ID",JString("1"))::JField("name",JString("child1"))::JField("P_ID",JString("1"))::Nil):: - JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil)::Nil) - )::Nil) - } - - "objects must be uppercase" in { - val ds=dataChld1.toSeq::dataChld2.toSeq::StreamT.empty - val Some((d1Json,_))=ImportJdbc.mkPartialJson("parent",tblDesc,ds) - d1Json must_== - JObject(JField("ID",JString("1"))::JField("name",JString("parent")):: - JField("CHILD",JArray( - JObject(JField("ID",JString("1"))::JField("name",JString("child1"))::JField("P_ID",JString("1"))::Nil):: - JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil)::Nil) - )::Nil) + ) } "buildBody for multiple values" in { @@ -124,17 +114,18 @@ class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestM val dataNoChld = List("2","parent2",null,null,null) val dataParent3 = List("3","parent3","2","child2","1") - val allData= StreamT.fromIterable((dataChld1::dataChld2::dataNoChld::dataParent3::Nil).reverse.map( _.toIndexedSeq).toIterable) + //val allData= StreamT.fromIterable((dataChld1::dataChld2::dataNoChld::dataParent3::Nil).reverse.map( _.toIndexedSeq).toIterable) + val allData= dataChld1::dataChld2::dataNoChld::dataParent3::StreamT.empty[IO,Seq[String]] implicit val executionContext = defaultFutureDispatch implicit val futureMonad= new FutureMonad(executionContext) val r= ImportJdbc.buildBody(allData,"parent",tblDesc) - Await.result(r.toStream,1 minute).toList must_==( + Await.result(r.flatMap(_.toStream),1 minute) must_==( JObject(JField("ID",JString("1"))::JField("name",JString("parent1")):: JField("CHILD",JArray( - JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil):: JObject(JField("ID",JString("1"))::JField("name",JString("child1"))::JField("P_ID",JString("1"))::Nil):: + JObject(JField("ID",JString("2"))::JField("name",JString("child2"))::JField("P_ID",JString("1"))::Nil):: Nil) )::Nil):: JObject(JField("ID",JString("2"))::JField("name",JString("parent2"))::JField("CHILD",JArray(Nil))::Nil):: @@ -145,6 +136,7 @@ class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestM } } + "Ingest data" should { implicit val executionContext = defaultFutureDispatch @@ -155,7 +147,7 @@ class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestM dataA val r=ImportJdbc.ingest(conn,"a",ImportJdbc.buildQuery(tblADesc),Some(tblADesc),basePath,host,apiKey) Await.result(r,1 minute) must beLike { - case HttpResponse(_ ,_,Some(Left(buffer)),_) => { new String(buffer.array(), "UTF-8") must_== """{"ingested":1,"errors":[]}"""} + case HttpResponse(_ ,_,Some(Left(buffer)),_) => { new String(buffer.array(), "UTF-8") must_== """{"ingested":3,"errors":[]}"""} } } @@ -166,7 +158,7 @@ class ImportJdbcTest extends Specification with FutureMatchers with HttpRequestM val r=ImportJdbc.ingest(conn,"a",ImportJdbc.buildQuery(tblABDesc),Some(tblABDesc),basePath,host,apiKey) Await.result(r,1 minute) must beLike { - case HttpResponse(_ ,_,Some(Left(buffer)),_) => { new String(buffer.array(), "UTF-8") must_== """{"ingested":1,"errors":[]}"""} + case HttpResponse(_ ,_,Some(Left(buffer)),_) => { new String(buffer.array(), "UTF-8") must_== """{"ingested":3,"errors":[]}"""} } } } diff --git a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala index 52d47be..e738af4 100644 --- a/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala +++ b/tools/import/jdbc/src/test/scala/com/precog/tools/importers/jdbc/package.scala @@ -94,6 +94,4 @@ package object jdbc { } } - def manageConn(s:String)= new Conn{ val dbName=s } - } diff --git a/tools/import/mongodb/project/build.scala b/tools/import/mongodb/project/Build.scala similarity index 100% rename from tools/import/mongodb/project/build.scala rename to tools/import/mongodb/project/Build.scala diff --git a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala index 69987e9..5ed50c2 100644 --- a/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala +++ b/tools/import/mongodb/src/main/scala/com/precog/tools/importers/mongo/ImportMongo.scala @@ -4,12 +4,11 @@ import com.mongodb.casbah.Imports._ import blueeyes.persistence.mongo.json.BijectionsMongoJson._ import com.mongodb.casbah.commons.TypeImports.ObjectId import scalaz._ -import akka.dispatch.{Await, Future} +import akka.dispatch.Future import blueeyes.json._ import blueeyes.bkka.AkkaDefaults._ import blueeyes.bkka.AkkaDefaults.defaultFutureDispatch import blueeyes.bkka.FutureMonad -import akka.util.Duration import blueeyes.core.http.HttpResponse import com.precog.tools.importers.common._ import ConsoleUtils._ @@ -30,7 +29,17 @@ import org.slf4j.LoggerFactory * Date: 1/17/13 */ object ImportMongo { +/* +Next version: +every time it loads: +configuration exists? +if no, prompt to run config +if yes, check for new collections and prompt to config those +after run, update config with latest + +use configrity for configuration +*/ private lazy val logger = LoggerFactory.getLogger("com.precog.tools.importers.mongo.ImportMongo") implicit val as=actorSystem @@ -40,10 +49,18 @@ object ImportMongo { val collsConfig="precog_import_config" val sampleSize=100 - def parseInt(s : String) : Option[Int] = try { - Some(s.toInt) - } catch { - case _ : java.lang.NumberFormatException => None + + def matchValid[T](value:Any, fi: =>T, fl: =>T, ff: =>T, fd: =>T, fs: =>T,foid: =>T, fdt: =>T, fno: =>T= null)={ + value match { + case i: java.lang.Integer => fi + case l: java.lang.Long => fl + case f: java.lang.Float => ff + case d: java.lang.Double => fd + case l: java.lang.String => fs + case oid: ObjectId => foid + case dt: Date => fdt + case _ if fno != null => fno + } } // No @tailrec but we don't expect getting back from mongoDb a hierarchy big enough to blow the stack @@ -54,15 +71,14 @@ object ImportMongo { }).toSeq } - def sampleColumns(mongoConn:MongoConnection)(db: MongoDB, coll: String)={ + def sampleColumns(mongoConn:MongoConnection)(db: MongoDB, coll: String):Set[String]={ val collection=db(coll).find().take(sampleSize) - collection.flatMap(columnsOf(_)).toSet + collection.foldLeft(Set[String]())((s,o)=>s++(columnsOf(o))) } def configureCollections(mongoConn:MongoConnection)(db: MongoDB):Seq[DBObject]={ println("No configuration found in the mongo instance, creating a new one.") - val databases=db.name - println("DATABASE %s \n".format(db)) + println("DATABASE %s \n".format(db.name)) val userCollections=db.getCollectionNames().filter(name=> !(name.startsWith("system.") || name.startsWith(collsConfig))) val colls=selectSet("collection",userCollections.toSeq) colls.map( coll =>{ @@ -70,14 +86,7 @@ object ImportMongo { val columns=sampleColumns(mongoConn)(db,coll).toSeq val fields=selectSet("column", columns) - //TODO ugly, maybe using a wrapper type? - val sortColumns=db(coll).find().take(sampleSize).map(mobj => mobj.toMap).reduceLeft(_++_).filter( kv => kv._2 match { - case s:String => true - case d:java.lang.Long => true - case oid:ObjectId => true - case dt:Date => true - case _ => false - }) + val sortColumns=db(coll).find().take(sampleSize).map(mobj => mobj.toMap).reduceLeft(_++_).filter( kv => matchValid(kv._2, true, true, true, true, true, true, true, false )) val sortColumn=selectOne("import control column", sortColumns.keys.toSeq) MongoDBObject("collection"->coll, "fields"->fields, "sortColumn"->sortColumn) @@ -85,6 +94,75 @@ object ImportMongo { ) } + def pair[T](getter: String=>T)(name:String ) = (name-> getter(name)) + + def getString(jo: JObject)(field:String) = strValue(jo \ field) + def getArray(jo: JObject)(field:String) = arrOfStrValues(jo \ field) + + def strValue(jv: JValue) = (jv --> classOf[JString]).value + def arrOfStrValues(jv: JValue) = (jv -->? classOf[JArray]).map(_.elements.map(strValue(_))).getOrElse(Nil) + + + def importCollection(host:String, basePath:String, apiKey:String, db:MongoDB, mdbobj: MongoDBObject, mongoConn: MongoConnection):Future[(Either[String,String],AnyRef)]={ + + val collName = mdbobj.getAs[String]("collection").get + val fieldNames = mdbobj.getAsOrElse[util.ArrayList[String]]("fields",new util.ArrayList()) + val lastId = mdbobj.getAs[String]("lastId") + val sortColumn=mdbobj.getAs[String]("sortColumn").get + + logger.info("Ingesting %s since %s of %s".format(collName,lastId,sortColumn)) + + val fdsid = Future { dsZipMaxIds(db, collName, sortColumn, fieldNames, lastId) } + val (fds, fmaxId) = (fdsid map (_._1), fdsid map (_._2)) + + val fjsons = fds.map(_.flatMap(MongoToJson(_).toStream)) + val path = "%s/%s/%s".format(basePath, db.name, collName) + val data = StreamT.fromStream[Future, JObject](fjsons) + val fsend= data.isEmpty.flatMap( isEmpty => + if (isEmpty) Future(Left("No new data found in %s.%s".format(db.name,collName))) + else sendToPrecog(host,path,apiKey,toByteStream(data),streaming=false) flatMap( _ match { + case HttpResponse(status, _, Some(Left(buffer)), _) => { + Future(Right("Result from precog: %s (%s)".format(new String(buffer.array(), "UTF-8"), status))) + } + case HttpResponse(_, _, Some(Right(stream)), _) => { + stream.toStream.map( strmBuffer =>Right("Streaming result from precog: %s".format(strmBuffer.foldLeft("")( (str,b)=> str+new String(b.array(), "UTF-8"))))) + } + case result => Future(Left("Error: %s".format(result.toString()))) + } + )) + M.lift2((a: Either[String,String], b: AnyRef) => (a, b))(fsend, fmaxId) + } + + + def dsZipMaxIds(db: MongoDB, collName: String, sortColumn: String, fieldNames: util.ArrayList[String], lastId: Option[String]): (Stream[DBObject], AnyRef) = { + val rStrm = readFromMongo(db, collName, sortColumn, fieldNames, lastId) + val (oids, dataStrm) = rStrm.map(m => (m(sortColumn), m)).unzip + + + val maxOid = if (oids.isEmpty) lastId + else { + + //ugly but need the runtime type to go form AnyRef to Ordering[_] for max to work... sum types + def ordering for sum types? + def refine[T<:Comparable[T]](f: AnyRef=>T)=oids.map(f).max + matchValid(oids.head, + refine({case ss:java.lang.Integer => ss}), + refine({case ss:java.lang.Long => ss}), + refine({case ss:java.lang.Float => ss}), + refine({case ss:java.lang.Double => ss}), + refine({case ss:String => ss}), + refine({case ss:ObjectId => ss}), + refine({case ss:Date => ss})) + } + (dataStrm, maxOid) + } + + def readFromMongo[A : AsQueryParam](mongoDB: MongoDB, collName: String, idCol:String, fieldNames:Seq[String], oLastId:Option[A]=None:Option[ObjectId]):Stream[DBObject]={ + val mongoColl = mongoDB(collName) + val q = oLastId.map( idCol $gt _ ).getOrElse(MongoDBObject()) + val fields = MongoDBObject(fieldNames.map(_->""):_*) + mongoColl.find(q,fields).toStream + } + def main(args:Array[String]){ if (args.length != 4) { @@ -105,7 +183,6 @@ object ImportMongo { val mongoConn=MongoConnection(uri) uri.database.map { database => - //TODO: use uri.database.asList and if it's empty, load the full list of dbs val db = mongoConn(database) for { user <- uri.username @@ -120,89 +197,48 @@ object ImportMongo { val configs=configureCollections(mongoConn)(db) configs.map(inputConfigColl.save(_)) } - val jsonInputs= inputConfigColl.find().toList - - val fimports=jsonInputs.map(config=> importCollection(precogHost,basePath,apiKey,db, config, mongoConn)) - val fresults=Await.result(Future.sequence(fimports.toList), Duration("24 hours")) + val jsonInputs= inputConfigColl.find().toList - jsonInputs.zip(fresults).map( r =>{ - val (mDbObj,(result,lastId)) = r - result.left.map(s=> - logger.warn("%s".format(s)) - ).right.map({s=> - logger.info("%s".format(s)) - inputConfigColl.save(mDbObj++("lastId"->lastId)) - }) + val fimports=jsonInputs.map(config=> { + + val collName = config.getAs[String]("collection").get + val lastId = config.getAs[String]("lastId") + val sortColumn=config.getAs[String]("sortColumn").get + println("Ingesting %s since %s of %s".format(collName,lastId,sortColumn)) + + importCollection(precogHost,basePath,apiKey,db, config, mongoConn) + }) + + Future.sequence(fimports).onComplete( x => {x match { + case Right(results) => { + jsonInputs.zip(results).map( {case (mDbObj,(result,lastId)) => + result.left.map(s=>{ + val result="%s".format(s) + logger.warn(result) + println(result) + } + ).right.map(s=>{ + inputConfigColl.save(mDbObj++("lastId"->lastId)) + val result="%s".format(s) + logger.info(result) + println(result) + } + ) + } + ) + } + case Left(e) => logger.error("Exception during import ",e) } + actorSystem.shutdown() + } ) } - } finally { - logger.info("Shutting down...") - actorSystem.shutdown() - } - } - - def pair[T](getter: String=>T)(name:String ) = (name-> getter(name)) - - def getString(jo: JObject)(field:String) = strValue(jo \ field) - def getArray(jo: JObject)(field:String) = arrOfStrValues(jo \ field) - - def strValue(jv: JValue) = (jv --> classOf[JString]).value - def arrOfStrValues(jv: JValue) = (jv -->? classOf[JArray]).map(_.elements.map(strValue(_))).getOrElse(Nil) - - - def importCollection(host:String, basePath:String, apiKey:String, db:MongoDB, mdbobj: MongoDBObject, mongoConn: MongoConnection):Future[(Either[String,String],AnyRef)]={ - - val collName = mdbobj.getAs[String]("collection").get - val fieldNames = mdbobj.getAsOrElse[util.ArrayList[String]]("fields",new util.ArrayList()) - val lastId = mdbobj.getAs[String]("lastId") - val sortColumn=mdbobj.getAs[String]("sortColumn").get - val fdsid = Future { - val rStrm=readFromMongo(db, collName, sortColumn, lastId, fieldNames) - val (oids,dataStrm)=rStrm.map(m=>(m(sortColumn),m)).unzip - - //ugly but need the runtime type to go form AnyRef to Ordering[_] for max to work... sum types + def ordering for sum types? - val maxOid= if (oids.isEmpty) lastId else { - oids.head match { - case s:String => oids.map( {case ss:String => ss}).max - case d:java.lang.Long => oids.map( {case ds:java.lang.Long => ds}).max - case oid:ObjectId => oids.map( {case oids:ObjectId => oids}).max - case dt:Date => oids.map( {case ds:Date => ds}).max - } + } catch { + case e:Throwable => { + logger.error("General exception during import",e) + actorSystem.shutdown() } - (dataStrm,maxOid) } - val (fds, fmaxId) = (fdsid map (_._1), fdsid map (_._2)) - - val fjsons = fds.map(_.flatMap(MongoToJson(_).toStream)) - val path = "%s/%s/%s".format(basePath, db.name, collName) - val data = StreamT.fromStream[Future, JObject](fjsons) - val fsend= data.isEmpty.flatMap( isEmpty => - if (isEmpty) Future(Left("No new data found in %s.%s".format(db.name,collName))) - else sendToPrecog(host,path,apiKey,toByteStream(data)) map( _ match { - case HttpResponse(_, _, Some(Left(buffer)), _) => { - Right("Result from precog: %s".format(new String(buffer.array(), "UTF-8"))) - } - case result => Left("Error: %s".format(result.toString())) - } - )) - M.lift2((a: Either[String,String], b: AnyRef) => (a, b))(fsend, fmaxId) - } - - def readFromMongo(mongoDB: MongoDB, collName: String, idCol:String, oLastId:Option[AnyRef], fieldNames:Seq[String]):Stream[DBObject]={ - val mongoColl = mongoDB(collName) - - //ugly, maybe using a wrapper type? - val q = oLastId.map( - _ match { - case s:String => idCol $gt s - case d:java.lang.Long => idCol $gt d.longValue() - case oid:ObjectId => idCol $gt oid - case dt:Date => idCol $gt dt - } - ).getOrElse(MongoDBObject()) - val fields = MongoDBObject(fieldNames.map(_->""):_*) - mongoColl.find(q,fields).toStream //.view ? } } diff --git a/tools/import/mongodb/src/test/scala/com/precog/tools/importers/mongo/ImportMongoSpec.scala b/tools/import/mongodb/src/test/scala/com/precog/tools/importers/mongo/ImportMongoSpec.scala new file mode 100644 index 0000000..aa0b82e --- /dev/null +++ b/tools/import/mongodb/src/test/scala/com/precog/tools/importers/mongo/ImportMongoSpec.scala @@ -0,0 +1,117 @@ +package com.precog.tools.importers.mongo + +import com.mongodb.casbah.Imports._ +import blueeyes.persistence.mongo.RealMongoSpecSupport +import com.mongodb.casbah.MongoDB +import org.specs2.mutable.After +import org.specs2.specification.Scope + + +/** + * User: gabriel + * Date: 3/29/13 + */ +class ImportMongoSpec extends RealMongoSpecSupport { + + trait Mongo extends After with Scope { + + def dbName:String + + implicit lazy val testDb= MongoDB(realMongo, dbName ) + + def after{ + testDb.dropDatabase() + } + } + + + //def readFromMongo(mongoDB: MongoDB, collName: String, idCol:String, oLastId:Option[AnyRef], fieldNames:Seq[String]):Stream[DBObject]={ + "read from mongo" should { + "return only selected columns" in new Mongo { + val dbName="t1" + val newObj = MongoDBObject("a" -> "1", "x" -> "y", "b" -> 3, "spam" -> "eggs") + testDb("test1").save(newObj) + val r=ImportMongo.readFromMongo(testDb,"test1","_id", Seq("a","b")) + r.head must_== MongoDBObject("_id"->newObj("_id"),"a" -> "1", "b" -> 3) + } + + "return the whole connection if no last id" in new Mongo { + val dbName="t2" + val data = List(MongoDBObject("a" -> 1),MongoDBObject("a" -> 2),MongoDBObject("a" -> 3),MongoDBObject("a" -> 4)) + data.foreach( testDb("test2").save(_) ) + val r=ImportMongo.readFromMongo(testDb,"test2","a",Seq("a")) + r must containTheSameElementsAs(data) + } + + "return only new rows" in new Mongo{ + val dbName="t3" + val (d1,d2,d3,d4)=(MongoDBObject("a" -> 1),MongoDBObject("a" -> 2),MongoDBObject("a" -> 3),MongoDBObject("a" -> 4)) + val data = List(d1,d2,d3,d4) + data.foreach( testDb("test3").save(_) ) + ImportMongo.readFromMongo(testDb,"test3","a", Seq("a")) must containTheSameElementsAs(data) + val r=ImportMongo.readFromMongo(testDb,"test3","a", Seq("a"),Some(2)) + r must containTheSameElementsAs(List(d3,d4)) + } + + "return empty if no new rows" in new Mongo{ + val dbName="t4" + val (d1,d2,d3,d4)=(MongoDBObject("a" -> 1),MongoDBObject("a" -> 2),MongoDBObject("a" -> 3),MongoDBObject("a" -> 4)) + val data = List(d1,d2,d3,d4) + data.foreach( testDb("test4").save(_) ) + val r=ImportMongo.readFromMongo(testDb,"test4","a", Seq("a"),Some(4)) + r must be empty + } + } + + "columns of" should { + "return no columns for the empty object" in { + ImportMongo.columnsOf(MongoDBObject()) must be empty + } + + "return the set of columns of an object" in { + ImportMongo.columnsOf(MongoDBObject("a"->1,"c"->3, "column"->"zzzz")) must containTheSameElementsAs(Seq("c","column","a")) + } + } + + + "sample columns" should { + "return no columns when the collection is empty" in new Mongo{ + val dbName="t5" + val data = List() + data.foreach( testDb("test5").save(_) ) + val conn=new MongoConnection(realMongo) + val cols=ImportMongo.sampleColumns(conn)(testDb,"test5") + cols must be empty + } + "identify all the columns with a collection smaller than the sample size" in new Mongo{ + val dbName="t6" + val data = List(MongoDBObject("a" -> 1,"b"->"a"),MongoDBObject("a" -> 2,"b"->"b"),MongoDBObject("a" -> 3,"b"->"c"),MongoDBObject("a" -> 4,"b"->"d")) + data.foreach( testDb("test6").save(_) ) + + val conn=new MongoConnection(realMongo) + val cols=ImportMongo.sampleColumns(conn)(testDb,"test6") + cols must_== Set("_id","a","b") + } + + "identify all the columns with a collection bigger than the sample size" in new Mongo{ + val dbName="t7" + (1 to 2*ImportMongo.sampleSize).foreach (i=>{testDb("test7").save(MongoDBObject("data" -> "asdb","idx"->i))}) + + + val conn=new MongoConnection(realMongo) + val cols=ImportMongo.sampleColumns(conn)(testDb,"test7") + cols must_== Set("_id","data","idx") + } + + "identify all the columns for collections with different objects" in new Mongo{ + val dbName="t8" + val data = List(MongoDBObject("a" -> 1,"b"->"a"),MongoDBObject("c" -> 2,"d"->"b"),MongoDBObject("a" -> 3,"b"->"c","z"->123),MongoDBObject("a" -> 4,"b"->"d")) + data.foreach( testDb("test8").save(_) ) + + val conn=new MongoConnection(realMongo) + val cols=ImportMongo.sampleColumns(conn)(testDb,"test8") + cols must_== Set("_id","a","b","c","d","z") + } + } + +} diff --git a/tools/import/project/build.scala b/tools/import/project/Build.scala similarity index 100% rename from tools/import/project/build.scala rename to tools/import/project/Build.scala