From e1b524fd56f049f0d1fc43adfd69a81a2740753f Mon Sep 17 00:00:00 2001 From: Rasna Tomar Date: Tue, 1 Nov 2016 23:48:53 +0530 Subject: [PATCH 1/8] exclude field in query --- src/main/scala/Engine.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/Engine.scala b/src/main/scala/Engine.scala index aee709b..2e41f23 100644 --- a/src/main/scala/Engine.scala +++ b/src/main/scala/Engine.scala @@ -43,7 +43,8 @@ case class Query( // to what is in the algorithm params or false num: Option[Int] = None, // default: whatever is in algorithm params, which itself has a default--probably 20 eventNames: Option[List[String]], // names used to ID all user actions - withRanks: Option[Boolean] = None) // Add to ItemScore rank fields values, default fasle + withRanks: Option[Boolean] = None, // Add to ItemScore rank fields values, default fasle + excludeFields: Option[List[excludeField]]) // blacklist fields specified in query extends Serializable /** Used to specify how Fields are represented in engine.json */ From c6173fd8cdd059561fdc83833df856b0a33b9376 Mon Sep 17 00:00:00 2001 From: Rasna Tomar Date: Tue, 1 Nov 2016 23:50:22 +0530 Subject: [PATCH 2/8] blacklist properties --- src/main/scala/Engine.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/Engine.scala b/src/main/scala/Engine.scala index 2e41f23..469ce97 100644 --- a/src/main/scala/Engine.scala +++ b/src/main/scala/Engine.scala @@ -44,9 +44,10 @@ case class Query( num: Option[Int] = None, // default: whatever is in algorithm params, which itself has a default--probably 20 eventNames: Option[List[String]], // names used to ID all user actions withRanks: Option[Boolean] = None, // Add to ItemScore rank fields values, default fasle - excludeFields: Option[List[excludeField]]) // blacklist fields specified in query + excludeFields: Option[List[excludeField]]) // blacklist fields specified in query extends Serializable + /** Used to specify how Fields are represented in engine.json */ case class Field( // no optional values for fields, whne specified name: String, // name of metadata field From fefd8b649c44fa225db4e5a0f6a2b22f4a1e2c2a Mon Sep 17 00:00:00 2001 From: Rasna Tomar Date: Wed, 2 Nov 2016 00:06:25 +0530 Subject: [PATCH 3/8] exclude fields changes --- src/main/scala/URAlgorithm.scala | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/main/scala/URAlgorithm.scala b/src/main/scala/URAlgorithm.scala index e6cb570..274ebeb 100644 --- a/src/main/scala/URAlgorithm.scala +++ b/src/main/scala/URAlgorithm.scala @@ -156,7 +156,8 @@ case class URAlgorithmParams( // used as the subject of a dateRange in queries, specifies the name of the item property dateName: Option[String] = None, indicators: Option[List[IndicatorParams]] = None, // control params per matrix pair - seed: Option[Long] = None) // seed is not used presently + seed: Option[Long] = None, // seed is not used presently + excludeFields: Option[List[excludeField]] = None) //exclude fields specified in engine.json extends Params //fixed default make it reproducible unless supplied /** Creates cooccurrence, cross-cooccurrence and eventually content correlators with @@ -178,6 +179,8 @@ class URAlgorithm(val ap: URAlgorithmParams) } case class FilterCorrelators(actionName: String, itemIDs: Seq[ItemID]) + case class blacklistFields(name: String, values: List[String]) + val appName: String = ap.appName val recsModel: String = ap.recsModel.getOrElse(defaultURAlgorithmParams.DefaultRecsModel) //val eventNames: Seq[String] = ap.eventNames @@ -209,6 +212,8 @@ class URAlgorithm(val ap: URAlgorithmParams) eventNames } + val excludeFields: List[excludeField] = ap.excludeFields.getOrElse(List.empty) + // Unique by 'type' ranking params, if collision get first. lazy val rankingsParams: Seq[RankingParams] = ap.rankings.getOrElse(Seq(RankingParams( name = Some(defaultURAlgorithmParams.DefaultBackfillFieldName), @@ -621,11 +626,30 @@ class URAlgorithm(val ap: URAlgorithmParams) } /** Build not must query part */ - def buildQueryMustNot(query: Query, events: Seq[Event]): JValue = { + def buildQueryMustNot(query: Query, events: Seq[Event]): List[JValue] = { + var excludeFieldsList = List[JValue]() + val paramsBlacklistField = excludeFields + val queryBlacklistField = query.excludeFields.getOrElse(List.empty) + + //de-duplicate common fields provided in engine.json and query + val deduplicateFields = (paramsBlacklistField ::: queryBlacklistField).map(field => blacklistFields(field.name, field.values)).distinct + + deduplicateFields.foreach { field => + val excludeFieldJValue: JValue = render("terms" -> (field.name -> field.values)) + excludeFieldsList ::= excludeFieldJValue + } + val mustNotFields: JValue = render("ids" -> ("values" -> getExcludedItems(events, query)) ~ ("boost" -> 0)) - mustNotFields + excludeFieldsList ::= mustNotFields + excludeFieldsList } + /** Build not must query part */ +// def buildQueryMustNot(query: Query, events: Seq[Event]): JValue = { +// val mustNotFields: JValue = render("ids" -> ("values" -> getExcludedItems(events, query)) ~ ("boost" -> 0)) +// mustNotFields +// } + /** Build sort query part */ def buildQuerySort(): Seq[JValue] = if (recsModel == RecsModel.All || recsModel == RecsModel.BF) { val sortByScore: Seq[JValue] = Seq(parse("""{"_score": {"order": "desc"}}""")) From a4d00ac6c9bd1837c252a1531e86ecf3eb5479ff Mon Sep 17 00:00:00 2001 From: Rasna Tomar Date: Wed, 2 Nov 2016 00:09:11 +0530 Subject: [PATCH 4/8] Update Engine.scala --- src/main/scala/Engine.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/scala/Engine.scala b/src/main/scala/Engine.scala index 469ce97..4e1324c 100644 --- a/src/main/scala/Engine.scala +++ b/src/main/scala/Engine.scala @@ -56,6 +56,11 @@ case class Field( // no optional values for fields, whne specified bias: Float) // any positive value is a boost, negative is a filter extends Serializable +/** Used to specify how exclude fields are represented in engine.json */ +case class excludeField( + name: String, // name of metadata field + values: List[String]) // fields can have multiple values like tags of a single value as when using hierarchical + /** Used to specify the date range for a query */ case class DateRange( name: String, // name of item property for the date comparison From 757f2c482bb57d3f35cfeb898f00dd859ef4fe3a Mon Sep 17 00:00:00 2001 From: Rasna Tomar Date: Fri, 4 Nov 2016 09:40:16 +0530 Subject: [PATCH 5/8] numerical range filter --- src/main/scala/URAlgorithm.scala | 187 +++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) diff --git a/src/main/scala/URAlgorithm.scala b/src/main/scala/URAlgorithm.scala index 274ebeb..b10aeb7 100644 --- a/src/main/scala/URAlgorithm.scala +++ b/src/main/scala/URAlgorithm.scala @@ -867,7 +867,194 @@ class URAlgorithm(val ap: URAlgorithmParams) } json } + + def getFilteringNumericRange(query: Query): Seq[JValue] = { + var json = Seq[JValue]() + if (query.numericRangeFilter.nonEmpty) { + val numericRanges = query.numericRangeFilter.getOrElse(List.empty) + numericRanges.foreach { numericRange => + val name = numericRange.name + if (numericRange.greaterThan.nonEmpty && numericRange.lessThan.nonEmpty) { + // val name = numericRange.name + val greaterThan = numericRange.greaterThan.get + val lessThan = numericRange.lessThan.get + val range = + s""" + |{ + | "constant_score": { + | "filter": { + | "range": { + | "$name": { + | "gt": $greaterThan, + | "lt": $lessThan + | } + | } + | }, + | "boost": 0 + | } + |} + """.stripMargin + json = json :+ parse(range) + } else if (numericRange.greaterThan.nonEmpty && numericRange.lessThanOrEqual.nonEmpty) { + //val name = numericRange.name + val greaterThan = numericRange.greaterThan.get + val lessThanOrEqual = numericRange.lessThanOrEqual.get + val range = + s""" + |{ + | "constant_score": { + | "filter": { + | "range": { + | "$name": { + | "gt": $greaterThan, + | "lte": $lessThanOrEqual + | } + | } + | }, + | "boost": 0 + | } + |} + """.stripMargin + json = json :+ parse(range) + } else if (numericRange.greaterThanOrEqual.nonEmpty && numericRange.lessThan.nonEmpty) { + //val name = numericRange.name + val greaterThanOrEqual = numericRange.greaterThanOrEqual.get + val lessThan = numericRange.lessThan.get + val range = + s""" + |{ + | "constant_score": { + | "filter": { + | "range": { + | "$name": { + | "gte": $greaterThanOrEqual, + | "lt": $lessThan + | } + | } + | }, + | "boost": 0 + | } + |} + """.stripMargin + json = json :+ parse(range) + } else if (numericRange.greaterThanOrEqual.nonEmpty && numericRange.lessThanOrEqual.nonEmpty) { + //val name = numericRange.name + val greaterThanOrEqual = numericRange.greaterThanOrEqual.get + val lessThanOrEqual = numericRange.lessThanOrEqual.get + val range = + s""" + |{ + | "constant_score": { + | "filter": { + | "range": { + | "$name": { + | "gte": $greaterThanOrEqual, + | "lte": $lessThanOrEqual + | } + | } + | }, + | "boost": 0 + | } + |} + """.stripMargin + + json = json :+ parse(range) + + } else if (numericRange.greaterThan.nonEmpty) { + //val name = numericRange.name + val greaterThan = numericRange.greaterThan.get + val range = + s""" + |{ + | "constant_score": { + | "filter": { + | "range": { + | "$name": { + | "gt": $greaterThan + | } + | } + | }, + | "boost": 0 + | } + |} + """.stripMargin + json = json :+ parse(range) + + } else if (numericRange.greaterThanOrEqual.nonEmpty) { + //val name = numericRange.name + val greaterThanOrEqual = numericRange.greaterThanOrEqual.get + val range = + s""" + |{ + | "constant_score": { + | "filter": { + | "range": { + | "$name": { + | "gte": $greaterThanOrEqual + | } + | } + | }, + | "boost": 0 + | } + |} + """.stripMargin + + json = json :+ parse(range) + + } else if (numericRange.lessThan.nonEmpty) { + //val name = numericRange.name + val lessThan = numericRange.lessThan.get + val range = + s""" + |{ + | "constant_score": { + | "filter": { + | "range": { + | "$name": { + | "lt": $lessThan + | } + | } + | }, + | "boost": 0 + | } + |} + """.stripMargin + + json = json :+ parse(range) + + } else if (numericRange.lessThanOrEqual.nonEmpty) { + //val name = numericRange.name + val lessThanOrEqual = numericRange.lessThanOrEqual.get + val range = + s""" + |{ + | "constant_score": { + | "filter": { + | "range": { + | "$name": { + | "lte": $lessThanOrEqual + | } + | } + | }, + | "boost": 0 + | } + |} + """.stripMargin + + json = json :+ parse(range) + + } else { + logger.info( + """ + |Misconfigured range information, your query's numeric Range is incorrect. + |Ingoring range information for this query.""".stripMargin) + Seq.empty + } + } + } + json + } def getRankingMapping: Map[String, String] = rankingFieldNames map { fieldName => fieldName -> "float" } toMap From 8d0a9e55c4c84d517902aba842aa1897688163e3 Mon Sep 17 00:00:00 2001 From: Rasna Tomar Date: Fri, 4 Nov 2016 09:42:01 +0530 Subject: [PATCH 6/8] filtering numeric range changes in buildQueryMust --- src/main/scala/URAlgorithm.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/URAlgorithm.scala b/src/main/scala/URAlgorithm.scala index b10aeb7..420d3c4 100644 --- a/src/main/scala/URAlgorithm.scala +++ b/src/main/scala/URAlgorithm.scala @@ -616,13 +616,14 @@ class URAlgorithm(val ap: URAlgorithmParams) val filteringMetadata = getFilteringMetadata(query) val filteringDateRange = getFilteringDateRange(query) + val filteringNumericRange = getFilteringNumericRange(query) val allFilteringCorrelators = recentUserHistoryFilter ++ similarItemsFilter ++ filteringMetadata val mustFields: Seq[JValue] = allFilteringCorrelators.map { case FilterCorrelators(actionName, itemIDs) => render("terms" -> (actionName -> itemIDs) ~ ("boost" -> 0)) } - mustFields ++ filteringDateRange + mustFields ++ filteringDateRange ++ filteringNumericRange } /** Build not must query part */ From 744bd094a41ae365ac0a2f67ee46bd0f72137934 Mon Sep 17 00:00:00 2001 From: Rasna Tomar Date: Fri, 4 Nov 2016 09:45:13 +0530 Subject: [PATCH 7/8] commented code removed --- src/main/scala/URAlgorithm.scala | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/main/scala/URAlgorithm.scala b/src/main/scala/URAlgorithm.scala index 420d3c4..5ecfc1e 100644 --- a/src/main/scala/URAlgorithm.scala +++ b/src/main/scala/URAlgorithm.scala @@ -876,7 +876,6 @@ class URAlgorithm(val ap: URAlgorithmParams) numericRanges.foreach { numericRange => val name = numericRange.name if (numericRange.greaterThan.nonEmpty && numericRange.lessThan.nonEmpty) { - // val name = numericRange.name val greaterThan = numericRange.greaterThan.get val lessThan = numericRange.lessThan.get val range = @@ -897,7 +896,6 @@ class URAlgorithm(val ap: URAlgorithmParams) """.stripMargin json = json :+ parse(range) } else if (numericRange.greaterThan.nonEmpty && numericRange.lessThanOrEqual.nonEmpty) { - //val name = numericRange.name val greaterThan = numericRange.greaterThan.get val lessThanOrEqual = numericRange.lessThanOrEqual.get val range = @@ -918,7 +916,6 @@ class URAlgorithm(val ap: URAlgorithmParams) """.stripMargin json = json :+ parse(range) } else if (numericRange.greaterThanOrEqual.nonEmpty && numericRange.lessThan.nonEmpty) { - //val name = numericRange.name val greaterThanOrEqual = numericRange.greaterThanOrEqual.get val lessThan = numericRange.lessThan.get val range = @@ -939,7 +936,6 @@ class URAlgorithm(val ap: URAlgorithmParams) """.stripMargin json = json :+ parse(range) } else if (numericRange.greaterThanOrEqual.nonEmpty && numericRange.lessThanOrEqual.nonEmpty) { - //val name = numericRange.name val greaterThanOrEqual = numericRange.greaterThanOrEqual.get val lessThanOrEqual = numericRange.lessThanOrEqual.get val range = @@ -962,7 +958,6 @@ class URAlgorithm(val ap: URAlgorithmParams) json = json :+ parse(range) } else if (numericRange.greaterThan.nonEmpty) { - //val name = numericRange.name val greaterThan = numericRange.greaterThan.get val range = s""" @@ -983,7 +978,6 @@ class URAlgorithm(val ap: URAlgorithmParams) json = json :+ parse(range) } else if (numericRange.greaterThanOrEqual.nonEmpty) { - //val name = numericRange.name val greaterThanOrEqual = numericRange.greaterThanOrEqual.get val range = s""" @@ -1004,7 +998,6 @@ class URAlgorithm(val ap: URAlgorithmParams) json = json :+ parse(range) } else if (numericRange.lessThan.nonEmpty) { - //val name = numericRange.name val lessThan = numericRange.lessThan.get val range = s""" @@ -1025,7 +1018,6 @@ class URAlgorithm(val ap: URAlgorithmParams) json = json :+ parse(range) } else if (numericRange.lessThanOrEqual.nonEmpty) { - //val name = numericRange.name val lessThanOrEqual = numericRange.lessThanOrEqual.get val range = s""" From 72eead278c010e7a650fa8ca4c845b2a699d52f1 Mon Sep 17 00:00:00 2001 From: Rasna Tomar Date: Fri, 4 Nov 2016 09:46:55 +0530 Subject: [PATCH 8/8] Update Engine.scala --- src/main/scala/Engine.scala | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/scala/Engine.scala b/src/main/scala/Engine.scala index 4e1324c..bce04d8 100644 --- a/src/main/scala/Engine.scala +++ b/src/main/scala/Engine.scala @@ -44,7 +44,8 @@ case class Query( num: Option[Int] = None, // default: whatever is in algorithm params, which itself has a default--probably 20 eventNames: Option[List[String]], // names used to ID all user actions withRanks: Option[Boolean] = None, // Add to ItemScore rank fields values, default fasle - excludeFields: Option[List[excludeField]]) // blacklist fields specified in query + excludeFields: Option[List[excludeField]], // blacklist fields specified in query + numericRangeFilter: Option[List[RangeField]]) // numeric range filter extends Serializable @@ -61,6 +62,14 @@ case class excludeField( name: String, // name of metadata field values: List[String]) // fields can have multiple values like tags of a single value as when using hierarchical +/** Used to specify the numeric range for a query */ +case class RangeField( + name: String, + greaterThan: Option[Float], + lessThan: Option[Float], + greaterThanOrEqual: Option[Float], + lessThanOrEqual: Option[Float]) + /** Used to specify the date range for a query */ case class DateRange( name: String, // name of item property for the date comparison