JanusGraph · amcp · May 28, 2017 · Apr 18, 2017 · May 27, 2017 · sjudeng
diff --git a/docs/elasticsearch.txt b/docs/elasticsearch.txt
@@ -13,6 +13,7 @@ JanusGraph supports https://www.elastic.co/[Elasticsearch] as an index backend.
 * *TTL*: Supports automatically expiring indexed elements.
 * *Collections*: Supports indexing SET and LIST cardinality properties.
 * *Temporal*: Nanosecond granularity temporal indexing.
+* *Custom Analyzer*: Choose to use a custom analyzer
 
 Please see <<version-compat>> for details on what versions of ES will work with JanusGraph.
 
@@ -189,4 +190,4 @@ For additional suggestions on how to increase write performance in Elasticsearch
 
 ==== Further Reading
 
-* Please refer to the https://www.elastic.co[Elasticsearch homepage] and available documentation for more information on Elasticsearch and how to setup an Elasticsearch cluster.
+* Please refer to the https://www.elastic.co[Elasticsearch homepage] and available documentation for more information on Elasticsearch and how to setup an Elasticsearch cluster.
diff --git a/docs/solr.txt b/docs/solr.txt
@@ -12,6 +12,7 @@ JanusGraph supports http://lucene.apache.org/solr/[Solr] as an index backend.  H
 * *Numeric Range*: Supports all numeric comparisons in `Compare`.
 * *TTL*: Supports automatically expiring indexed elements.
 * *Temporal*: Millisecond granularity temporal indexing.
+* *Custom Analyzer*: Choose to use a custom analyzer
 
 Please see <<version-compat>> for details on what versions of Solr will work with JanusGraph.
 

diff --git a/docs/textsearch.txt b/docs/textsearch.txt
@@ -129,3 +129,38 @@ Instead of individually adjusting the field mapping for every key added to a mix
 However, this approach has two limitations: 1) The user has to ensure that the property key names are valid field names for the indexing backend and 2) renaming the property key will NOT rename the field name in the index which can lead to naming collisions that the user has to be aware of and avoid.
 
 Note, that individual field mappings as described above can be used to overwrite the default name for a particular key.
+
+
+==== Custom Analyser
+
+By default, JanusGraph will use the default analyzer from the indexing backend for properties with Mapping.TEXT, and no analyzer for properties with Mapping.STRING. If one wants to use another analyzer, it can be explicitly specified through a parameter : ParameterType.TEXT_ANALYZER for Mapping.TEXT and ParameterType.STRING_ANALYZER for Mapping.STRING.
+
+===== For Elasticsearch
+
+The name of the analyzer must be set as parameter value.
+
+[source, gremlin]
+mgmt = graph.openManagement()
+string = mgmt.makePropertyKey('string').dataType(String.class).make()
+text = mgmt.makePropertyKey('text').dataType(String.class).make()
+textString = mgmt.makePropertyKey('textString').dataType(String.class).make()
+mgmt.buildIndex('string', Vertex.class).addKey(string, Mapping.STRING.asParameter(), Parameter.of(ParameterType.STRING_ANALYZER.getName(), 'standard')).buildMixedIndex("search")
+mgmt.buildIndex('text', Vertex.class).addKey(text, Mapping.TEXT.asParameter(), Parameter.of(ParameterType.TEXT_ANALYZER.getName(), 'english')).buildMixedIndex("search")
+mgmt.buildIndex('textString', Vertex.class).addKey(text, Mapping.TEXTSTRING.asParameter(), Parameter.of(ParameterType.STRING_ANALYZER.getName(), 'standard'), Parameter.of(ParameterType.TEXT_ANALYZER.getName(), 'english')).buildMixedIndex("search")
+mgmt.commit()
+
+With these settings, JanusGraph will use the 'standard' analyzer for property key 'string' and the 'english' analyzer for property key 'text'. 
+
+===== For Solr
+
+The class of the tokenizer must be set as parameter value.
+
+[source, gremlin]
+mgmt = graph.openManagement()
+string = mgmt.makePropertyKey('string').dataType(String.class).make()
+text = mgmt.makePropertyKey('text').dataType(String.class).make()
+mgmt.buildIndex('string', Vertex.class).addKey(string, Mapping.STRING.asParameter(), Parameter.of(ParameterType.STRING_ANALYZER.getName(), 'org.apache.lucene.analysis.standard.StandardTokenizer')).buildMixedIndex("search")
+mgmt.buildIndex('text', Vertex.class).addKey(text, Mapping.TEXT.asParameter(), Parameter.of(ParameterType.TEXT_ANALYZER.getName(), 'org.apache.lucene.analysis.core.WhitespaceTokenizer')).buildMixedIndex("search")
+mgmt.commit()
+
+With these settings, JanusGraph will use the 'standard' tokenizer for property key 'string' and the 'whitespace' tokenizer for property key 'text'.
diff --git a/janusgraph-core/src/main/java/org/janusgraph/diskstorage/indexing/IndexFeatures.java b/janusgraph-core/src/main/java/org/janusgraph/diskstorage/indexing/IndexFeatures.java
@@ -35,11 +35,12 @@ public class IndexFeatures {
     private final ImmutableSet<Mapping> supportedStringMappings;
     private final String wildcardField;
     private final boolean supportsNanoseconds;
-    private ImmutableSet<Cardinality> supportedCardinaities;
+    private final boolean supportsCustomAnalyzer;
+    private ImmutableSet<Cardinality> supportedCardinalities;
 
     public IndexFeatures(boolean supportsDocumentTTL,
                          Mapping defaultMap,
-                         ImmutableSet<Mapping> supportedMap, String wildcardField, ImmutableSet<Cardinality> supportedCardinaities, boolean supportsNanoseconds) {
+                         ImmutableSet<Mapping> supportedMap, String wildcardField, ImmutableSet<Cardinality> supportedCardinaities, boolean supportsNanoseconds, boolean supportCustomAnalyzer) {
 
         Preconditions.checkArgument(defaultMap!=null || defaultMap!=Mapping.DEFAULT);
         Preconditions.checkArgument(supportedMap!=null && !supportedMap.isEmpty()
@@ -48,8 +49,9 @@ public IndexFeatures(boolean supportsDocumentTTL,
         this.defaultStringMapping = defaultMap;
         this.supportedStringMappings = supportedMap;
         this.wildcardField = wildcardField;
-        this.supportedCardinaities = supportedCardinaities;
+        this.supportedCardinalities = supportedCardinaities;
         this.supportsNanoseconds = supportsNanoseconds;
+        this.supportsCustomAnalyzer = supportCustomAnalyzer;
     }
 
     public boolean supportsDocumentTTL() {
@@ -69,12 +71,16 @@ public String getWildcardField() {
     }
 
     public boolean supportsCardinality(Cardinality cardinality) {
-        return supportedCardinaities.contains(cardinality);
+        return supportedCardinalities.contains(cardinality);
     }
 
     public boolean supportsNanoseconds() {
         return supportsNanoseconds;
     }
+
+    public boolean supportsCustomAnalyzer() {
+        return supportsCustomAnalyzer;
+    }
 
     public static class Builder {
 
@@ -84,6 +90,7 @@ public static class Builder {
         private Set<Cardinality> supportedCardinalities = Sets.newHashSet();
         private String wildcardField = "*";
         private boolean supportsNanoseconds;
+        private boolean supportsCustomAnalyzer;
 
         public Builder supportsDocumentTTL() {
             supportsDocumentTTL=true;
@@ -114,10 +121,15 @@ public Builder supportsNanoseconds() {
             supportsNanoseconds = true;
             return this;
         }
+
+        public Builder supportsCustomAnalyzer() {
+            supportsCustomAnalyzer = true;
+            return this;
+        }
 
         public IndexFeatures build() {
             return new IndexFeatures(supportsDocumentTTL, defaultStringMapping,
-                    ImmutableSet.copyOf(supportedMappings), wildcardField,  ImmutableSet.copyOf(supportedCardinalities), supportsNanoseconds);
+                    ImmutableSet.copyOf(supportedMappings), wildcardField,  ImmutableSet.copyOf(supportedCardinalities), supportsNanoseconds, supportsCustomAnalyzer);
         }
 
 

diff --git a/janusgraph-core/src/main/java/org/janusgraph/graphdb/types/ParameterType.java b/janusgraph-core/src/main/java/org/janusgraph/graphdb/types/ParameterType.java
@@ -35,8 +35,15 @@ public enum ParameterType {
     INDEX_GEO_MAX_LEVELS("index-geo-max-levels"),
 
     /** Distance error percent used to determine precision in spatial prefix tree where applicable. **/
-    INDEX_GEO_DIST_ERROR_PCT("index-geo-dist-error-pct");
-
+    INDEX_GEO_DIST_ERROR_PCT("index-geo-dist-error-pct"),
+
+    /** Analyzer for String Type with mapping STRING**/
+    STRING_ANALYZER("string-analyzer"),
+
+    /** Analyzer for String Type with mapping TEXT**/
+    TEXT_ANALYZER("text-analyzer"),
+    ;
+
     private final String name;
 
     private ParameterType(String name) {

diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java
@@ -106,6 +106,12 @@ public class ElasticSearchIndex implements IndexProvider {
 
     private static final String STRING_MAPPING_SUFFIX = "__STRING";
 
+    private static final String NOT_ANALYZED = "not_analyzed";
+
+    private static final String ANALYZER = "analyzer";
+
+    private static final String INDEX = "index";
+
     public static final ConfigNamespace ELASTICSEARCH_NS =
             new ConfigNamespace(INDEX_NS, "elasticsearch", "Elasticsearch index configuration");
 
@@ -163,7 +169,7 @@ public class ElasticSearchIndex implements IndexProvider {
             new ConfigNamespace(ES_CREATE_NS, "ext", "Overrides for arbitrary settings applied at index creation", true);
 
     private static final IndexFeatures ES_FEATURES = new IndexFeatures.Builder()
-            .setDefaultStringMapping(Mapping.TEXT).supportedStringMappings(Mapping.TEXT, Mapping.TEXTSTRING, Mapping.STRING).setWildcardField("_all").supportsCardinality(Cardinality.SINGLE).supportsCardinality(Cardinality.LIST).supportsCardinality(Cardinality.SET).supportsNanoseconds().build();
+            .setDefaultStringMapping(Mapping.TEXT).supportedStringMappings(Mapping.TEXT, Mapping.TEXTSTRING, Mapping.STRING).setWildcardField("_all").supportsCardinality(Cardinality.SINGLE).supportsCardinality(Cardinality.LIST).supportsCardinality(Cardinality.SET).supportsNanoseconds().supportsCustomAnalyzer().build();
 
     public static final int HOST_PORT_DEFAULT = 9200;
 
@@ -228,7 +234,6 @@ private void checkForOrCreateIndex(Configuration config) throws IOException {
 
             ElasticSearchSetup.applySettingsFromJanusGraphConf(settings, config, ES_CREATE_EXTRAS_NS);
             settings.put("index.max_result_window", Integer.MAX_VALUE);
-
             client.createIndex(indexName, settings.build());
 
             try {
@@ -298,19 +303,34 @@ public void register(String store, String key, KeyInformation information, BaseT
                 if (map==Mapping.DEFAULT) map=Mapping.TEXT;
                 log.debug("Registering string type for {} with mapping {}", key, map);
                 mapping.field("type", "string");
+                String stringAnalyzer = (String) ParameterType.STRING_ANALYZER.findParameter(information.getParameters(), null);
+                String textAnalyzer = (String) ParameterType.TEXT_ANALYZER.findParameter(information.getParameters(), null);
                 switch (map) {
                     case STRING:
-                        mapping.field("index","not_analyzed");
+                        if (stringAnalyzer != null) {
+                            mapping.field(ANALYZER, stringAnalyzer);
+                        } else {
+                            mapping.field(INDEX, NOT_ANALYZED);
+                        }
                         break;
                     case TEXT:
-                        //default, do nothing
+                        if (textAnalyzer != null) {
+                            mapping.field(ANALYZER, textAnalyzer);
+                        }
                     	break;
                     case TEXTSTRING:
+                        if (textAnalyzer != null) {
+                            mapping.field(ANALYZER, textAnalyzer);
+                        }
                         mapping.endObject();
                         //add string mapping
                         mapping.startObject(getDualMappingName(key));
                         mapping.field("type", "string");
-                        mapping.field("index","not_analyzed");
+                        if (stringAnalyzer != null) {
+                            mapping.field(ANALYZER, stringAnalyzer);
+                        } else {
+                            mapping.field(INDEX, NOT_ANALYZED);
+                        }
                         break;
                     default: throw new AssertionError("Unexpected mapping: "+map);
                 }
@@ -359,7 +379,7 @@ public void register(String store, String key, KeyInformation information, BaseT
             } else if (dataType == UUID.class) {
                 log.debug("Registering uuid type for {}", key);
                 mapping.field("type", "string");
-                mapping.field("index","not_analyzed");
+                mapping.field(INDEX, NOT_ANALYZED);
             }
 
             mapping.endObject().endObject().endObject();
@@ -687,28 +707,20 @@ public QueryBuilder getFilter(Condition<?> condition, KeyInformation.StoreRetrie
                     throw new IllegalArgumentException("String mapped string values do not support CONTAINS queries: " + janusgraphPredicate);
                 if (map==Mapping.TEXTSTRING && !janusgraphPredicate.toString().startsWith("CONTAINS"))
                     fieldName = getDualMappingName(key);
-
-                if (janusgraphPredicate == Text.CONTAINS) {
-                    value = ((String) value).toLowerCase();
-                    BoolQueryBuilder b = QueryBuilders.boolQuery();
-                    for (String term : Text.tokenize((String)value)) {
-                        b.must(QueryBuilders.termQuery(fieldName, term));
-                    }
-                    return b;
+                if (janusgraphPredicate == Text.CONTAINS || janusgraphPredicate == Cmp.EQUAL) {
+                    return QueryBuilders.matchQuery(fieldName, value).operator(Operator.AND);
                 } else if (janusgraphPredicate == Text.CONTAINS_PREFIX) {
-                    value = ((String) value).toLowerCase();
+                    value = ParameterType.TEXT_ANALYZER.findParameter(informations.get(key).getParameters(), null)!=null?((String) value):((String) value).toLowerCase();
                     return QueryBuilders.prefixQuery(fieldName, (String) value);
                 } else if (janusgraphPredicate == Text.CONTAINS_REGEX) {
-                    value = ((String) value).toLowerCase();
+                    value = ParameterType.TEXT_ANALYZER.findParameter(informations.get(key).getParameters(), null)!=null?((String) value):((String) value).toLowerCase();
                     return QueryBuilders.regexpQuery(fieldName, (String) value);
                 } else if (janusgraphPredicate == Text.PREFIX) {
                     return QueryBuilders.prefixQuery(fieldName, (String) value);
                 } else if (janusgraphPredicate == Text.REGEX) {
                     return QueryBuilders.regexpQuery(fieldName, (String) value);
-                } else if (janusgraphPredicate == Cmp.EQUAL) {
-                    return QueryBuilders.termQuery(fieldName, (String) value);
                 } else if (janusgraphPredicate == Cmp.NOT_EQUAL) {
-                    return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(fieldName, (String) value));
+                    return QueryBuilders.boolQuery().mustNot(QueryBuilders.matchQuery(fieldName, value).operator(Operator.AND));
                 } else if (janusgraphPredicate == Text.FUZZY || janusgraphPredicate == Text.CONTAINS_FUZZY){
                     return QueryBuilders.matchQuery(fieldName, (String) value).fuzziness(Fuzziness.AUTO).operator(Operator.AND);
                 } else
@@ -835,9 +847,7 @@ public QueryBuilder getFilter(Condition<?> condition, KeyInformation.StoreRetrie
     @Override
     public List<String> query(IndexQuery query, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException {
         ElasticSearchRequest sr = new ElasticSearchRequest();
-
-        sr.setQuery(QueryBuilders.matchAllQuery());
-        sr.setPostFilter(getFilter(query.getCondition(),informations.get(query.getStore())));
+        sr.setQuery(getFilter(query.getCondition(),informations.get(query.getStore())));
         if (!query.getOrder().isEmpty()) {
             List<IndexQuery.OrderEntry> orders = query.getOrder();
             for (int i = 0; i < orders.size(); i++) {

diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchConfigTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchConfigTest.java
@@ -132,7 +132,7 @@ private void simpleWriteAndQuery(IndexProvider idx) throws BackendException, Int
 
         final Duration maxWrite = Duration.ofMillis(2000L);
         final String storeName = "jvmlocal_test_store";
-        final KeyInformation.IndexRetriever indexRetriever = IndexProviderTest.getIndexRetriever(IndexProviderTest.getMapping(idx.getFeatures()));
+        final KeyInformation.IndexRetriever indexRetriever = IndexProviderTest.getIndexRetriever(IndexProviderTest.getMapping(idx.getFeatures(), "standard", "keyword"));
 
         BaseTransactionConfig txConfig = StandardBaseTransactionConfig.of(TimestampProviders.MILLI);
         IndexTransaction itx = new IndexTransaction(idx, indexRetriever, txConfig, maxWrite);

diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java
@@ -17,6 +17,7 @@
 import com.google.common.base.Throwables;
 import com.google.common.collect.HashMultimap;
 import com.google.common.collect.Multimap;
+
 import org.apache.commons.lang.RandomStringUtils;
 import org.janusgraph.core.Cardinality;
 import org.janusgraph.core.JanusGraphException;
@@ -75,6 +76,16 @@ public boolean supportsLuceneStyleQueries() {
         return true;
     }
 
+    @Override
+    public String getEnglishAnalyzerName() {
+        return "english";
+    }
+
+    @Override
+    public String getKeywordAnalyzerName() {
+        return "keyword";
+    }
+
     public Configuration getESTestConfig() {
         final String index = "es";
         ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration();
@@ -192,5 +203,4 @@ public void testUpdateAdditionWithLongString() throws Exception {
         assertEquals(0, tx.query(new IndexQuery("vertex", PredicateCondition.of(TEXT, Text.CONTAINS, "bob"))).size());
         assertEquals(1, tx.query(new IndexQuery("vertex", PredicateCondition.of(TEXT, Text.CONTAINS, "world"))).size());
     }
-
 }
diff --git a/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneIndexTest.java b/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneIndexTest.java
@@ -61,6 +61,16 @@ public boolean supportsLuceneStyleQueries() {
         return false;
     }
 
+    @Override
+    public String getEnglishAnalyzerName() {
+        return null;
+    }
+
+    @Override
+    public String getKeywordAnalyzerName() {
+        return null;
+    }
+
     public static final Configuration getLocalLuceneTestConfig() {
         final String index = "lucene";
         ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration();