Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/elasticsearch.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ JanusGraph supports https://www.elastic.co/[Elasticsearch] as an index backend.
* *TTL*: Supports automatically expiring indexed elements.
* *Collections*: Supports indexing SET and LIST cardinality properties.
* *Temporal*: Nanosecond granularity temporal indexing.
* *Custom Analyzer*: Choose to use a custom analyzer

Please see <<version-compat>> for details on what versions of ES will work with JanusGraph.

Expand Down Expand Up @@ -189,4 +190,4 @@ For additional suggestions on how to increase write performance in Elasticsearch

==== Further Reading

* Please refer to the https://www.elastic.co[Elasticsearch homepage] and available documentation for more information on Elasticsearch and how to setup an Elasticsearch cluster.
* Please refer to the https://www.elastic.co[Elasticsearch homepage] and available documentation for more information on Elasticsearch and how to setup an Elasticsearch cluster.
1 change: 1 addition & 0 deletions docs/solr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ JanusGraph supports http://lucene.apache.org/solr/[Solr] as an index backend. H
* *Numeric Range*: Supports all numeric comparisons in `Compare`.
* *TTL*: Supports automatically expiring indexed elements.
* *Temporal*: Millisecond granularity temporal indexing.
* *Custom Analyzer*: Choose to use a custom analyzer

Please see <<version-compat>> for details on what versions of Solr will work with JanusGraph.

Expand Down
35 changes: 35 additions & 0 deletions docs/textsearch.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,38 @@ Instead of individually adjusting the field mapping for every key added to a mix
However, this approach has two limitations: 1) The user has to ensure that the property key names are valid field names for the indexing backend and 2) renaming the property key will NOT rename the field name in the index which can lead to naming collisions that the user has to be aware of and avoid.

Note, that individual field mappings as described above can be used to overwrite the default name for a particular key.


==== Custom Analyser

By default, JanusGraph will use the default analyzer from the indexing backend for properties with Mapping.TEXT, and no analyzer for properties with Mapping.STRING. If one wants to use another analyzer, it can be explicitly specified through a parameter : ParameterType.TEXT_ANALYZER for Mapping.TEXT and ParameterType.STRING_ANALYZER for Mapping.STRING.

===== For Elasticsearch

The name of the analyzer must be set as parameter value.

[source, gremlin]
mgmt = graph.openManagement()
string = mgmt.makePropertyKey('string').dataType(String.class).make()
text = mgmt.makePropertyKey('text').dataType(String.class).make()
textString = mgmt.makePropertyKey('textString').dataType(String.class).make()
mgmt.buildIndex('string', Vertex.class).addKey(string, Mapping.STRING.asParameter(), Parameter.of(ParameterType.STRING_ANALYZER.getName(), 'standard')).buildMixedIndex("search")
mgmt.buildIndex('text', Vertex.class).addKey(text, Mapping.TEXT.asParameter(), Parameter.of(ParameterType.TEXT_ANALYZER.getName(), 'english')).buildMixedIndex("search")
mgmt.buildIndex('textString', Vertex.class).addKey(text, Mapping.TEXTSTRING.asParameter(), Parameter.of(ParameterType.STRING_ANALYZER.getName(), 'standard'), Parameter.of(ParameterType.TEXT_ANALYZER.getName(), 'english')).buildMixedIndex("search")
mgmt.commit()

With these settings, JanusGraph will use the 'standard' analyzer for property key 'string' and the 'english' analyzer for property key 'text'.

===== For Solr

The class of the tokenizer must be set as parameter value.

[source, gremlin]
mgmt = graph.openManagement()
string = mgmt.makePropertyKey('string').dataType(String.class).make()
text = mgmt.makePropertyKey('text').dataType(String.class).make()
mgmt.buildIndex('string', Vertex.class).addKey(string, Mapping.STRING.asParameter(), Parameter.of(ParameterType.STRING_ANALYZER.getName(), 'org.apache.lucene.analysis.standard.StandardTokenizer')).buildMixedIndex("search")
mgmt.buildIndex('text', Vertex.class).addKey(text, Mapping.TEXT.asParameter(), Parameter.of(ParameterType.TEXT_ANALYZER.getName(), 'org.apache.lucene.analysis.core.WhitespaceTokenizer')).buildMixedIndex("search")
mgmt.commit()

With these settings, JanusGraph will use the 'standard' tokenizer for property key 'string' and the 'whitespace' tokenizer for property key 'text'.
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ public class IndexFeatures {
private final ImmutableSet<Mapping> supportedStringMappings;
private final String wildcardField;
private final boolean supportsNanoseconds;
private ImmutableSet<Cardinality> supportedCardinaities;
private final boolean supportsCustomAnalyzer;
private ImmutableSet<Cardinality> supportedCardinalities;

public IndexFeatures(boolean supportsDocumentTTL,
Mapping defaultMap,
ImmutableSet<Mapping> supportedMap, String wildcardField, ImmutableSet<Cardinality> supportedCardinaities, boolean supportsNanoseconds) {
ImmutableSet<Mapping> supportedMap, String wildcardField, ImmutableSet<Cardinality> supportedCardinaities, boolean supportsNanoseconds, boolean supportCustomAnalyzer) {

Preconditions.checkArgument(defaultMap!=null || defaultMap!=Mapping.DEFAULT);
Preconditions.checkArgument(supportedMap!=null && !supportedMap.isEmpty()
Expand All @@ -48,8 +49,9 @@ public IndexFeatures(boolean supportsDocumentTTL,
this.defaultStringMapping = defaultMap;
this.supportedStringMappings = supportedMap;
this.wildcardField = wildcardField;
this.supportedCardinaities = supportedCardinaities;
this.supportedCardinalities = supportedCardinaities;
this.supportsNanoseconds = supportsNanoseconds;
this.supportsCustomAnalyzer = supportCustomAnalyzer;
}

public boolean supportsDocumentTTL() {
Expand All @@ -69,12 +71,16 @@ public String getWildcardField() {
}

public boolean supportsCardinality(Cardinality cardinality) {
return supportedCardinaities.contains(cardinality);
return supportedCardinalities.contains(cardinality);
}

public boolean supportsNanoseconds() {
return supportsNanoseconds;
}

public boolean supportsCustomAnalyzer() {
return supportsCustomAnalyzer;
}

public static class Builder {

Expand All @@ -84,6 +90,7 @@ public static class Builder {
private Set<Cardinality> supportedCardinalities = Sets.newHashSet();
private String wildcardField = "*";
private boolean supportsNanoseconds;
private boolean supportsCustomAnalyzer;

public Builder supportsDocumentTTL() {
supportsDocumentTTL=true;
Expand Down Expand Up @@ -114,10 +121,15 @@ public Builder supportsNanoseconds() {
supportsNanoseconds = true;
return this;
}

public Builder supportsCustomAnalyzer() {
supportsCustomAnalyzer = true;
return this;
}

public IndexFeatures build() {
return new IndexFeatures(supportsDocumentTTL, defaultStringMapping,
ImmutableSet.copyOf(supportedMappings), wildcardField, ImmutableSet.copyOf(supportedCardinalities), supportsNanoseconds);
ImmutableSet.copyOf(supportedMappings), wildcardField, ImmutableSet.copyOf(supportedCardinalities), supportsNanoseconds, supportsCustomAnalyzer);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,15 @@ public enum ParameterType {
INDEX_GEO_MAX_LEVELS("index-geo-max-levels"),

/** Distance error percent used to determine precision in spatial prefix tree where applicable. **/
INDEX_GEO_DIST_ERROR_PCT("index-geo-dist-error-pct");

INDEX_GEO_DIST_ERROR_PCT("index-geo-dist-error-pct"),

/** Analyzer for String Type with mapping STRING**/
STRING_ANALYZER("string-analyzer"),

/** Analyzer for String Type with mapping TEXT**/
TEXT_ANALYZER("text-analyzer"),
;

private final String name;

private ParameterType(String name) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ public class ElasticSearchIndex implements IndexProvider {

private static final String STRING_MAPPING_SUFFIX = "__STRING";

private static final String NOT_ANALYZED = "not_analyzed";

private static final String ANALYZER = "analyzer";

private static final String INDEX = "index";

public static final ConfigNamespace ELASTICSEARCH_NS =
new ConfigNamespace(INDEX_NS, "elasticsearch", "Elasticsearch index configuration");

Expand Down Expand Up @@ -163,7 +169,7 @@ public class ElasticSearchIndex implements IndexProvider {
new ConfigNamespace(ES_CREATE_NS, "ext", "Overrides for arbitrary settings applied at index creation", true);

private static final IndexFeatures ES_FEATURES = new IndexFeatures.Builder()
.setDefaultStringMapping(Mapping.TEXT).supportedStringMappings(Mapping.TEXT, Mapping.TEXTSTRING, Mapping.STRING).setWildcardField("_all").supportsCardinality(Cardinality.SINGLE).supportsCardinality(Cardinality.LIST).supportsCardinality(Cardinality.SET).supportsNanoseconds().build();
.setDefaultStringMapping(Mapping.TEXT).supportedStringMappings(Mapping.TEXT, Mapping.TEXTSTRING, Mapping.STRING).setWildcardField("_all").supportsCardinality(Cardinality.SINGLE).supportsCardinality(Cardinality.LIST).supportsCardinality(Cardinality.SET).supportsNanoseconds().supportsCustomAnalyzer().build();

public static final int HOST_PORT_DEFAULT = 9200;

Expand Down Expand Up @@ -228,7 +234,6 @@ private void checkForOrCreateIndex(Configuration config) throws IOException {

ElasticSearchSetup.applySettingsFromJanusGraphConf(settings, config, ES_CREATE_EXTRAS_NS);
settings.put("index.max_result_window", Integer.MAX_VALUE);

client.createIndex(indexName, settings.build());

try {
Expand Down Expand Up @@ -298,19 +303,34 @@ public void register(String store, String key, KeyInformation information, BaseT
if (map==Mapping.DEFAULT) map=Mapping.TEXT;
log.debug("Registering string type for {} with mapping {}", key, map);
mapping.field("type", "string");
String stringAnalyzer = (String) ParameterType.STRING_ANALYZER.findParameter(information.getParameters(), null);
String textAnalyzer = (String) ParameterType.TEXT_ANALYZER.findParameter(information.getParameters(), null);
switch (map) {
case STRING:
mapping.field("index","not_analyzed");
if (stringAnalyzer != null) {
mapping.field(ANALYZER, stringAnalyzer);
} else {
mapping.field(INDEX, NOT_ANALYZED);
}
break;
case TEXT:
//default, do nothing
if (textAnalyzer != null) {
mapping.field(ANALYZER, textAnalyzer);
}
break;
case TEXTSTRING:
if (textAnalyzer != null) {
mapping.field(ANALYZER, textAnalyzer);
}
mapping.endObject();
//add string mapping
mapping.startObject(getDualMappingName(key));
mapping.field("type", "string");
mapping.field("index","not_analyzed");
if (stringAnalyzer != null) {
mapping.field(ANALYZER, stringAnalyzer);
} else {
mapping.field(INDEX, NOT_ANALYZED);
}
break;
default: throw new AssertionError("Unexpected mapping: "+map);
}
Expand Down Expand Up @@ -359,7 +379,7 @@ public void register(String store, String key, KeyInformation information, BaseT
} else if (dataType == UUID.class) {
log.debug("Registering uuid type for {}", key);
mapping.field("type", "string");
mapping.field("index","not_analyzed");
mapping.field(INDEX, NOT_ANALYZED);
}

mapping.endObject().endObject().endObject();
Expand Down Expand Up @@ -687,28 +707,20 @@ public QueryBuilder getFilter(Condition<?> condition, KeyInformation.StoreRetrie
throw new IllegalArgumentException("String mapped string values do not support CONTAINS queries: " + janusgraphPredicate);
if (map==Mapping.TEXTSTRING && !janusgraphPredicate.toString().startsWith("CONTAINS"))
fieldName = getDualMappingName(key);

if (janusgraphPredicate == Text.CONTAINS) {
value = ((String) value).toLowerCase();
BoolQueryBuilder b = QueryBuilders.boolQuery();
for (String term : Text.tokenize((String)value)) {
b.must(QueryBuilders.termQuery(fieldName, term));
}
return b;
if (janusgraphPredicate == Text.CONTAINS || janusgraphPredicate == Cmp.EQUAL) {
return QueryBuilders.matchQuery(fieldName, value).operator(Operator.AND);
} else if (janusgraphPredicate == Text.CONTAINS_PREFIX) {
value = ((String) value).toLowerCase();
value = ParameterType.TEXT_ANALYZER.findParameter(informations.get(key).getParameters(), null)!=null?((String) value):((String) value).toLowerCase();
return QueryBuilders.prefixQuery(fieldName, (String) value);
} else if (janusgraphPredicate == Text.CONTAINS_REGEX) {
value = ((String) value).toLowerCase();
value = ParameterType.TEXT_ANALYZER.findParameter(informations.get(key).getParameters(), null)!=null?((String) value):((String) value).toLowerCase();
return QueryBuilders.regexpQuery(fieldName, (String) value);
} else if (janusgraphPredicate == Text.PREFIX) {
return QueryBuilders.prefixQuery(fieldName, (String) value);
} else if (janusgraphPredicate == Text.REGEX) {
return QueryBuilders.regexpQuery(fieldName, (String) value);
} else if (janusgraphPredicate == Cmp.EQUAL) {
return QueryBuilders.termQuery(fieldName, (String) value);
} else if (janusgraphPredicate == Cmp.NOT_EQUAL) {
return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(fieldName, (String) value));
return QueryBuilders.boolQuery().mustNot(QueryBuilders.matchQuery(fieldName, value).operator(Operator.AND));
} else if (janusgraphPredicate == Text.FUZZY || janusgraphPredicate == Text.CONTAINS_FUZZY){
return QueryBuilders.matchQuery(fieldName, (String) value).fuzziness(Fuzziness.AUTO).operator(Operator.AND);
} else
Expand Down Expand Up @@ -835,9 +847,7 @@ public QueryBuilder getFilter(Condition<?> condition, KeyInformation.StoreRetrie
@Override
public List<String> query(IndexQuery query, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException {
ElasticSearchRequest sr = new ElasticSearchRequest();

sr.setQuery(QueryBuilders.matchAllQuery());
sr.setPostFilter(getFilter(query.getCondition(),informations.get(query.getStore())));
sr.setQuery(getFilter(query.getCondition(),informations.get(query.getStore())));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good fix

if (!query.getOrder().isEmpty()) {
List<IndexQuery.OrderEntry> orders = query.getOrder();
for (int i = 0; i < orders.size(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ private void simpleWriteAndQuery(IndexProvider idx) throws BackendException, Int

final Duration maxWrite = Duration.ofMillis(2000L);
final String storeName = "jvmlocal_test_store";
final KeyInformation.IndexRetriever indexRetriever = IndexProviderTest.getIndexRetriever(IndexProviderTest.getMapping(idx.getFeatures()));
final KeyInformation.IndexRetriever indexRetriever = IndexProviderTest.getIndexRetriever(IndexProviderTest.getMapping(idx.getFeatures(), "standard", "keyword"));

BaseTransactionConfig txConfig = StandardBaseTransactionConfig.of(TimestampProviders.MILLI);
IndexTransaction itx = new IndexTransaction(idx, indexRetriever, txConfig, maxWrite);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.google.common.base.Throwables;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;

import org.apache.commons.lang.RandomStringUtils;
import org.janusgraph.core.Cardinality;
import org.janusgraph.core.JanusGraphException;
Expand Down Expand Up @@ -75,6 +76,16 @@ public boolean supportsLuceneStyleQueries() {
return true;
}

@Override
public String getEnglishAnalyzerName() {
return "english";
}

@Override
public String getKeywordAnalyzerName() {
return "keyword";
}

public Configuration getESTestConfig() {
final String index = "es";
ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration();
Expand Down Expand Up @@ -192,5 +203,4 @@ public void testUpdateAdditionWithLongString() throws Exception {
assertEquals(0, tx.query(new IndexQuery("vertex", PredicateCondition.of(TEXT, Text.CONTAINS, "bob"))).size());
assertEquals(1, tx.query(new IndexQuery("vertex", PredicateCondition.of(TEXT, Text.CONTAINS, "world"))).size());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@ public boolean supportsLuceneStyleQueries() {
return false;
}

@Override
public String getEnglishAnalyzerName() {
return null;
}

@Override
public String getKeywordAnalyzerName() {
return null;
}

public static final Configuration getLocalLuceneTestConfig() {
final String index = "lucene";
ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration();
Expand Down
Loading