From 62e02b08e06793ddd9845d18cb7228aa9b51d8d8 Mon Sep 17 00:00:00 2001 From: sjudeng Date: Thu, 23 Feb 2017 19:13:04 -0600 Subject: [PATCH 1/5] Update to Elasticsearch 2.4.4 and Lucene 5.5.2 Signed-off-by: sjudeng --- docs/elasticsearch.txt | 24 +- .../src/assembly/static/bin/elasticsearch | 145 +++---- .../assembly/static/bin/elasticsearch.in.sh | 47 ++- .../src/assembly/static/bin/janusgraph.sh | 4 +- .../assembly/static/conf/es/elasticsearch.yml | 386 +++--------------- .../src/assembly/static/conf/es/logging.yml | 31 +- janusgraph-es/pom.xml | 45 +- .../org/elasticsearch/bootstrap/JarHell.java | 70 ++++ .../diskstorage/es/ElasticSearchIndex.java | 177 ++++---- .../diskstorage/es/ElasticSearchSetup.java | 68 +-- janusgraph-es/src/test/bin/elasticsearch | 127 ++---- .../src/test/bin/elasticsearch.in.sh | 34 +- .../src/test/config/elasticsearch.yml | 379 +++-------------- .../src/test/config/indexCreationOptions.yml | 378 ----------------- .../src/test/config/networkNodeUsingExt.yml | 378 ----------------- .../src/test/config/networkNodeUsingYaml.yml | 378 ----------------- .../src/test/config/transportClient.yml | 378 ----------------- .../es/ElasticSearchConfigTest.java | 55 +-- .../diskstorage/es/ElasticsearchRunner.java | 8 +- .../resources/es_cfg_bogus_nodeclient.yml | 1 - .../src/test/resources/es_cfg_nodeclient.yml | 1 - .../src/test/resources/es_jvmlocal.yml | 5 +- janusgraph-hadoop-parent/pom.xml | 14 + .../test/java/org/janusgraph/HBaseStatus.java | 2 +- .../diskstorage/lucene/LuceneIndex.java | 13 +- .../diskstorage/lucene/LuceneExample.java | 6 +- pom.xml | 19 +- 27 files changed, 605 insertions(+), 2568 deletions(-) create mode 100644 janusgraph-es/src/main/java/org/elasticsearch/bootstrap/JarHell.java delete mode 100644 janusgraph-es/src/test/config/indexCreationOptions.yml delete mode 100644 janusgraph-es/src/test/config/networkNodeUsingExt.yml delete mode 100644 janusgraph-es/src/test/config/networkNodeUsingYaml.yml delete mode 100644 janusgraph-es/src/test/config/transportClient.yml diff --git a/docs/elasticsearch.txt b/docs/elasticsearch.txt index 82347950de..215786bd16 100644 --- a/docs/elasticsearch.txt +++ b/docs/elasticsearch.txt @@ -137,7 +137,7 @@ In addition to common options described in <>, the Node clie Unlike the Transport client, the Node client can be completely configured through `conf-file` or `ext`. If you provide a complete Node configuration via `conf-file` or `ext`, then none of the JanusGraph options listed above are required, and it's fine to leave them unset in JanusGraph's configuration. The JanusGraph options listed above are retained mainly for convenience and continuity with the legacy config track. -However, there is one unique aspect to `index.[X].directory`. When `index.[X].directory` is set for Elasticsearch, it is taken as the path to a directory which will contain the ES data, work, and logs directories. These directories are created if they don't already exist. Furthermore, when the `index.[X].directory` setting appears in a JanusGraph properties file on disk and its value is a relative path, it will be interpreted relative to the directory containing that JanusGraph properties file (similar to how relative `conf-file` paths are handled). That's the difference between setting JanusGraph's `index.[X].directory` versus setting Elasticsearch's `path.data`, `path.work`, and `path.logs` directories: relative paths for the former are based on the directory containing the JanusGraph properties file, whereas relative paths for the latter are based on the JVM's current working directory. +However, there is one unique aspect to `index.[X].directory`. When `index.[X].directory` is set for Elasticsearch, it is taken as the path to a directory which will contain the ES data, work, and logs directories. These directories are created if they don't already exist. Furthermore, when the `index.[X].directory` setting appears in a JanusGraph properties file on disk and its value is a relative path, it will be interpreted relative to the directory containing that JanusGraph properties file (similar to how relative `conf-file` paths are handled). That's the difference between setting JanusGraph's `index.[X].directory` versus setting Elasticsearch's `path.home` directory: relative paths for the former are based on the directory containing the JanusGraph properties file, whereas relative paths for the latter are based on the JVM's current working directory. Note that `index.[X].hostname` is not in the list above. The recommended way to set a list of hostnames with the Node client is to use Elasticsearch's own config keys via `ext` or `conf-file`. See the Elasticsearch documentation on the http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-zen.html[`discovery` module] and the http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-transport.html[`transport` module] for relevant ES config keys. Also see <> for an example configuration using the Elasticsearch Zen discovery module and unicast addressing. @@ -159,19 +159,17 @@ index.search.conf-file=es_jvmlocal.yml node.data: true node.client: false node.local: true -# These paths are interpreted relative to the JVM's current working directory -path.data: es/data -path.work: es/work -path.logs: es/logs +# This path is interpreted relative to the JVM's current working directory +path.home: es ---- -The following configuration is similar to the one above, except it uses `ext` and the `index.[X].directory` JanusGraph setting to locate the ES work, data, and log paths. When the `index.[X].directory` appears in a JanusGraph properties file and is set to a relative path, that path is interpreted relative to the directory containing the JanusGraph properties file. Compare this to setting `path.data`, `path.work`, and `path.logs` directly, which will be interpreted relative to the current working directory of the Java VM. +The following configuration is similar to the one above, except it uses `ext` and the `index.[X].directory` JanusGraph setting to locate the ES work, data, and log paths. When the `index.[X].directory` appears in a JanusGraph properties file and is set to a relative path, that path is interpreted relative to the directory containing the JanusGraph properties file. Compare this to setting `path.home` directly, which will be interpreted relative to the current working directory of the Java VM. [source, properties] ---- index.search.backend=elasticsearch index.search.elasticsearch.interface=NODE -# data, work, and logs subdirectories for ES will be created in +# data, logs, etc. subdirectories for ES will be created in # /../db/es index.search.directory=../db/es index.search.elasticsearch.ext.node.data=true @@ -229,9 +227,7 @@ index.search.conf-file=es_clustermember.yml node.data: true node.client: false node.local: false -path.data: es/data -path.work: es/work -path.logs: es/logs +path.home: es discovery.zen.ping.multicast.enabled: false discovery.zen.ping.unicast.hosts: [ "host1", "host2:customport" ] ---- @@ -245,10 +241,8 @@ index.search.elasticsearch.interface=NODE index.search.elasticsearch.ext.node.data=true index.search.elasticsearch.ext.node.client=false index.search.elasticsearch.ext.node.local=false -# The next three paths are interpreted relative to the JVM working directory -index.search.elasticsearch.ext.path.data=es/data -index.search.elasticsearch.ext.path.work=es/work -index.search.elasticsearch.ext.path.logs=es/logs +# The next path is interpreted relative to the JVM working directory +index.search.elasticsearch.ext.path.home=es index.search.elasticsearch.ext.discovery.zen.ping.multicast.enabled=false index.search.elasticsearch.ext.discovery.zen.ping.unicast.hosts=host1, host2:customport ---- @@ -379,7 +373,7 @@ Check that the Elasticsearch cluster nodes are reachable on the native "transpor When you see exception referring to lucene implementation details, make sure you don't have a conflicting version of Lucene on the classpath. Exception may look like this: [source, text] -java.lang.NoSuchFieldError: LUCENE_4_10_4 +java.lang.NoSuchFieldError: LUCENE_5_5_2 === Optimizing Elasticsearch diff --git a/janusgraph-dist/src/assembly/static/bin/elasticsearch b/janusgraph-dist/src/assembly/static/bin/elasticsearch index 7647c75995..1fdc1c165c 100755 --- a/janusgraph-dist/src/assembly/static/bin/elasticsearch +++ b/janusgraph-dist/src/assembly/static/bin/elasticsearch @@ -1,9 +1,5 @@ #!/bin/sh -# OPTIONS: -# -d: daemonize, start in the background -# -p : log the pid to a file (useful to kill it later) - # CONTROLLING STARTUP: # # This script relies on few environment variables to determine startup @@ -46,16 +42,16 @@ # Be aware that you will be entirely responsible for populating the needed # environment variables. - # Maven will replace the project.name with elasticsearch below. If that # hasn't been done, we assume that this is not a packaged version and the # user has forgotten to run Maven to create a package. -IS_PACKAGED_VERSION='elasticsearch' -if [ "$IS_PACKAGED_VERSION" != "elasticsearch" ]; then + +IS_PACKAGED_VERSION='distributions' +if [ "$IS_PACKAGED_VERSION" != "distributions" ]; then cat >&2 << EOF Error: You must build the project with Maven or download a pre-built package before you can run Elasticsearch. See 'Building from Source' in README.textile -or visit http://www.elasticsearch.org/download to get a pre-built package. +or visit https://www.elastic.co/download to get a pre-built package. EOF exit 1 fi @@ -81,8 +77,25 @@ ES_HOME=`dirname "$SCRIPT"`/.. # make ELASTICSEARCH_HOME absolute ES_HOME=`cd "$ES_HOME"; pwd` -export ES_INCLUDE="`dirname $0`/elasticsearch.in.sh" -. "$ES_INCLUDE" + +# If an include wasn't specified in the environment, then search for one... +if [ "x$ES_INCLUDE" = "x" ]; then + # Locations (in order) to use when searching for an include file. + for include in /usr/share/elasticsearch/elasticsearch.in.sh \ + /usr/local/share/elasticsearch/elasticsearch.in.sh \ + /opt/elasticsearch/elasticsearch.in.sh \ + ~/.elasticsearch.in.sh \ + "$ES_HOME/bin/elasticsearch.in.sh" \ + "`dirname "$0"`"/elasticsearch.in.sh; do + if [ -r "$include" ]; then + . "$include" + break + fi + done +# ...otherwise, source the specified include. +elif [ -r "$ES_INCLUDE" ]; then + . "$ES_INCLUDE" +fi if [ -x "$JAVA_HOME/bin/java" ]; then JAVA="$JAVA_HOME/bin/java" @@ -100,6 +113,14 @@ if [ -z "$ES_CLASSPATH" ]; then exit 1 fi +# don't let JAVA_TOOL_OPTIONS slip in (e.g. crazy agents in ubuntu) +# works around https://bugs.launchpad.net/ubuntu/+source/jayatana/+bug/1441487 +if [ "x$JAVA_TOOL_OPTIONS" != "x" ]; then + echo "Warning: Ignoring JAVA_TOOL_OPTIONS=$JAVA_TOOL_OPTIONS" + echo "Please pass JVM parameters via JAVA_OPTS instead" + unset JAVA_TOOL_OPTIONS +fi + # Special-case path variables. case `uname` in CYGWIN*) @@ -108,91 +129,29 @@ case `uname` in ;; esac -launch_service() -{ - pidpath=$1 - daemonized=$2 - props=$3 - es_parms="-Delasticsearch" +# full hostname passed through cut for portability on systems that do not support hostname -s +# export on separate line for shells that do not support combining definition and export +HOSTNAME=`hostname | cut -d. -f1` +export HOSTNAME - if [ "x$pidpath" != "x" ]; then - es_parms="$es_parms -Des.pidfile=$pidpath" +# manual parsing to find out, if process should be detached +daemonized=`echo $* | egrep -- '(^-d |-d$| -d |--daemonize$|--daemonize )'` +if [ -z "$daemonized" ] ; then + exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" \ + org.elasticsearch.bootstrap.Elasticsearch start "$@" +else + exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" \ + org.elasticsearch.bootstrap.Elasticsearch start "$@" <&- & + retval=$? + pid=$! + [ $retval -eq 0 ] || exit $retval + if [ ! -z "$ES_STARTUP_SLEEP_TIME" ]; then + sleep $ES_STARTUP_SLEEP_TIME fi - - # The es-foreground option will tell Elasticsearch not to close stdout/stderr, but it's up to us not to daemonize. - if [ "x$daemonized" = "x" ]; then - es_parms="$es_parms -Des.foreground=yes" - exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS $es_parms -Des.path.conf="$ES_HOME/conf/es" -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" $props \ - org.elasticsearch.bootstrap.Elasticsearch - # exec without running it in the background, makes it replace this shell, we'll never get here... - # no need to return something - else - # Startup Elasticsearch, background it, and write the pid. - exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS $es_parms -Des.path.conf="$ES_HOME/conf/es" -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" $props \ - org.elasticsearch.bootstrap.Elasticsearch <&- & - return $? + if ! ps -p $pid > /dev/null ; then + exit 1 fi -} - -# Parse any long getopt options and put them into properties before calling getopt below -# Be dash compatible to make sure running under ubuntu works -ARGV="" -while [ $# -gt 0 ] -do - case $1 in - --*=*) properties="$properties -Des.${1#--}" - shift 1 - ;; - --*) properties="$properties -Des.${1#--}=$2" - shift 2 - ;; - *) ARGV="$ARGV $1" ; shift - esac -done - -# Parse any command line options. -args=`getopt vdhp:D:X: $ARGV` -eval set -- "$args" - -while true; do - case $1 in - -v) - "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS $es_parms -Des.path.conf="$ES_HOME/conf/es" -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" $props \ - org.elasticsearch.Version - exit 0 - ;; - -p) - pidfile="$2" - shift 2 - ;; - -d) - daemonized="yes" - shift - ;; - -h) - echo "Usage: $0 [-d] [-h] [-p pidfile]" - exit 0 - ;; - -D) - properties="$properties -D$2" - shift 2 - ;; - -X) - properties="$properties -X$2" - shift 2 - ;; - --) - shift - break - ;; - *) - echo "Error parsing argument $1!" >&2 - exit 1 - ;; - esac -done - -# Start up the service -launch_service "$pidfile" "$daemonized" "$properties" + exit 0 +fi exit $? diff --git a/janusgraph-dist/src/assembly/static/bin/elasticsearch.in.sh b/janusgraph-dist/src/assembly/static/bin/elasticsearch.in.sh index 215521e4dc..5472cd20bd 100755 --- a/janusgraph-dist/src/assembly/static/bin/elasticsearch.in.sh +++ b/janusgraph-dist/src/assembly/static/bin/elasticsearch.in.sh @@ -1,6 +1,19 @@ #!/bin/sh -ES_CLASSPATH=$ES_CLASSPATH:$ES_HOME/lib/elasticsearch-1.1.1.jar:$ES_HOME/lib/*:$ES_HOME/lib/sigar/* +# check in case a user was using this mechanism +if [ "x$ES_CLASSPATH" != "x" ]; then + cat >&2 << EOF +Error: Don't modify the classpath with ES_CLASSPATH. Best is to add +additional elements via the plugin mechanism, or if code must really be +added to the main classpath, add jars to lib/ (unsupported). +EOF + exit 1 +fi + +#### Start JanusGraph-specific edit +# ensure janusgraph-es is first on classpath for JarHell override +ES_CLASSPATH="`ls $ES_HOME/lib/janusgraph-es*`:$ES_HOME/lib/*" +#### End JanusGraph-specific edit if [ "x$ES_MIN_MEM" = "x" ]; then ES_MIN_MEM=256m @@ -30,9 +43,6 @@ if [ "x$ES_DIRECT_SIZE" != "x" ]; then JAVA_OPTS="$JAVA_OPTS -XX:MaxDirectMemorySize=${ES_DIRECT_SIZE}" fi -# reduce the per-thread stack size -JAVA_OPTS="$JAVA_OPTS -Xss256k" - # set to headless, just in case JAVA_OPTS="$JAVA_OPTS -Djava.awt.headless=true" @@ -41,20 +51,28 @@ if [ "x$ES_USE_IPV4" != "x" ]; then JAVA_OPTS="$JAVA_OPTS -Djava.net.preferIPv4Stack=true" fi -JAVA_OPTS="$JAVA_OPTS -XX:+UseParNewGC" -JAVA_OPTS="$JAVA_OPTS -XX:+UseConcMarkSweepGC" +# Add gc options. ES_GC_OPTS is unsupported, for internal testing +if [ "x$ES_GC_OPTS" = "x" ]; then + ES_GC_OPTS="$ES_GC_OPTS -XX:+UseParNewGC" + ES_GC_OPTS="$ES_GC_OPTS -XX:+UseConcMarkSweepGC" + ES_GC_OPTS="$ES_GC_OPTS -XX:CMSInitiatingOccupancyFraction=75" + ES_GC_OPTS="$ES_GC_OPTS -XX:+UseCMSInitiatingOccupancyOnly" +fi -JAVA_OPTS="$JAVA_OPTS -XX:CMSInitiatingOccupancyFraction=75" -JAVA_OPTS="$JAVA_OPTS -XX:+UseCMSInitiatingOccupancyOnly" +JAVA_OPTS="$JAVA_OPTS $ES_GC_OPTS" # GC logging options -if [ "x$ES_USE_GC_LOGGING" != "x" ]; then +if [ -n "$ES_GC_LOG_FILE" ]; then JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCDetails" JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCTimeStamps" + JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCDateStamps" JAVA_OPTS="$JAVA_OPTS -XX:+PrintClassHistogram" JAVA_OPTS="$JAVA_OPTS -XX:+PrintTenuringDistribution" JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCApplicationStoppedTime" - JAVA_OPTS="$JAVA_OPTS -Xloggc:/var/log/elasticsearch/gc.log" + JAVA_OPTS="$JAVA_OPTS -Xloggc:$ES_GC_LOG_FILE" + + # Ensure that the directory for the log file exists: the JVM will not create it. + mkdir -p "`dirname \"$ES_GC_LOG_FILE\"`" fi # Causes the JVM to dump its heap on OutOfMemory. @@ -63,4 +81,11 @@ JAVA_OPTS="$JAVA_OPTS -XX:+HeapDumpOnOutOfMemoryError" # space for a full heap dump. #JAVA_OPTS="$JAVA_OPTS -XX:HeapDumpPath=$ES_HOME/logs/heapdump.hprof" -cd "`dirname $0`"/.. +# Disables explicit GC +JAVA_OPTS="$JAVA_OPTS -XX:+DisableExplicitGC" + +# Ensure UTF-8 encoding by default (e.g. filenames) +JAVA_OPTS="$JAVA_OPTS -Dfile.encoding=UTF-8" + +# Use our provided JNA always versus the system one +JAVA_OPTS="$JAVA_OPTS -Djna.nosys=true" diff --git a/janusgraph-dist/src/assembly/static/bin/janusgraph.sh b/janusgraph-dist/src/assembly/static/bin/janusgraph.sh index fe7662daf8..bd4ca2a093 100755 --- a/janusgraph-dist/src/assembly/static/bin/janusgraph.sh +++ b/janusgraph-dist/src/assembly/static/bin/janusgraph.sh @@ -134,9 +134,9 @@ start() { } echo "Forking Elasticsearch..." if [ -n "$VERBOSE" ]; then - "$BIN"/elasticsearch -d + "$BIN"/elasticsearch -d -Dpath.conf=conf/es else - "$BIN"/elasticsearch -d >/dev/null 2>&1 + "$BIN"/elasticsearch -d -Dpath.conf=conf/es >/dev/null 2>&1 fi wait_for_startup Elasticsearch $ELASTICSEARCH_IP $ELASTICSEARCH_PORT $ELASTICSEARCH_STARTUP_TIMEOUT_S || { echo "See $BIN/../log/elasticsearch.log for Elasticsearch log output." >&2 diff --git a/janusgraph-dist/src/assembly/static/conf/es/elasticsearch.yml b/janusgraph-dist/src/assembly/static/conf/es/elasticsearch.yml index 8c4c19b440..616503b2cb 100644 --- a/janusgraph-dist/src/assembly/static/conf/es/elasticsearch.yml +++ b/janusgraph-dist/src/assembly/static/conf/es/elasticsearch.yml @@ -1,386 +1,96 @@ -##################### Elasticsearch Configuration Example ##################### - -# This file contains an overview of various configuration settings, -# targeted at operations staff. Application developers should -# consult the guide at . +# ======================== Elasticsearch Configuration ========================= # -# The installation procedure is covered at -# . +# NOTE: Elasticsearch comes with reasonable defaults for most settings. +# Before you set out to tweak and tune the configuration, make sure you +# understand what are you trying to accomplish and the consequences. # -# Elasticsearch comes with reasonable defaults for most settings, -# so you can try it out without bothering with configuration. +# The primary way of configuring a node is via this file. This template lists +# the most important settings you may want to configure for a production cluster. # -# Most of the time, these defaults are just fine for running a production -# cluster. If you're fine-tuning your cluster, or wondering about the -# effect of certain configuration option, please _do ask_ on the -# mailing list or IRC channel [http://elasticsearch.org/community]. - -# Any element in the configuration can be replaced with environment variables -# by placing them in ${...} notation. For example: +# Please see the documentation for further information on configuration options: +# # -# node.rack: ${RACK_ENV_VAR} - -# For information on supported formats and syntax for the config file, see -# - - -################################### Cluster ################################### - -# Cluster name identifies your cluster for auto-discovery. If you're running -# multiple clusters on the same network, make sure you're using unique names. +# ---------------------------------- Cluster ----------------------------------- # -# cluster.name: elasticsearch - - -#################################### Node ##################################### - -# Node names are generated dynamically on startup, so you're relieved -# from configuring them manually. You can tie this node to a specific name: -# -# node.name: "Franz Kafka" - -# Every node can be configured to allow or deny being eligible as the master, -# and to allow or deny to store the data. +# Use a descriptive name for your cluster: # -# Allow this node to be eligible as a master node (enabled by default): +# cluster.name: my-application # -# node.master: true +# ------------------------------------ Node ------------------------------------ # -# Allow this node to store data (enabled by default): +# Use a descriptive name for the node: # -# node.data: true - -# You can exploit these settings to design advanced cluster topologies. +# node.name: node-1 # -# 1. You want this node to never become a master node, only to hold data. -# This will be the "workhorse" of your cluster. +# Add custom attributes to the node: # -# node.master: false -# node.data: true +# node.rack: r1 # -# 2. You want this node to only serve as a master: to not store any data and -# to have free resources. This will be the "coordinator" of your cluster. +# ----------------------------------- Paths ------------------------------------ # -# node.master: true -# node.data: false -# -# 3. You want this node to be neither master nor data node, but -# to act as a "search load balancer" (fetching data from nodes, -# aggregating results, etc.) -# -# node.master: false -# node.data: false - -# Use the Cluster Health API [http://localhost:9200/_cluster/health], the -# Node Info API [http://localhost:9200/_nodes] or GUI tools -# such as , -# , -# and -# to inspect the cluster state. - -# A node can have generic attributes associated with it, which can later be used -# for customized shard allocation filtering, or allocation awareness. An attribute -# is a simple key value pair, similar to node.key: value, here is an example: -# -# node.rack: rack314 - -# By default, multiple nodes are allowed to start from the same installation location -# to disable it, set the following: -# node.max_local_storage_nodes: 1 - - -#################################### Index #################################### - -# You can set a number of options (such as shard/replica options, mapping -# or analyzer definitions, translog settings, ...) for indices globally, -# in this file. -# -# Note, that it makes more sense to configure index settings specifically for -# a certain index, either when creating it or by using the index templates API. -# -# See and -# -# for more information. - -# Set the number of shards (splits) of an index (5 by default): -# -# index.number_of_shards: 5 - -# Set the number of replicas (additional copies) of an index (1 by default): -# -# index.number_of_replicas: 1 - -# Note, that for development on a local machine, with small indices, it usually -# makes sense to "disable" the distributed features: +# Path to directory where to store the data (separate multiple locations by comma): # -# index.number_of_shards: 1 -# index.number_of_replicas: 0 - -# These settings directly affect the performance of index and search operations -# in your cluster. Assuming you have enough machines to hold shards and -# replicas, the rule of thumb is: -# -# 1. Having more *shards* enhances the _indexing_ performance and allows to -# _distribute_ a big index across machines. -# 2. Having more *replicas* enhances the _search_ performance and improves the -# cluster _availability_. -# -# The "number_of_shards" is a one-time setting for an index. -# -# The "number_of_replicas" can be increased or decreased anytime, -# by using the Index Update Settings API. -# -# Elasticsearch takes care about load balancing, relocating, gathering the -# results from nodes, etc. Experiment with different settings to fine-tune -# your setup. - -# Use the Index Status API () to inspect -# the index status. - - -#################################### Paths #################################### - -# Path to directory containing configuration (this file and logging.yml): -# -# path.conf: /path/to/conf - -# Path to directory where to store index data allocated for this node. -# -# path.data: /path/to/data -# -# Can optionally include more than one location, causing data to be striped across -# the locations (a la RAID 0) on a file level, favouring locations with most free -# space on creation. For example: -# -# path.data: /path/to/data1,/path/to/data2 - path.data: db/es/data - -# Path to temporary files: # -# path.work: /path/to/work - -path.work: db/es/work - # Path to log files: # -# path.logs: /path/to/logs - path.logs: log -# Path to where plugins are installed: -# -# path.plugins: /path/to/plugins - path.plugins: bin/esplugins - - -#################################### Plugin ################################### - -# If a plugin listed here is not installed for current node, the node will not start. # -# plugin.mandatory: mapper-attachments,lang-groovy - - -################################### Memory #################################### - -# Elasticsearch performs poorly when JVM starts swapping: you should ensure that -# it _never_ swaps. +# ----------------------------------- Memory ----------------------------------- # -# Set this property to true to lock the memory: +# Lock the memory on startup: # -# bootstrap.mlockall: true - -# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set -# to the same value, and that the machine has enough memory to allocate -# for Elasticsearch, leaving enough memory for the operating system itself. +# bootstrap.memory_lock: true # -# You should also make sure that the Elasticsearch process is allowed to lock -# the memory, eg. by using `ulimit -l unlimited`. - - -############################## Network And HTTP ############################### - -# Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens -# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node -# communication. (the range means that if the port is busy, it will automatically -# try the next port). - -# Set the bind address specifically (IPv4 or IPv6): +# Make sure that the `ES_HEAP_SIZE` environment variable is set to about half the memory +# available on the system and that the owner of the process is allowed to use this limit. # -# network.bind_host: 192.168.0.1 - -# Set the address other nodes will use to communicate with this node. If not -# set, it is automatically derived. It must point to an actual IP address. +# Elasticsearch performs poorly when the system is swapping the memory. # -# network.publish_host: 192.168.0.1 - -# Set both 'bind_host' and 'publish_host': +# ---------------------------------- Network ----------------------------------- # -# network.host: 192.168.0.1 -network.host: 127.0.0.1 - -# Set a custom port for the node to node communication (9300 by default): +# Set the bind address to a specific IP (IPv4 or IPv6): # -# transport.tcp.port: 9300 - -# Enable compression for all communication between nodes (disabled by default): +# network.host: 192.168.0.1 # -# transport.tcp.compress: true - -# Set a custom port to listen for HTTP traffic: +# Set a custom port for HTTP: # # http.port: 9200 - -# Set a custom allowed content length: -# -# http.max_content_length: 100mb - -# Disable HTTP completely: -# -# http.enabled: false - - -################################### Gateway ################################### - -# The gateway allows for persisting the cluster state between full cluster -# restarts. Every change to the state (such as adding an index) will be stored -# in the gateway, and when the cluster starts up for the first time, -# it will read its state from the gateway. - -# There are several types of gateway implementations. For more information, see -# . - -# The default gateway type is the "local" gateway (recommended): -# -# gateway.type: local - -# Settings below control how and when to start the initial recovery process on -# a full cluster restart (to reuse as much local data as possible when using shared -# gateway). - -# Allow recovery process after N nodes in a cluster are up: -# -# gateway.recover_after_nodes: 1 - -# Set the timeout to initiate the recovery process, once the N nodes -# from previous setting are up (accepts time value): -# -# gateway.recover_after_time: 5m - -# Set how many nodes are expected in this cluster. Once these N nodes -# are up (and recover_after_nodes is met), begin recovery process immediately -# (without waiting for recover_after_time to expire): -# -# gateway.expected_nodes: 2 - - -############################# Recovery Throttling ############################# - -# These settings allow to control the process of shards allocation between -# nodes during initial recovery, replica allocation, rebalancing, -# or when adding and removing nodes. - -# Set the number of concurrent recoveries happening on a node: -# -# 1. During the initial recovery -# -# cluster.routing.allocation.node_initial_primaries_recoveries: 4 -# -# 2. During adding/removing nodes, rebalancing, etc # -# cluster.routing.allocation.node_concurrent_recoveries: 2 - -# Set to throttle throughput when recovering (eg. 100mb, by default 20mb): +# For more information, see the documentation at: +# # -# indices.recovery.max_bytes_per_sec: 20mb - -# Set to limit the number of open concurrent streams when -# recovering a shard from a peer: +# --------------------------------- Discovery ---------------------------------- # -# indices.recovery.concurrent_streams: 5 - - -################################## Discovery ################################## - -# Discovery infrastructure ensures nodes can be found within a cluster -# and master node is elected. Multicast discovery is the default. - -# Set to ensure a node sees N other master eligible nodes to be considered -# operational within the cluster. Its recommended to set it to a higher value -# than 1 when running more than 2 nodes in the cluster. +# Pass an initial list of hosts to perform discovery when new node is started: +# The default list of hosts is ["127.0.0.1", "[::1]"] # -# discovery.zen.minimum_master_nodes: 1 - -# Set the time to wait for ping responses from other nodes when discovering. -# Set this option to a higher value on a slow or congested network -# to minimize discovery failures: +# discovery.zen.ping.unicast.hosts: ["host1", "host2"] # -# discovery.zen.ping.timeout: 3s - -# For more information, see -# - -# Unicast discovery allows to explicitly control which nodes will be used -# to discover the cluster. It can be used when multicast is not present, -# or to restrict the cluster communication-wise. +# Prevent the "split brain" by configuring the majority of nodes (total number of nodes / 2 + 1): # -# 1. Disable multicast discovery (enabled by default): +# discovery.zen.minimum_master_nodes: 3 # -# discovery.zen.ping.multicast.enabled: false +# For more information, see the documentation at: +# # -# 2. Configure an initial list of master nodes in the cluster -# to perform discovery when new nodes (master or data) are started: +# ---------------------------------- Gateway ----------------------------------- # -# discovery.zen.ping.unicast.hosts: ["host1", "host2:port"] - -# EC2 discovery allows to use AWS EC2 API in order to perform discovery. +# Block initial recovery after a full cluster restart until N nodes are started: # -# You have to install the cloud-aws plugin for enabling the EC2 discovery. +# gateway.recover_after_nodes: 3 # -# For more information, see -# +# For more information, see the documentation at: +# # -# See -# for a step-by-step tutorial. - -# GCE discovery allows to use Google Compute Engine API in order to perform discovery. +# ---------------------------------- Various ----------------------------------- # -# You have to install the cloud-gce plugin for enabling the GCE discovery. +# Disable starting multiple nodes on a single system: # -# For more information, see . - -# Azure discovery allows to use Azure API in order to perform discovery. +# node.max_local_storage_nodes: 1 # -# You have to install the cloud-azure plugin for enabling the Azure discovery. +# Require explicit names when deleting indices: # -# For more information, see . - -################################## Slow Log ################################## - -# Shard level query and fetch threshold logging. - -#index.search.slowlog.threshold.query.warn: 10s -#index.search.slowlog.threshold.query.info: 5s -#index.search.slowlog.threshold.query.debug: 2s -#index.search.slowlog.threshold.query.trace: 500ms - -#index.search.slowlog.threshold.fetch.warn: 1s -#index.search.slowlog.threshold.fetch.info: 800ms -#index.search.slowlog.threshold.fetch.debug: 500ms -#index.search.slowlog.threshold.fetch.trace: 200ms - -#index.indexing.slowlog.threshold.index.warn: 10s -#index.indexing.slowlog.threshold.index.info: 5s -#index.indexing.slowlog.threshold.index.debug: 2s -#index.indexing.slowlog.threshold.index.trace: 500ms - -################################## GC Logging ################################ - -#monitor.jvm.gc.young.warn: 1000ms -#monitor.jvm.gc.young.info: 700ms -#monitor.jvm.gc.young.debug: 400ms - -#monitor.jvm.gc.old.warn: 10s -#monitor.jvm.gc.old.info: 5s -#monitor.jvm.gc.old.debug: 2s +# action.destructive_requires_name: true diff --git a/janusgraph-dist/src/assembly/static/conf/es/logging.yml b/janusgraph-dist/src/assembly/static/conf/es/logging.yml index 9d0fb6b4ff..939aa1eed0 100644 --- a/janusgraph-dist/src/assembly/static/conf/es/logging.yml +++ b/janusgraph-dist/src/assembly/static/conf/es/logging.yml @@ -1,13 +1,20 @@ -# Elasticsearch logging configuration -# # you can override this using by setting a system property, for example -Des.logger.level=DEBUG es.logger.level: INFO rootLogger: ${es.logger.level}, console, file logger: # log action execution errors for easier debugging action: DEBUG + + # deprecation logging, turn to DEBUG to see them + deprecation: INFO, deprecation_log_file + # reduce the logging for aws, too much is logged under the default INFO com.amazonaws: WARN + # aws will try to do some sketchy JMX stuff, but its not needed. + com.amazonaws.jmx.SdkMBeanRegistrySupport: ERROR + com.amazonaws.metrics.AwsSdkMetrics: ERROR + + org.apache.http: INFO # gateway #gateway: DEBUG @@ -25,6 +32,7 @@ logger: additivity: index.search.slowlog: false index.indexing.slowlog: false + deprecation: false appender: console: @@ -37,6 +45,25 @@ appender: type: dailyRollingFile file: ${path.logs}/${cluster.name}.log datePattern: "'.'yyyy-MM-dd" + layout: + type: pattern + conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %.10000m%n" + + # Use the following log4j-extras RollingFileAppender to enable gzip compression of log files. + # For more information see https://logging.apache.org/log4j/extras/apidocs/org/apache/log4j/rolling/RollingFileAppender.html + #file: + #type: extrasRollingFile + #file: ${path.logs}/${cluster.name}.log + #rollingPolicy: timeBased + #rollingPolicy.FileNamePattern: ${path.logs}/${cluster.name}.log.%d{yyyy-MM-dd}.gz + #layout: + #type: pattern + #conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" + + deprecation_log_file: + type: dailyRollingFile + file: ${path.logs}/${cluster.name}_deprecation.log + datePattern: "'.'yyyy-MM-dd" layout: type: pattern conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" diff --git a/janusgraph-es/pom.xml b/janusgraph-es/pom.xml index b4a6626304..6c3b89c90f 100644 --- a/janusgraph-es/pom.xml +++ b/janusgraph-es/pom.xml @@ -54,10 +54,19 @@ org.elasticsearch elasticsearch ${elasticsearch.version} + + + + org.elasticsearch.module + lang-groovy + ${elasticsearch.version} + zip + test - org.antlr - antlr-runtime + org.codehaus.groovy + groovy @@ -105,6 +114,21 @@ test-jar + + + pre-test-jar + process-test-classes + + jar + + + es_jarhell + + **/JarHell.class + + + @@ -189,6 +213,23 @@ ${project.build.directory}/es_classpath.txt + + + unpack-plugins + + unpack-dependencies + + generate-test-resources + + ${project.build.directory}/db/es/plugins/lang-groovy + lang-groovy + org.elasticsearch.module + true + pom + groovy*.jarexcludes> + test + + diff --git a/janusgraph-es/src/main/java/org/elasticsearch/bootstrap/JarHell.java b/janusgraph-es/src/main/java/org/elasticsearch/bootstrap/JarHell.java new file mode 100644 index 0000000000..7060d85ffc --- /dev/null +++ b/janusgraph-es/src/main/java/org/elasticsearch/bootstrap/JarHell.java @@ -0,0 +1,70 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.bootstrap; + +import org.elasticsearch.common.io.PathUtils; + +import java.net.MalformedURLException; +import java.net.URL; + +/** + * This class masks the Elasticsearch class of the same name. + * Clients are responsible for ensuring their classpath is sane. + */ +public class JarHell { + + public static void checkJarHell() { } + + public static URL[] parseClassPath() { + return parseClassPath(System.getProperty("java.class.path")); + } + + static URL[] parseClassPath(String classPath) { + String pathSeparator = System.getProperty("path.separator"); + String fileSeparator = System.getProperty("file.separator"); + String elements[] = classPath.split(pathSeparator); + URL urlElements[] = new URL[elements.length]; + for (int i = 0; i < elements.length; i++) { + String element = elements[i]; + if (element.isEmpty()) { + throw new IllegalStateException("Classpath should not contain empty elements! (outdated shell script from a previous version?) classpath='" + classPath + "'"); + } + if (element.startsWith("/") && "\\".equals(fileSeparator)) { + element = element.replace("/", "\\"); + if (element.length() >= 3 && element.charAt(2) == ':') { + element = element.substring(1); + } + } + try { + urlElements[i] = PathUtils.get(element).toUri().toURL(); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + } + return urlElements; + } + + public static void checkJarHell(URL urls[]) { } + + public static void checkVersionFormat(String targetVersion) { } + + public static void checkJavaVersion(String resource, String targetVersion) { } + +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java index fe9eb09989..fae58f91f1 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java @@ -56,16 +56,17 @@ import org.elasticsearch.action.update.UpdateRequestBuilder; import org.elasticsearch.client.Client; import org.elasticsearch.client.transport.TransportClient; -import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.common.unit.DistanceUnit; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.index.query.*; -import org.elasticsearch.indices.IndexMissingException; import org.elasticsearch.node.Node; import org.elasticsearch.node.NodeBuilder; import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; @@ -74,10 +75,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.time.Instant; import java.util.*; import java.util.concurrent.TimeUnit; @@ -95,8 +99,6 @@ public class ElasticSearchIndex implements IndexProvider { private static final String TTL_FIELD = "_ttl"; private static final String STRING_MAPPING_SUFFIX = "__STRING"; - public static final ImmutableList DATA_SUBDIRS = ImmutableList.of("data", "work", "logs"); - public static final ConfigNamespace ELASTICSEARCH_NS = new ConfigNamespace(INDEX_NS, "elasticsearch", "Elasticsearch index configuration"); @@ -193,7 +195,7 @@ public class ElasticSearchIndex implements IndexProvider { private final int maxResultsSize; private final boolean useDeprecatedIgnoreUnmapped; - public ElasticSearchIndex(Configuration config) { + public ElasticSearchIndex(Configuration config) throws BackendException { indexName = config.get(INDEX_NAME); useDeprecatedIgnoreUnmapped = config.get(USE_EDEPRECATED_IGNORE_UNMAPPED_OPTION); @@ -235,7 +237,7 @@ private void checkForOrCreateIndex(Configuration config) { IndicesExistsResponse response = client.admin().indices().exists(new IndicesExistsRequest(indexName)).actionGet(); if (!response.isExists()) { - ImmutableSettings.Builder settings = ImmutableSettings.settingsBuilder(); + Settings.Builder settings = Settings.settingsBuilder(); ElasticSearchSetup.applySettingsFromJanusGraphConf(settings, config, ES_CREATE_EXTRAS_NS); @@ -292,7 +294,7 @@ private ElasticSearchSetup.Connection interfaceConfiguration(Configuration confi * @param config a config passed to ElasticSearchIndex's constructor * @return a node and client object open and ready for use */ - private ElasticSearchSetup.Connection legacyConfiguration(Configuration config) { + private ElasticSearchSetup.Connection legacyConfiguration(Configuration config) throws BackendException { Node node; Client client; @@ -308,7 +310,7 @@ private ElasticSearchSetup.Connection legacyConfiguration(Configuration config) "Must either configure configuration file or base directory"); if (config.has(INDEX_CONF_FILE)) { String configFile = config.get(INDEX_CONF_FILE); - ImmutableSettings.Builder sb = ImmutableSettings.settingsBuilder(); + Settings.Builder sb = Settings.settingsBuilder(); log.debug("Configuring ES from YML file [{}]", configFile); FileInputStream fis = null; try { @@ -321,18 +323,8 @@ private ElasticSearchSetup.Connection legacyConfiguration(Configuration config) IOUtils.closeQuietly(fis); } } else { - String dataDirectory = config.get(INDEX_DIRECTORY); - log.debug("Configuring ES with data directory [{}]", dataDirectory); - File f = new File(dataDirectory); - if (!f.exists()) f.mkdirs(); - ImmutableSettings.Builder b = ImmutableSettings.settingsBuilder(); - for (String sub : DATA_SUBDIRS) { - String subdir = dataDirectory + File.separator + sub; - f = new File(subdir); - if (!f.exists()) f.mkdirs(); - b.put("path." + sub, subdir); - } - b.put("script.disable_dynamic", false); + Settings.Builder b = Settings.settingsBuilder(); + b.put("script.inline", true); b.put("indices.ttl.interval", "5s"); builder.settings(b.build()); @@ -342,12 +334,22 @@ private ElasticSearchSetup.Connection legacyConfiguration(Configuration config) builder.clusterName(clustername); } + String dataDirectory = config.get(INDEX_DIRECTORY); + if (StringUtils.isNotBlank(dataDirectory)) { + log.debug("Configuring ES with home directory [{}]", dataDirectory); + File f = new File(dataDirectory); + if (!f.exists()) f.mkdirs(); + builder.settings().put("path.home", dataDirectory); + } + + builder.settings().put("index.max_result_window", Integer.MAX_VALUE); + node = builder.client(clientOnly).data(!clientOnly).local(local).node(); client = node.client(); } else { log.debug("Configuring ES for network transport"); - ImmutableSettings.Builder settings = ImmutableSettings.settingsBuilder(); + Settings.Builder settings = Settings.settingsBuilder(); if (config.has(CLUSTER_NAME)) { String clustername = config.get(CLUSTER_NAME); Preconditions.checkArgument(StringUtils.isNotBlank(clustername), "Invalid cluster name: %s", clustername); @@ -357,8 +359,9 @@ private ElasticSearchSetup.Connection legacyConfiguration(Configuration config) } log.debug("Transport sniffing enabled: {}", config.get(CLIENT_SNIFF)); settings.put("client.transport.sniff", config.get(CLIENT_SNIFF)); - settings.put("script.disable_dynamic", false); - TransportClient tc = new TransportClient(settings.build()); + settings.put("script.inline", true); + settings.put("index.max_result_window", Integer.MAX_VALUE); + TransportClient tc = TransportClient.builder().settings(settings.build()).build(); int defaultPort = config.has(INDEX_PORT)?config.get(INDEX_PORT):HOST_PORT_DEFAULT; for (String host : config.get(INDEX_HOSTS)) { String[] hostparts = host.split(":"); @@ -366,7 +369,11 @@ private ElasticSearchSetup.Connection legacyConfiguration(Configuration config) int hostport = defaultPort; if (hostparts.length == 2) hostport = Integer.parseInt(hostparts[1]); log.info("Configured remote host: {} : {}", hostname, hostport); - tc.addTransportAddress(new InetSocketTransportAddress(hostname, hostport)); + try { + tc.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(hostname), hostport)); + } catch (UnknownHostException e) { + throw new TemporaryBackendException(e); + } } client = tc; node = null; @@ -468,8 +475,8 @@ public void register(String store, String key, KeyInformation information, BaseT } try { - PutMappingResponse response = client.admin().indices().preparePutMapping(indexName). - setIgnoreConflicts(false).setType(store).setSource(mapping).execute().actionGet(); + PutMappingResponse response = client.admin().indices().preparePutMapping(indexName) + .setType(store).setSource(mapping).execute().actionGet(); } catch (Exception e) { throw convert(e); } @@ -486,8 +493,7 @@ private static boolean hasDualStringMapping(KeyInformation information) { return AttributeUtil.isString(information.getDataType()) && getStringMapping(information)==Mapping.TEXTSTRING; } - public XContentBuilder getNewDocument(final List additions, KeyInformation.StoreRetriever informations, int ttl) throws BackendException { - Preconditions.checkArgument(ttl >= 0); + public XContentBuilder getNewDocument(final List additions, KeyInformation.StoreRetriever informations) throws BackendException { try { XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); @@ -509,19 +515,26 @@ public XContentBuilder getNewDocument(final List additions, KeyInfor break; case SET: case LIST: - value = add.getValue().stream().map(v -> convertToEsType(v.value)).collect(Collectors.toList()).toArray(); + value = add.getValue().stream().map(v -> convertToEsType(v.value)) + .filter(v -> { + Preconditions.checkArgument(!(v instanceof byte[]), "Collections not supported for " + add.getKey()); + return true; + }) + .collect(Collectors.toList()).toArray(); break; } - - builder.field(add.getKey(), value); + if (value instanceof byte[]) { + builder.rawField(add.getKey(), new ByteArrayInputStream((byte[]) value)); + } else { + builder.field(add.getKey(), value); + } if (hasDualStringMapping(informations.get(add.getKey())) && keyInformation.getDataType() == String.class) { builder.field(getDualMappingName(add.getKey()), value); } } - if (ttl>0) builder.field(TTL_FIELD, TimeUnit.MILLISECONDS.convert(ttl,TimeUnit.SECONDS)); builder.endObject(); @@ -579,28 +592,34 @@ public void mutate(Map> mutations, KeyInforma brb.add(new DeleteRequest(indexName, storename, docid)); } else { String script = getDeletionScript(informations, storename, mutation); - brb.add(client.prepareUpdate(indexName, storename, docid).setScript(script, ScriptService.ScriptType.INLINE)); + brb.add(client.prepareUpdate(indexName, storename, docid).setScript(new Script(script, ScriptService.ScriptType.INLINE, null, null))); log.trace("Adding script {}", script); } bulkrequests++; } if (mutation.hasAdditions()) { - int ttl = mutation.determineTTL(); + long ttl = mutation.determineTTL() * 1000l; if (mutation.isNew()) { //Index log.trace("Adding entire document {}", docid); - brb.add(new IndexRequest(indexName, storename, docid) - .source(getNewDocument(mutation.getAdditions(), informations.get(storename), ttl))); + Preconditions.checkArgument(ttl >= 0); + IndexRequest request = new IndexRequest(indexName, storename, docid) + .source(getNewDocument(mutation.getAdditions(), informations.get(storename))); + if (ttl > 0) { + request.ttl(ttl); + } + brb.add(request); } else { Preconditions.checkArgument(ttl == 0, "Elasticsearch only supports TTL on new documents [%s]", docid); boolean needUpsert = !mutation.hasDeletions(); String script = getAdditionScript(informations, storename, mutation); - UpdateRequestBuilder update = client.prepareUpdate(indexName, storename, docid).setScript(script, ScriptService.ScriptType.INLINE); + UpdateRequestBuilder update = client.prepareUpdate(indexName, storename, docid).setScript(new Script(script, ScriptService.ScriptType.INLINE, null, null)); if (needUpsert) { - XContentBuilder doc = getNewDocument(mutation.getAdditions(), informations.get(storename), ttl); + XContentBuilder doc = getNewDocument(mutation.getAdditions(), informations.get(storename)); + update.setUpsert(doc); } @@ -692,7 +711,12 @@ private static String convertToJsType(Object value) throws PermanentBackendExcep try { XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - builder.field("value", convertToEsType(value)); + Object esValue = convertToEsType(value); + if (esValue instanceof byte[]) { + builder.rawField("value", new ByteArrayInputStream((byte[]) esValue)); + } else { + builder.field("value", esValue); + } String s = builder.string(); int prefixLength = "{\"value\":".length(); @@ -730,7 +754,13 @@ public void restore(Map>> documents, KeyInfo // Add if (log.isTraceEnabled()) log.trace("Adding entire document {}", docID); - bulk.add(new IndexRequest(indexName, store, docID).source(getNewDocument(content, informations.get(store), IndexMutation.determineTTL(content)))); + long ttl = IndexMutation.determineTTL(content) * 1000l; + Preconditions.checkArgument(ttl >= 0); + IndexRequest request = new IndexRequest(indexName, store, docID).source(getNewDocument(content, informations.get(store))); + if (ttl > 0) { + request.ttl(ttl); + } + bulk.add(request); requests++; } } @@ -743,7 +773,7 @@ public void restore(Map>> documents, KeyInfo } } - public FilterBuilder getFilter(Condition condition, KeyInformation.StoreRetriever informations) { + public QueryBuilder getFilter(Condition condition, KeyInformation.StoreRetriever informations) { if (condition instanceof PredicateCondition) { PredicateCondition atom = (PredicateCondition) condition; Object value = atom.getValue(); @@ -756,17 +786,17 @@ public FilterBuilder getFilter(Condition condition, KeyInformation.StoreRetri switch (numRel) { case EQUAL: - return FilterBuilders.inFilter(key, value); + return QueryBuilders.termsQuery(key, value); case NOT_EQUAL: - return FilterBuilders.notFilter(FilterBuilders.inFilter(key, value)); + return QueryBuilders.boolQuery().mustNot(QueryBuilders.termsQuery(key, value)); case LESS_THAN: - return FilterBuilders.rangeFilter(key).lt(value); + return QueryBuilders.rangeQuery(key).lt(value); case LESS_THAN_EQUAL: - return FilterBuilders.rangeFilter(key).lte(value); + return QueryBuilders.rangeQuery(key).lte(value); case GREATER_THAN: - return FilterBuilders.rangeFilter(key).gt(value); + return QueryBuilders.rangeQuery(key).gt(value); case GREATER_THAN_EQUAL: - return FilterBuilders.rangeFilter(key).gte(value); + return QueryBuilders.rangeQuery(key).gte(value); default: throw new IllegalArgumentException("Unexpected relation: " + numRel); } @@ -782,25 +812,25 @@ public FilterBuilder getFilter(Condition condition, KeyInformation.StoreRetri if (janusgraphPredicate == Text.CONTAINS) { value = ((String) value).toLowerCase(); - AndFilterBuilder b = FilterBuilders.andFilter(); + BoolQueryBuilder b = QueryBuilders.boolQuery(); for (String term : Text.tokenize((String)value)) { - b.add(FilterBuilders.termFilter(fieldName, term)); + b.must(QueryBuilders.termQuery(fieldName, term)); } return b; } else if (janusgraphPredicate == Text.CONTAINS_PREFIX) { value = ((String) value).toLowerCase(); - return FilterBuilders.prefixFilter(fieldName, (String) value); + return QueryBuilders.prefixQuery(fieldName, (String) value); } else if (janusgraphPredicate == Text.CONTAINS_REGEX) { value = ((String) value).toLowerCase(); - return FilterBuilders.regexpFilter(fieldName, (String) value); + return QueryBuilders.regexpQuery(fieldName, (String) value); } else if (janusgraphPredicate == Text.PREFIX) { - return FilterBuilders.prefixFilter(fieldName, (String) value); + return QueryBuilders.prefixQuery(fieldName, (String) value); } else if (janusgraphPredicate == Text.REGEX) { - return FilterBuilders.regexpFilter(fieldName, (String) value); + return QueryBuilders.regexpQuery(fieldName, (String) value); } else if (janusgraphPredicate == Cmp.EQUAL) { - return FilterBuilders.termFilter(fieldName, (String) value); + return QueryBuilders.termQuery(fieldName, (String) value); } else if (janusgraphPredicate == Cmp.NOT_EQUAL) { - return FilterBuilders.notFilter(FilterBuilders.termFilter(fieldName, (String) value)); + return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(fieldName, (String) value)); } else throw new IllegalArgumentException("Predicate is not supported for string value: " + janusgraphPredicate); } else if (value instanceof Geoshape) { @@ -808,11 +838,11 @@ public FilterBuilder getFilter(Condition condition, KeyInformation.StoreRetri Geoshape shape = (Geoshape) value; if (shape.getType() == Geoshape.Type.CIRCLE) { Geoshape.Point center = shape.getPoint(); - return FilterBuilders.geoDistanceFilter(key).lat(center.getLatitude()).lon(center.getLongitude()).distance(shape.getRadius(), DistanceUnit.KILOMETERS); + return QueryBuilders.geoDistanceQuery(key).lat(center.getLatitude()).lon(center.getLongitude()).distance(shape.getRadius(), DistanceUnit.KILOMETERS); } else if (shape.getType() == Geoshape.Type.BOX) { Geoshape.Point southwest = shape.getPoint(0); Geoshape.Point northeast = shape.getPoint(1); - return FilterBuilders.geoBoundingBoxFilter(key).bottomRight(southwest.getLatitude(), northeast.getLongitude()).topLeft(northeast.getLatitude(), southwest.getLongitude()); + return QueryBuilders.geoBoundingBoxQuery(key).bottomRight(southwest.getLatitude(), northeast.getLongitude()).topLeft(northeast.getLatitude(), southwest.getLongitude()); } else throw new IllegalArgumentException("Unsupported or invalid search shape type: " + shape.getType()); } else if (value instanceof Date || value instanceof Instant) { @@ -821,17 +851,17 @@ public FilterBuilder getFilter(Condition condition, KeyInformation.StoreRetri switch (numRel) { case EQUAL: - return FilterBuilders.inFilter(key, value); + return QueryBuilders.termsQuery(key, value); case NOT_EQUAL: - return FilterBuilders.notFilter(FilterBuilders.inFilter(key, value)); + return QueryBuilders.boolQuery().mustNot(QueryBuilders.termsQuery(key, value)); case LESS_THAN: - return FilterBuilders.rangeFilter(key).lt(value); + return QueryBuilders.rangeQuery(key).lt(value); case LESS_THAN_EQUAL: - return FilterBuilders.rangeFilter(key).lte(value); + return QueryBuilders.rangeQuery(key).lte(value); case GREATER_THAN: - return FilterBuilders.rangeFilter(key).gt(value); + return QueryBuilders.rangeQuery(key).gt(value); case GREATER_THAN_EQUAL: - return FilterBuilders.rangeFilter(key).gte(value); + return QueryBuilders.rangeQuery(key).gte(value); default: throw new IllegalArgumentException("Unexpected relation: " + numRel); } @@ -839,34 +869,35 @@ public FilterBuilder getFilter(Condition condition, KeyInformation.StoreRetri Cmp numRel = (Cmp) janusgraphPredicate; switch (numRel) { case EQUAL: - return FilterBuilders.inFilter(key, value); + return QueryBuilders.termsQuery(key, value); case NOT_EQUAL: - return FilterBuilders.notFilter(FilterBuilders.inFilter(key, value)); + return QueryBuilders.boolQuery().mustNot(QueryBuilders.termsQuery(key, value)); default: throw new IllegalArgumentException("Boolean types only support EQUAL or NOT_EQUAL"); } } else if (value instanceof UUID) { if (janusgraphPredicate == Cmp.EQUAL) { - return FilterBuilders.termFilter(key, value); + return QueryBuilders.termQuery(key, value); } else if (janusgraphPredicate == Cmp.NOT_EQUAL) { - return FilterBuilders.notFilter(FilterBuilders.termFilter(key, value)); + return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(key, value)); } else { throw new IllegalArgumentException("Only equal or not equal is supported for UUIDs: " + janusgraphPredicate); } } else throw new IllegalArgumentException("Unsupported type: " + value); } else if (condition instanceof Not) { - return FilterBuilders.notFilter(getFilter(((Not) condition).getChild(),informations)); + return QueryBuilders.boolQuery().mustNot(getFilter(((Not) condition).getChild(),informations)); } else if (condition instanceof And) { - AndFilterBuilder b = FilterBuilders.andFilter(); + BoolQueryBuilder b = QueryBuilders.boolQuery(); for (Condition c : condition.getChildren()) { - b.add(getFilter(c,informations)); + b.must(getFilter(c,informations)); } return b; } else if (condition instanceof Or) { - OrFilterBuilder b = FilterBuilders.orFilter(); + BoolQueryBuilder b = QueryBuilders.boolQuery(); + b.minimumNumberShouldMatch(1); for (Condition c : condition.getChildren()) { - b.add(getFilter(c,informations)); + b.should(getFilter(c,informations)); } return b; } else throw new IllegalArgumentException("Invalid condition: " + condition); @@ -1045,7 +1076,7 @@ public void clearStorage() throws BackendException { .delete(new DeleteIndexRequest(indexName)).actionGet(); // We wait for one second to let ES delete the river Thread.sleep(1000); - } catch (IndexMissingException e) { + } catch (IndexNotFoundException e) { // Index does not exist... Fine } } catch (Exception e) { diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java index 3fdfede8ab..9598f190ba 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java @@ -23,7 +23,7 @@ import org.apache.commons.lang.StringUtils; import org.elasticsearch.client.Client; import org.elasticsearch.client.transport.TransportClient; -import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.node.Node; import org.elasticsearch.node.NodeBuilder; @@ -32,6 +32,7 @@ import java.io.*; import java.lang.reflect.Array; +import java.net.InetAddress; import java.util.List; import java.util.Map; @@ -82,7 +83,7 @@ public enum ElasticSearchSetup { public Connection connect(Configuration config) throws IOException { log.debug("Configuring TransportClient"); - ImmutableSettings.Builder settingsBuilder = settingsBuilder(config); + Settings.Builder settingsBuilder = settingsBuilder(config); if (config.has(ElasticSearchIndex.CLIENT_SNIFF)) { String k = "client.transport.sniff"; @@ -90,7 +91,9 @@ public Connection connect(Configuration config) throws IOException { log.debug("Set {}: {}", k, config.get(ElasticSearchIndex.CLIENT_SNIFF)); } - TransportClient tc = new TransportClient(settingsBuilder.build()); + settingsBuilder.put("index.max_result_window", Integer.MAX_VALUE); + + TransportClient tc = TransportClient.builder().settings(settingsBuilder.build()).build(); int defaultPort = config.has(INDEX_PORT) ? config.get(INDEX_PORT) : ElasticSearchIndex.HOST_PORT_DEFAULT; for (String host : config.get(INDEX_HOSTS)) { String[] hostparts = host.split(":"); @@ -98,7 +101,7 @@ public Connection connect(Configuration config) throws IOException { int hostport = defaultPort; if (hostparts.length == 2) hostport = Integer.parseInt(hostparts[1]); log.info("Configured remote host: {} : {}", hostname, hostport); - tc.addTransportAddress(new InetSocketTransportAddress(hostname, hostport)); + tc.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(hostname), hostport)); } return new Connection(null, tc); } @@ -113,7 +116,7 @@ public Connection connect(Configuration config) throws IOException { log.debug("Configuring Node Client"); - ImmutableSettings.Builder settingsBuilder = settingsBuilder(config); + Settings.Builder settingsBuilder = settingsBuilder(config); if (config.has(ElasticSearchIndex.TTL_INTERVAL)) { String k = "indices.ttl.interval"; @@ -123,7 +126,7 @@ public Connection connect(Configuration config) throws IOException { makeLocalDirsIfNecessary(settingsBuilder, config); - NodeBuilder nodeBuilder = NodeBuilder.nodeBuilder().settings(settingsBuilder.build()); + NodeBuilder nodeBuilder = NodeBuilder.nodeBuilder(); // Apply explicit JanusGraph properties file overrides (otherwise conf-file or ES defaults apply) if (config.has(ElasticSearchIndex.CLIENT_ONLY)) { @@ -134,9 +137,14 @@ public Connection connect(Configuration config) throws IOException { if (config.has(ElasticSearchIndex.LOCAL_MODE)) nodeBuilder.local(config.get(ElasticSearchIndex.LOCAL_MODE)); - if (config.has(ElasticSearchIndex.LOAD_DEFAULT_NODE_SETTINGS)) - nodeBuilder.loadConfigSettings(config.get(ElasticSearchIndex.LOAD_DEFAULT_NODE_SETTINGS)); + if (config.has(ElasticSearchIndex.LOAD_DEFAULT_NODE_SETTINGS)) { + // Elasticsearch >2.3 always loads default settings + String k = "config.ignore_system_properties"; + settingsBuilder.put(k, !config.get(ElasticSearchIndex.LOAD_DEFAULT_NODE_SETTINGS)); + } + settingsBuilder.put("index.max_result_window", Integer.MAX_VALUE); + nodeBuilder.settings(settingsBuilder.build()); Node node = nodeBuilder.node(); Client client = node.client(); return new Connection(node, client); @@ -159,7 +167,7 @@ public Connection connect(Configuration config) throws IOException { *
  • If ignore-cluster-name is set, copy that value to client.transport.ignore_cluster_name in the settings builder
  • *
  • If client-sniff is set, copy that value to client.transport.sniff in the settings builder
  • *
  • If ttl-interval is set, copy that volue to indices.ttl.interval in the settings builder
  • - *
  • Unconditionally set script.disable_dynamic to false (i.e. enable dynamic scripting)
  • + *
  • Unconditionally set script.inline to true (i.e. enable inline scripting)
  • * * * This method then returns the builder. @@ -168,13 +176,15 @@ public Connection connect(Configuration config) throws IOException { * @return ES settings builder configured according to the {@code config} parameter * @throws java.io.IOException if conf-file was set but could not be read */ - private static ImmutableSettings.Builder settingsBuilder(Configuration config) throws IOException { + private static Settings.Builder settingsBuilder(Configuration config) throws IOException { - ImmutableSettings.Builder settings = ImmutableSettings.settingsBuilder(); + Settings.Builder settings = Settings.settingsBuilder(); // Set JanusGraph defaults settings.put("client.transport.ignore_cluster_name", true); + settings.put("path.home", System.getProperty("java.io.tmpdir")); + // Apply overrides from ES conf file applySettingsFromFile(settings, config, INDEX_CONF_FILE); @@ -198,21 +208,21 @@ private static ImmutableSettings.Builder settingsBuilder(Configuration config) t log.debug("Set {}: {}", k, ignoreClusterName); } - // Force-enable dynamic scripting. This is probably only useful in Node mode. - String disableScriptsKey = "script.disable_dynamic"; - String disableScriptsVal = settings.get(disableScriptsKey); - if (null != disableScriptsVal && !"false".equals(disableScriptsVal)) { - log.warn("JanusGraph requires Elasticsearch dynamic scripting. Setting {} to false. " + - "Dynamic scripting must be allowed in the Elasticsearch cluster configuration.", - disableScriptsKey); + // Force-enable inline scripting. This is probably only useful in Node mode. + String inlineScriptsKey = "script.inline"; + String inlineScriptsVal = settings.get(inlineScriptsKey); + if (null != inlineScriptsVal && !"true".equals(inlineScriptsVal)) { + log.warn("JanusGraph requires Elasticsearch inline scripting. Setting {} to true. " + + "Inline scripting must be allowed in the Elasticsearch cluster configuration.", + inlineScriptsKey); } - settings.put(disableScriptsKey, false); - log.debug("Set {}: {}", disableScriptsKey, false); + settings.put(inlineScriptsKey, true); + log.debug("Set {}: {}", inlineScriptsKey, false); return settings; } - static void applySettingsFromFile(ImmutableSettings.Builder settings, + static void applySettingsFromFile(Settings.Builder settings, Configuration config, ConfigOption confFileOption) throws FileNotFoundException { if (config.has(confFileOption)) { @@ -230,7 +240,7 @@ static void applySettingsFromFile(ImmutableSettings.Builder settings, } } - static void applySettingsFromJanusGraphConf(ImmutableSettings.Builder settings, + static void applySettingsFromJanusGraphConf(Settings.Builder settings, Configuration config, ConfigNamespace rootNS) { int keysLoaded = 0; @@ -262,7 +272,7 @@ static void applySettingsFromJanusGraphConf(ImmutableSettings.Builder settings, } - private static void makeLocalDirsIfNecessary(ImmutableSettings.Builder settingsBuilder, Configuration config) { + private static void makeLocalDirsIfNecessary(Settings.Builder settingsBuilder, Configuration config) { if (config.has(INDEX_DIRECTORY)) { String dataDirectory = config.get(INDEX_DIRECTORY); File f = new File(dataDirectory); @@ -270,17 +280,9 @@ private static void makeLocalDirsIfNecessary(ImmutableSettings.Builder settingsB log.info("Creating ES directory prefix: {}", f); f.mkdirs(); } - for (String sub : ElasticSearchIndex.DATA_SUBDIRS) { - String subdir = dataDirectory + File.separator + sub; - f = new File(subdir); - if (!f.exists()) { - log.info("Creating ES {} directory: {}", sub, f); - f.mkdirs(); - } - settingsBuilder.put("path." + sub, subdir); - log.debug("Set ES {} directory: {}", sub, f); - } + settingsBuilder.put("path.home", dataDirectory); } + } private static final Logger log = LoggerFactory.getLogger(ElasticSearchSetup.class); diff --git a/janusgraph-es/src/test/bin/elasticsearch b/janusgraph-es/src/test/bin/elasticsearch index a203d636f8..8461e00531 100755 --- a/janusgraph-es/src/test/bin/elasticsearch +++ b/janusgraph-es/src/test/bin/elasticsearch @@ -1,9 +1,5 @@ #!/bin/sh -# OPTIONS: -# -d: daemonize, start in the background -# -p : log the pid to a file (useful to kill it later) - # CONTROLLING STARTUP: # # This script relies on few environment variables to determine startup @@ -46,16 +42,16 @@ # Be aware that you will be entirely responsible for populating the needed # environment variables. - # Maven will replace the project.name with elasticsearch below. If that # hasn't been done, we assume that this is not a packaged version and the # user has forgotten to run Maven to create a package. -IS_PACKAGED_VERSION='elasticsearch' -if [ "$IS_PACKAGED_VERSION" != "elasticsearch" ]; then + +IS_PACKAGED_VERSION='distributions' +if [ "$IS_PACKAGED_VERSION" != "distributions" ]; then cat >&2 << EOF Error: You must build the project with Maven or download a pre-built package before you can run Elasticsearch. See 'Building from Source' in README.textile -or visit http://www.elasticsearch.org/download to get a pre-built package. +or visit https://www.elastic.co/download to get a pre-built package. EOF exit 1 fi @@ -83,7 +79,7 @@ ES_HOME=`cd "$ES_HOME"; pwd` #### Start JanusGraph-specific edit ES_INCLUDE="$ES_HOME/bin/elasticsearch.in.sh" -ES_CLASSPATH="`cat $ES_HOME/target/es_classpath.txt`" +ES_CLASSPATH="$ES_HOME/target/es_jarhell.jar:`cat $ES_HOME/target/es_classpath.txt`" #### End JanusGraph-specific edit # If an include wasn't specified in the environment, then search for one... @@ -93,6 +89,7 @@ if [ "x$ES_INCLUDE" = "x" ]; then /usr/local/share/elasticsearch/elasticsearch.in.sh \ /opt/elasticsearch/elasticsearch.in.sh \ ~/.elasticsearch.in.sh \ + "$ES_HOME/bin/elasticsearch.in.sh" \ "`dirname "$0"`"/elasticsearch.in.sh; do if [ -r "$include" ]; then . "$include" @@ -120,6 +117,14 @@ if [ -z "$ES_CLASSPATH" ]; then exit 1 fi +# don't let JAVA_TOOL_OPTIONS slip in (e.g. crazy agents in ubuntu) +# works around https://bugs.launchpad.net/ubuntu/+source/jayatana/+bug/1441487 +if [ "x$JAVA_TOOL_OPTIONS" != "x" ]; then + echo "Warning: Ignoring JAVA_TOOL_OPTIONS=$JAVA_TOOL_OPTIONS" + echo "Please pass JVM parameters via JAVA_OPTS instead" + unset JAVA_TOOL_OPTIONS +fi + # Special-case path variables. case `uname` in CYGWIN*) @@ -128,91 +133,29 @@ case `uname` in ;; esac -launch_service() -{ - pidpath=$1 - daemonized=$2 - props=$3 - es_parms="-Delasticsearch" +# full hostname passed through cut for portability on systems that do not support hostname -s +# export on separate line for shells that do not support combining definition and export +HOSTNAME=`hostname | cut -d. -f1` +export HOSTNAME - if [ "x$pidpath" != "x" ]; then - es_parms="$es_parms -Des.pidfile=$pidpath" +# manual parsing to find out, if process should be detached +daemonized=`echo $* | egrep -- '(^-d |-d$| -d |--daemonize$|--daemonize )'` +if [ -z "$daemonized" ] ; then + exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" \ + org.elasticsearch.bootstrap.Elasticsearch start "$@" +else + exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" \ + org.elasticsearch.bootstrap.Elasticsearch start "$@" <&- & + retval=$? + pid=$! + [ $retval -eq 0 ] || exit $retval + if [ ! -z "$ES_STARTUP_SLEEP_TIME" ]; then + sleep $ES_STARTUP_SLEEP_TIME fi - - # The es-foreground option will tell Elasticsearch not to close stdout/stderr, but it's up to us not to daemonize. - if [ "x$daemonized" = "x" ]; then - es_parms="$es_parms -Des.foreground=yes" - exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS $es_parms -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" $props \ - org.elasticsearch.bootstrap.Elasticsearch - # exec without running it in the background, makes it replace this shell, we'll never get here... - # no need to return something - else - # Startup Elasticsearch, background it, and write the pid. - exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS $es_parms -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" $props \ - org.elasticsearch.bootstrap.Elasticsearch <&- & - return $? + if ! ps -p $pid > /dev/null ; then + exit 1 fi -} - -# Parse any long getopt options and put them into properties before calling getopt below -# Be dash compatible to make sure running under ubuntu works -ARGV="" -while [ $# -gt 0 ] -do - case $1 in - --*=*) properties="$properties -Des.${1#--}" - shift 1 - ;; - --*) properties="$properties -Des.${1#--}=$2" - shift 2 - ;; - *) ARGV="$ARGV $1" ; shift - esac -done - -# Parse any command line options. -args=`getopt vdhp:D:X: $ARGV` -eval set -- "$args" - -while true; do - case $1 in - -v) - "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS $es_parms -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" $props \ - org.elasticsearch.Version - exit 0 - ;; - -p) - pidfile="$2" - shift 2 - ;; - -d) - daemonized="yes" - shift - ;; - -h) - echo "Usage: $0 [-d] [-h] [-p pidfile]" - exit 0 - ;; - -D) - properties="$properties -D$2" - shift 2 - ;; - -X) - properties="$properties -X$2" - shift 2 - ;; - --) - shift - break - ;; - *) - echo "Error parsing argument $1!" >&2 - exit 1 - ;; - esac -done - -# Start up the service -launch_service "$pidfile" "$daemonized" "$properties" + exit 0 +fi exit $? diff --git a/janusgraph-es/src/test/bin/elasticsearch.in.sh b/janusgraph-es/src/test/bin/elasticsearch.in.sh index 4263fdfc0a..376d811e7e 100755 --- a/janusgraph-es/src/test/bin/elasticsearch.in.sh +++ b/janusgraph-es/src/test/bin/elasticsearch.in.sh @@ -1,6 +1,6 @@ #!/bin/sh -ES_CLASSPATH=$ES_CLASSPATH:$ES_HOME/lib/elasticsearch-1.2.1.jar:$ES_HOME/lib/*:$ES_HOME/lib/sigar/* +# check in case a user was using this mechanism if [ "x$ES_MIN_MEM" = "x" ]; then ES_MIN_MEM=256m @@ -30,9 +30,6 @@ if [ "x$ES_DIRECT_SIZE" != "x" ]; then JAVA_OPTS="$JAVA_OPTS -XX:MaxDirectMemorySize=${ES_DIRECT_SIZE}" fi -# reduce the per-thread stack size -JAVA_OPTS="$JAVA_OPTS -Xss256k" - # set to headless, just in case JAVA_OPTS="$JAVA_OPTS -Djava.awt.headless=true" @@ -41,20 +38,28 @@ if [ "x$ES_USE_IPV4" != "x" ]; then JAVA_OPTS="$JAVA_OPTS -Djava.net.preferIPv4Stack=true" fi -JAVA_OPTS="$JAVA_OPTS -XX:+UseParNewGC" -JAVA_OPTS="$JAVA_OPTS -XX:+UseConcMarkSweepGC" +# Add gc options. ES_GC_OPTS is unsupported, for internal testing +if [ "x$ES_GC_OPTS" = "x" ]; then + ES_GC_OPTS="$ES_GC_OPTS -XX:+UseParNewGC" + ES_GC_OPTS="$ES_GC_OPTS -XX:+UseConcMarkSweepGC" + ES_GC_OPTS="$ES_GC_OPTS -XX:CMSInitiatingOccupancyFraction=75" + ES_GC_OPTS="$ES_GC_OPTS -XX:+UseCMSInitiatingOccupancyOnly" +fi -JAVA_OPTS="$JAVA_OPTS -XX:CMSInitiatingOccupancyFraction=75" -JAVA_OPTS="$JAVA_OPTS -XX:+UseCMSInitiatingOccupancyOnly" +JAVA_OPTS="$JAVA_OPTS $ES_GC_OPTS" # GC logging options -if [ "x$ES_USE_GC_LOGGING" != "x" ]; then +if [ -n "$ES_GC_LOG_FILE" ]; then JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCDetails" JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCTimeStamps" + JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCDateStamps" JAVA_OPTS="$JAVA_OPTS -XX:+PrintClassHistogram" JAVA_OPTS="$JAVA_OPTS -XX:+PrintTenuringDistribution" JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCApplicationStoppedTime" - JAVA_OPTS="$JAVA_OPTS -Xloggc:/var/log/elasticsearch/gc.log" + JAVA_OPTS="$JAVA_OPTS -Xloggc:$ES_GC_LOG_FILE" + + # Ensure that the directory for the log file exists: the JVM will not create it. + mkdir -p "`dirname \"$ES_GC_LOG_FILE\"`" fi # Causes the JVM to dump its heap on OutOfMemory. @@ -62,3 +67,12 @@ JAVA_OPTS="$JAVA_OPTS -XX:+HeapDumpOnOutOfMemoryError" # The path to the heap dump location, note directory must exists and have enough # space for a full heap dump. #JAVA_OPTS="$JAVA_OPTS -XX:HeapDumpPath=$ES_HOME/logs/heapdump.hprof" + +# Disables explicit GC +JAVA_OPTS="$JAVA_OPTS -XX:+DisableExplicitGC" + +# Ensure UTF-8 encoding by default (e.g. filenames) +JAVA_OPTS="$JAVA_OPTS -Dfile.encoding=UTF-8" + +# Use our provided JNA always versus the system one +JAVA_OPTS="$JAVA_OPTS -Djna.nosys=true" diff --git a/janusgraph-es/src/test/config/elasticsearch.yml b/janusgraph-es/src/test/config/elasticsearch.yml index 89b9ef291c..0cc9d632d8 100644 --- a/janusgraph-es/src/test/config/elasticsearch.yml +++ b/janusgraph-es/src/test/config/elasticsearch.yml @@ -1,378 +1,97 @@ -##################### Elasticsearch Configuration Example ##################### - -# This file contains an overview of various configuration settings, -# targeted at operations staff. Application developers should -# consult the guide at . -# -# The installation procedure is covered at -# . +# ======================== Elasticsearch Configuration ========================= # -# Elasticsearch comes with reasonable defaults for most settings, -# so you can try it out without bothering with configuration. -# -# Most of the time, these defaults are just fine for running a production -# cluster. If you're fine-tuning your cluster, or wondering about the -# effect of certain configuration option, please _do ask_ on the -# mailing list or IRC channel [http://elasticsearch.org/community]. - -# Any element in the configuration can be replaced with environment variables -# by placing them in ${...} notation. For example: +# NOTE: Elasticsearch comes with reasonable defaults for most settings. +# Before you set out to tweak and tune the configuration, make sure you +# understand what are you trying to accomplish and the consequences. # -#node.rack: ${RACK_ENV_VAR} - -# For information on supported formats and syntax for the config file, see -# - - -################################### Cluster ################################### - -# Cluster name identifies your cluster for auto-discovery. If you're running -# multiple clusters on the same network, make sure you're using unique names. +# The primary way of configuring a node is via this file. This template lists +# the most important settings you may want to configure for a production cluster. # -#cluster.name: elasticsearch - - -#################################### Node ##################################### - -# Node names are generated dynamically on startup, so you're relieved -# from configuring them manually. You can tie this node to a specific name: +# Please see the documentation for further information on configuration options: +# # -#node.name: "Franz Kafka" - -# Every node can be configured to allow or deny being eligible as the master, -# and to allow or deny to store the data. +# ---------------------------------- Cluster ----------------------------------- # -# Allow this node to be eligible as a master node (enabled by default): +# Use a descriptive name for your cluster: # -#node.master: true +# cluster.name: my-application # -# Allow this node to store data (enabled by default): +# ------------------------------------ Node ------------------------------------ # -#node.data: true - -# You can exploit these settings to design advanced cluster topologies. +# Use a descriptive name for the node: # -# 1. You want this node to never become a master node, only to hold data. -# This will be the "workhorse" of your cluster. +# node.name: node-1 # -#node.master: false -#node.data: true +# Add custom attributes to the node: # -# 2. You want this node to only serve as a master: to not store any data and -# to have free resources. This will be the "coordinator" of your cluster. +# node.rack: r1 # -#node.master: true -#node.data: false +# ----------------------------------- Paths ------------------------------------ # -# 3. You want this node to be neither master nor data node, but -# to act as a "search load balancer" (fetching data from nodes, -# aggregating results, etc.) -# -#node.master: false -#node.data: false - -# Use the Cluster Health API [http://localhost:9200/_cluster/health], the -# Node Info API [http://localhost:9200/_nodes] or GUI tools -# such as , -# , -# and -# to inspect the cluster state. - -# A node can have generic attributes associated with it, which can later be used -# for customized shard allocation filtering, or allocation awareness. An attribute -# is a simple key value pair, similar to node.key: value, here is an example: -# -#node.rack: rack314 - -# By default, multiple nodes are allowed to start from the same installation location -# to disable it, set the following: -#node.max_local_storage_nodes: 1 - - -#################################### Index #################################### - -# You can set a number of options (such as shard/replica options, mapping -# or analyzer definitions, translog settings, ...) for indices globally, -# in this file. -# -# Note, that it makes more sense to configure index settings specifically for -# a certain index, either when creating it or by using the index templates API. -# -# See and -# -# for more information. - -# Set the number of shards (splits) of an index (5 by default): -# -#index.number_of_shards: 5 - -# Set the number of replicas (additional copies) of an index (1 by default): -# -#index.number_of_replicas: 1 - -# Note, that for development on a local machine, with small indices, it usually -# makes sense to "disable" the distributed features: -# -#index.number_of_shards: 1 -#index.number_of_replicas: 0 - -# These settings directly affect the performance of index and search operations -# in your cluster. Assuming you have enough machines to hold shards and -# replicas, the rule of thumb is: -# -# 1. Having more *shards* enhances the _indexing_ performance and allows to -# _distribute_ a big index across machines. -# 2. Having more *replicas* enhances the _search_ performance and improves the -# cluster _availability_. -# -# The "number_of_shards" is a one-time setting for an index. -# -# The "number_of_replicas" can be increased or decreased anytime, -# by using the Index Update Settings API. -# -# Elasticsearch takes care about load balancing, relocating, gathering the -# results from nodes, etc. Experiment with different settings to fine-tune -# your setup. - -# Use the Index Status API () to inspect -# the index status. - - -#################################### Paths #################################### - -# Path to directory containing configuration (this file and logging.yml): -# -path.conf: $MAVEN{project.basedir}/config - -# Path to directory where to store index data allocated for this node. +# Path to directory where to store the data (separate multiple locations by comma): # path.data: $MAVEN{project.build.directory}/es-data - -# -# Can optionally include more than one location, causing data to be striped across -# the locations (a la RAID 0) on a file level, favouring locations with most free -# space on creation. For example: -# -#path.data: /path/to/data1,/path/to/data2 - -# Path to temporary files: # -path.work: $MAVEN{project.build.directory}/es-work - # Path to log files: # path.logs: $MAVEN{project.build.directory}/es-logs - -# Path to where plugins are installed: # -#path.plugins: /path/to/plugins - - -#################################### Plugin ################################### - -# If a plugin listed here is not installed for current node, the node will not start. +# ----------------------------------- Memory ----------------------------------- # -#plugin.mandatory: mapper-attachments,lang-groovy - - -################################### Memory #################################### - -# Elasticsearch performs poorly when JVM starts swapping: you should ensure that -# it _never_ swaps. +# Lock the memory on startup: # -# Set this property to true to lock the memory: +# bootstrap.mlockall: true # -#bootstrap.mlockall: true - -# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set -# to the same value, and that the machine has enough memory to allocate -# for Elasticsearch, leaving enough memory for the operating system itself. +# Make sure that the `ES_HEAP_SIZE` environment variable is set to about half the memory +# available on the system and that the owner of the process is allowed to use this limit. # -# You should also make sure that the Elasticsearch process is allowed to lock -# the memory, eg. by using `ulimit -l unlimited`. - - -############################## Network And HTTP ############################### - -# Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens -# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node -# communication. (the range means that if the port is busy, it will automatically -# try the next port). - -# Set the bind address specifically (IPv4 or IPv6): +# Elasticsearch performs poorly when the system is swapping the memory. # -network.bind_host: 127.0.0.1 - -# Set the address other nodes will use to communicate with this node. If not -# set, it is automatically derived. It must point to an actual IP address. +# ---------------------------------- Network ----------------------------------- # -network.publish_host: 127.0.0.1 - -# Set both 'bind_host' and 'publish_host': +# Set the bind address to a specific IP (IPv4 or IPv6): # network.host: 127.0.0.1 - -# Set a custom port for the node to node communication (9300 by default): -# -#transport.tcp.port: 9300 - -# Enable compression for all communication between nodes (disabled by default): -# -#transport.tcp.compress: true - -# Set a custom port to listen for HTTP traffic: -# -#http.port: 9200 - -# Set a custom allowed content length: -# -#http.max_content_length: 100mb - -# Disable HTTP completely: -# -#http.enabled: false - - -################################### Gateway ################################### - -# The gateway allows for persisting the cluster state between full cluster -# restarts. Every change to the state (such as adding an index) will be stored -# in the gateway, and when the cluster starts up for the first time, -# it will read its state from the gateway. - -# There are several types of gateway implementations. For more information, see -# . - -# The default gateway type is the "local" gateway (recommended): -# -#gateway.type: local - -# Settings below control how and when to start the initial recovery process on -# a full cluster restart (to reuse as much local data as possible when using shared -# gateway). - -# Allow recovery process after N nodes in a cluster are up: -# -#gateway.recover_after_nodes: 1 - -# Set the timeout to initiate the recovery process, once the N nodes -# from previous setting are up (accepts time value): # -#gateway.recover_after_time: 5m - -# Set how many nodes are expected in this cluster. Once these N nodes -# are up (and recover_after_nodes is met), begin recovery process immediately -# (without waiting for recover_after_time to expire): +# Set a custom port for HTTP: # -#gateway.expected_nodes: 2 - - -############################# Recovery Throttling ############################# - -# These settings allow to control the process of shards allocation between -# nodes during initial recovery, replica allocation, rebalancing, -# or when adding and removing nodes. - -# Set the number of concurrent recoveries happening on a node: -# -# 1. During the initial recovery +# http.port: 9200 # -#cluster.routing.allocation.node_initial_primaries_recoveries: 4 +# For more information, see the documentation at: +# # -# 2. During adding/removing nodes, rebalancing, etc +# --------------------------------- Discovery ---------------------------------- # -#cluster.routing.allocation.node_concurrent_recoveries: 2 - -# Set to throttle throughput when recovering (eg. 100mb, by default 20mb): -# -#indices.recovery.max_bytes_per_sec: 20mb - -# Set to limit the number of open concurrent streams when -# recovering a shard from a peer: -# -#indices.recovery.concurrent_streams: 5 - - -################################## Discovery ################################## - -# Discovery infrastructure ensures nodes can be found within a cluster -# and master node is elected. Multicast discovery is the default. - -# Set to ensure a node sees N other master eligible nodes to be considered -# operational within the cluster. Its recommended to set it to a higher value -# than 1 when running more than 2 nodes in the cluster. +# Pass an initial list of hosts to perform discovery when new node is started: +# The default list of hosts is ["127.0.0.1", "[::1]"] # -#discovery.zen.minimum_master_nodes: 1 - -# Set the time to wait for ping responses from other nodes when discovering. -# Set this option to a higher value on a slow or congested network -# to minimize discovery failures: +# discovery.zen.ping.unicast.hosts: ["host1", "host2"] # -#discovery.zen.ping.timeout: 3s - -# For more information, see -# - -# Unicast discovery allows to explicitly control which nodes will be used -# to discover the cluster. It can be used when multicast is not present, -# or to restrict the cluster communication-wise. +# Prevent the "split brain" by configuring the majority of nodes (total number of nodes / 2 + 1): # -# 1. Disable multicast discovery (enabled by default): +# discovery.zen.minimum_master_nodes: 3 # -#discovery.zen.ping.multicast.enabled: false +# For more information, see the documentation at: +# # -# 2. Configure an initial list of master nodes in the cluster -# to perform discovery when new nodes (master or data) are started: +# ---------------------------------- Gateway ----------------------------------- # -#discovery.zen.ping.unicast.hosts: ["host1", "host2:port"] - -# EC2 discovery allows to use AWS EC2 API in order to perform discovery. +# Block initial recovery after a full cluster restart until N nodes are started: # -# You have to install the cloud-aws plugin for enabling the EC2 discovery. +# gateway.recover_after_nodes: 3 # -# For more information, see -# +# For more information, see the documentation at: +# # -# See -# for a step-by-step tutorial. - -# GCE discovery allows to use Google Compute Engine API in order to perform discovery. +# ---------------------------------- Various ----------------------------------- # -# You have to install the cloud-gce plugin for enabling the GCE discovery. +# Disable starting multiple nodes on a single system: # -# For more information, see . - -# Azure discovery allows to use Azure API in order to perform discovery. +# node.max_local_storage_nodes: 1 # -# You have to install the cloud-azure plugin for enabling the Azure discovery. +# Require explicit names when deleting indices: # -# For more information, see . - -################################## Slow Log ################################## - -# Shard level query and fetch threshold logging. - -#index.search.slowlog.threshold.query.warn: 10s -#index.search.slowlog.threshold.query.info: 5s -#index.search.slowlog.threshold.query.debug: 2s -#index.search.slowlog.threshold.query.trace: 500ms - -#index.search.slowlog.threshold.fetch.warn: 1s -#index.search.slowlog.threshold.fetch.info: 800ms -#index.search.slowlog.threshold.fetch.debug: 500ms -#index.search.slowlog.threshold.fetch.trace: 200ms - -#index.indexing.slowlog.threshold.index.warn: 10s -#index.indexing.slowlog.threshold.index.info: 5s -#index.indexing.slowlog.threshold.index.debug: 2s -#index.indexing.slowlog.threshold.index.trace: 500ms - -################################## GC Logging ################################ +# action.destructive_requires_name: true -#monitor.jvm.gc.young.warn: 1000ms -#monitor.jvm.gc.young.info: 700ms -#monitor.jvm.gc.young.debug: 400ms +index.max_result_window: 10000000 -#monitor.jvm.gc.old.warn: 10s -#monitor.jvm.gc.old.info: 5s -#monitor.jvm.gc.old.debug: 2s diff --git a/janusgraph-es/src/test/config/indexCreationOptions.yml b/janusgraph-es/src/test/config/indexCreationOptions.yml deleted file mode 100644 index d91c8867a1..0000000000 --- a/janusgraph-es/src/test/config/indexCreationOptions.yml +++ /dev/null @@ -1,378 +0,0 @@ -##################### Elasticsearch Configuration Example ##################### - -# This file contains an overview of various configuration settings, -# targeted at operations staff. Application developers should -# consult the guide at . -# -# The installation procedure is covered at -# . -# -# Elasticsearch comes with reasonable defaults for most settings, -# so you can try it out without bothering with configuration. -# -# Most of the time, these defaults are just fine for running a production -# cluster. If you're fine-tuning your cluster, or wondering about the -# effect of certain configuration option, please _do ask_ on the -# mailing list or IRC channel [http://elasticsearch.org/community]. - -# Any element in the configuration can be replaced with environment variables -# by placing them in ${...} notation. For example: -# -#node.rack: ${RACK_ENV_VAR} - -# For information on supported formats and syntax for the config file, see -# - - -################################### Cluster ################################### - -# Cluster name identifies your cluster for auto-discovery. If you're running -# multiple clusters on the same network, make sure you're using unique names. -# -cluster.name: indexCreationOptions - - -#################################### Node ##################################### - -# Node names are generated dynamically on startup, so you're relieved -# from configuring them manually. You can tie this node to a specific name: -# -#node.name: "Franz Kafka" - -# Every node can be configured to allow or deny being eligible as the master, -# and to allow or deny to store the data. -# -# Allow this node to be eligible as a master node (enabled by default): -# -#node.master: true -# -# Allow this node to store data (enabled by default): -# -#node.data: true - -# You can exploit these settings to design advanced cluster topologies. -# -# 1. You want this node to never become a master node, only to hold data. -# This will be the "workhorse" of your cluster. -# -#node.master: false -#node.data: true -# -# 2. You want this node to only serve as a master: to not store any data and -# to have free resources. This will be the "coordinator" of your cluster. -# -#node.master: true -#node.data: false -# -# 3. You want this node to be neither master nor data node, but -# to act as a "search load balancer" (fetching data from nodes, -# aggregating results, etc.) -# -#node.master: false -#node.data: false - -# Use the Cluster Health API [http://localhost:9200/_cluster/health], the -# Node Info API [http://localhost:9200/_nodes] or GUI tools -# such as , -# , -# and -# to inspect the cluster state. - -# A node can have generic attributes associated with it, which can later be used -# for customized shard allocation filtering, or allocation awareness. An attribute -# is a simple key value pair, similar to node.key: value, here is an example: -# -#node.rack: rack314 - -# By default, multiple nodes are allowed to start from the same installation location -# to disable it, set the following: -#node.max_local_storage_nodes: 1 - - -#################################### Index #################################### - -# You can set a number of options (such as shard/replica options, mapping -# or analyzer definitions, translog settings, ...) for indices globally, -# in this file. -# -# Note, that it makes more sense to configure index settings specifically for -# a certain index, either when creating it or by using the index templates API. -# -# See and -# -# for more information. - -# Set the number of shards (splits) of an index (5 by default): -# -#index.number_of_shards: 5 - -# Set the number of replicas (additional copies) of an index (1 by default): -# -#index.number_of_replicas: 1 - -# Note, that for development on a local machine, with small indices, it usually -# makes sense to "disable" the distributed features: -# -#index.number_of_shards: 1 -#index.number_of_replicas: 0 - -# These settings directly affect the performance of index and search operations -# in your cluster. Assuming you have enough machines to hold shards and -# replicas, the rule of thumb is: -# -# 1. Having more *shards* enhances the _indexing_ performance and allows to -# _distribute_ a big index across machines. -# 2. Having more *replicas* enhances the _search_ performance and improves the -# cluster _availability_. -# -# The "number_of_shards" is a one-time setting for an index. -# -# The "number_of_replicas" can be increased or decreased anytime, -# by using the Index Update Settings API. -# -# Elasticsearch takes care about load balancing, relocating, gathering the -# results from nodes, etc. Experiment with different settings to fine-tune -# your setup. - -# Use the Index Status API () to inspect -# the index status. - - -#################################### Paths #################################### - -# Path to directory containing configuration (this file and logging.yml): -# -path.conf: $MAVEN{project.basedir}/config - -# Path to directory where to store index data allocated for this node. -# -path.data: $MAVEN{project.build.directory}/es-data - -# -# Can optionally include more than one location, causing data to be striped across -# the locations (a la RAID 0) on a file level, favouring locations with most free -# space on creation. For example: -# -#path.data: /path/to/data1,/path/to/data2 - -# Path to temporary files: -# -path.work: $MAVEN{project.build.directory}/es-work - -# Path to log files: -# -path.logs: $MAVEN{project.build.directory}/es-logs - -# Path to where plugins are installed: -# -#path.plugins: /path/to/plugins - - -#################################### Plugin ################################### - -# If a plugin listed here is not installed for current node, the node will not start. -# -#plugin.mandatory: mapper-attachments,lang-groovy - - -################################### Memory #################################### - -# Elasticsearch performs poorly when JVM starts swapping: you should ensure that -# it _never_ swaps. -# -# Set this property to true to lock the memory: -# -#bootstrap.mlockall: true - -# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set -# to the same value, and that the machine has enough memory to allocate -# for Elasticsearch, leaving enough memory for the operating system itself. -# -# You should also make sure that the Elasticsearch process is allowed to lock -# the memory, eg. by using `ulimit -l unlimited`. - - -############################## Network And HTTP ############################### - -# Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens -# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node -# communication. (the range means that if the port is busy, it will automatically -# try the next port). - -# Set the bind address specifically (IPv4 or IPv6): -# -network.bind_host: 127.0.0.1 - -# Set the address other nodes will use to communicate with this node. If not -# set, it is automatically derived. It must point to an actual IP address. -# -network.publish_host: 127.0.0.1 - -# Set both 'bind_host' and 'publish_host': -# -network.host: 127.0.0.1 - -# Set a custom port for the node to node communication (9300 by default): -# -#transport.tcp.port: 9300 - -# Enable compression for all communication between nodes (disabled by default): -# -#transport.tcp.compress: true - -# Set a custom port to listen for HTTP traffic: -# -#http.port: 9200 - -# Set a custom allowed content length: -# -#http.max_content_length: 100mb - -# Disable HTTP completely: -# -#http.enabled: false - - -################################### Gateway ################################### - -# The gateway allows for persisting the cluster state between full cluster -# restarts. Every change to the state (such as adding an index) will be stored -# in the gateway, and when the cluster starts up for the first time, -# it will read its state from the gateway. - -# There are several types of gateway implementations. For more information, see -# . - -# The default gateway type is the "local" gateway (recommended): -# -#gateway.type: local - -# Settings below control how and when to start the initial recovery process on -# a full cluster restart (to reuse as much local data as possible when using shared -# gateway). - -# Allow recovery process after N nodes in a cluster are up: -# -#gateway.recover_after_nodes: 1 - -# Set the timeout to initiate the recovery process, once the N nodes -# from previous setting are up (accepts time value): -# -#gateway.recover_after_time: 5m - -# Set how many nodes are expected in this cluster. Once these N nodes -# are up (and recover_after_nodes is met), begin recovery process immediately -# (without waiting for recover_after_time to expire): -# -#gateway.expected_nodes: 2 - - -############################# Recovery Throttling ############################# - -# These settings allow to control the process of shards allocation between -# nodes during initial recovery, replica allocation, rebalancing, -# or when adding and removing nodes. - -# Set the number of concurrent recoveries happening on a node: -# -# 1. During the initial recovery -# -#cluster.routing.allocation.node_initial_primaries_recoveries: 4 -# -# 2. During adding/removing nodes, rebalancing, etc -# -#cluster.routing.allocation.node_concurrent_recoveries: 2 - -# Set to throttle throughput when recovering (eg. 100mb, by default 20mb): -# -#indices.recovery.max_bytes_per_sec: 20mb - -# Set to limit the number of open concurrent streams when -# recovering a shard from a peer: -# -#indices.recovery.concurrent_streams: 5 - - -################################## Discovery ################################## - -# Discovery infrastructure ensures nodes can be found within a cluster -# and master node is elected. Multicast discovery is the default. - -# Set to ensure a node sees N other master eligible nodes to be considered -# operational within the cluster. Its recommended to set it to a higher value -# than 1 when running more than 2 nodes in the cluster. -# -#discovery.zen.minimum_master_nodes: 1 - -# Set the time to wait for ping responses from other nodes when discovering. -# Set this option to a higher value on a slow or congested network -# to minimize discovery failures: -# -#discovery.zen.ping.timeout: 3s - -# For more information, see -# - -# Unicast discovery allows to explicitly control which nodes will be used -# to discover the cluster. It can be used when multicast is not present, -# or to restrict the cluster communication-wise. -# -# 1. Disable multicast discovery (enabled by default): -# -#discovery.zen.ping.multicast.enabled: false -# -# 2. Configure an initial list of master nodes in the cluster -# to perform discovery when new nodes (master or data) are started: -# -#discovery.zen.ping.unicast.hosts: ["host1", "host2:port"] - -# EC2 discovery allows to use AWS EC2 API in order to perform discovery. -# -# You have to install the cloud-aws plugin for enabling the EC2 discovery. -# -# For more information, see -# -# -# See -# for a step-by-step tutorial. - -# GCE discovery allows to use Google Compute Engine API in order to perform discovery. -# -# You have to install the cloud-gce plugin for enabling the GCE discovery. -# -# For more information, see . - -# Azure discovery allows to use Azure API in order to perform discovery. -# -# You have to install the cloud-azure plugin for enabling the Azure discovery. -# -# For more information, see . - -################################## Slow Log ################################## - -# Shard level query and fetch threshold logging. - -#index.search.slowlog.threshold.query.warn: 10s -#index.search.slowlog.threshold.query.info: 5s -#index.search.slowlog.threshold.query.debug: 2s -#index.search.slowlog.threshold.query.trace: 500ms - -#index.search.slowlog.threshold.fetch.warn: 1s -#index.search.slowlog.threshold.fetch.info: 800ms -#index.search.slowlog.threshold.fetch.debug: 500ms -#index.search.slowlog.threshold.fetch.trace: 200ms - -#index.indexing.slowlog.threshold.index.warn: 10s -#index.indexing.slowlog.threshold.index.info: 5s -#index.indexing.slowlog.threshold.index.debug: 2s -#index.indexing.slowlog.threshold.index.trace: 500ms - -################################## GC Logging ################################ - -#monitor.jvm.gc.young.warn: 1000ms -#monitor.jvm.gc.young.info: 700ms -#monitor.jvm.gc.young.debug: 400ms - -#monitor.jvm.gc.old.warn: 10s -#monitor.jvm.gc.old.info: 5s -#monitor.jvm.gc.old.debug: 2s diff --git a/janusgraph-es/src/test/config/networkNodeUsingExt.yml b/janusgraph-es/src/test/config/networkNodeUsingExt.yml deleted file mode 100644 index 7d0e2ac206..0000000000 --- a/janusgraph-es/src/test/config/networkNodeUsingExt.yml +++ /dev/null @@ -1,378 +0,0 @@ -##################### Elasticsearch Configuration Example ##################### - -# This file contains an overview of various configuration settings, -# targeted at operations staff. Application developers should -# consult the guide at . -# -# The installation procedure is covered at -# . -# -# Elasticsearch comes with reasonable defaults for most settings, -# so you can try it out without bothering with configuration. -# -# Most of the time, these defaults are just fine for running a production -# cluster. If you're fine-tuning your cluster, or wondering about the -# effect of certain configuration option, please _do ask_ on the -# mailing list or IRC channel [http://elasticsearch.org/community]. - -# Any element in the configuration can be replaced with environment variables -# by placing them in ${...} notation. For example: -# -#node.rack: ${RACK_ENV_VAR} - -# For information on supported formats and syntax for the config file, see -# - - -################################### Cluster ################################### - -# Cluster name identifies your cluster for auto-discovery. If you're running -# multiple clusters on the same network, make sure you're using unique names. -# -cluster.name: networkNodeUsingExt - - -#################################### Node ##################################### - -# Node names are generated dynamically on startup, so you're relieved -# from configuring them manually. You can tie this node to a specific name: -# -#node.name: "Franz Kafka" - -# Every node can be configured to allow or deny being eligible as the master, -# and to allow or deny to store the data. -# -# Allow this node to be eligible as a master node (enabled by default): -# -#node.master: true -# -# Allow this node to store data (enabled by default): -# -#node.data: true - -# You can exploit these settings to design advanced cluster topologies. -# -# 1. You want this node to never become a master node, only to hold data. -# This will be the "workhorse" of your cluster. -# -#node.master: false -#node.data: true -# -# 2. You want this node to only serve as a master: to not store any data and -# to have free resources. This will be the "coordinator" of your cluster. -# -#node.master: true -#node.data: false -# -# 3. You want this node to be neither master nor data node, but -# to act as a "search load balancer" (fetching data from nodes, -# aggregating results, etc.) -# -#node.master: false -#node.data: false - -# Use the Cluster Health API [http://localhost:9200/_cluster/health], the -# Node Info API [http://localhost:9200/_nodes] or GUI tools -# such as , -# , -# and -# to inspect the cluster state. - -# A node can have generic attributes associated with it, which can later be used -# for customized shard allocation filtering, or allocation awareness. An attribute -# is a simple key value pair, similar to node.key: value, here is an example: -# -#node.rack: rack314 - -# By default, multiple nodes are allowed to start from the same installation location -# to disable it, set the following: -#node.max_local_storage_nodes: 1 - - -#################################### Index #################################### - -# You can set a number of options (such as shard/replica options, mapping -# or analyzer definitions, translog settings, ...) for indices globally, -# in this file. -# -# Note, that it makes more sense to configure index settings specifically for -# a certain index, either when creating it or by using the index templates API. -# -# See and -# -# for more information. - -# Set the number of shards (splits) of an index (5 by default): -# -#index.number_of_shards: 5 - -# Set the number of replicas (additional copies) of an index (1 by default): -# -#index.number_of_replicas: 1 - -# Note, that for development on a local machine, with small indices, it usually -# makes sense to "disable" the distributed features: -# -#index.number_of_shards: 1 -#index.number_of_replicas: 0 - -# These settings directly affect the performance of index and search operations -# in your cluster. Assuming you have enough machines to hold shards and -# replicas, the rule of thumb is: -# -# 1. Having more *shards* enhances the _indexing_ performance and allows to -# _distribute_ a big index across machines. -# 2. Having more *replicas* enhances the _search_ performance and improves the -# cluster _availability_. -# -# The "number_of_shards" is a one-time setting for an index. -# -# The "number_of_replicas" can be increased or decreased anytime, -# by using the Index Update Settings API. -# -# Elasticsearch takes care about load balancing, relocating, gathering the -# results from nodes, etc. Experiment with different settings to fine-tune -# your setup. - -# Use the Index Status API () to inspect -# the index status. - - -#################################### Paths #################################### - -# Path to directory containing configuration (this file and logging.yml): -# -path.conf: $MAVEN{project.basedir}/config - -# Path to directory where to store index data allocated for this node. -# -path.data: $MAVEN{project.build.directory}/es-data - -# -# Can optionally include more than one location, causing data to be striped across -# the locations (a la RAID 0) on a file level, favouring locations with most free -# space on creation. For example: -# -#path.data: /path/to/data1,/path/to/data2 - -# Path to temporary files: -# -path.work: $MAVEN{project.build.directory}/es-work - -# Path to log files: -# -path.logs: $MAVEN{project.build.directory}/es-logs - -# Path to where plugins are installed: -# -#path.plugins: /path/to/plugins - - -#################################### Plugin ################################### - -# If a plugin listed here is not installed for current node, the node will not start. -# -#plugin.mandatory: mapper-attachments,lang-groovy - - -################################### Memory #################################### - -# Elasticsearch performs poorly when JVM starts swapping: you should ensure that -# it _never_ swaps. -# -# Set this property to true to lock the memory: -# -#bootstrap.mlockall: true - -# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set -# to the same value, and that the machine has enough memory to allocate -# for Elasticsearch, leaving enough memory for the operating system itself. -# -# You should also make sure that the Elasticsearch process is allowed to lock -# the memory, eg. by using `ulimit -l unlimited`. - - -############################## Network And HTTP ############################### - -# Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens -# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node -# communication. (the range means that if the port is busy, it will automatically -# try the next port). - -# Set the bind address specifically (IPv4 or IPv6): -# -network.bind_host: 127.0.0.1 - -# Set the address other nodes will use to communicate with this node. If not -# set, it is automatically derived. It must point to an actual IP address. -# -network.publish_host: 127.0.0.1 - -# Set both 'bind_host' and 'publish_host': -# -network.host: 127.0.0.1 - -# Set a custom port for the node to node communication (9300 by default): -# -#transport.tcp.port: 9300 - -# Enable compression for all communication between nodes (disabled by default): -# -#transport.tcp.compress: true - -# Set a custom port to listen for HTTP traffic: -# -#http.port: 9200 - -# Set a custom allowed content length: -# -#http.max_content_length: 100mb - -# Disable HTTP completely: -# -#http.enabled: false - - -################################### Gateway ################################### - -# The gateway allows for persisting the cluster state between full cluster -# restarts. Every change to the state (such as adding an index) will be stored -# in the gateway, and when the cluster starts up for the first time, -# it will read its state from the gateway. - -# There are several types of gateway implementations. For more information, see -# . - -# The default gateway type is the "local" gateway (recommended): -# -#gateway.type: local - -# Settings below control how and when to start the initial recovery process on -# a full cluster restart (to reuse as much local data as possible when using shared -# gateway). - -# Allow recovery process after N nodes in a cluster are up: -# -#gateway.recover_after_nodes: 1 - -# Set the timeout to initiate the recovery process, once the N nodes -# from previous setting are up (accepts time value): -# -#gateway.recover_after_time: 5m - -# Set how many nodes are expected in this cluster. Once these N nodes -# are up (and recover_after_nodes is met), begin recovery process immediately -# (without waiting for recover_after_time to expire): -# -#gateway.expected_nodes: 2 - - -############################# Recovery Throttling ############################# - -# These settings allow to control the process of shards allocation between -# nodes during initial recovery, replica allocation, rebalancing, -# or when adding and removing nodes. - -# Set the number of concurrent recoveries happening on a node: -# -# 1. During the initial recovery -# -#cluster.routing.allocation.node_initial_primaries_recoveries: 4 -# -# 2. During adding/removing nodes, rebalancing, etc -# -#cluster.routing.allocation.node_concurrent_recoveries: 2 - -# Set to throttle throughput when recovering (eg. 100mb, by default 20mb): -# -#indices.recovery.max_bytes_per_sec: 20mb - -# Set to limit the number of open concurrent streams when -# recovering a shard from a peer: -# -#indices.recovery.concurrent_streams: 5 - - -################################## Discovery ################################## - -# Discovery infrastructure ensures nodes can be found within a cluster -# and master node is elected. Multicast discovery is the default. - -# Set to ensure a node sees N other master eligible nodes to be considered -# operational within the cluster. Its recommended to set it to a higher value -# than 1 when running more than 2 nodes in the cluster. -# -#discovery.zen.minimum_master_nodes: 1 - -# Set the time to wait for ping responses from other nodes when discovering. -# Set this option to a higher value on a slow or congested network -# to minimize discovery failures: -# -#discovery.zen.ping.timeout: 3s - -# For more information, see -# - -# Unicast discovery allows to explicitly control which nodes will be used -# to discover the cluster. It can be used when multicast is not present, -# or to restrict the cluster communication-wise. -# -# 1. Disable multicast discovery (enabled by default): -# -#discovery.zen.ping.multicast.enabled: false -# -# 2. Configure an initial list of master nodes in the cluster -# to perform discovery when new nodes (master or data) are started: -# -#discovery.zen.ping.unicast.hosts: ["host1", "host2:port"] - -# EC2 discovery allows to use AWS EC2 API in order to perform discovery. -# -# You have to install the cloud-aws plugin for enabling the EC2 discovery. -# -# For more information, see -# -# -# See -# for a step-by-step tutorial. - -# GCE discovery allows to use Google Compute Engine API in order to perform discovery. -# -# You have to install the cloud-gce plugin for enabling the GCE discovery. -# -# For more information, see . - -# Azure discovery allows to use Azure API in order to perform discovery. -# -# You have to install the cloud-azure plugin for enabling the Azure discovery. -# -# For more information, see . - -################################## Slow Log ################################## - -# Shard level query and fetch threshold logging. - -#index.search.slowlog.threshold.query.warn: 10s -#index.search.slowlog.threshold.query.info: 5s -#index.search.slowlog.threshold.query.debug: 2s -#index.search.slowlog.threshold.query.trace: 500ms - -#index.search.slowlog.threshold.fetch.warn: 1s -#index.search.slowlog.threshold.fetch.info: 800ms -#index.search.slowlog.threshold.fetch.debug: 500ms -#index.search.slowlog.threshold.fetch.trace: 200ms - -#index.indexing.slowlog.threshold.index.warn: 10s -#index.indexing.slowlog.threshold.index.info: 5s -#index.indexing.slowlog.threshold.index.debug: 2s -#index.indexing.slowlog.threshold.index.trace: 500ms - -################################## GC Logging ################################ - -#monitor.jvm.gc.young.warn: 1000ms -#monitor.jvm.gc.young.info: 700ms -#monitor.jvm.gc.young.debug: 400ms - -#monitor.jvm.gc.old.warn: 10s -#monitor.jvm.gc.old.info: 5s -#monitor.jvm.gc.old.debug: 2s diff --git a/janusgraph-es/src/test/config/networkNodeUsingYaml.yml b/janusgraph-es/src/test/config/networkNodeUsingYaml.yml deleted file mode 100644 index cdda1211cd..0000000000 --- a/janusgraph-es/src/test/config/networkNodeUsingYaml.yml +++ /dev/null @@ -1,378 +0,0 @@ -##################### Elasticsearch Configuration Example ##################### - -# This file contains an overview of various configuration settings, -# targeted at operations staff. Application developers should -# consult the guide at . -# -# The installation procedure is covered at -# . -# -# Elasticsearch comes with reasonable defaults for most settings, -# so you can try it out without bothering with configuration. -# -# Most of the time, these defaults are just fine for running a production -# cluster. If you're fine-tuning your cluster, or wondering about the -# effect of certain configuration option, please _do ask_ on the -# mailing list or IRC channel [http://elasticsearch.org/community]. - -# Any element in the configuration can be replaced with environment variables -# by placing them in ${...} notation. For example: -# -#node.rack: ${RACK_ENV_VAR} - -# For information on supported formats and syntax for the config file, see -# - - -################################### Cluster ################################### - -# Cluster name identifies your cluster for auto-discovery. If you're running -# multiple clusters on the same network, make sure you're using unique names. -# -cluster.name: networkNodeUsingYaml - - -#################################### Node ##################################### - -# Node names are generated dynamically on startup, so you're relieved -# from configuring them manually. You can tie this node to a specific name: -# -#node.name: "Franz Kafka" - -# Every node can be configured to allow or deny being eligible as the master, -# and to allow or deny to store the data. -# -# Allow this node to be eligible as a master node (enabled by default): -# -#node.master: true -# -# Allow this node to store data (enabled by default): -# -#node.data: true - -# You can exploit these settings to design advanced cluster topologies. -# -# 1. You want this node to never become a master node, only to hold data. -# This will be the "workhorse" of your cluster. -# -#node.master: false -#node.data: true -# -# 2. You want this node to only serve as a master: to not store any data and -# to have free resources. This will be the "coordinator" of your cluster. -# -#node.master: true -#node.data: false -# -# 3. You want this node to be neither master nor data node, but -# to act as a "search load balancer" (fetching data from nodes, -# aggregating results, etc.) -# -#node.master: false -#node.data: false - -# Use the Cluster Health API [http://localhost:9200/_cluster/health], the -# Node Info API [http://localhost:9200/_nodes] or GUI tools -# such as , -# , -# and -# to inspect the cluster state. - -# A node can have generic attributes associated with it, which can later be used -# for customized shard allocation filtering, or allocation awareness. An attribute -# is a simple key value pair, similar to node.key: value, here is an example: -# -#node.rack: rack314 - -# By default, multiple nodes are allowed to start from the same installation location -# to disable it, set the following: -#node.max_local_storage_nodes: 1 - - -#################################### Index #################################### - -# You can set a number of options (such as shard/replica options, mapping -# or analyzer definitions, translog settings, ...) for indices globally, -# in this file. -# -# Note, that it makes more sense to configure index settings specifically for -# a certain index, either when creating it or by using the index templates API. -# -# See and -# -# for more information. - -# Set the number of shards (splits) of an index (5 by default): -# -#index.number_of_shards: 5 - -# Set the number of replicas (additional copies) of an index (1 by default): -# -#index.number_of_replicas: 1 - -# Note, that for development on a local machine, with small indices, it usually -# makes sense to "disable" the distributed features: -# -#index.number_of_shards: 1 -#index.number_of_replicas: 0 - -# These settings directly affect the performance of index and search operations -# in your cluster. Assuming you have enough machines to hold shards and -# replicas, the rule of thumb is: -# -# 1. Having more *shards* enhances the _indexing_ performance and allows to -# _distribute_ a big index across machines. -# 2. Having more *replicas* enhances the _search_ performance and improves the -# cluster _availability_. -# -# The "number_of_shards" is a one-time setting for an index. -# -# The "number_of_replicas" can be increased or decreased anytime, -# by using the Index Update Settings API. -# -# Elasticsearch takes care about load balancing, relocating, gathering the -# results from nodes, etc. Experiment with different settings to fine-tune -# your setup. - -# Use the Index Status API () to inspect -# the index status. - - -#################################### Paths #################################### - -# Path to directory containing configuration (this file and logging.yml): -# -path.conf: $MAVEN{project.basedir}/config - -# Path to directory where to store index data allocated for this node. -# -path.data: $MAVEN{project.build.directory}/es-data - -# -# Can optionally include more than one location, causing data to be striped across -# the locations (a la RAID 0) on a file level, favouring locations with most free -# space on creation. For example: -# -#path.data: /path/to/data1,/path/to/data2 - -# Path to temporary files: -# -path.work: $MAVEN{project.build.directory}/es-work - -# Path to log files: -# -path.logs: $MAVEN{project.build.directory}/es-logs - -# Path to where plugins are installed: -# -#path.plugins: /path/to/plugins - - -#################################### Plugin ################################### - -# If a plugin listed here is not installed for current node, the node will not start. -# -#plugin.mandatory: mapper-attachments,lang-groovy - - -################################### Memory #################################### - -# Elasticsearch performs poorly when JVM starts swapping: you should ensure that -# it _never_ swaps. -# -# Set this property to true to lock the memory: -# -#bootstrap.mlockall: true - -# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set -# to the same value, and that the machine has enough memory to allocate -# for Elasticsearch, leaving enough memory for the operating system itself. -# -# You should also make sure that the Elasticsearch process is allowed to lock -# the memory, eg. by using `ulimit -l unlimited`. - - -############################## Network And HTTP ############################### - -# Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens -# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node -# communication. (the range means that if the port is busy, it will automatically -# try the next port). - -# Set the bind address specifically (IPv4 or IPv6): -# -network.bind_host: 127.0.0.1 - -# Set the address other nodes will use to communicate with this node. If not -# set, it is automatically derived. It must point to an actual IP address. -# -network.publish_host: 127.0.0.1 - -# Set both 'bind_host' and 'publish_host': -# -network.host: 127.0.0.1 - -# Set a custom port for the node to node communication (9300 by default): -# -#transport.tcp.port: 9300 - -# Enable compression for all communication between nodes (disabled by default): -# -#transport.tcp.compress: true - -# Set a custom port to listen for HTTP traffic: -# -#http.port: 9200 - -# Set a custom allowed content length: -# -#http.max_content_length: 100mb - -# Disable HTTP completely: -# -#http.enabled: false - - -################################### Gateway ################################### - -# The gateway allows for persisting the cluster state between full cluster -# restarts. Every change to the state (such as adding an index) will be stored -# in the gateway, and when the cluster starts up for the first time, -# it will read its state from the gateway. - -# There are several types of gateway implementations. For more information, see -# . - -# The default gateway type is the "local" gateway (recommended): -# -#gateway.type: local - -# Settings below control how and when to start the initial recovery process on -# a full cluster restart (to reuse as much local data as possible when using shared -# gateway). - -# Allow recovery process after N nodes in a cluster are up: -# -#gateway.recover_after_nodes: 1 - -# Set the timeout to initiate the recovery process, once the N nodes -# from previous setting are up (accepts time value): -# -#gateway.recover_after_time: 5m - -# Set how many nodes are expected in this cluster. Once these N nodes -# are up (and recover_after_nodes is met), begin recovery process immediately -# (without waiting for recover_after_time to expire): -# -#gateway.expected_nodes: 2 - - -############################# Recovery Throttling ############################# - -# These settings allow to control the process of shards allocation between -# nodes during initial recovery, replica allocation, rebalancing, -# or when adding and removing nodes. - -# Set the number of concurrent recoveries happening on a node: -# -# 1. During the initial recovery -# -#cluster.routing.allocation.node_initial_primaries_recoveries: 4 -# -# 2. During adding/removing nodes, rebalancing, etc -# -#cluster.routing.allocation.node_concurrent_recoveries: 2 - -# Set to throttle throughput when recovering (eg. 100mb, by default 20mb): -# -#indices.recovery.max_bytes_per_sec: 20mb - -# Set to limit the number of open concurrent streams when -# recovering a shard from a peer: -# -#indices.recovery.concurrent_streams: 5 - - -################################## Discovery ################################## - -# Discovery infrastructure ensures nodes can be found within a cluster -# and master node is elected. Multicast discovery is the default. - -# Set to ensure a node sees N other master eligible nodes to be considered -# operational within the cluster. Its recommended to set it to a higher value -# than 1 when running more than 2 nodes in the cluster. -# -#discovery.zen.minimum_master_nodes: 1 - -# Set the time to wait for ping responses from other nodes when discovering. -# Set this option to a higher value on a slow or congested network -# to minimize discovery failures: -# -#discovery.zen.ping.timeout: 3s - -# For more information, see -# - -# Unicast discovery allows to explicitly control which nodes will be used -# to discover the cluster. It can be used when multicast is not present, -# or to restrict the cluster communication-wise. -# -# 1. Disable multicast discovery (enabled by default): -# -#discovery.zen.ping.multicast.enabled: false -# -# 2. Configure an initial list of master nodes in the cluster -# to perform discovery when new nodes (master or data) are started: -# -#discovery.zen.ping.unicast.hosts: ["host1", "host2:port"] - -# EC2 discovery allows to use AWS EC2 API in order to perform discovery. -# -# You have to install the cloud-aws plugin for enabling the EC2 discovery. -# -# For more information, see -# -# -# See -# for a step-by-step tutorial. - -# GCE discovery allows to use Google Compute Engine API in order to perform discovery. -# -# You have to install the cloud-gce plugin for enabling the GCE discovery. -# -# For more information, see . - -# Azure discovery allows to use Azure API in order to perform discovery. -# -# You have to install the cloud-azure plugin for enabling the Azure discovery. -# -# For more information, see . - -################################## Slow Log ################################## - -# Shard level query and fetch threshold logging. - -#index.search.slowlog.threshold.query.warn: 10s -#index.search.slowlog.threshold.query.info: 5s -#index.search.slowlog.threshold.query.debug: 2s -#index.search.slowlog.threshold.query.trace: 500ms - -#index.search.slowlog.threshold.fetch.warn: 1s -#index.search.slowlog.threshold.fetch.info: 800ms -#index.search.slowlog.threshold.fetch.debug: 500ms -#index.search.slowlog.threshold.fetch.trace: 200ms - -#index.indexing.slowlog.threshold.index.warn: 10s -#index.indexing.slowlog.threshold.index.info: 5s -#index.indexing.slowlog.threshold.index.debug: 2s -#index.indexing.slowlog.threshold.index.trace: 500ms - -################################## GC Logging ################################ - -#monitor.jvm.gc.young.warn: 1000ms -#monitor.jvm.gc.young.info: 700ms -#monitor.jvm.gc.young.debug: 400ms - -#monitor.jvm.gc.old.warn: 10s -#monitor.jvm.gc.old.info: 5s -#monitor.jvm.gc.old.debug: 2s diff --git a/janusgraph-es/src/test/config/transportClient.yml b/janusgraph-es/src/test/config/transportClient.yml deleted file mode 100644 index 36cc947166..0000000000 --- a/janusgraph-es/src/test/config/transportClient.yml +++ /dev/null @@ -1,378 +0,0 @@ -##################### Elasticsearch Configuration Example ##################### - -# This file contains an overview of various configuration settings, -# targeted at operations staff. Application developers should -# consult the guide at . -# -# The installation procedure is covered at -# . -# -# Elasticsearch comes with reasonable defaults for most settings, -# so you can try it out without bothering with configuration. -# -# Most of the time, these defaults are just fine for running a production -# cluster. If you're fine-tuning your cluster, or wondering about the -# effect of certain configuration option, please _do ask_ on the -# mailing list or IRC channel [http://elasticsearch.org/community]. - -# Any element in the configuration can be replaced with environment variables -# by placing them in ${...} notation. For example: -# -#node.rack: ${RACK_ENV_VAR} - -# For information on supported formats and syntax for the config file, see -# - - -################################### Cluster ################################### - -# Cluster name identifies your cluster for auto-discovery. If you're running -# multiple clusters on the same network, make sure you're using unique names. -# -cluster.name: transportClient - - -#################################### Node ##################################### - -# Node names are generated dynamically on startup, so you're relieved -# from configuring them manually. You can tie this node to a specific name: -# -#node.name: "Franz Kafka" - -# Every node can be configured to allow or deny being eligible as the master, -# and to allow or deny to store the data. -# -# Allow this node to be eligible as a master node (enabled by default): -# -#node.master: true -# -# Allow this node to store data (enabled by default): -# -#node.data: true - -# You can exploit these settings to design advanced cluster topologies. -# -# 1. You want this node to never become a master node, only to hold data. -# This will be the "workhorse" of your cluster. -# -#node.master: false -#node.data: true -# -# 2. You want this node to only serve as a master: to not store any data and -# to have free resources. This will be the "coordinator" of your cluster. -# -#node.master: true -#node.data: false -# -# 3. You want this node to be neither master nor data node, but -# to act as a "search load balancer" (fetching data from nodes, -# aggregating results, etc.) -# -#node.master: false -#node.data: false - -# Use the Cluster Health API [http://localhost:9200/_cluster/health], the -# Node Info API [http://localhost:9200/_nodes] or GUI tools -# such as , -# , -# and -# to inspect the cluster state. - -# A node can have generic attributes associated with it, which can later be used -# for customized shard allocation filtering, or allocation awareness. An attribute -# is a simple key value pair, similar to node.key: value, here is an example: -# -#node.rack: rack314 - -# By default, multiple nodes are allowed to start from the same installation location -# to disable it, set the following: -#node.max_local_storage_nodes: 1 - - -#################################### Index #################################### - -# You can set a number of options (such as shard/replica options, mapping -# or analyzer definitions, translog settings, ...) for indices globally, -# in this file. -# -# Note, that it makes more sense to configure index settings specifically for -# a certain index, either when creating it or by using the index templates API. -# -# See and -# -# for more information. - -# Set the number of shards (splits) of an index (5 by default): -# -#index.number_of_shards: 5 - -# Set the number of replicas (additional copies) of an index (1 by default): -# -#index.number_of_replicas: 1 - -# Note, that for development on a local machine, with small indices, it usually -# makes sense to "disable" the distributed features: -# -#index.number_of_shards: 1 -#index.number_of_replicas: 0 - -# These settings directly affect the performance of index and search operations -# in your cluster. Assuming you have enough machines to hold shards and -# replicas, the rule of thumb is: -# -# 1. Having more *shards* enhances the _indexing_ performance and allows to -# _distribute_ a big index across machines. -# 2. Having more *replicas* enhances the _search_ performance and improves the -# cluster _availability_. -# -# The "number_of_shards" is a one-time setting for an index. -# -# The "number_of_replicas" can be increased or decreased anytime, -# by using the Index Update Settings API. -# -# Elasticsearch takes care about load balancing, relocating, gathering the -# results from nodes, etc. Experiment with different settings to fine-tune -# your setup. - -# Use the Index Status API () to inspect -# the index status. - - -#################################### Paths #################################### - -# Path to directory containing configuration (this file and logging.yml): -# -path.conf: $MAVEN{project.basedir}/config - -# Path to directory where to store index data allocated for this node. -# -path.data: $MAVEN{project.build.directory}/es-data - -# -# Can optionally include more than one location, causing data to be striped across -# the locations (a la RAID 0) on a file level, favouring locations with most free -# space on creation. For example: -# -#path.data: /path/to/data1,/path/to/data2 - -# Path to temporary files: -# -path.work: $MAVEN{project.build.directory}/es-work - -# Path to log files: -# -path.logs: $MAVEN{project.build.directory}/es-logs - -# Path to where plugins are installed: -# -#path.plugins: /path/to/plugins - - -#################################### Plugin ################################### - -# If a plugin listed here is not installed for current node, the node will not start. -# -#plugin.mandatory: mapper-attachments,lang-groovy - - -################################### Memory #################################### - -# Elasticsearch performs poorly when JVM starts swapping: you should ensure that -# it _never_ swaps. -# -# Set this property to true to lock the memory: -# -#bootstrap.mlockall: true - -# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set -# to the same value, and that the machine has enough memory to allocate -# for Elasticsearch, leaving enough memory for the operating system itself. -# -# You should also make sure that the Elasticsearch process is allowed to lock -# the memory, eg. by using `ulimit -l unlimited`. - - -############################## Network And HTTP ############################### - -# Elasticsearch, by default, binds itself to the 0.0.0.0 address, and listens -# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node -# communication. (the range means that if the port is busy, it will automatically -# try the next port). - -# Set the bind address specifically (IPv4 or IPv6): -# -network.bind_host: 127.0.0.1 - -# Set the address other nodes will use to communicate with this node. If not -# set, it is automatically derived. It must point to an actual IP address. -# -network.publish_host: 127.0.0.1 - -# Set both 'bind_host' and 'publish_host': -# -network.host: 127.0.0.1 - -# Set a custom port for the node to node communication (9300 by default): -# -#transport.tcp.port: 9300 - -# Enable compression for all communication between nodes (disabled by default): -# -#transport.tcp.compress: true - -# Set a custom port to listen for HTTP traffic: -# -#http.port: 9200 - -# Set a custom allowed content length: -# -#http.max_content_length: 100mb - -# Disable HTTP completely: -# -#http.enabled: false - - -################################### Gateway ################################### - -# The gateway allows for persisting the cluster state between full cluster -# restarts. Every change to the state (such as adding an index) will be stored -# in the gateway, and when the cluster starts up for the first time, -# it will read its state from the gateway. - -# There are several types of gateway implementations. For more information, see -# . - -# The default gateway type is the "local" gateway (recommended): -# -#gateway.type: local - -# Settings below control how and when to start the initial recovery process on -# a full cluster restart (to reuse as much local data as possible when using shared -# gateway). - -# Allow recovery process after N nodes in a cluster are up: -# -#gateway.recover_after_nodes: 1 - -# Set the timeout to initiate the recovery process, once the N nodes -# from previous setting are up (accepts time value): -# -#gateway.recover_after_time: 5m - -# Set how many nodes are expected in this cluster. Once these N nodes -# are up (and recover_after_nodes is met), begin recovery process immediately -# (without waiting for recover_after_time to expire): -# -#gateway.expected_nodes: 2 - - -############################# Recovery Throttling ############################# - -# These settings allow to control the process of shards allocation between -# nodes during initial recovery, replica allocation, rebalancing, -# or when adding and removing nodes. - -# Set the number of concurrent recoveries happening on a node: -# -# 1. During the initial recovery -# -#cluster.routing.allocation.node_initial_primaries_recoveries: 4 -# -# 2. During adding/removing nodes, rebalancing, etc -# -#cluster.routing.allocation.node_concurrent_recoveries: 2 - -# Set to throttle throughput when recovering (eg. 100mb, by default 20mb): -# -#indices.recovery.max_bytes_per_sec: 20mb - -# Set to limit the number of open concurrent streams when -# recovering a shard from a peer: -# -#indices.recovery.concurrent_streams: 5 - - -################################## Discovery ################################## - -# Discovery infrastructure ensures nodes can be found within a cluster -# and master node is elected. Multicast discovery is the default. - -# Set to ensure a node sees N other master eligible nodes to be considered -# operational within the cluster. Its recommended to set it to a higher value -# than 1 when running more than 2 nodes in the cluster. -# -#discovery.zen.minimum_master_nodes: 1 - -# Set the time to wait for ping responses from other nodes when discovering. -# Set this option to a higher value on a slow or congested network -# to minimize discovery failures: -# -#discovery.zen.ping.timeout: 3s - -# For more information, see -# - -# Unicast discovery allows to explicitly control which nodes will be used -# to discover the cluster. It can be used when multicast is not present, -# or to restrict the cluster communication-wise. -# -# 1. Disable multicast discovery (enabled by default): -# -#discovery.zen.ping.multicast.enabled: false -# -# 2. Configure an initial list of master nodes in the cluster -# to perform discovery when new nodes (master or data) are started: -# -#discovery.zen.ping.unicast.hosts: ["host1", "host2:port"] - -# EC2 discovery allows to use AWS EC2 API in order to perform discovery. -# -# You have to install the cloud-aws plugin for enabling the EC2 discovery. -# -# For more information, see -# -# -# See -# for a step-by-step tutorial. - -# GCE discovery allows to use Google Compute Engine API in order to perform discovery. -# -# You have to install the cloud-gce plugin for enabling the GCE discovery. -# -# For more information, see . - -# Azure discovery allows to use Azure API in order to perform discovery. -# -# You have to install the cloud-azure plugin for enabling the Azure discovery. -# -# For more information, see . - -################################## Slow Log ################################## - -# Shard level query and fetch threshold logging. - -#index.search.slowlog.threshold.query.warn: 10s -#index.search.slowlog.threshold.query.info: 5s -#index.search.slowlog.threshold.query.debug: 2s -#index.search.slowlog.threshold.query.trace: 500ms - -#index.search.slowlog.threshold.fetch.warn: 1s -#index.search.slowlog.threshold.fetch.info: 800ms -#index.search.slowlog.threshold.fetch.debug: 500ms -#index.search.slowlog.threshold.fetch.trace: 200ms - -#index.indexing.slowlog.threshold.index.warn: 10s -#index.indexing.slowlog.threshold.index.info: 5s -#index.indexing.slowlog.threshold.index.debug: 2s -#index.indexing.slowlog.threshold.index.trace: 500ms - -################################## GC Logging ################################ - -#monitor.jvm.gc.young.warn: 1000ms -#monitor.jvm.gc.young.info: 700ms -#monitor.jvm.gc.young.debug: 400ms - -#monitor.jvm.gc.old.warn: 10s -#monitor.jvm.gc.old.info: 5s -#monitor.jvm.gc.old.debug: 2s diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchConfigTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchConfigTest.java index be52f90683..bc73ef1bfd 100644 --- a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchConfigTest.java +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchConfigTest.java @@ -33,16 +33,19 @@ import org.janusgraph.graphdb.query.condition.PredicateCondition; import org.janusgraph.util.system.IOUtils; import org.apache.commons.configuration.BaseConfiguration; +import org.apache.commons.io.FileUtils; import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest; import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse; -import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.node.Node; import org.elasticsearch.node.NodeBuilder; import org.junit.Assert; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import java.io.File; +import java.io.IOException; import java.time.Duration; import static org.janusgraph.diskstorage.es.ElasticSearchIndex.*; @@ -71,19 +74,23 @@ public static void killElasticsearch() { esr.stop(); } + @Before + public void setup() throws IOException { + String baseDir = Joiner.on(File.separator).join("target", "es"); + FileUtils.deleteDirectory(new File(baseDir + File.separator + "data")); + } + @Test public void testJanusGraphFactoryBuilder() { - String baseDir = Joiner.on(File.separator).join("target", "es", "janusgraphfactory_jvmlocal_ext"); + String baseDir = Joiner.on(File.separator).join("target", "es"); JanusGraphFactory.Builder builder = JanusGraphFactory.build(); builder.set("storage.backend", "inmemory"); builder.set("index." + INDEX_NAME + ".elasticsearch.interface", "NODE"); builder.set("index." + INDEX_NAME + ".elasticsearch.ext.node.data", "true"); builder.set("index." + INDEX_NAME + ".elasticsearch.ext.node.client", "false"); builder.set("index." + INDEX_NAME + ".elasticsearch.ext.node.local", "true"); - builder.set("index." + INDEX_NAME + ".elasticsearch.ext.path.data", baseDir + File.separator + "data"); - builder.set("index." + INDEX_NAME + ".elasticsearch.ext.path.work", baseDir + File.separator + "work"); - builder.set("index." + INDEX_NAME + ".elasticsearch.ext.path.logs", baseDir + File.separator + "logs"); + builder.set("index." + INDEX_NAME + ".elasticsearch.ext.path.home", baseDir); JanusGraph graph = builder.open(); // Must not throw an exception assertTrue(graph.isOpen()); graph.close(); @@ -91,7 +98,7 @@ public void testJanusGraphFactoryBuilder() @Test public void testTransportClient() throws BackendException, InterruptedException { - ElasticsearchRunner esr = new ElasticsearchRunner(".", "transportClient.yml"); + ElasticsearchRunner esr = new ElasticsearchRunner("."); esr.start(); ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration(); config.set(INTERFACE, ElasticSearchSetup.TRANSPORT_CLIENT.toString(), INDEX_NAME); @@ -120,17 +127,13 @@ public void testTransportClient() throws BackendException, InterruptedException @Test public void testLocalNodeUsingExt() throws BackendException, InterruptedException { - String baseDir = Joiner.on(File.separator).join("target", "es", "jvmlocal_ext"); - - assertFalse(new File(baseDir + File.separator + "data").exists()); + String baseDir = Joiner.on(File.separator).join("target", "es"); CommonsConfiguration cc = new CommonsConfiguration(new BaseConfiguration()); cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.data", "true"); cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.client", "false"); cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.local", "true"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.path.data", baseDir + File.separator + "data"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.path.work", baseDir + File.separator + "work"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.path.logs", baseDir + File.separator + "logs"); + cc.set("index." + INDEX_NAME + ".elasticsearch.ext.path.home", baseDir); ModifiableConfiguration config = new ModifiableConfiguration(GraphDatabaseConfiguration.ROOT_NS, cc, BasicConfiguration.Restriction.NONE); @@ -146,9 +149,7 @@ public void testLocalNodeUsingExt() throws BackendException, InterruptedExceptio @Test public void testLocalNodeUsingExtAndIndexDirectory() throws BackendException, InterruptedException { - String baseDir = Joiner.on(File.separator).join("target", "es", "jvmlocal_ext2"); - - assertFalse(new File(baseDir + File.separator + "data").exists()); + String baseDir = Joiner.on(File.separator).join("target", "es"); CommonsConfiguration cc = new CommonsConfiguration(new BaseConfiguration()); cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.data", "true"); @@ -170,9 +171,7 @@ public void testLocalNodeUsingExtAndIndexDirectory() throws BackendException, In @Test public void testLocalNodeUsingYaml() throws BackendException, InterruptedException { - String baseDir = Joiner.on(File.separator).join("target", "es", "jvmlocal_yml"); - - assertFalse(new File(baseDir + File.separator + "data").exists()); + String baseDir = Joiner.on(File.separator).join("target", "es"); ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration(); config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); @@ -188,12 +187,11 @@ public void testLocalNodeUsingYaml() throws BackendException, InterruptedExcepti @Test public void testNetworkNodeUsingExt() throws BackendException, InterruptedException { - ElasticsearchRunner esr = new ElasticsearchRunner(".", "networkNodeUsingExt.yml"); + ElasticsearchRunner esr = new ElasticsearchRunner("."); esr.start(); CommonsConfiguration cc = new CommonsConfiguration(new BaseConfiguration()); cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.data", "false"); cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.client", "true"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.cluster.name", "networkNodeUsingExt"); cc.set("index." + INDEX_NAME + ".elasticsearch.ext.discovery.zen.ping.multicast.enabled", "false"); cc.set("index." + INDEX_NAME + ".elasticsearch.ext.discovery.zen.ping.unicast.hosts", "localhost,127.0.0.1:9300"); ModifiableConfiguration config = @@ -225,7 +223,7 @@ public void testNetworkNodeUsingExt() throws BackendException, InterruptedExcept @Test public void testNetworkNodeUsingYaml() throws BackendException, InterruptedException { - ElasticsearchRunner esr = new ElasticsearchRunner(".", "networkNodeUsingYaml.yml"); + ElasticsearchRunner esr = new ElasticsearchRunner("."); esr.start(); ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration(); config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); @@ -256,27 +254,30 @@ public void testNetworkNodeUsingYaml() throws BackendException, InterruptedExcep @Test public void testIndexCreationOptions() throws InterruptedException, BackendException { - final int shards = 77; - ElasticsearchRunner esr = new ElasticsearchRunner(".", "indexCreationOptions.yml"); + String baseDir = Joiner.on(File.separator).join("target", "es"); + + final int shards = 7; + + ElasticsearchRunner esr = new ElasticsearchRunner("."); esr.start(); CommonsConfiguration cc = new CommonsConfiguration(new BaseConfiguration()); cc.set("index." + INDEX_NAME + ".elasticsearch.create.ext.number_of_shards", String.valueOf(shards)); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.cluster.name", "indexCreationOptions"); ModifiableConfiguration config = new ModifiableConfiguration(GraphDatabaseConfiguration.ROOT_NS, cc, BasicConfiguration.Restriction.NONE); config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); + config.set(GraphDatabaseConfiguration.INDEX_DIRECTORY, baseDir, INDEX_NAME); Configuration indexConfig = config.restrictTo(INDEX_NAME); IndexProvider idx = new ElasticSearchIndex(indexConfig); simpleWriteAndQuery(idx); - ImmutableSettings.Builder settingsBuilder = ImmutableSettings.settingsBuilder(); + Settings.Builder settingsBuilder = Settings.settingsBuilder(); settingsBuilder.put("discovery.zen.ping.multicast.enabled", "false"); settingsBuilder.put("discovery.zen.ping.unicast.hosts", "localhost,127.0.0.1:9300"); - settingsBuilder.put("cluster.name", "indexCreationOptions"); + settingsBuilder.put("path.home", baseDir); NodeBuilder nodeBuilder = NodeBuilder.nodeBuilder().settings(settingsBuilder.build()); nodeBuilder.client(true).data(false).local(false); Node n = nodeBuilder.build().start(); @@ -285,7 +286,7 @@ public void testIndexCreationOptions() throws InterruptedException, BackendExcep assertEquals(String.valueOf(shards), response.getSetting("janusgraph", "index.number_of_shards")); idx.close(); - n.stop(); + n.close(); esr.stop(); } diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchRunner.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchRunner.java index a6ae1e1afb..a0a93e6575 100644 --- a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchRunner.java +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchRunner.java @@ -34,7 +34,6 @@ public class ElasticsearchRunner extends DaemonRunner { LoggerFactory.getLogger(ElasticsearchRunner.class); public static final String ES_PID_FILE = "/tmp/janusgraph-test-es.pid"; - private String configFile = "elasticsearch.yml"; public ElasticsearchRunner() { this.homedir = "."; @@ -44,11 +43,6 @@ public ElasticsearchRunner(String esHome) { this.homedir = esHome; } - public ElasticsearchRunner(String esHome, String configFile) { - this(esHome); - this.configFile = configFile; - } - @Override protected String getDaemonShortName() { @@ -91,7 +85,7 @@ protected ElasticsearchStatus startImpl() throws IOException { FileUtils.deleteDirectory(logs); } - runCommand(homedir + File.separator + "bin/elasticsearch", "-d", "-p", ES_PID_FILE, "-Des.config=" + homedir + File.separator + "config" + File.separator + configFile); + runCommand(homedir + File.separator + "bin/elasticsearch", "-d", "-p", ES_PID_FILE); try { watchLog(" started", 60L, TimeUnit.SECONDS); } catch (InterruptedException e) { diff --git a/janusgraph-es/src/test/resources/es_cfg_bogus_nodeclient.yml b/janusgraph-es/src/test/resources/es_cfg_bogus_nodeclient.yml index be59d45860..29b13bca5e 100644 --- a/janusgraph-es/src/test/resources/es_cfg_bogus_nodeclient.yml +++ b/janusgraph-es/src/test/resources/es_cfg_bogus_nodeclient.yml @@ -2,4 +2,3 @@ node.data: false node.client: true discovery.zen.ping.multicast.enabled: false discovery.zen.ping.unicast.hosts: [ "10.11.12.13" ] -cluster.name: networkNodeUsingYaml \ No newline at end of file diff --git a/janusgraph-es/src/test/resources/es_cfg_nodeclient.yml b/janusgraph-es/src/test/resources/es_cfg_nodeclient.yml index e0f4f02475..22c6e28c20 100644 --- a/janusgraph-es/src/test/resources/es_cfg_nodeclient.yml +++ b/janusgraph-es/src/test/resources/es_cfg_nodeclient.yml @@ -2,4 +2,3 @@ node.data: false node.client: true discovery.zen.ping.multicast.enabled: false discovery.zen.ping.unicast.hosts: [ "localhost", "127.0.0.1:9300" ] -cluster.name: networkNodeUsingYaml \ No newline at end of file diff --git a/janusgraph-es/src/test/resources/es_jvmlocal.yml b/janusgraph-es/src/test/resources/es_jvmlocal.yml index 995e2aa592..ca4963473e 100644 --- a/janusgraph-es/src/test/resources/es_jvmlocal.yml +++ b/janusgraph-es/src/test/resources/es_jvmlocal.yml @@ -1,7 +1,4 @@ node.data: true node.client: false node.local: true -path.data: ${project.build.directory}/es/jvmlocal_yml/data -path.work: ${project.build.directory}/es/jvmlocal_yml/work -path.logs: ${project.build.directory}/es/jvmlocal_yml/logs -cluster.name: jvmlocal \ No newline at end of file +path.home: ${project.build.directory}/es \ No newline at end of file diff --git a/janusgraph-hadoop-parent/pom.xml b/janusgraph-hadoop-parent/pom.xml index c37b85c057..ab8bfacb26 100644 --- a/janusgraph-hadoop-parent/pom.xml +++ b/janusgraph-hadoop-parent/pom.xml @@ -76,6 +76,20 @@ net.jpountz.lz4 lz4 + + com.fasterxml.jackson.module + jackson-module-scala_2.10 + + + + + com.fasterxml.jackson.module + jackson-module-scala_2.10 + + + org.scala-lang + scala-library + diff --git a/janusgraph-hbase-parent/janusgraph-hbase-core/src/test/java/org/janusgraph/HBaseStatus.java b/janusgraph-hbase-parent/janusgraph-hbase-core/src/test/java/org/janusgraph/HBaseStatus.java index 23d98d0503..186f1bcbdb 100644 --- a/janusgraph-hbase-parent/janusgraph-hbase-core/src/test/java/org/janusgraph/HBaseStatus.java +++ b/janusgraph-hbase-parent/janusgraph-hbase-core/src/test/java/org/janusgraph/HBaseStatus.java @@ -20,7 +20,7 @@ import java.io.FileReader; import java.io.IOException; -import org.elasticsearch.common.Preconditions; +import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java b/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java index f66c525bcd..1e2b9d19a7 100644 --- a/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java +++ b/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java @@ -76,7 +76,8 @@ public class LuceneIndex implements IndexProvider { private static final String GEOID = "_____geo"; private static final int MAX_STRING_FIELD_LEN = 256; - private static final Version LUCENE_VERSION = Version.LUCENE_4_10_4; + private static final Version LUCENE_VERSION = Version.LUCENE_5_5_2; + private static final IndexFeatures LUCENE_FEATURES = new IndexFeatures.Builder().supportedStringMappings(Mapping.TEXT, Mapping.STRING).supportsCardinality(Cardinality.SINGLE).supportsNanoseconds().build(); private static final int GEO_MAX_LEVELS = 11; @@ -110,7 +111,7 @@ private Directory getStoreDirectory(String store) throws BackendException { if (!path.exists() || !path.isDirectory() || !path.canWrite()) throw new PermanentBackendException("Cannot access or write to directory: " + dir); log.debug("Opening store directory [{}]", path); - return FSDirectory.open(path); + return FSDirectory.open(path.toPath()); } catch (IOException e) { throw new PermanentBackendException("Could not open directory: " + dir, e); } @@ -120,7 +121,7 @@ private IndexWriter getWriter(String store) throws BackendException { Preconditions.checkArgument(writerLock.isHeldByCurrentThread()); IndexWriter writer = writers.get(store); if (writer == null) { - IndexWriterConfig iwc = new IndexWriterConfig(LUCENE_VERSION, analyzer); + IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); try { writer = new IndexWriter(getStoreDirectory(store), iwc); @@ -307,12 +308,16 @@ private void addToDocument(String store, if (e.value instanceof Number) { Field field; + Field sortField; if (AttributeUtil.isWholeNumber((Number) e.value)) { field = new LongField(e.field, ((Number) e.value).longValue(), Field.Store.YES); + sortField = new NumericDocValuesField(e.field, ((Number) e.value).longValue()); } else { //double or float field = new DoubleField(e.field, ((Number) e.value).doubleValue(), Field.Store.YES); + sortField = new DoubleDocValuesField(e.field, ((Number) e.value).doubleValue()); } doc.add(field); + doc.add(sortField); } else if (AttributeUtil.isString(e.value)) { String str = (String) e.value; Mapping mapping = Mapping.getMapping(store, e.field, informations); @@ -504,7 +509,7 @@ private final SearchParams convertQuery(Condition condition, KeyInformation.S Preconditions.checkArgument(janusgraphPredicate == Geo.WITHIN, "Relation is not supported for geo value: " + janusgraphPredicate); Shape shape = ((Geoshape) value).convert2Spatial4j(); SpatialArgs args = new SpatialArgs(SpatialOperation.IsWithin, shape); - params.addFilter(getSpatialStrategy(key).makeFilter(args)); + params.addQuery(getSpatialStrategy(key).makeQuery(args)); } else if (value instanceof Date) { Preconditions.checkArgument(janusgraphPredicate instanceof Cmp, "Relation not supported on date types: " + janusgraphPredicate); params.addFilter(numericFilter(key, (Cmp) janusgraphPredicate, ((Date) value).getTime())); diff --git a/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneExample.java b/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneExample.java index 34384b41cd..bf68b07023 100644 --- a/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneExample.java +++ b/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneExample.java @@ -78,9 +78,9 @@ private SpatialStrategy getSpatialStrategy(String key) { @Test public void example1() throws Exception { - Directory dir = FSDirectory.open(path); + Directory dir = FSDirectory.open(path.toPath()); Analyzer analyzer = new StandardAnalyzer(); - IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_4, analyzer); + IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); @@ -105,7 +105,7 @@ public void example1() throws Exception { writer.close(); //Search - IndexReader reader = DirectoryReader.open(FSDirectory.open(path)); + IndexReader reader = DirectoryReader.open(FSDirectory.open(path.toPath())); IndexSearcher searcher = new IndexSearcher(reader); analyzer = new StandardAnalyzer(); diff --git a/pom.xml b/pom.xml index 83c52580b5..b21cd45089 100644 --- a/pom.xml +++ b/pom.xml @@ -81,16 +81,16 @@ 1.2.4 ${hbase100.core.version} 1.9.2 - 2.4.4 + 2.6.6 - 4.10.4 - 1.5.1 + 5.5.2 + 2.4.4 1.7.0 3.2.2 - 1.6.2 + 2.8.2 1.3 2.7.7 3.2 @@ -504,6 +504,11 @@ jackson-annotations ${jackson2.version} + + com.fasterxml.jackson.module + jackson-module-scala_2.10 + ${jackson2.version} + joda-time joda-time @@ -853,7 +858,7 @@ commons-cli commons-cli - 1.2 + 1.3.1 org.jboss.netty @@ -863,14 +868,14 @@ io.netty netty - 3.6.6.Final + 3.10.5.Final com.spatial4j spatial4j - 0.4.1 + 0.5 From 513d1f6adad2c3c01050c3205c862a6aae537e6e Mon Sep 17 00:00:00 2001 From: sjudeng Date: Wed, 22 Feb 2017 06:33:11 -0600 Subject: [PATCH 2/5] Add support for indexing line and polygon geometries, querying by polygon geometries, and support for geoIntersect, geoContains and geoDisjoint predicates. Support for indexing multi-point/line/polygon properties is implemented but untested. Signed-off-by: sjudeng --- docs/elasticsearch.txt | 2 +- docs/hadoop.txt | 1 + docs/lucene.txt | 2 +- docs/searchpredicates.txt | 20 +- docs/solr.txt | 2 +- docs/textsearch.txt | 23 +- janusgraph-core/pom.xml | 11 + .../org/janusgraph/core/attribute/Geo.java | 33 +- .../janusgraph/core/attribute/Geoshape.java | 527 +++++++++--------- .../core/attribute/GeoshapeHelper.java | 142 +++++ .../core/attribute/JtsGeoshapeHelper.java | 137 +++++ .../org/janusgraph/core/schema/Mapping.java | 3 +- .../database/serialize/AttributeUtil.java | 6 + .../graphdb/types/ParameterType.java | 14 +- .../diskstorage/es/ElasticSearchIndex.java | 147 ++++- .../diskstorage/es/ElasticSearchSetup.java | 5 +- .../es/ElasticSearchIndexTest.java | 10 + .../src/test/resources/es_jvmlocal.yml | 3 +- .../serialize/JanusGraphKryoRegistrator.java | 70 +++ .../test/resources/cassandra-read.properties | 1 + .../src/test/resources/hbase-read.properties | 1 + .../diskstorage/lucene/LuceneIndex.java | 92 ++- .../diskstorage/lucene/LuceneExample.java | 4 +- .../diskstorage/lucene/LuceneIndexTest.java | 9 + janusgraph-solr/pom.xml | 1 - .../diskstorage/solr/SolrIndex.java | 71 +-- .../solr/transform/GeoToWktConverter.java | 12 +- .../diskstorage/solr/SolrIndexTest.java | 11 +- .../solr/transform/GeoToWktConverterTest.java | 86 ++- .../resources/solr/core-template/schema.xml | 3 +- janusgraph-test/pom.xml | 4 + .../indexing/IndexProviderTest.java | 89 ++- .../graphdb/JanusGraphIndexTest.java | 110 +++- .../janusgraph/graphdb/JanusGraphIoTest.java | 120 +++- .../janusgraph/graphdb/JanusGraphTest.java | 17 +- .../graphdb/attribute/GeoshapeTest.java | 144 ++++- .../graphdb/serializer/SerializerTest.java | 25 +- pom.xml | 7 + 38 files changed, 1532 insertions(+), 433 deletions(-) create mode 100644 janusgraph-core/src/main/java/org/janusgraph/core/attribute/GeoshapeHelper.java create mode 100644 janusgraph-core/src/main/java/org/janusgraph/core/attribute/JtsGeoshapeHelper.java create mode 100644 janusgraph-hadoop-parent/janusgraph-hadoop-core/src/main/java/org/janusgraph/hadoop/serialize/JanusGraphKryoRegistrator.java diff --git a/docs/elasticsearch.txt b/docs/elasticsearch.txt index 215786bd16..d66a9d514d 100644 --- a/docs/elasticsearch.txt +++ b/docs/elasticsearch.txt @@ -7,7 +7,7 @@ Elasticsearch is a flexible and powerful open source, distributed, real-time sea JanusGraph supports http://elasticsearch.org[Elasticsearch] as an index backend. Here are some of the Elasticsearch features supported by JanusGraph: * *Full-Text*: Supports all `Text` predicates to search for text properties that matches a given word, prefix or regular expression. -* *Geo*: Supports the `Geo.WITHIN` condition to search for points that fall within a given circle. Only supports points for indexing and circles for querying. +* *Geo*: Supports all `Geo` predicates to search for geo properties that are intersecting, within, disjoint to or contained in a given query geometry. Supports points, lines and polygons for indexing. Supports circles, boxes and polygons for querying point properties and all shapes for querying non-point properties. Note that JTS is required when using line and polygon shapes (see <> for more information). * *Numeric Range*: Supports all numeric comparisons in `Compare`. * *Flexible Configuration*: Supports embedded or remote operation, custom transport and discovery, and open-ended settings customization. * *TTL*: Supports automatically expiring indexed elements. diff --git a/docs/hadoop.txt b/docs/hadoop.txt index 7244df871d..94fba079f4 100644 --- a/docs/hadoop.txt +++ b/docs/hadoop.txt @@ -83,6 +83,7 @@ giraph.maxMessagesInMemory=100000 spark.master=local[*] spark.executor.memory=1g spark.serializer=org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator=org.janusgraph.hadoop.serialize.JanusGraphKryoRegistrator ---- [source, gremlin] diff --git a/docs/lucene.txt b/docs/lucene.txt index a1113afaee..452dbe5146 100644 --- a/docs/lucene.txt +++ b/docs/lucene.txt @@ -21,7 +21,7 @@ In the above configuration, the index backend is named `search`. Replace `search === Feature Support * *Full-Text*: Supports all `Text` predicates to search for text properties that matches a given word, prefix or regular expression. -* *Geo*: Supports the `Geo.WITHIN` condition to search for points that fall within a given geographic shape. Only supports points for indexing and circles and boxes for querying. +* *Geo*: Supports `Geo` predicates to search for geo properties that are intersecting, within, or contained in a given query geometry. Supports points, lines and polygons for indexing. Supports circles and boxes for querying point properties and all shapes for querying non-point properties. Note that JTS is required when using line and polygon shapes (see <> for more information). * *Numeric Range*: Supports all numeric comparisons in `Compare`. * *Temporal*: Nanosecond granularity temporal indexing. diff --git a/docs/searchpredicates.txt b/docs/searchpredicates.txt index 3826faa888..7ef11fc63c 100644 --- a/docs/searchpredicates.txt +++ b/docs/searchpredicates.txt @@ -30,8 +30,14 @@ See <> for more information about full-text and string search. === Geo Predicate -The `Geo` enum specifies the geo-location predicate `geoWithin` which holds true if one geometric object contains the other. +The `Geo` enum specifies geo-location predicates. +* `geoIntersect` which holds true if the two geometric objects have at least one point in common (opposite of `geoDisjoint`). +* `geoWithin` which holds true if one geometric object contains the other. +* `geoDisjoint` which holds true if the two geometric objects have no points in common (opposite of `geoIntersect`). +* `geoContains` which holds true if one geometric object is contained by the other. + +See <> for more information about geo search. === Query Examples @@ -81,9 +87,13 @@ While JanusGraph's composite indexes support any data type that can be stored in Additional data types will be supported in the future. === Geoshape Data Type -The Geoshape data type supports representing a point, circle or box. However all index backends currently only support indexing points. +The Geoshape data type supports representing a point, circle, box, line, polygon, multi-point, multi-line and multi-polygon. Index backends currently support indexing points, lines and polygons. Indexing multi-point, multi-line and multi-polygon properties has not been tested. Geospatial index lookups are only supported via mixed indexes. +.Note: +[NOTE] +The Java Topology Suite (JTS) is currently required to use line, polygon, multi-point, multi-line and multi-polygon geometries. JTS is not included in JanusGraph distributions by default due to its LGPL license. Users must download the JTS JAR file separately and include it in the classpath when full geometry support is required. + To construct a Geoshape use the following methods: [source, java] @@ -93,8 +103,10 @@ Geoshape.point(37.97, 23.72) Geoshape.circle(37.97, 23.72, 50) //SW lat, SW lng, NE lat, NE lng Geoshape.box(37.97, 23.72, 38.97, 24.72) +//WKT (requires JTS for polygon, linestring and multi-point/line/polygon geometries) +Geoshape.fromWkt("POLYGON ((35.4 48.9, 35.6 48.9, 35.6 49.1, 35.4 49.1, 35.4 48.9))") -In addition when importing a graph via GraphSON Point may be represented by: +In addition, when importing a graph via GraphSON the geometry may be represented by GeoJSON: [source, java] //string "37.97, 23.72" @@ -117,7 +129,7 @@ In addition when importing a graph via GraphSON Point may be represented by: "coordinates": [125.6, 10.1] } -link:http://geojson.org/[GeoJSON] may be specified as Point, Circle or Polygon. However polygons must form a box. +link:http://geojson.org/[GeoJSON] may be specified as Point, Circle, LineString or Polygon. Polygons must be closed. Note that unlike the JanusGraph API GeoJSON specifies coordinates as lng lat. === Collections diff --git a/docs/solr.txt b/docs/solr.txt index 1ef0a33d9d..bad64cb6ea 100644 --- a/docs/solr.txt +++ b/docs/solr.txt @@ -8,7 +8,7 @@ Solr is the popular, blazing fast open source enterprise search platform from th JanusGraph supports http://lucene.apache.org/solr/[Solr] as an index backend. Here are some of the Solr features supported by JanusGraph: * *Full-Text*: Supports all `Text` predicates to search for text properties that matches a given word, prefix or regular expression. -* *Geo*: Supports the `Geo.WITHIN` condition to search for points that fall within a given circle. Only supports points for indexing and circles for querying. +* *Geo*: Supports all `Geo` predicates to search for geo properties that are intersecting, within, disjoint to or contained in a given query geometry. Supports points, lines and polygons for indexing. Supports circles, boxes and polygons for querying point properties and all shapes for querying non-point properties. Note that JTS is required when using line and polygon shapes (see <> for more information). * *Numeric Range*: Supports all numeric comparisons in `Compare`. * *TTL*: Supports automatically expiring indexed elements. * *Temporal*: Millisecond granularity temporal indexing. diff --git a/docs/textsearch.txt b/docs/textsearch.txt index 8c19bb7cb6..6c9c1aa201 100644 --- a/docs/textsearch.txt +++ b/docs/textsearch.txt @@ -81,7 +81,28 @@ mgmt.commit() Note that the data will be stored in the index twice, once for exact matching and once for fuzzy matching. -=== Field Mapping +[[geo-search]] +===Geo Mapping + +By default, JanusGraph supports indexing geo properties with point type and querying geo properties by circle or box. To index a non-point geo property with support for querying by any geoshape type, specify the mapping as `Mapping.PREFIX_TREE`: + +[source, gremlin] +mgmt = graph.openManagement() +name = mgmt.makePropertyKey('border').dataType(Geoshape.class).make() +mgmt.buildIndex('borderIndex', Vertex.class).addKey(name, Mapping.PREFIX_TREE.asParameter()).buildMixedIndex("search") +mgmt.commit() + +Additional parameters can be specified to tune the configuration of the underlying prefix tree mapping. These optional parameters include the number of levels used in the prefix tree as well as the associated precision. + +[source, gremlin] +mgmt = graph.openManagement() +name = mgmt.makePropertyKey('border').dataType(Geoshape.class).make() +mgmt.buildIndex('borderIndex', Vertex.class).addKey(name, Mapping.PREFIX_TREE.asParameter(), Parameter.of("index-geo-max-levels", 18), Parameter.of("index-geo-dist-error-pct", 0.0125)).buildMixedIndex("search") +mgmt.commit() + +Note that some indexing backends (e.g. Solr) may require additional external schema configuration to support and tune indexing non-point properties. + +== Field Mapping [[index-local-field-mapping]] ==== Individual Field Mapping diff --git a/janusgraph-core/pom.xml b/janusgraph-core/pom.xml index 702ea059e6..ec641fad95 100644 --- a/janusgraph-core/pom.xml +++ b/janusgraph-core/pom.xml @@ -70,6 +70,11 @@ com.spatial4j spatial4j + + com.vividsolutions + jts + true + commons-collections commons-collections @@ -102,6 +107,12 @@ com.google.code.findbugs jsr305 + + + org.noggit + noggit + 0.6 + ${basedir}/target diff --git a/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geo.java b/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geo.java index 4b0e8add4c..b7439d8155 100644 --- a/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geo.java +++ b/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geo.java @@ -83,7 +83,7 @@ public JanusGraphPredicate negate() { }, /** - * Whether one geographic region is completely contains within another + * Whether one geographic region is completely within another */ WITHIN { @Override @@ -104,6 +104,34 @@ public boolean hasNegation() { return false; } + @Override + public JanusGraphPredicate negate() { + throw new UnsupportedOperationException(); + } + }, + + /** + * Whether one geographic region completely contains another + */ + CONTAINS { + @Override + public boolean test(Object value, Object condition) { + Preconditions.checkArgument(condition instanceof Geoshape); + if (value == null) return false; + Preconditions.checkArgument(value instanceof Geoshape); + return ((Geoshape) value).contains((Geoshape) condition); + } + + @Override + public String toString() { + return "contains"; + } + + @Override + public boolean hasNegation() { + return false; + } + @Override public JanusGraphPredicate negate() { throw new UnsupportedOperationException(); @@ -137,4 +165,7 @@ public static P geoDisjoint(final V value) { public static P geoWithin(final V value) { return new P(Geo.WITHIN, value); } + public static P geoContains(final V value) { + return new P(Geo.CONTAINS, value); + } } diff --git a/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geoshape.java b/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geoshape.java index 13360c44ff..66e7003875 100644 --- a/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geoshape.java +++ b/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geoshape.java @@ -18,12 +18,14 @@ import com.google.common.primitives.Doubles; import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.distance.DistanceUtils; +import com.spatial4j.core.shape.Circle; import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.SpatialRelation; import org.janusgraph.diskstorage.ScanBuffer; import org.janusgraph.diskstorage.WriteBuffer; import org.janusgraph.graphdb.database.idhandling.VariableLong; -import org.apache.commons.lang.builder.HashCodeBuilder; + +import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.tinkerpop.gremlin.structure.io.graphson.GraphSONTokens; import org.apache.tinkerpop.gremlin.structure.io.graphson.GraphSONUtil; @@ -31,6 +33,7 @@ import org.apache.tinkerpop.shaded.jackson.core.JsonParser; import org.apache.tinkerpop.shaded.jackson.core.JsonProcessingException; import org.apache.tinkerpop.shaded.jackson.databind.DeserializationContext; +import org.apache.tinkerpop.shaded.jackson.databind.ObjectMapper; import org.apache.tinkerpop.shaded.jackson.databind.SerializerProvider; import org.apache.tinkerpop.shaded.jackson.databind.deser.std.StdDeserializer; import org.apache.tinkerpop.shaded.jackson.databind.jsontype.TypeSerializer; @@ -40,37 +43,55 @@ import org.apache.tinkerpop.shaded.kryo.io.Input; import org.apache.tinkerpop.shaded.kryo.io.Output; +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.StringReader; import java.lang.reflect.Array; +import java.text.ParseException; import java.util.*; import java.util.stream.Collectors; /** * A generic representation of a geographic shape, which can either be a single point, - * circle, box, or polygon. Use {@link #getType()} to determine the type of shape of a particular Geoshape object. + * circle, box, line or polygon. Use {@link #getType()} to determine the type of shape of a particular Geoshape object. * Use the static constructor methods to create the desired geoshape. * - * Note, polygons are not yet supported. - * * @author Matthias Broecheler (me@matthiasb.com) */ public class Geoshape { + private static String FIELD_LABEL = "geometry"; private static String FIELD_TYPE = "type"; private static String FIELD_COORDINATES = "coordinates"; private static String FIELD_RADIUS = "radius"; - private static final SpatialContext CTX = SpatialContext.GEO; + public static final GeoshapeHelper HELPER; + static { + boolean haveJts = false; + try { + haveJts = Class.forName("com.vividsolutions.jts.geom.Geometry") != null; + } catch (ClassNotFoundException e) { } + + HELPER = haveJts ? new JtsGeoshapeHelper() : new GeoshapeHelper(); + } /** - * The Type of a shape: a point, box, circle, or polygon. + * The Type of a shape: a point, box, circle, line or polygon. */ public enum Type { POINT("Point"), BOX("Box"), CIRCLE("Circle"), - POLYGON("Polygon"); + LINE("Line"), + POLYGON("Polygon"), + MULTIPOINT("MultiPoint"), + MULTILINESTRING("MultiLineString"), + MULTIPOLYGON("MultiPolygon"); private final String gsonName; @@ -92,26 +113,16 @@ public String toString() { } } - //coordinates[0] = latitudes, coordinates[1] = longitudes - private final float[][] coordinates; + private final Shape shape; - private Geoshape() { - coordinates = null; - } - - private Geoshape(final float[][] coordinates) { - Preconditions.checkArgument(coordinates!=null && coordinates.length==2); - Preconditions.checkArgument(coordinates[0].length==coordinates[1].length && coordinates[0].length>0); - for (int i=0;i0); - else Preconditions.checkArgument(isValidCoordinate(coordinates[0][i],coordinates[1][i])); - } - this.coordinates=coordinates; + protected Geoshape(final Shape shape) { + Preconditions.checkNotNull(shape,"Invalid shape (null)"); + this.shape = shape; } @Override public int hashCode() { - return new HashCodeBuilder().append(coordinates[0]).append(coordinates[1]).toHashCode(); + return shape.hashCode(); } @Override @@ -120,38 +131,32 @@ public boolean equals(Object other) { else if (other==null) return false; else if (!getClass().isInstance(other)) return false; Geoshape oth = (Geoshape)other; - Preconditions.checkArgument(coordinates.length==2 && oth.coordinates.length==2); - for (int i=0;i0) s.append(","); - s.append(getPoint(i)); - } - s.append("]"); - } - return s.toString(); + return HELPER.getWktWriter().toString(shape); + } + + /** + * Returns the GeoJSON representation of the shape. + * @return + */ + public String toGeoJson() { + return GeoshapeGsonSerializer.toGeoJson(this); + } + + /** + * Returns the underlying {@link Shape}. + * @return + */ + public Shape getShape() { + return shape; } /** @@ -160,28 +165,17 @@ public String toString() { * @return */ public Type getType() { - if (coordinates[0].length==1) return Type.POINT; - else if (coordinates[0].length>2) return Type.POLYGON; - else { //coordinates[0].length==2 - if (Float.isNaN(coordinates[0][1])) return Type.CIRCLE; - else return Type.BOX; - } + return HELPER.getType(shape); } /** * Returns the number of points comprising this geoshape. A point and circle have only one point (center of cricle), - * a box has two points (the south-west and north-east corners) and a polygon has a variable number of points (>=3). + * a box has two points (the south-west and north-east corners). Lines and polygons have a variable number of points. * * @return */ public int size() { - switch(getType()) { - case POINT: return 1; - case CIRCLE: return 1; - case BOX: return 2; - case POLYGON: return coordinates[0].length; - default: throw new IllegalStateException("Unrecognized type: " + getType()); - } + return HELPER.size(shape); } /** @@ -191,8 +185,7 @@ public int size() { * @return */ public Point getPoint(int position) { - if (position<0 || position>=size()) throw new ArrayIndexOutOfBoundsException("Invalid position: " + position); - return new Point(coordinates[0][position],coordinates[1][position]); + return HELPER.getPoint(this, position); } /** @@ -201,63 +194,63 @@ public Point getPoint(int position) { * @return */ public Point getPoint() { - Preconditions.checkArgument(size()==1,"Shape does not have a single point"); - return getPoint(0); + Preconditions.checkArgument(getType()==Type.POINT || getType()==Type.CIRCLE,"Shape does not have a single point"); + return new Point(shape.getCenter().getY(), shape.getCenter().getX()); } /** * Returns the radius in kilometers of this circle. Only applicable to circle shapes. * @return */ - public float getRadius() { + public double getRadius() { Preconditions.checkArgument(getType()==Type.CIRCLE,"This shape is not a circle"); - return coordinates[1][1]; + double radiusInDeg = ((Circle) shape).getRadius(); + return DistanceUtils.degrees2Dist(radiusInDeg, DistanceUtils.EARTH_MEAN_RADIUS_KM); } private SpatialRelation getSpatialRelation(Geoshape other) { Preconditions.checkNotNull(other); - return convert2Spatial4j().relate(other.convert2Spatial4j()); + return shape.relate(other.shape); } + /** + * Whether this geometry has any points in common with the given geometry. + * @param other + * @return + */ public boolean intersect(Geoshape other) { SpatialRelation r = getSpatialRelation(other); return r==SpatialRelation.INTERSECTS || r==SpatialRelation.CONTAINS || r==SpatialRelation.WITHIN; } + /** + * Whether this geometry is within the given geometry. + * @param outer + * @return + */ public boolean within(Geoshape outer) { return getSpatialRelation(outer)==SpatialRelation.WITHIN; } - public boolean disjoint(Geoshape other) { - return getSpatialRelation(other)==SpatialRelation.DISJOINT; - } - /** - * Converts this shape into its equivalent Spatial4j {@link Shape}. + * Whether this geometry contains the given geometry. + * @param outer * @return */ - public Shape convert2Spatial4j() { - switch(getType()) { - case POINT: return getPoint().getSpatial4jPoint(); - case CIRCLE: return CTX.makeCircle(getPoint(0).getSpatial4jPoint(), DistanceUtils.dist2Degrees(getRadius(), DistanceUtils.EARTH_MEAN_RADIUS_KM)); - case BOX: return CTX.makeRectangle(getPoint(0).getSpatial4jPoint(),getPoint(1).getSpatial4jPoint()); - case POLYGON: throw new UnsupportedOperationException("Not yet supported"); - default: throw new IllegalStateException("Unrecognized type: " + getType()); - } + public boolean contains(Geoshape outer) { + return getSpatialRelation(outer)==SpatialRelation.CONTAINS; } - /** - * Constructs a point from its latitude and longitude information - * @param latitude - * @param longitude + * Whether this geometry has no points in common with the given geometry. + * @param other * @return */ - public static final Geoshape point(final float latitude, final float longitude) { - Preconditions.checkArgument(isValidCoordinate(latitude,longitude),"Invalid coordinate provided"); - return new Geoshape(new float[][]{ new float[]{latitude}, new float[]{longitude}}); + public boolean disjoint(Geoshape other) { + return getSpatialRelation(other)==SpatialRelation.DISJOINT; } + /** * Constructs a point from its latitude and longitude information * @param latitude @@ -265,20 +258,8 @@ public static final Geoshape point(final float latitude, final float longitude) * @return */ public static final Geoshape point(final double latitude, final double longitude) { - return point((float)latitude,(float)longitude); - } - - /** - * Constructs a circle from a given center point and a radius in kilometer - * @param latitude - * @param longitude - * @param radiusInKM - * @return - */ - public static final Geoshape circle(final float latitude, final float longitude, final float radiusInKM) { Preconditions.checkArgument(isValidCoordinate(latitude,longitude),"Invalid coordinate provided"); - Preconditions.checkArgument(radiusInKM>0,"Invalid radius provided [%s]",radiusInKM); - return new Geoshape(new float[][]{ new float[]{latitude, Float.NaN}, new float[]{longitude, radiusInKM}}); + return new Geoshape(HELPER.getContext().makePoint(longitude, latitude)); } /** @@ -289,7 +270,10 @@ public static final Geoshape circle(final float latitude, final float longitude, * @return */ public static final Geoshape circle(final double latitude, final double longitude, final double radiusInKM) { - return circle((float)latitude,(float)longitude,(float)radiusInKM); + Preconditions.checkArgument(isValidCoordinate(latitude,longitude),"Invalid coordinate provided"); + Preconditions.checkArgument(radiusInKM>0,"Invalid radius provided [%s]",radiusInKM); + double radius = DistanceUtils.dist2Degrees(radiusInKM, DistanceUtils.EARTH_MEAN_RADIUS_KM); + return new Geoshape(HELPER.getContext().makeCircle(longitude, latitude, radius)); } /** @@ -300,24 +284,54 @@ public static final Geoshape circle(final double latitude, final double longitud * @param northEastLongitude * @return */ - public static final Geoshape box(final float southWestLatitude, final float southWestLongitude, - final float northEastLatitude, final float northEastLongitude) { + public static final Geoshape box(final double southWestLatitude, final double southWestLongitude, + final double northEastLatitude, final double northEastLongitude) { Preconditions.checkArgument(isValidCoordinate(southWestLatitude,southWestLongitude),"Invalid south-west coordinate provided"); Preconditions.checkArgument(isValidCoordinate(northEastLatitude,northEastLongitude),"Invalid north-east coordinate provided"); - return new Geoshape(new float[][]{ new float[]{southWestLatitude, northEastLatitude}, new float[]{southWestLongitude, northEastLongitude}}); + return new Geoshape(HELPER.getContext().makeRectangle(southWestLongitude, northEastLongitude, southWestLatitude, northEastLatitude)); } /** - * Constructs a new box shape which is identified by its south-west and north-east corner points - * @param southWestLatitude - * @param southWestLongitude - * @param northEastLatitude - * @param northEastLongitude + * Constructs a line from list of coordinates + * @param coordinates Coordinate (lon,lat) pairs * @return */ - public static final Geoshape box(final double southWestLatitude, final double southWestLongitude, - final double northEastLatitude, final double northEastLongitude) { - return box((float)southWestLatitude,(float)southWestLongitude,(float)northEastLatitude,(float)northEastLongitude); + public static final Geoshape line(List coordinates) { + Preconditions.checkArgument(coordinates.size() >= 2, "Too few coordinate pairs provided"); + List points = new ArrayList<>(); + for (double[] coordinate : coordinates) { + Preconditions.checkArgument(isValidCoordinate(coordinate[1],coordinate[0]),"Invalid coordinate provided"); + points.add(HELPER.getContext().makePoint(coordinate[0], coordinate[1])); + } + return new Geoshape(HELPER.getContext().makeLineString(points)); + } + + /** + * Constructs a polygon from list of coordinates + * @param coordinates Coordinate (lon,lat) pairs + * @return + */ + public static final Geoshape polygon(List coordinates) { + return HELPER.polygon(coordinates); + } + + /** + * Constructs a Geoshape from a spatial4j {@link Shape}. + * @param shape + * @return + */ + public static final Geoshape geoshape(Shape shape) { + return new Geoshape(shape); + } + + /** + * Create Geoshape from WKT representation. + * @param wkt + * @return + * @throws ParseException + */ + public static final Geoshape fromWkt(String wkt) throws ParseException { + return new Geoshape(HELPER.getWktReader().parse(wkt)); } /** @@ -326,25 +340,29 @@ public static final Geoshape box(final double southWestLatitude, final double so * @param longitude * @return */ - public static final boolean isValidCoordinate(final float latitude, final float longitude) { + public static final boolean isValidCoordinate(final double latitude, final double longitude) { return latitude>=-90.0 && latitude<=90.0 && longitude>=-180.0 && longitude<=180.0; } + public static final SpatialContext getSpatialContext() { + return HELPER.getContext(); + } + /** * A single point representation. A point is identified by its coordinate on the earth sphere using the spherical * system of latitudes and longitudes. */ public static final class Point { - private final float longitude; - private final float latitude; + private final double longitude; + private final double latitude; /** * Constructs a point with the given latitude and longitude * @param latitude Between -90 and 90 degrees * @param longitude Between -180 and 180 degrees */ - Point(float latitude, float longitude) { + Point(double latitude, double longitude) { this.longitude = longitude; this.latitude = latitude; } @@ -353,7 +371,7 @@ public static final class Point { * Longitude of this point * @return */ - public float getLongitude() { + public double getLongitude() { return longitude; } @@ -361,12 +379,12 @@ public float getLongitude() { * Latitude of this point * @return */ - public float getLatitude() { + public double getLatitude() { return latitude; } private com.spatial4j.core.shape.Point getSpatial4jPoint() { - return CTX.makePoint(longitude,latitude); + return HELPER.getContext().makePoint(longitude,latitude); } /** @@ -376,31 +394,13 @@ private com.spatial4j.core.shape.Point getSpatial4jPoint() { * @return */ public double distance(Point other) { - return DistanceUtils.degrees2Dist(CTX.getDistCalc().distance(getSpatial4jPoint(),other.getSpatial4jPoint()),DistanceUtils.EARTH_MEAN_RADIUS_KM); - } - - @Override - public String toString() { - return "["+latitude+","+longitude+"]"; - } - - @Override - public int hashCode() { - return new HashCodeBuilder().append(latitude).append(longitude).toHashCode(); - } - - @Override - public boolean equals(Object other) { - if (this==other) return true; - else if (other==null) return false; - else if (!getClass().isInstance(other)) return false; - Point oth = (Point)other; - return latitude==oth.latitude && longitude==oth.longitude; + return DistanceUtils.degrees2Dist(HELPER.getContext().getDistCalc().distance(getSpatial4jPoint(),other.getSpatial4jPoint()),DistanceUtils.EARTH_MEAN_RADIUS_KM); } } /** + * Geoshape attribute serializer for JanusGraph. * @author Matthias Broecheler (me@matthiasb.com) */ public static class GeoshapeSerializer implements AttributeSerializer { @@ -469,24 +469,20 @@ private Geoshape convertGeoJson(Object value) { try { Map map = (Map) value; String type = (String) map.get("type"); - if("Point".equals(type) || "Circle".equals(type) || "Polygon".equals(type)) { - return convertGeometry(map); - } - else if("Feature".equals(type)) { + if("Feature".equals(type)) { Map geometry = (Map) map.get("geometry"); return convertGeometry(geometry); + } else { + return convertGeometry(map); } - throw new IllegalArgumentException("Only Point, Circle, Polygon or feature types are supported"); - } catch (ClassCastException e) { + } catch (ClassCastException | IOException | ParseException e) { throw new IllegalArgumentException("GeoJSON was unparsable"); } - } - private Geoshape convertGeometry(Map geometry) { + private Geoshape convertGeometry(Map geometry) throws IOException, ParseException { String type = (String) geometry.get("type"); List coordinates = (List) geometry.get("coordinates"); - //Either this is a single point or a collection of points if ("Point".equals(type)) { double[] parsedCoordinates = convertCollection(coordinates); @@ -499,26 +495,23 @@ private Geoshape convertGeometry(Map geometry) { double[] parsedCoordinates = convertCollection(coordinates); return circle(parsedCoordinates[1], parsedCoordinates[0], radius.doubleValue()); } else if ("Polygon".equals(type)) { - if (coordinates.size() != 4) { - throw new IllegalArgumentException("GeoJSON polygons are only supported if they form a box"); - } - List polygon = (List) coordinates.stream().map(o -> convertCollection((Collection) o)).collect(Collectors.toList()); - - double[] p0 = polygon.get(0); - double[] p1 = polygon.get(1); - double[] p2 = polygon.get(2); - double[] p3 = polygon.get(3); - - //This may be a clockwise or counterclockwise polygon, we have to verify that it is a box - if ((p0[0] == p1[0] && p1[1] == p2[1] && p2[0] == p3[0] && p3[1] == p0[1]) || - (p0[1] == p1[1] && p1[0] == p2[0] && p2[1] == p3[1] && p3[0] == p0[0])) { - return box(min(p0[1], p1[1], p2[1], p3[1]), min(p0[0], p1[0], p2[0], p3[0]), max(p0[1], p1[1], p2[1], p3[1]), max(p0[0], p1[0], p2[0], p3[0])); + // check whether this is a box + if (coordinates.size() == 4) { + double[] p0 = convertCollection((Collection) coordinates.get(0)); + double[] p1 = convertCollection((Collection) coordinates.get(1)); + double[] p2 = convertCollection((Collection) coordinates.get(2)); + double[] p3 = convertCollection((Collection) coordinates.get(3)); + + //This may be a clockwise or counterclockwise polygon, we have to verify that it is a box + if ((p0[0] == p1[0] && p1[1] == p2[1] && p2[0] == p3[0] && p3[1] == p0[1] && p3[0] != p0[0]) || + (p0[1] == p1[1] && p1[0] == p2[0] && p2[1] == p3[1] && p3[0] == p0[0] && p3[1] != p0[1])) { + return box(min(p0[1], p1[1], p2[1], p3[1]), min(p0[0], p1[0], p2[0], p3[0]), max(p0[1], p1[1], p2[1], p3[1]), max(p0[0], p1[0], p2[0], p3[0])); + } } - - throw new IllegalArgumentException("GeoJSON polygons are only supported if they form a box"); - } else { - throw new IllegalArgumentException("GeoJSON support is restricted to Point, Circle or Polygon."); } + + String json = new ObjectMapper().writeValueAsString(geometry); + return new Geoshape(HELPER.getGeojsonReader().read(new StringReader(json))); } private double min(double... numbers) { @@ -535,63 +528,60 @@ public Geoshape read(ScanBuffer buffer) { long l = VariableLong.readPositive(buffer); assert l>0 && l0); - int length = coordinates[0].length; - VariableLong.writePositive(buffer,length); - for (int i = 0; i < 2; i++) { - for (int j = 0; j < length; j++) { - buffer.putFloat(coordinates[i][j]); - } + try { + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + GeoshapeBinarySerializer.write(outputStream, attribute); + byte[] bytes = outputStream.toByteArray(); + VariableLong.writePositive(buffer,bytes.length); + buffer.putBytes(bytes); + } catch (IOException e) { + throw new RuntimeException("I/O exception writing geoshape"); } } } /** - * Serializer for TinkerPop's Gryo. + * Geoshape serializer for TinkerPop's Gryo. */ public static class GeoShapeGryoSerializer extends Serializer { @Override public void write(Kryo kryo, Output output, Geoshape geoshape) { - float[][] coordinates = geoshape.coordinates; - assert (coordinates.length==2); - assert (coordinates[0].length==coordinates[1].length && coordinates[0].length>0); - int length = coordinates[0].length; - output.writeLong(length); - for (int i = 0; i < 2; i++) { - for (int j = 0; j < length; j++) { - output.writeFloat(coordinates[i][j]); - } + try { + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + GeoshapeBinarySerializer.write(outputStream, geoshape); + byte[] bytes = outputStream.toByteArray(); + output.write(bytes.length); + output.write(bytes); + } catch (IOException e) { + throw new RuntimeException("I/O exception writing geoshape"); } } @Override public Geoshape read(Kryo kryo, Input input, Class aClass) { - long l = input.readLong(); - assert l>0 && l0; + InputStream inputStream = new ByteArrayInputStream(input.readBytes(length)); + try { + return GeoshapeBinarySerializer.read(inputStream); + } catch (IOException e) { + throw new RuntimeException("I/O exception reding geoshape"); } - return new Geoshape(coordinates); } } /** - * Serialization of Geoshape for JSON purposes uses the standard GeoJSON(http://geojson.org/) format. - * - * @author Bryn Cooke + * Geoshape serializer supports writing GeoJSON (http://geojson.org/). */ public static class GeoshapeGsonSerializer extends StdSerializer { @@ -601,84 +591,115 @@ public GeoshapeGsonSerializer() { @Override public void serialize(Geoshape value, JsonGenerator jgen, SerializerProvider provider) throws IOException, JsonProcessingException { - jgen.writeStartObject(); - jgen.writeFieldName(FIELD_TYPE); - switch(value.getType()) { - case POLYGON: - throw new UnsupportedOperationException("Polygons are not supported"); - case BOX: - jgen.writeString(Type.BOX.toString()); - jgen.writeFieldName(FIELD_COORDINATES); - jgen.writeStartArray(); - - jgen.writeStartArray(); - jgen.writeNumber(value.coordinates[1][0]); - jgen.writeNumber(value.coordinates[0][0]); - jgen.writeEndArray(); - - jgen.writeStartArray(); - jgen.writeNumber(value.coordinates[1][1]); - jgen.writeNumber(value.coordinates[0][0]); - jgen.writeEndArray(); - - jgen.writeStartArray(); - jgen.writeNumber(value.coordinates[1][1]); - jgen.writeNumber(value.coordinates[0][1]); - jgen.writeEndArray(); - - jgen.writeStartArray(); - jgen.writeNumber(value.coordinates[1][0]); - jgen.writeNumber(value.coordinates[0][1]); - jgen.writeEndArray(); - - jgen.writeEndArray(); - break; - case CIRCLE: - jgen.writeString(Type.CIRCLE.toString()); - jgen.writeFieldName(FIELD_RADIUS); - jgen.writeNumber(value.getRadius()); - jgen.writeFieldName(FIELD_COORDINATES); - jgen.writeStartArray(); - jgen.writeNumber(value.coordinates[1][0]); - jgen.writeNumber(value.coordinates[0][0]); - jgen.writeEndArray(); - break; case POINT: + jgen.writeStartObject(); + jgen.writeFieldName(FIELD_TYPE); jgen.writeString(Type.POINT.toString()); jgen.writeFieldName(FIELD_COORDINATES); jgen.writeStartArray(); - jgen.writeNumber(value.coordinates[1][0]); - jgen.writeNumber(value.coordinates[0][0]); + jgen.writeNumber(value.getPoint().getLongitude()); + jgen.writeNumber(value.getPoint().getLatitude()); jgen.writeEndArray(); + jgen.writeEndObject(); + break; + default: + jgen.writeRawValue(toGeoJson(value)); break; } - jgen.writeEndObject(); } @Override public void serializeWithType(Geoshape geoshape, JsonGenerator jgen, SerializerProvider serializerProvider, TypeSerializer typeSerializer) throws IOException, JsonProcessingException { + jgen.writeStartObject(); if (typeSerializer != null) jgen.writeStringField(GraphSONTokens.CLASS, Geoshape.class.getName()); - GraphSONUtil.writeWithType(FIELD_COORDINATES, geoshape.coordinates, jgen, serializerProvider, typeSerializer); + String geojson = toGeoJson(geoshape); + Map json = new ObjectMapper().readValue(geojson, LinkedHashMap.class); + if (geoshape.getType() == Type.POINT) { + double[] coords = ((List) json.get("coordinates")).stream().map(i -> i.doubleValue()).mapToDouble(i -> i).toArray(); + GraphSONUtil.writeWithType(FIELD_COORDINATES, coords, jgen, serializerProvider, typeSerializer); + } else { + GraphSONUtil.writeWithType(FIELD_LABEL, json, jgen, serializerProvider, typeSerializer); + } jgen.writeEndObject(); } + + public static String toGeoJson(Geoshape geoshape) { + return HELPER.getGeojsonWriter().toString(geoshape.shape); + } + } + /** + * Geoshape JSON deserializer supporting reading from GeoJSON (http://geojson.org/). + */ public static class GeoshapeGsonDeserializer extends StdDeserializer { + public GeoshapeGsonDeserializer() { super(Geoshape.class); } @Override public Geoshape deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException, JsonProcessingException { - // move the parser forward jsonParser.nextToken(); + if (jsonParser.getCurrentName().equals("coordinates")) { + double[] f = jsonParser.readValueAs(double[].class); + jsonParser.nextToken(); + return Geoshape.point(f[1], f[0]); + } else { + try { + HashMap map = jsonParser.readValueAs(LinkedHashMap.class); + jsonParser.nextToken(); + String json = new ObjectMapper().writeValueAsString(map); + Geoshape shape = new Geoshape(HELPER.getGeojsonReader().read(new StringReader(json))); + return shape; + } catch (ParseException e) { + throw new IOException("Unable to read and parse geojson", e); + } + } + } + } - float[][] f = jsonParser.readValueAs(float[][].class); - jsonParser.nextToken(); - return new Geoshape(f); + /** + * Geoshape binary serializer using spatial4j's {@link com.spatial4j.core.io.BinaryCodec}. + * + */ + public static class GeoshapeBinarySerializer { + + /** + * Serialize a geoshape. + * @param outputStream + * @param attribute + * @return + * @throws IOException + */ + public static void write(OutputStream outputStream, Geoshape attribute) throws IOException { + outputStream.write(HELPER.isJts(attribute.shape) ? 0 : 1); + try (DataOutputStream dataOutput = new DataOutputStream(outputStream)) { + HELPER.write(dataOutput, attribute); + dataOutput.flush(); + } + outputStream.flush(); + } + + /** + * Deserialize a geoshape. + * @param inputStream + * @return + * @throws IOException + */ + public static Geoshape read(InputStream inputStream) throws IOException { + boolean isJts = inputStream.read()==0; + try (DataInputStream dataInput = new DataInputStream(inputStream)) { + if (isJts) { + return new Geoshape(HELPER.readGeometry(dataInput)); + } else { + return new Geoshape(HELPER.getBinaryCodec().readShape(dataInput)); + } + } } } + } diff --git a/janusgraph-core/src/main/java/org/janusgraph/core/attribute/GeoshapeHelper.java b/janusgraph-core/src/main/java/org/janusgraph/core/attribute/GeoshapeHelper.java new file mode 100644 index 0000000000..44b80eaac0 --- /dev/null +++ b/janusgraph-core/src/main/java/org/janusgraph/core/attribute/GeoshapeHelper.java @@ -0,0 +1,142 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.core.attribute; + +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.context.SpatialContextFactory; +import com.spatial4j.core.io.BinaryCodec; +import com.spatial4j.core.io.GeoJSONReader; +import com.spatial4j.core.io.GeoJSONWriter; +import com.spatial4j.core.io.WKTReader; +import com.spatial4j.core.io.WKTWriter; +import com.spatial4j.core.shape.Circle; +import com.spatial4j.core.shape.Rectangle; +import com.spatial4j.core.shape.Shape; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.List; + +/** + * Base class for default implementation of spatial context and associated I/O operations supporting point, circle and + * rectangle shapes. + */ +public class GeoshapeHelper { + + protected SpatialContext context; + + protected WKTReader wktReader; + + protected WKTWriter wktWriter; + + protected GeoJSONReader geojsonReader; + + protected GeoJSONWriter geojsonWriter; + + protected BinaryCodec binaryCodec; + + public GeoshapeHelper() { + SpatialContextFactory factory = new SpatialContextFactory(); + factory.geo = true; + context = new SpatialContext(factory); + wktReader = new WKTReader(context, factory); + wktWriter = new WKTWriter(); + geojsonReader = new GeoJSONReader(context, factory); + geojsonWriter = new GeoJSONWriter(context, factory); + binaryCodec = new BinaryCodec(context, factory); + } + + public Shape readGeometry(DataInputStream dataInput) throws IOException { + throw new UnsupportedOperationException("JTS is required for this operation"); + } + + public void write(DataOutputStream dataOutput, Geoshape attribute) throws IOException { + binaryCodec.writeShape(dataOutput, attribute.getShape()); + } + + public Geoshape polygon(List coordinates) { + throw new UnsupportedOperationException("JTS is required for this operation"); + } + + public Geoshape.Type getType(Shape shape) { + final Geoshape.Type type; + if (com.spatial4j.core.shape.Point.class.isAssignableFrom(shape.getClass())) { + type = Geoshape.Type.POINT; + } else if (Circle.class.isAssignableFrom(shape.getClass())) { + type = Geoshape.Type.CIRCLE; + } else if (Rectangle.class.isAssignableFrom(shape.getClass())) { + type = Geoshape.Type.BOX; + } else { + throw new IllegalStateException("Unrecognized shape type"); + } + return type; + } + + public int size(Shape shape) { + switch(getType(shape)) { + case POINT: return 1; + case CIRCLE: return 1; + case BOX: return 2; + default: throw new IllegalStateException("size() not supported for type: " + getType(shape)); + } + } + + public Geoshape.Point getPoint(Geoshape geoshape, int position) { + Shape shape = geoshape.getShape(); + if (position<0 || position>=size(shape)) throw new ArrayIndexOutOfBoundsException("Invalid position: " + position); + switch(getType(shape)) { + case POINT: + case CIRCLE: + return geoshape.getPoint(); + case BOX: + if (position == 0) + return new Geoshape.Point(shape.getBoundingBox().getMinY(), shape.getBoundingBox().getMinX()); + else + return new Geoshape.Point(shape.getBoundingBox().getMaxY(), shape.getBoundingBox().getMaxX()); + default: + throw new IllegalStateException("getPoint(int) not supported for type: " + getType(shape)); + } + } + + public boolean isJts(Shape shape) { + return false; + } + + public SpatialContext getContext() { + return context; + } + + public WKTReader getWktReader() { + return wktReader; + } + + public WKTWriter getWktWriter() { + return wktWriter; + } + + public GeoJSONReader getGeojsonReader() { + return geojsonReader; + } + + public GeoJSONWriter getGeojsonWriter() { + return geojsonWriter; + } + + public BinaryCodec getBinaryCodec() { + return binaryCodec; + } + +} diff --git a/janusgraph-core/src/main/java/org/janusgraph/core/attribute/JtsGeoshapeHelper.java b/janusgraph-core/src/main/java/org/janusgraph/core/attribute/JtsGeoshapeHelper.java new file mode 100644 index 0000000000..2dd0b93aee --- /dev/null +++ b/janusgraph-core/src/main/java/org/janusgraph/core/attribute/JtsGeoshapeHelper.java @@ -0,0 +1,137 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.core.attribute; + +import com.google.common.base.Preconditions; +import com.spatial4j.core.context.jts.DatelineRule; +import com.spatial4j.core.context.jts.JtsSpatialContext; +import com.spatial4j.core.context.jts.JtsSpatialContextFactory; +import com.spatial4j.core.io.jts.JtsBinaryCodec; +import com.spatial4j.core.io.jts.JtsGeoJSONReader; +import com.spatial4j.core.io.jts.JtsGeoJSONWriter; +import com.spatial4j.core.io.jts.JtsWKTReader; +import com.spatial4j.core.io.jts.JtsWKTWriter; +import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.jts.JtsGeometry; +import com.vividsolutions.jts.geom.Coordinate; +import com.vividsolutions.jts.geom.GeometryFactory; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.List; + +/** + * Extension of default spatial context and associated I/O operations to use the Java Topology Suite (JTS), which adds + * support for line and polygon shapes. + */ +public class JtsGeoshapeHelper extends GeoshapeHelper { + + public JtsGeoshapeHelper() { + JtsSpatialContextFactory factory = new JtsSpatialContextFactory(); + factory.geo = true; + factory.useJtsPoint = false; + factory.useJtsLineString = true; + // TODO: Use default dateline rule and update to support multiline/polygon to resolve wrapping issues + factory.datelineRule = DatelineRule.none; + JtsSpatialContext context = new JtsSpatialContext(factory); + + super.context = context; + wktReader = new JtsWKTReader(context, factory); + wktWriter = new JtsWKTWriter(context, factory); + geojsonReader = new JtsGeoJSONReader(context, factory); + geojsonWriter = new JtsGeoJSONWriter(context, factory); + binaryCodec = new JtsBinaryCodec(context, factory); + } + + public Geoshape geoshape(com.vividsolutions.jts.geom.Geometry geometry) { + return new Geoshape(((JtsSpatialContext) context).makeShape(geometry)); + } + + @Override + public Shape readGeometry(DataInputStream dataInput) throws IOException { + return ((JtsBinaryCodec) binaryCodec).readJtsGeom(dataInput); + } + + @Override + public void write(DataOutputStream dataOutput, Geoshape attribute) throws IOException { + if (attribute.getShape() instanceof JtsGeometry) { + ((JtsBinaryCodec) binaryCodec).writeJtsGeom(dataOutput, attribute.getShape()); + } else { + binaryCodec.writeShape(dataOutput, attribute.getShape()); + } + } + + @Override + public Geoshape polygon(List coordinates) { + Preconditions.checkArgument(coordinates.size() >= 4, "Too few coordinate pairs provided"); + Coordinate[] points = new Coordinate[coordinates.size()]; + for (int i=0; i=size(shape)) throw new ArrayIndexOutOfBoundsException("Invalid position: " + position); + switch(getType(shape)) { + case LINE: + case POLYGON: + Coordinate coordinate = ((JtsGeometry) shape).getGeom().getCoordinates()[position]; + return new Geoshape.Point(coordinate.y, coordinate.x); + default: + return super.getPoint(geoshape, position); + } + } + + @Override + public boolean isJts(Shape shape) { + return shape instanceof JtsGeometry; + } + +} diff --git a/janusgraph-core/src/main/java/org/janusgraph/core/schema/Mapping.java b/janusgraph-core/src/main/java/org/janusgraph/core/schema/Mapping.java index 62be1c029c..9c00001847 100644 --- a/janusgraph-core/src/main/java/org/janusgraph/core/schema/Mapping.java +++ b/janusgraph-core/src/main/java/org/janusgraph/core/schema/Mapping.java @@ -32,7 +32,8 @@ public enum Mapping { DEFAULT, TEXT, STRING, - TEXTSTRING; + TEXTSTRING, + PREFIX_TREE; /** * Returns the mapping as a parameter so that it can be passed to {@link JanusGraphManagement#addIndexKey(JanusGraphIndex, org.janusgraph.core.PropertyKey, Parameter[])} diff --git a/janusgraph-core/src/main/java/org/janusgraph/graphdb/database/serialize/AttributeUtil.java b/janusgraph-core/src/main/java/org/janusgraph/graphdb/database/serialize/AttributeUtil.java index eaf6d32526..4bfe01ca8e 100644 --- a/janusgraph-core/src/main/java/org/janusgraph/graphdb/database/serialize/AttributeUtil.java +++ b/janusgraph-core/src/main/java/org/janusgraph/graphdb/database/serialize/AttributeUtil.java @@ -15,6 +15,8 @@ package org.janusgraph.graphdb.database.serialize; import org.janusgraph.core.PropertyKey; + +import org.janusgraph.core.attribute.Geoshape; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,6 +54,10 @@ public static final boolean isString(Class clazz) { return clazz.equals(String.class); } + public static final boolean isGeo(Class clazz) { + return clazz.equals(Geoshape.class); + } + /** * Compares the two elements like {@link java.util.Comparator#compare(Object, Object)} but returns * null in case the two elements are not comparable. diff --git a/janusgraph-core/src/main/java/org/janusgraph/graphdb/types/ParameterType.java b/janusgraph-core/src/main/java/org/janusgraph/graphdb/types/ParameterType.java index 87c8afb547..667acc9fbe 100644 --- a/janusgraph-core/src/main/java/org/janusgraph/graphdb/types/ParameterType.java +++ b/janusgraph-core/src/main/java/org/janusgraph/graphdb/types/ParameterType.java @@ -23,7 +23,19 @@ */ public enum ParameterType { - MAPPING("mapping"), INDEX_POSITION("index-pos"), MAPPED_NAME("mapped-name"), STATUS("status"); + MAPPING("mapping"), + + INDEX_POSITION("index-pos"), + + MAPPED_NAME("mapped-name"), + + STATUS("status"), + + /** Maximum number of levels to be used in the spatial prefix tree where applicable. **/ + INDEX_GEO_MAX_LEVELS("index-geo-max-levels"), + + /** Distance error percent used to determine precision in spatial prefix tree where applicable. **/ + INDEX_GEO_DIST_ERROR_PCT("index-geo-dist-error-pct"); private final String name; diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java index fae58f91f1..e6708e480c 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java @@ -16,9 +16,11 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterators; import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.Multimap; + import org.janusgraph.core.Cardinality; import org.janusgraph.core.JanusGraphException; import org.janusgraph.core.attribute.*; @@ -38,6 +40,7 @@ import org.janusgraph.graphdb.internal.Order; import org.janusgraph.graphdb.query.JanusGraphPredicate; import org.janusgraph.graphdb.query.condition.*; +import org.janusgraph.graphdb.types.ParameterType; import org.janusgraph.util.system.IOUtils; import org.apache.commons.lang.StringUtils; import org.elasticsearch.Version; @@ -56,6 +59,10 @@ import org.elasticsearch.action.update.UpdateRequestBuilder; import org.elasticsearch.client.Client; import org.elasticsearch.client.transport.TransportClient; +import org.elasticsearch.common.geo.ShapeRelation; +import org.elasticsearch.common.geo.builders.LineStringBuilder; +import org.elasticsearch.common.geo.builders.PolygonBuilder; +import org.elasticsearch.common.geo.builders.ShapeBuilder; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.common.unit.DistanceUnit; @@ -84,8 +91,11 @@ import java.net.UnknownHostException; import java.time.Instant; import java.util.*; +import java.util.AbstractMap.SimpleEntry; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; /** * @author Matthias Broecheler (me@matthiasb.com) @@ -189,6 +199,18 @@ public class ElasticSearchIndex implements IndexProvider { public static final int HOST_PORT_DEFAULT = 9300; + /** + * Default tree_levels used when creating geo_shape mappings. + */ + public static final int DEFAULT_GEO_MAX_LEVELS = 20; + + /** + * Default distance_error_pct used when creating geo_shape mappings. + */ + public static final double DEFAULT_GEO_DIST_ERROR_PCT = 0.025; + + private static final Map SPATIAL_PREDICATES = spatialPredicates(); + private final Node node; private final Client client; private final String indexName; @@ -394,12 +416,20 @@ private static String getDualMappingName(String key) { return key + STRING_MAPPING_SUFFIX; } + private static Map spatialPredicates() { + return ImmutableMap.of(Geo.WITHIN, ShapeRelation.WITHIN, + Geo.CONTAINS, ShapeRelation.CONTAINS, + Geo.INTERSECT, ShapeRelation.INTERSECTS, + Geo.DISJOINT, ShapeRelation.DISJOINT); + } + @Override public void register(String store, String key, KeyInformation information, BaseTransaction tx) throws BackendException { XContentBuilder mapping; Class dataType = information.getDataType(); Mapping map = Mapping.getMapping(information); - Preconditions.checkArgument(map==Mapping.DEFAULT || AttributeUtil.isString(dataType), + Preconditions.checkArgument(map==Mapping.DEFAULT || AttributeUtil.isString(dataType) || + (map==Mapping.PREFIX_TREE && AttributeUtil.isGeo(dataType)), "Specified illegal mapping [%s] for data type [%s]",map,dataType); try { @@ -454,8 +484,20 @@ public void register(String store, String key, KeyInformation information, BaseT log.debug("Registering boolean type for {}", key); mapping.field("type", "boolean"); } else if (dataType == Geoshape.class) { - log.debug("Registering geo_point type for {}", key); - mapping.field("type", "geo_point"); + switch (map) { + case PREFIX_TREE: + int maxLevels = (int) ParameterType.INDEX_GEO_MAX_LEVELS.findParameter(information.getParameters(), DEFAULT_GEO_MAX_LEVELS); + double distErrorPct = (double) ParameterType.INDEX_GEO_DIST_ERROR_PCT.findParameter(information.getParameters(), DEFAULT_GEO_DIST_ERROR_PCT); + log.debug("Registering geo_shape type for {} with tree_levels={} and distance_error_pct={}", key, maxLevels, distErrorPct); + mapping.field("type", "geo_shape"); + mapping.field("tree", "quadtree"); + mapping.field("tree_levels", maxLevels); + mapping.field("distance_error_pct", distErrorPct); + break; + default: + log.debug("Registering geo_point type for {}", key); + mapping.field("type", "geo_point"); + } } else if (dataType == Date.class || dataType == Instant.class) { log.debug("Registering date type for {}", key); mapping.field("type", "date"); @@ -554,12 +596,7 @@ private static Object convertToEsType(Object value) { } else if (AttributeUtil.isString(value)) { return value; } else if (value instanceof Geoshape) { - Geoshape shape = (Geoshape) value; - if (shape.getType() == Geoshape.Type.POINT) { - Geoshape.Point p = shape.getPoint(); - return new double[]{p.getLongitude(), p.getLatitude()}; - } else throw new UnsupportedOperationException("Geo type is not supported: " + shape.getType()); - + return convertgeo((Geoshape) value); } else if (value instanceof Date || value instanceof Instant) { return value; } else if (value instanceof Boolean) { @@ -569,6 +606,17 @@ private static Object convertToEsType(Object value) { } else throw new IllegalArgumentException("Unsupported type: " + value.getClass() + " (value: " + value + ")"); } + private static Object convertgeo(Geoshape geoshape) { + if (geoshape.getType() == Geoshape.Type.POINT) { + Geoshape.Point p = geoshape.getPoint(); + return new double[]{p.getLongitude(), p.getLatitude()}; + } else if (geoshape.getType() != Geoshape.Type.BOX && geoshape.getType() != Geoshape.Type.CIRCLE) { + return geoshape.toGeoJson().getBytes(); + } else { + throw new IllegalArgumentException("Unsupported or invalid shape type for indexing: " + geoshape.getType()); + } + } + @Override public void mutate(Map> mutations, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException { BulkRequestBuilder brb = client.prepareBulk(); @@ -833,18 +881,67 @@ public QueryBuilder getFilter(Condition condition, KeyInformation.StoreRetrie return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(fieldName, (String) value)); } else throw new IllegalArgumentException("Predicate is not supported for string value: " + janusgraphPredicate); - } else if (value instanceof Geoshape) { - Preconditions.checkArgument(janusgraphPredicate == Geo.WITHIN, "Relation is not supported for geo value: " + janusgraphPredicate); + } else if (value instanceof Geoshape && Mapping.getMapping(informations.get(key)) == Mapping.DEFAULT) { + // geopoint Geoshape shape = (Geoshape) value; + Preconditions.checkArgument(janusgraphPredicate instanceof Geo && janusgraphPredicate != Geo.CONTAINS, "Relation not supported on geopoint types: " + janusgraphPredicate); + + final QueryBuilder queryBuilder; if (shape.getType() == Geoshape.Type.CIRCLE) { Geoshape.Point center = shape.getPoint(); - return QueryBuilders.geoDistanceQuery(key).lat(center.getLatitude()).lon(center.getLongitude()).distance(shape.getRadius(), DistanceUnit.KILOMETERS); + queryBuilder = QueryBuilders.geoDistanceQuery(key).lat(center.getLatitude()).lon(center.getLongitude()).distance(shape.getRadius(), DistanceUnit.KILOMETERS); } else if (shape.getType() == Geoshape.Type.BOX) { Geoshape.Point southwest = shape.getPoint(0); Geoshape.Point northeast = shape.getPoint(1); - return QueryBuilders.geoBoundingBoxQuery(key).bottomRight(southwest.getLatitude(), northeast.getLongitude()).topLeft(northeast.getLatitude(), southwest.getLongitude()); - } else + queryBuilder = QueryBuilders.geoBoundingBoxQuery(key).bottomRight(southwest.getLatitude(), northeast.getLongitude()).topLeft(northeast.getLatitude(), southwest.getLongitude()); + } else if (shape.getType() == Geoshape.Type.POLYGON) { + queryBuilder = QueryBuilders.geoPolygonQuery(key); + IntStream.range(0, shape.size()).forEach(i -> { + Geoshape.Point point = shape.getPoint(i); + ((GeoPolygonQueryBuilder) queryBuilder).addPoint(point.getLatitude(), point.getLongitude()); + }); + } else { + throw new IllegalArgumentException("Unsupported or invalid search shape type for geopoint: " + shape.getType()); + } + + return janusgraphPredicate == Geo.DISJOINT ? QueryBuilders.notQuery(queryBuilder) : queryBuilder; + } else if (value instanceof Geoshape) { + // geoshape + Preconditions.checkArgument(janusgraphPredicate instanceof Geo, "Relation not supported on geoshape types: " + janusgraphPredicate); + Geoshape shape = (Geoshape) value; + final ShapeBuilder sb; + switch (shape.getType()) { + case CIRCLE: + Geoshape.Point center = shape.getPoint(); + sb = ShapeBuilder.newCircleBuilder().center(center.getLongitude(), center.getLatitude()).radius(shape.getRadius(), DistanceUnit.KILOMETERS); + break; + case BOX: + Geoshape.Point southwest = shape.getPoint(0); + Geoshape.Point northeast = shape.getPoint(1); + sb = ShapeBuilder.newEnvelope().bottomRight(northeast.getLongitude(),southwest.getLatitude()).topLeft(southwest.getLongitude(),northeast.getLatitude()); + break; + case LINE: + sb = ShapeBuilder.newLineString(); + IntStream.range(0, shape.size()).forEach(i -> { + Geoshape.Point point = shape.getPoint(i); + ((LineStringBuilder) sb).point(point.getLongitude(), point.getLatitude()); + }); + break; + case POLYGON: + sb = ShapeBuilder.newPolygon(); + IntStream.range(0, shape.size()).forEach(i -> { + Geoshape.Point point = shape.getPoint(i); + ((PolygonBuilder) sb).point(point.getLongitude(), point.getLatitude()); + }); + break; + case POINT: + sb = ShapeBuilder.newPoint(shape.getPoint().getLongitude(),shape.getPoint().getLatitude()); + break; + default: throw new IllegalArgumentException("Unsupported or invalid search shape type: " + shape.getType()); + } + + return QueryBuilders.geoShapeQuery(key, sb, SPATIAL_PREDICATES.get((Geo) janusgraphPredicate)); } else if (value instanceof Date || value instanceof Instant) { Preconditions.checkArgument(janusgraphPredicate instanceof Cmp, "Relation not supported on date types: " + janusgraphPredicate); Cmp numRel = (Cmp) janusgraphPredicate; @@ -918,8 +1015,10 @@ public List query(IndexQuery query, KeyInformation.IndexRetriever inform if (useDeprecatedIgnoreUnmapped) { fsb.ignoreUnmapped(true); } else { + KeyInformation information = informations.get(query.getStore()).get(orders.get(i).getKey()); + Mapping mapping = Mapping.getMapping(information); Class datatype = orderEntry.getDatatype(); - fsb.unmappedType(convertToEsDataType(datatype)); + fsb.unmappedType(convertToEsDataType(datatype, mapping)); } srb.addSort(fsb); } @@ -942,7 +1041,7 @@ public List query(IndexQuery query, KeyInformation.IndexRetriever inform return result; } - private String convertToEsDataType(Class datatype) { + private String convertToEsDataType(Class datatype, Mapping mapping) { if(String.class.isAssignableFrom(datatype)) { return "string"; } @@ -968,7 +1067,7 @@ else if (Instant.class.isAssignableFrom(datatype)) { return "date"; } else if (Geoshape.class.isAssignableFrom(datatype)) { - return "geo_point"; + return mapping == Mapping.DEFAULT ? "geo_point" : "geo_shape"; } return null; @@ -1002,12 +1101,18 @@ public Iterable> query(RawQuery query, KeyInformation.In public boolean supports(KeyInformation information, JanusGraphPredicate janusgraphPredicate) { Class dataType = information.getDataType(); Mapping mapping = Mapping.getMapping(information); - if (mapping!=Mapping.DEFAULT && !AttributeUtil.isString(dataType)) return false; + if (mapping!=Mapping.DEFAULT && !AttributeUtil.isString(dataType) && + !(mapping==Mapping.PREFIX_TREE && AttributeUtil.isGeo(dataType))) return false; if (Number.class.isAssignableFrom(dataType)) { if (janusgraphPredicate instanceof Cmp) return true; } else if (dataType == Geoshape.class) { - return janusgraphPredicate == Geo.WITHIN; + switch(mapping) { + case DEFAULT: + return janusgraphPredicate instanceof Geo && janusgraphPredicate != Geo.CONTAINS; + case PREFIX_TREE: + return janusgraphPredicate instanceof Geo; + } } else if (AttributeUtil.isString(dataType)) { switch(mapping) { case DEFAULT: @@ -1033,11 +1138,13 @@ public boolean supports(KeyInformation information, JanusGraphPredicate janusgra public boolean supports(KeyInformation information) { Class dataType = information.getDataType(); Mapping mapping = Mapping.getMapping(information); - if (Number.class.isAssignableFrom(dataType) || dataType == Geoshape.class || dataType == Date.class || dataType== Instant.class || dataType == Boolean.class || dataType == UUID.class) { + if (Number.class.isAssignableFrom(dataType) || dataType == Date.class || dataType== Instant.class || dataType == Boolean.class || dataType == UUID.class) { if (mapping==Mapping.DEFAULT) return true; } else if (AttributeUtil.isString(dataType)) { if (mapping==Mapping.DEFAULT || mapping==Mapping.STRING || mapping==Mapping.TEXT || mapping==Mapping.TEXTSTRING) return true; + } else if (AttributeUtil.isGeo(dataType)) { + if (mapping==Mapping.DEFAULT || mapping==Mapping.PREFIX_TREE) return true; } return false; } diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java index 9598f190ba..4e9ba326f3 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java @@ -212,9 +212,8 @@ private static Settings.Builder settingsBuilder(Configuration config) throws IOE String inlineScriptsKey = "script.inline"; String inlineScriptsVal = settings.get(inlineScriptsKey); if (null != inlineScriptsVal && !"true".equals(inlineScriptsVal)) { - log.warn("JanusGraph requires Elasticsearch inline scripting. Setting {} to true. " + - "Inline scripting must be allowed in the Elasticsearch cluster configuration.", - inlineScriptsKey); + log.error("JanusGraph requires Elasticsearch inline scripting but found {} set to false", inlineScriptsKey); + throw new IOException("JanusGraph requires Elasticsearch inline scripting"); } settings.put(inlineScriptsKey, true); log.debug("Set {}: {}", inlineScriptsKey, false); diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java index acbb310c21..04b28c7bae 100644 --- a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java @@ -98,6 +98,16 @@ public void testSupport() { assertTrue(index.supports(of(UUID.class, Cardinality.SINGLE), Cmp.EQUAL)); assertTrue(index.supports(of(UUID.class, Cardinality.SINGLE), Cmp.NOT_EQUAL)); + + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE))); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.WITHIN)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.INTERSECT)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.DISJOINT)); + assertFalse(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.CONTAINS)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.WITHIN)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.INTERSECT)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.CONTAINS)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.DISJOINT)); } @Test diff --git a/janusgraph-es/src/test/resources/es_jvmlocal.yml b/janusgraph-es/src/test/resources/es_jvmlocal.yml index ca4963473e..89908984a3 100644 --- a/janusgraph-es/src/test/resources/es_jvmlocal.yml +++ b/janusgraph-es/src/test/resources/es_jvmlocal.yml @@ -1,4 +1,5 @@ node.data: true node.client: false node.local: true -path.home: ${project.build.directory}/es \ No newline at end of file +path.home: ${project.build.directory}/es + diff --git a/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/main/java/org/janusgraph/hadoop/serialize/JanusGraphKryoRegistrator.java b/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/main/java/org/janusgraph/hadoop/serialize/JanusGraphKryoRegistrator.java new file mode 100644 index 0000000000..00f509c341 --- /dev/null +++ b/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/main/java/org/janusgraph/hadoop/serialize/JanusGraphKryoRegistrator.java @@ -0,0 +1,70 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.hadoop.serialize; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import com.google.common.base.Preconditions; +import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.spark.serializer.KryoRegistrator; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.Serializer; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; +import org.janusgraph.core.attribute.Geoshape; + +/** + * Register JanusGraph classes requiring custom Kryo serialization for Spark. + * + */ +public class JanusGraphKryoRegistrator implements KryoRegistrator { + + @Override + public void registerClasses(Kryo kryo) { + kryo.register(Geoshape.class, new GeoShapeKryoSerializer()); + } + + /** + * Geoshape serializer for Kryo. + */ + public static class GeoShapeKryoSerializer extends Serializer { + @Override + public void write(Kryo kryo, Output output, Geoshape geoshape) { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + Geoshape.GeoshapeBinarySerializer.write(outputStream, geoshape); + byte[] bytes = outputStream.toByteArray(); + output.write(bytes.length); + output.write(bytes); + } catch (IOException e) { + throw new RuntimeException("I/O exception writing geoshape"); + } + } + + @Override + public Geoshape read(Kryo kryo, Input input, Class aClass) { + int length = input.read(); + Preconditions.checkArgument(length>0); + try (InputStream inputStream = new ByteArrayInputStream(input.readBytes(length))) { + return Geoshape.GeoshapeBinarySerializer.read(inputStream); + } catch (IOException e) { + throw new RuntimeException("I/O exception reding geoshape"); + } + } + } + +} diff --git a/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/test/resources/cassandra-read.properties b/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/test/resources/cassandra-read.properties index 507ca41342..51a072d9d2 100644 --- a/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/test/resources/cassandra-read.properties +++ b/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/test/resources/cassandra-read.properties @@ -28,5 +28,6 @@ giraph.maxMessagesInMemory=100000 spark.master=local[4] spark.executor.memory=1g spark.serializer=org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator=org.janusgraph.hadoop.serialize.JanusGraphKryoRegistrator cassandra.input.partitioner.class=org.apache.cassandra.dht.Murmur3Partitioner cassandra.input.widerows=true diff --git a/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/test/resources/hbase-read.properties b/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/test/resources/hbase-read.properties index eceecc54c5..fbc6cf00eb 100644 --- a/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/test/resources/hbase-read.properties +++ b/janusgraph-hadoop-parent/janusgraph-hadoop-core/src/test/resources/hbase-read.properties @@ -26,3 +26,4 @@ giraph.maxMessagesInMemory=100000 spark.master=local[4] spark.executor.memory=1g spark.serializer=org.apache.spark.serializer.KryoSerializer +spark.kryo.registrator=org.janusgraph.hadoop.serialize.JanusGraphKryoRegistrator diff --git a/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java b/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java index 1e2b9d19a7..4c71254cda 100644 --- a/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java +++ b/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java @@ -32,6 +32,7 @@ import org.janusgraph.graphdb.database.serialize.AttributeUtil; import org.janusgraph.graphdb.query.JanusGraphPredicate; import org.janusgraph.graphdb.query.condition.*; +import org.janusgraph.graphdb.types.ParameterType; import org.janusgraph.util.system.IOUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; @@ -47,6 +48,10 @@ import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.spatial.SpatialStrategy; +import org.apache.lucene.spatial.prefix.PrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.query.SpatialOperation; import org.apache.lucene.spatial.vector.PointVectorStrategy; @@ -61,8 +66,11 @@ import java.io.IOException; import java.time.Instant; import java.util.*; +import java.util.AbstractMap.SimpleEntry; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * @author Matthias Broecheler (me@matthiasb.com) @@ -80,7 +88,17 @@ public class LuceneIndex implements IndexProvider { private static final IndexFeatures LUCENE_FEATURES = new IndexFeatures.Builder().supportedStringMappings(Mapping.TEXT, Mapping.STRING).supportsCardinality(Cardinality.SINGLE).supportsNanoseconds().build(); - private static final int GEO_MAX_LEVELS = 11; + /** + * Default tree levels used when creating the prefix tree. + */ + public static final int DEFAULT_GEO_MAX_LEVELS = 20; + + /** + * Default measure of shape precision used when creating the prefix tree. + */ + public static final double DEFAULT_GEO_DIST_ERROR_PCT = 0.025; + + private static Map SPATIAL_PREDICATES = spatialPredicates(); private final Analyzer analyzer = new StandardAnalyzer(); @@ -88,7 +106,7 @@ public class LuceneIndex implements IndexProvider { private final ReentrantLock writerLock = new ReentrantLock(); private Map spatial = new ConcurrentHashMap(12); - private SpatialContext ctx = SpatialContext.GEO; + private SpatialContext ctx = Geoshape.getSpatialContext(); private final String basePath; @@ -133,14 +151,23 @@ private IndexWriter getWriter(String store) throws BackendException { return writer; } - private SpatialStrategy getSpatialStrategy(String key) { + private SpatialStrategy getSpatialStrategy(String key, KeyInformation ki) { SpatialStrategy strategy = spatial.get(key); + Mapping mapping = Mapping.getMapping(ki); + int maxLevels = (int) ParameterType.INDEX_GEO_MAX_LEVELS.findParameter(ki.getParameters(), DEFAULT_GEO_MAX_LEVELS); + double distErrorPct = (double) ParameterType.INDEX_GEO_DIST_ERROR_PCT.findParameter(ki.getParameters(), DEFAULT_GEO_DIST_ERROR_PCT); if (strategy == null) { synchronized (spatial) { if (!spatial.containsKey(key)) { // SpatialPrefixTree grid = new GeohashPrefixTree(ctx, GEO_MAX_LEVELS); // strategy = new RecursivePrefixTreeStrategy(grid, key); - strategy = new PointVectorStrategy(ctx, key); + if (mapping == Mapping.DEFAULT) { + strategy = new PointVectorStrategy(ctx, key); + } else { + SpatialPrefixTree grid = new QuadPrefixTree(ctx, maxLevels); + strategy = new RecursivePrefixTreeStrategy(grid, key); + ((PrefixTreeStrategy) strategy).setDistErrPct(distErrorPct); + } spatial.put(key, strategy); } else return spatial.get(key); } @@ -148,12 +175,23 @@ private SpatialStrategy getSpatialStrategy(String key) { return strategy; } + private static Map spatialPredicates() { + return Collections.unmodifiableMap(Stream.of( + new SimpleEntry<>(Geo.WITHIN, SpatialOperation.IsWithin), + new SimpleEntry<>(Geo.CONTAINS, SpatialOperation.Contains), + new SimpleEntry<>(Geo.INTERSECT, SpatialOperation.Intersects), + new SimpleEntry<>(Geo.DISJOINT, SpatialOperation.IsDisjointTo)) + .collect(Collectors.toMap((e) -> e.getKey(), (e) -> e.getValue()))); + } + @Override public void register(String store, String key, KeyInformation information, BaseTransaction tx) throws BackendException { Class dataType = information.getDataType(); Mapping map = Mapping.getMapping(information); - Preconditions.checkArgument(map == Mapping.DEFAULT || AttributeUtil.isString(dataType), - "Specified illegal mapping [%s] for data type [%s]", map, dataType); } + Preconditions.checkArgument(map == Mapping.DEFAULT || AttributeUtil.isString(dataType) || + (map == Mapping.PREFIX_TREE && AttributeUtil.isGeo(dataType)), + "Specified illegal mapping [%s] for data type [%s]", map, dataType); + } @Override public void mutate(Map> mutations, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException { @@ -280,7 +318,7 @@ private Pair> retrieveOrCreate(String docID, IndexS for (IndexableField field : doc.getFields()) { if (field.stringValue().startsWith(GEOID)) { try { - geofields.put(field.name(), ctx.readShapeFromWkt(field.stringValue().substring(GEOID.length()))); + geofields.put(field.name(), Geoshape.fromWkt(field.stringValue().substring(GEOID.length())).getShape()); } catch (java.text.ParseException e) { throw new IllegalArgumentException("Geoshape was unparsable"); } @@ -334,10 +372,9 @@ private void addToDocument(String store, } doc.add(field); } else if (e.value instanceof Geoshape) { - Shape shape = ((Geoshape) e.value).convert2Spatial4j(); + Shape shape = ((Geoshape) e.value).getShape(); geofields.put(e.field, shape); - doc.add(new StoredField(e.field, GEOID + toWkt(shape))); - + doc.add(new StoredField(e.field, GEOID + e.value.toString())); } else if (e.value instanceof Date) { doc.add(new LongField(e.field, (((Date) e.value).getTime()), Field.Store.YES)); } else if (e.value instanceof Instant) { @@ -357,17 +394,14 @@ private void addToDocument(String store, if (log.isTraceEnabled()) log.trace("Updating geo-indexes for key {}", geo.getKey()); - for (IndexableField f : getSpatialStrategy(geo.getKey()).createIndexableFields(geo.getValue())) + KeyInformation ki = informations.get(store, geo.getKey()); + SpatialStrategy spatialStrategy = getSpatialStrategy(geo.getKey(), ki); + for (IndexableField f : spatialStrategy.createIndexableFields(geo.getValue())) { doc.add(f); - } - } - - private String toWkt(Shape shape) { - if(shape instanceof Point) { - return "POINT(" + ((Point) shape).getX() + " " + ((Point) shape).getY() + ")"; - } - else { - throw new IllegalArgumentException("Only points are supported"); + if (spatialStrategy instanceof PointVectorStrategy) { + doc.add(new DoubleDocValuesField(f.name(), f.numericValue().doubleValue())); + } + } } } @@ -506,10 +540,11 @@ private final SearchParams convertQuery(Condition condition, KeyInformation.S } else throw new IllegalArgumentException("Relation is not supported for string value: " + janusgraphPredicate); } else if (value instanceof Geoshape) { - Preconditions.checkArgument(janusgraphPredicate == Geo.WITHIN, "Relation is not supported for geo value: " + janusgraphPredicate); - Shape shape = ((Geoshape) value).convert2Spatial4j(); - SpatialArgs args = new SpatialArgs(SpatialOperation.IsWithin, shape); - params.addQuery(getSpatialStrategy(key).makeQuery(args)); + Preconditions.checkArgument(janusgraphPredicate instanceof Geo, "Relation not supported on geo types: " + janusgraphPredicate); + Shape shape = ((Geoshape) value).getShape(); + SpatialOperation spatialOp = SPATIAL_PREDICATES.get((Geo) janusgraphPredicate); + SpatialArgs args = new SpatialArgs(spatialOp, shape); + params.addQuery(getSpatialStrategy(key, informations.get(key)).makeQuery(args)); } else if (value instanceof Date) { Preconditions.checkArgument(janusgraphPredicate instanceof Cmp, "Relation not supported on date types: " + janusgraphPredicate); params.addFilter(numericFilter(key, (Cmp) janusgraphPredicate, ((Date) value).getTime())); @@ -606,12 +641,13 @@ public boolean supports(KeyInformation information, JanusGraphPredicate janusgra if (information.getCardinality()!= Cardinality.SINGLE) return false; Class dataType = information.getDataType(); Mapping mapping = Mapping.getMapping(information); - if (mapping!=Mapping.DEFAULT && !AttributeUtil.isString(dataType)) return false; + if (mapping!=Mapping.DEFAULT && !AttributeUtil.isString(dataType) && + !(mapping==Mapping.PREFIX_TREE && AttributeUtil.isGeo(dataType))) return false; if (Number.class.isAssignableFrom(dataType)) { if (janusgraphPredicate instanceof Cmp) return true; } else if (dataType == Geoshape.class) { - return janusgraphPredicate == Geo.WITHIN; + return janusgraphPredicate == Geo.INTERSECT || janusgraphPredicate == Geo.WITHIN || janusgraphPredicate == Geo.CONTAINS; } else if (AttributeUtil.isString(dataType)) { switch(mapping) { case DEFAULT: @@ -635,10 +671,12 @@ public boolean supports(KeyInformation information) { if (information.getCardinality()!= Cardinality.SINGLE) return false; Class dataType = information.getDataType(); Mapping mapping = Mapping.getMapping(information); - if (Number.class.isAssignableFrom(dataType) || dataType == Geoshape.class || dataType == Date.class || dataType == Instant.class || dataType == Boolean.class || dataType == UUID.class) { + if (Number.class.isAssignableFrom(dataType) || dataType == Date.class || dataType == Instant.class || dataType == Boolean.class || dataType == UUID.class) { if (mapping==Mapping.DEFAULT) return true; } else if (AttributeUtil.isString(dataType)) { if (mapping==Mapping.DEFAULT || mapping==Mapping.STRING || mapping==Mapping.TEXT) return true; + } else if (AttributeUtil.isGeo(dataType)) { + if (mapping==Mapping.DEFAULT || mapping==Mapping.PREFIX_TREE) return true; } return false; } diff --git a/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneExample.java b/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneExample.java index bf68b07023..8993cf13a6 100644 --- a/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneExample.java +++ b/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneExample.java @@ -113,7 +113,7 @@ public void example1() throws Exception { BooleanFilter filter = new BooleanFilter(); //filter.add(new TermsFilter(new Term("name_txt","know")), BooleanClause.Occur.MUST); - SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,Geoshape.circle(51.666167,6.58905,450).convert2Spatial4j()); + SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,Geoshape.circle(51.666167,6.58905,450).getShape()); //filter.add(getSpatialStrategy("location").makeFilter(args), BooleanClause.Occur.MUST); filter.add(NumericRangeFilter.newLongRange("time",(long)1000342034,(long)1000342034,true,true), BooleanClause.Occur.MUST); @@ -166,7 +166,7 @@ void indexDocs(IndexWriter writer, String docid, Map docMap) thro field = new StringField(key+STR_SUFFIX, str, Field.Store.NO); doc.add(field); } else if (value instanceof Geoshape) { - Shape shape = ((Geoshape)value).convert2Spatial4j(); + Shape shape = ((Geoshape)value).getShape(); for (IndexableField f : getSpatialStrategy(key).createIndexableFields(shape)) { doc.add(f); } diff --git a/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneIndexTest.java b/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneIndexTest.java index 601e023ba5..4cc0852f13 100644 --- a/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneIndexTest.java +++ b/janusgraph-lucene/src/test/java/org/janusgraph/diskstorage/lucene/LuceneIndexTest.java @@ -108,6 +108,15 @@ public void testSupport() { assertTrue(index.supports(of(UUID.class, Cardinality.SINGLE), Cmp.EQUAL)); assertTrue(index.supports(of(UUID.class, Cardinality.SINGLE), Cmp.NOT_EQUAL)); + + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE))); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.WITHIN)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.INTERSECT)); + assertFalse(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.DISJOINT)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.WITHIN)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.CONTAINS)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.INTERSECT)); + assertFalse(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.DISJOINT)); } // @Override diff --git a/janusgraph-solr/pom.xml b/janusgraph-solr/pom.xml index e610cf8ee9..8e149cf151 100644 --- a/janusgraph-solr/pom.xml +++ b/janusgraph-solr/pom.xml @@ -90,7 +90,6 @@ com.vividsolutions jts - 1.13 com.carrotsearch diff --git a/janusgraph-test/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java b/janusgraph-test/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java index e6592d2eab..55f67de5a1 100644 --- a/janusgraph-test/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java +++ b/janusgraph-test/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java @@ -40,7 +40,6 @@ import java.util.*; import static org.junit.Assert.*; -import static org.junit.Assert.assertFalse; /** * @author Matthias Broecheler (me@matthiasb.com) @@ -58,7 +57,7 @@ public abstract class IndexProviderTest { protected Map allKeys; protected KeyInformation.IndexRetriever indexRetriever; - public static final String TEXT = "text", TIME = "time", WEIGHT = "weight", LOCATION = "location", NAME = "name", PHONE_LIST = "phone_list", PHONE_SET = "phone_set", DATE = "date"; + public static final String TEXT = "text", TIME = "time", WEIGHT = "weight", LOCATION = "location", BOUNDARY = "boundary", NAME = "name", PHONE_LIST = "phone_list", PHONE_SET = "phone_set", DATE = "date"; public static StandardKeyInformation of(Class clazz, Cardinality cardinality, Parameter... paras) { return new StandardKeyInformation(clazz, cardinality, paras); @@ -95,6 +94,7 @@ public static final Map getMapping(final IndexFeatures in put(TIME,new StandardKeyInformation(Long.class, Cardinality.SINGLE)); put(WEIGHT,new StandardKeyInformation(Double.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.DEFAULT))); put(LOCATION,new StandardKeyInformation(Geoshape.class, Cardinality.SINGLE)); + put(BOUNDARY,new StandardKeyInformation(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE))); put(NAME,new StandardKeyInformation(String.class, Cardinality.SINGLE, new Parameter("mapping", indexFeatures.supportsStringMapping(Mapping.STRING)?Mapping.STRING:Mapping.TEXTSTRING))); if(indexFeatures.supportsCardinality(Cardinality.LIST)) { @@ -173,9 +173,9 @@ public void multipleStores() throws Exception { private void storeTest(String... stores) throws Exception { - Multimap doc1 = getDocument("Hello world", 1001, 5.2, Geoshape.point(48.0, 0.0), Arrays.asList("1", "2", "3"), Sets.newHashSet("1", "2"), Instant.ofEpochSecond(1)); - Multimap doc2 = getDocument("Tomorrow is the world", 1010, 8.5, Geoshape.point(49.0, 1.0), Arrays.asList("4", "5", "6"), Sets.newHashSet("4", "5"), Instant.ofEpochSecond(2)); - Multimap doc3 = getDocument("Hello Bob, are you there?", -500, 10.1, Geoshape.point(47.0, 10.0), Arrays.asList("7", "8", "9"), Sets.newHashSet("7", "8"), Instant.ofEpochSecond(3)); + Multimap doc1 = getDocument("Hello world", 1001, 5.2, Geoshape.point(48.0, 0.0), Geoshape.polygon(Arrays.asList(new double[][] {{-0.1,47.9},{0.1,47.9},{0.1,48.1},{-0.1,48.1},{-0.1,47.9}})),Arrays.asList("1", "2", "3"), Sets.newHashSet("1", "2"), Instant.ofEpochSecond(1)); + Multimap doc2 = getDocument("Tomorrow is the world", 1010, 8.5, Geoshape.point(49.0, 1.0), Geoshape.line(Arrays.asList(new double[][] {{0.9,48.9},{0.9,49.1},{1.1,49.1},{1.1,48.9}})), Arrays.asList("4", "5", "6"), Sets.newHashSet("4", "5"), Instant.ofEpochSecond(2)); + Multimap doc3 = getDocument("Hello Bob, are you there?", -500, 10.1, Geoshape.point(47.0, 10.0), Geoshape.polygon(Arrays.asList(new double[][] {{9.9,46.9},{10.1,46.9},{10.1,47.1},{9.9,47.1},{9.9,46.9}})), Arrays.asList("7", "8", "9"), Sets.newHashSet("7", "8"), Instant.ofEpochSecond(3)); for (String store : stores) { initialize(store); @@ -301,18 +301,61 @@ private void storeTest(String... stores) throws Exception { assertEquals(1, result.size()); assertEquals("doc2", result.get(0)); + result = tx.query(new IndexQuery(store, PredicateCondition.of(LOCATION, Geo.WITHIN, Geoshape.box(46.5, -0.5, 50.5, 10.5)))); + assertEquals(3,result.size()); + assertEquals(ImmutableSet.of("doc1", "doc2", "doc3"), ImmutableSet.copyOf(result)); + result = tx.query(new IndexQuery(store, PredicateCondition.of(LOCATION, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00)))); assertEquals(2, result.size()); assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result)); - result = tx.query(new IndexQuery(store, And.of(PredicateCondition.of(TEXT, Text.CONTAINS, "tomorrow"), PredicateCondition.of(LOCATION, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00))))); - assertEquals(ImmutableSet.of("doc2"), ImmutableSet.copyOf(result)); - - result = tx.query(new IndexQuery(store, PredicateCondition.of("location", Geo.WITHIN, Geoshape.box(46.5, -0.5, 50.5, 10.5)))); + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.box(46.5, -0.5, 50.5, 10.5)))); assertEquals(3,result.size()); assertEquals(ImmutableSet.of("doc1", "doc2", "doc3"), ImmutableSet.copyOf(result)); - result = tx.query(new IndexQuery(store, And.of(PredicateCondition.of(TIME, Cmp.GREATER_THAN_EQUAL, -1000), PredicateCondition.of(TIME, Cmp.LESS_THAN, 1010), PredicateCondition.of(LOCATION, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 1000.00))))); + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00)))); + assertEquals(2, result.size()); + assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result)); + + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.polygon(Arrays.asList(new double[][] + {{-5.0,47.0},{5.0,47.0},{5.0,50.0},{-5.0,50.0},{-5.0,47.0}}))))); + assertEquals(2, result.size()); + assertEquals(ImmutableSet.of("doc1","doc2"), ImmutableSet.copyOf(result)); + + if (index.supports(new StandardKeyInformation(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping", Mapping.PREFIX_TREE)), Geo.DISJOINT)) { + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.DISJOINT, Geoshape.box(46.5, -0.5, 50.5, 10.5)))); + assertEquals(0,result.size()); + + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.DISJOINT, Geoshape.circle(48.5, 0.5, 200.00)))); + assertEquals(1, result.size()); + assertEquals(ImmutableSet.of("doc3"), ImmutableSet.copyOf(result)); + + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.DISJOINT, Geoshape.polygon(Arrays.asList(new double[][] + {{-5.0,47.0},{5.0,47.0},{5.0,50.0},{-5.0,50.0},{-5.0,47.0}}))))); + assertEquals(1, result.size()); + assertEquals(ImmutableSet.of("doc3"), ImmutableSet.copyOf(result)); + } + + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.CONTAINS, Geoshape.point(47,10)))); + assertEquals(1, result.size()); + assertEquals(ImmutableSet.of("doc3"), ImmutableSet.copyOf(result)); + + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.box(48,-1,49,2)))); + assertEquals(2,result.size()); + assertEquals(ImmutableSet.of("doc1","doc2"), ImmutableSet.copyOf(result)); + + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.circle(48.5, 0.5, 200.00)))); + assertEquals(2, result.size()); + assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result)); + + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.polygon(Arrays.asList(new double[][] {{-1.0,48.0},{2.0,48.0},{2.0,49.0},{-1.0,49.0},{-1.0,48.0}}))))); + assertEquals(2, result.size()); + assertEquals(ImmutableSet.of("doc1","doc2"), ImmutableSet.copyOf(result)); + + result = tx.query(new IndexQuery(store, And.of(PredicateCondition.of("text", Text.CONTAINS, "tomorrow"), PredicateCondition.of(LOCATION, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00)), PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00))))); + assertEquals(ImmutableSet.of("doc2"), ImmutableSet.copyOf(result)); + + result = tx.query(new IndexQuery(store, And.of(PredicateCondition.of(TIME, Cmp.GREATER_THAN_EQUAL, -1000), PredicateCondition.of(TIME, Cmp.LESS_THAN, 1010), PredicateCondition.of(LOCATION, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 1000.00)), PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 1000.00))))); assertEquals(ImmutableSet.of("doc1", "doc3"), ImmutableSet.copyOf(result)); result = tx.query(new IndexQuery(store, And.of(PredicateCondition.of(WEIGHT, Cmp.GREATER_THAN, 10.0)))); @@ -363,7 +406,7 @@ private void storeTest(String... stores) throws Exception { //Update some data - add(store, "doc4", getDocument("I'ts all a big Bob", -100, 11.2, Geoshape.point(48.0, 8.0), Arrays.asList("10", "11", "12"), Sets.newHashSet("10", "11"), Instant.ofEpochSecond(4)), true); + add(store, "doc4", getDocument("I'ts all a big Bob", -100, 11.2, Geoshape.point(48.0, 8.0), Geoshape.line(Arrays.asList(new double[][] {{7.5, 47.5}, {8.5, 48.5}})), Arrays.asList("10", "11", "12"), Sets.newHashSet("10", "11"), Instant.ofEpochSecond(4)), true); remove(store, "doc2", doc2, true); remove(store, "doc3", ImmutableMultimap.of(WEIGHT, (Object) 10.1), false); add(store, "doc3", ImmutableMultimap.of(TIME, (Object) 2000, TEXT, "Bob owns the world"), false); @@ -387,12 +430,21 @@ private void storeTest(String... stores) throws Exception { result = tx.query(new IndexQuery(store, PredicateCondition.of(LOCATION, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00)))); assertEquals(ImmutableSet.of("doc1"), ImmutableSet.copyOf(result)); + result = tx.query(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00)))); + assertEquals(ImmutableSet.of("doc1"), ImmutableSet.copyOf(result)); + result = tx.query(new IndexQuery(store, And.of(PredicateCondition.of(TEXT, Text.CONTAINS, "tomorrow"), PredicateCondition.of(LOCATION, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00))))); assertEquals(ImmutableSet.of(), ImmutableSet.copyOf(result)); + result = tx.query(new IndexQuery(store, And.of(PredicateCondition.of(TEXT, Text.CONTAINS, "tomorrow"), PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00))))); + assertEquals(ImmutableSet.of(), ImmutableSet.copyOf(result)); + result = tx.query(new IndexQuery(store, And.of(PredicateCondition.of(TIME, Cmp.GREATER_THAN_EQUAL, -1000), PredicateCondition.of(TIME, Cmp.LESS_THAN, 1010), PredicateCondition.of(LOCATION, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 1000.00))))); assertEquals(ImmutableSet.of("doc1", "doc4"), ImmutableSet.copyOf(result)); + result = tx.query(new IndexQuery(store, And.of(PredicateCondition.of(TIME, Cmp.GREATER_THAN_EQUAL, -1000), PredicateCondition.of(TIME, Cmp.LESS_THAN, 1010), PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 1000.00))))); + assertEquals(ImmutableSet.of("doc1", "doc4"), ImmutableSet.copyOf(result)); + result = tx.query(new IndexQuery(store, And.of(PredicateCondition.of(WEIGHT, Cmp.GREATER_THAN, 10.0)))); assertEquals(ImmutableSet.of("doc1", "doc4"), ImmutableSet.copyOf(result)); @@ -431,7 +483,6 @@ public void testCommonSupport() { assertTrue(index.supports(of(Short.class, Cardinality.SINGLE))); assertTrue(index.supports(of(Byte.class, Cardinality.SINGLE))); assertTrue(index.supports(of(Float.class, Cardinality.SINGLE))); - assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE))); assertFalse(index.supports(of(Object.class, Cardinality.SINGLE))); assertFalse(index.supports(of(Exception.class, Cardinality.SINGLE))); @@ -440,11 +491,16 @@ public void testCommonSupport() { assertTrue(index.supports(of(Double.class, Cardinality.SINGLE), Cmp.LESS_THAN)); assertTrue(index.supports(of(Double.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.DEFAULT)), Cmp.LESS_THAN)); assertFalse(index.supports(of(Double.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.TEXT)), Cmp.LESS_THAN)); - assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.WITHIN)); assertFalse(index.supports(of(Double.class, Cardinality.SINGLE), Geo.INTERSECT)); assertFalse(index.supports(of(Long.class, Cardinality.SINGLE), Text.CONTAINS)); - assertFalse(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.DISJOINT)); + + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE))); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.WITHIN)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE), Geo.INTERSECT)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.WITHIN)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.CONTAINS)); + assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.INTERSECT)); } @Test @@ -852,13 +908,14 @@ private void remove(String store, String docid, Multimap doc, bo } - public Multimap getDocument(final String txt, final long time, final double weight, final Geoshape geo, List phoneList, Set phoneSet, Instant date) { + public Multimap getDocument(final String txt, final long time, final double weight, final Geoshape location, final Geoshape boundary, List phoneList, Set phoneSet, Instant date) { HashMultimap values = HashMultimap.create(); values.put(TEXT, txt); values.put(NAME, txt); values.put(TIME, time); values.put(WEIGHT, weight); - values.put(LOCATION, geo); + values.put(LOCATION, location); + values.put(BOUNDARY, boundary); values.put(DATE, date); if(indexFeatures.supportsCardinality(Cardinality.LIST)) { for (String phone : phoneList) { diff --git a/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphIndexTest.java b/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphIndexTest.java index 59ffc0176f..e813e6d63e 100644 --- a/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphIndexTest.java +++ b/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphIndexTest.java @@ -71,12 +71,14 @@ import java.time.Duration; import java.time.Instant; import java.time.temporal.ChronoUnit; +import java.util.Arrays; import java.util.Date; import java.util.HashSet; import java.util.Set; import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.function.Function; import static org.janusgraph.graphdb.JanusGraphTest.evaluateQuery; import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.*; @@ -213,6 +215,10 @@ public void testIndexing() { createExternalVertexIndex(location, INDEX); createExternalEdgeIndex(location, INDEX); + PropertyKey boundary = makeKey("boundary", Geoshape.class); + mgmt.addIndexKey(getExternalIndex(Vertex.class,INDEX),boundary, Parameter.of("mapping", Mapping.PREFIX_TREE), Parameter.of("index-geo-dist-error-pct", 0.0025)); + mgmt.addIndexKey(getExternalIndex(Edge.class,INDEX),boundary, Parameter.of("mapping", Mapping.PREFIX_TREE), Parameter.of("index-geo-dist-error-pct", 0.0025)); + PropertyKey time = makeKey("time", Long.class); createExternalVertexIndex(time, INDEX); createExternalEdgeIndex(time, INDEX); @@ -226,7 +232,7 @@ public void testIndexing() { createExternalEdgeIndex(group, INDEX); makeVertexIndexedKey("uid", Integer.class); - ((StandardEdgeLabelMaker) mgmt.makeEdgeLabel("knows")).sortKey(time).signature(location).make(); + ((StandardEdgeLabelMaker) mgmt.makeEdgeLabel("knows")).sortKey(time).signature(location,boundary).make(); finishSchema(); clopen(); @@ -245,13 +251,26 @@ public void testIndexing() { v.property(VertexProperty.Cardinality.single, "time", i); offset = (i % 2 == 0 ? 1 : -1) * (i * 50.0 / numV); v.property(VertexProperty.Cardinality.single, "location", Geoshape.point(0.0 + offset, 0.0 + offset)); - + if (i % 2 == 0) { + v.property(VertexProperty.Cardinality.single, "boundary", Geoshape.line(Arrays.asList(new double[][] { + {offset-0.1, offset-0.1}, {offset+0.1, offset-0.1}, {offset+0.1, offset+0.1}, {offset-0.1, offset+0.1}}))); + } else { + v.property(VertexProperty.Cardinality.single, "boundary", Geoshape.polygon(Arrays.asList(new double[][] + {{offset-0.1,offset-0.1},{offset+0.1,offset-0.1},{offset+0.1,offset+0.1},{offset-0.1,offset+0.1},{offset-0.1,offset-0.1}}))); + } Edge e = v.addEdge("knows", getVertex("uid", Math.max(0, i - 1))); e.property("text", "Vertex " + words[i % words.length]); e.property("time", i); e.property("category", i % numCategories); e.property("group", i % numGroups); e.property("location", Geoshape.point(0.0 + offset, 0.0 + offset)); + if (i % 2 == 0) { + e.property("boundary", Geoshape.line(Arrays.asList(new double[][] { + {offset-0.1, offset-0.1}, {offset+0.1, offset-0.1}, {offset+0.1, offset+0.1}, {offset-0.1, offset+0.1}}))); + } else { + e.property("boundary", Geoshape.polygon(Arrays.asList(new double[][] + {{offset-0.1,offset-0.1},{offset+0.1,offset-0.1},{offset+0.1,offset+0.1},{offset-0.1,offset+0.1},{offset-0.1,offset-0.1}}))); + } } for (int i = 0; i < words.length; i++) { @@ -280,11 +299,8 @@ public void testIndexing() { assertCount(i, tx.query().has("time", Cmp.GREATER_THAN_EQUAL, i).has("time", Cmp.LESS_THAN, i + i).edges()); } - for (int i = 0; i < numV; i += 10) { - offset = (i * 50.0 / originalNumV); - distance = Geoshape.point(0.0, 0.0).getPoint().distance(Geoshape.point(offset, offset).getPoint()) + 20; - assertCount(i + 1, tx.query().has("location", Geo.WITHIN, Geoshape.circle(0.0, 0.0, distance)).vertices()); - assertCount(i + 1, tx.query().has("location", Geo.WITHIN, Geoshape.circle(0.0, 0.0, distance)).edges()); + for (int i = 0; i < numV; i += 5) { + testGeo(i, originalNumV, numV, "location", "boundary"); } //Queries combining mixed and composite indexes @@ -295,6 +311,7 @@ public void testIndexing() { offset = (19 * 50.0 / originalNumV); distance = Geoshape.point(0.0, 0.0).getPoint().distance(Geoshape.point(offset, offset).getPoint()) + 20; assertCount(5, tx.query().has("location", Geo.INTERSECT, Geoshape.circle(0.0, 0.0, distance)).has("text", Text.CONTAINS, words[0]).vertices()); + assertCount(5, tx.query().has("boundary", Geo.INTERSECT, Geoshape.circle(0.0, 0.0, distance)).has("text", Text.CONTAINS, words[0]).vertices()); assertCount(numV, tx.query().vertices()); assertCount(numV, tx.query().edges()); @@ -333,11 +350,8 @@ public void testIndexing() { assertCount(i, tx.query().has("time", Cmp.GREATER_THAN_EQUAL, i).has("time", Cmp.LESS_THAN, i + i).edges()); } - for (int i = 0; i < numV; i += 10) { - offset = (i * 50.0 / originalNumV); - distance = Geoshape.point(0.0, 0.0).getPoint().distance(Geoshape.point(offset, offset).getPoint()) + 20; - assertCount(i + 1, tx.query().has("location", Geo.WITHIN, Geoshape.circle(0.0, 0.0, distance)).vertices()); - assertCount(i + 1, tx.query().has("location", Geo.WITHIN, Geoshape.circle(0.0, 0.0, distance)).edges()); + for (int i = 0; i < numV; i += 5) { + testGeo(i, originalNumV, numV, "location", "boundary"); } //Queries combining mixed and composite indexes @@ -348,6 +362,7 @@ public void testIndexing() { offset = (19 * 50.0 / originalNumV); distance = Geoshape.point(0.0, 0.0).getPoint().distance(Geoshape.point(offset, offset).getPoint()) + 20; assertCount(5, tx.query().has("location", Geo.INTERSECT, Geoshape.circle(0.0, 0.0, distance)).has("text", Text.CONTAINS, words[0]).vertices()); + assertCount(5, tx.query().has("boundary", Geo.INTERSECT, Geoshape.circle(0.0, 0.0, distance)).has("text", Text.CONTAINS, words[0]).vertices()); assertCount(numV, tx.query().vertices()); assertCount(numV, tx.query().edges()); @@ -372,17 +387,15 @@ public void testIndexing() { assertCount(i, tx.query().has("time", Cmp.GREATER_THAN_EQUAL, i).has("time", Cmp.LESS_THAN, i + i).edges()); } - for (int i = 0; i < numV; i += 10) { - offset = (i * 50.0 / originalNumV); - distance = Geoshape.point(0.0, 0.0).getPoint().distance(Geoshape.point(offset, offset).getPoint()) + 20; - assertCount(i + 1, tx.query().has("location", Geo.WITHIN, Geoshape.circle(0.0, 0.0, distance)).vertices()); - assertCount(i + 1, tx.query().has("location", Geo.WITHIN, Geoshape.circle(0.0, 0.0, distance)).edges()); + for (int i = 0; i < numV; i += 5) { + testGeo(i, originalNumV, numV, "location", "boundary"); } assertCount(5, tx.query().has("time", Cmp.GREATER_THAN_EQUAL, 10).has("time", Cmp.LESS_THAN, 30).has("text", Text.CONTAINS, words[0]).vertices()); offset = (19 * 50.0 / originalNumV); distance = Geoshape.point(0.0, 0.0).getPoint().distance(Geoshape.point(offset, offset).getPoint()) + 20; assertCount(5, tx.query().has("location", Geo.INTERSECT, Geoshape.circle(0.0, 0.0, distance)).has("text", Text.CONTAINS, words[0]).vertices()); + assertCount(5, tx.query().has("boundary", Geo.INTERSECT, Geoshape.circle(0.0, 0.0, distance)).has("text", Text.CONTAINS, words[0]).vertices()); assertCount(numV, tx.query().vertices()); assertCount(numV, tx.query().edges()); @@ -1681,15 +1694,15 @@ private void testIndexing(Cardinality cardinality) { PropertyKey intProperty = mgmt.makePropertyKey("age").dataType(Integer.class).cardinality(cardinality).make(); PropertyKey longProperty = mgmt.makePropertyKey("long").dataType(Long.class).cardinality(cardinality).make(); PropertyKey uuidProperty = mgmt.makePropertyKey("uuid").dataType(UUID.class).cardinality(cardinality).make(); - PropertyKey geoProperty = mgmt.makePropertyKey("geo").dataType(Geoshape.class).cardinality(cardinality).make(); - mgmt.buildIndex("collectionIndex", Vertex.class).addKey(stringProperty, getStringMapping()).addKey(intProperty).addKey(longProperty).addKey(uuidProperty).addKey(geoProperty).buildMixedIndex(INDEX); + PropertyKey geopointProperty = mgmt.makePropertyKey("geopoint").dataType(Geoshape.class).cardinality(cardinality).make(); + mgmt.buildIndex("collectionIndex", Vertex.class).addKey(stringProperty, getStringMapping()).addKey(intProperty).addKey(longProperty).addKey(uuidProperty).addKey(geopointProperty).buildMixedIndex(INDEX); finishSchema(); testCollection(cardinality, "name", "Totoro", "Hiro"); testCollection(cardinality, "age", 1, 2); testCollection(cardinality, "long", 1L, 2L); testCollection(cardinality, "uuid", UUID.randomUUID(), UUID.randomUUID()); - testCollection(cardinality, "geo", Geoshape.point(1.0, 1.0), Geoshape.point(2.0, 2.0)); + testCollection(cardinality, "geopoint", Geoshape.point(1.0, 1.0), Geoshape.point(2.0, 2.0)); } else { try { PropertyKey stringProperty = mgmt.makePropertyKey("name").dataType(String.class).cardinality(cardinality).make(); @@ -1775,4 +1788,61 @@ private void testCollection(Cardinality cardinality, String property, Object val } + private void testGeo(int i, int origNumV, int numV, String geoPointProperty, String geoShapeProperty) { + double offset = (i * 50.0 / origNumV); + double bufferKm = 20; + double distance = Geoshape.point(0.0, 0.0).getPoint().distance(Geoshape.point(offset, offset).getPoint()) + bufferKm; + + assertCount(i + 1, tx.query().has(geoPointProperty, Geo.WITHIN, Geoshape.circle(0.0, 0.0, distance)).vertices()); + assertCount(i + 1, tx.query().has(geoPointProperty, Geo.WITHIN, Geoshape.circle(0.0, 0.0, distance)).edges()); + assertCount(i + 1, tx.query().has(geoPointProperty, Geo.INTERSECT, Geoshape.circle(0.0, 0.0, distance)).vertices()); + assertCount(i + 1, tx.query().has(geoPointProperty, Geo.INTERSECT, Geoshape.circle(0.0, 0.0, distance)).edges()); + assertCount(numV-(i + 1), tx.query().has(geoPointProperty, Geo.DISJOINT, Geoshape.circle(0.0, 0.0, distance)).vertices()); + assertCount(numV-(i + 1), tx.query().has(geoPointProperty, Geo.DISJOINT, Geoshape.circle(0.0, 0.0, distance)).edges()); + assertCount(i + 1, tx.query().has(geoShapeProperty, Geo.INTERSECT, Geoshape.circle(0.0, 0.0, distance)).vertices()); + assertCount(i + 1, tx.query().has(geoShapeProperty, Geo.INTERSECT, Geoshape.circle(0.0, 0.0, distance)).edges()); + if (i > 0) { + assertCount(i, tx.query().has(geoShapeProperty, Geo.WITHIN, Geoshape.circle(0.0, 0.0, distance-bufferKm)).vertices()); + assertCount(i, tx.query().has(geoShapeProperty, Geo.WITHIN, Geoshape.circle(0.0, 0.0, distance-bufferKm)).edges()); + } + assertCount(numV-(i + 1), tx.query().has(geoShapeProperty, Geo.DISJOINT, Geoshape.circle(0.0, 0.0, distance)).vertices()); + assertCount(numV-(i + 1), tx.query().has(geoShapeProperty, Geo.DISJOINT, Geoshape.circle(0.0, 0.0, distance)).edges()); + + assertCount(i % 2, tx.query().has(geoShapeProperty, Geo.CONTAINS, Geoshape.point(-offset,-offset)).vertices()); + assertCount(i % 2, tx.query().has(geoShapeProperty, Geo.CONTAINS, Geoshape.point(-offset,-offset)).edges()); + + double buffer = bufferKm/111.; + double min = -Math.abs(offset); + double max = Math.abs(offset); + Geoshape bufferedBox = Geoshape.box(min-buffer, min-buffer, max+buffer, max+buffer); + assertCount(i + 1, tx.query().has(geoPointProperty, Geo.WITHIN, bufferedBox).vertices()); + assertCount(i + 1, tx.query().has(geoPointProperty, Geo.WITHIN, bufferedBox).edges()); + assertCount(i + 1, tx.query().has(geoPointProperty, Geo.INTERSECT, bufferedBox).vertices()); + assertCount(i + 1, tx.query().has(geoPointProperty, Geo.INTERSECT, bufferedBox).edges()); + assertCount(numV-(i + 1), tx.query().has(geoPointProperty, Geo.DISJOINT, bufferedBox).vertices()); + assertCount(numV-(i + 1), tx.query().has(geoPointProperty, Geo.DISJOINT, bufferedBox).edges()); + if (i > 0) { + Geoshape exactBox = Geoshape.box(min, min, max, max); + assertCount(i, tx.query().has(geoShapeProperty, Geo.WITHIN, exactBox).vertices()); + assertCount(i, tx.query().has(geoShapeProperty, Geo.WITHIN, exactBox).edges()); + } + assertCount(i + 1, tx.query().has(geoShapeProperty, Geo.INTERSECT, bufferedBox).vertices()); + assertCount(i + 1, tx.query().has(geoShapeProperty, Geo.INTERSECT, bufferedBox).edges()); + assertCount(numV-(i + 1), tx.query().has(geoShapeProperty, Geo.DISJOINT, bufferedBox).vertices()); + assertCount(numV-(i + 1), tx.query().has(geoShapeProperty, Geo.DISJOINT, bufferedBox).edges()); + + Geoshape bufferedPoly = Geoshape.polygon(Arrays.asList(new double[][] + {{min-buffer,min-buffer},{max+buffer,min-buffer},{max+buffer,max+buffer},{min-buffer,max+buffer},{min-buffer,min-buffer}})); + if (i > 0) { + Geoshape exactPoly = Geoshape.polygon(Arrays.asList(new double[][] + {{min,min},{max,min},{max,max},{min,max},{min,min}})); + assertCount(i, tx.query().has(geoShapeProperty, Geo.WITHIN, exactPoly).vertices()); + assertCount(i, tx.query().has(geoShapeProperty, Geo.WITHIN, exactPoly).edges()); + } + assertCount(i + 1, tx.query().has(geoShapeProperty, Geo.INTERSECT, bufferedPoly).vertices()); + assertCount(i + 1, tx.query().has(geoShapeProperty, Geo.INTERSECT, bufferedPoly).edges()); + assertCount(numV-(i + 1), tx.query().has(geoShapeProperty, Geo.DISJOINT, bufferedPoly).vertices()); + assertCount(numV-(i + 1), tx.query().has(geoShapeProperty, Geo.DISJOINT, bufferedPoly).edges()); + } + } diff --git a/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphIoTest.java b/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphIoTest.java index 219ac3fc5b..969a129b8a 100644 --- a/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphIoTest.java +++ b/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphIoTest.java @@ -14,15 +14,31 @@ package org.janusgraph.graphdb; +import org.janusgraph.core.JanusGraphTransaction; +import org.janusgraph.core.attribute.Geoshape; +import org.janusgraph.core.attribute.JtsGeoshapeHelper; +import org.janusgraph.core.schema.JanusGraphManagement; import org.janusgraph.example.GraphOfTheGodsFactory; + +import com.vividsolutions.jts.geom.Coordinate; +import com.vividsolutions.jts.geom.GeometryFactory; +import com.vividsolutions.jts.geom.LineString; +import com.vividsolutions.jts.geom.Polygon; + import org.apache.tinkerpop.gremlin.structure.io.GraphReader; import org.apache.tinkerpop.gremlin.structure.io.GraphWriter; import org.apache.tinkerpop.gremlin.structure.io.IoCore; import org.apache.tinkerpop.gremlin.structure.io.graphson.GraphSONMapper; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.junit.Before; import org.junit.Test; import java.io.FileInputStream; import java.io.FileOutputStream; +import java.util.Arrays; +import java.util.function.Function; /** * Tests JanusGraph specific serialization classes not covered by the TinkerPop suite. @@ -31,9 +47,42 @@ */ public abstract class JanusGraphIoTest extends JanusGraphBaseTest { - @Test - public void testGeoShapeSerializationReadWriteAsGraphSONEmbedded() throws Exception { + private static final GeometryFactory GF = new GeometryFactory(); + + private static final JtsGeoshapeHelper HELPER = new JtsGeoshapeHelper(); + + @Before + public void setup() { GraphOfTheGodsFactory.loadWithoutMixedIndex(graph, true); + JanusGraphManagement mgmt = graph.openManagement(); + mgmt.makePropertyKey("shape").dataType(Geoshape.class).make(); + mgmt.commit(); + } + + @Test + public void testSerializationReadWriteAsGraphSONEmbedded() throws Exception { + testSerializationReadWriteAsGraphSONEmbedded(null); + testSerializationReadWriteAsGraphSONEmbedded(makeLine); + testSerializationReadWriteAsGraphSONEmbedded(makePoly); + testSerializationReadWriteAsGraphSONEmbedded(makeMultiPoint); + testSerializationReadWriteAsGraphSONEmbedded(makeMultiLine); + testSerializationReadWriteAsGraphSONEmbedded(makeMultiPolygon); + } + + @Test + public void testSerializationReadWriteAsGryo() throws Exception { + testSerializationReadWriteAsGryo(null); + testSerializationReadWriteAsGryo(makeLine); + testSerializationReadWriteAsGryo(makePoly); + testSerializationReadWriteAsGryo(makeMultiPoint); + testSerializationReadWriteAsGryo(makeMultiLine); + testSerializationReadWriteAsGryo(makeMultiPolygon); + } + + public void testSerializationReadWriteAsGraphSONEmbedded(Function makeGeoshape) throws Exception { + if (makeGeoshape != null) { + addGeoshape(makeGeoshape); + } GraphSONMapper m = graph.io(IoCore.graphson()).mapper().embedTypes(true).create(); GraphWriter writer = graph.io(IoCore.graphson()).writer().mapper(m).create(); FileOutputStream fos = new FileOutputStream("/tmp/test.json"); @@ -47,11 +96,15 @@ public void testGeoShapeSerializationReadWriteAsGraphSONEmbedded() throws Except reader.readGraph(fis, graph); JanusGraphIndexTest.assertGraphOfTheGods(graph); + if (makeGeoshape != null) { + assertGeoshape(makeGeoshape); + } } - @Test - public void testGeoShapeSerializationReadWriteAsGryo() throws Exception { - GraphOfTheGodsFactory.loadWithoutMixedIndex(graph, true); + private void testSerializationReadWriteAsGryo(Function makeGeoshape) throws Exception { + if (makeGeoshape != null) { + addGeoshape(makeGeoshape); + } graph.io(IoCore.gryo()).writeGraph("/tmp/test.kryo"); clearGraph(config); @@ -60,5 +113,62 @@ public void testGeoShapeSerializationReadWriteAsGryo() throws Exception { graph.io(IoCore.gryo()).readGraph("/tmp/test.kryo"); JanusGraphIndexTest.assertGraphOfTheGods(graph); + if (makeGeoshape != null) { + assertGeoshape(makeGeoshape); + } + } + + private void addGeoshape(Function makeGeoshape) { + JanusGraphTransaction tx = graph.newTransaction(); + graph.traversal().E().has("place").toList().stream().forEach(e-> { + Geoshape place = (Geoshape) e.property("place").value(); + e.property("shape", makeGeoshape.apply(place)); + }); + tx.commit(); + } + + private void assertGeoshape(Function makeGeoshape) { + graph.traversal().E().has("place").toList().stream().forEach(e-> { + assertTrue(e.property("shape").isPresent()); + Geoshape place = (Geoshape) e.property("place").value(); + Geoshape expected = makeGeoshape.apply(place); + Geoshape actual = (Geoshape) e.property("shape").value(); + assertEquals(expected, actual); + }); } + + private static final Function makePoly = place -> { + double x = Math.floor(place.getPoint().getLongitude()); + double y = Math.floor(place.getPoint().getLatitude()); + return Geoshape.polygon(Arrays.asList(new double[][] {{x,y},{x,y+1},{x+1,y+1},{x+1,y},{x,y},{x,y}})); + }; + + private static final Function makeLine = place -> { + double x = Math.floor(place.getPoint().getLongitude()); + double y = Math.floor(place.getPoint().getLatitude()); + return Geoshape.line(Arrays.asList(new double[][] {{x,y},{x,y+1},{x+1,y+1},{x+1,y}})); + }; + + private static final Function makeMultiPoint = place -> { + double x = Math.floor(place.getPoint().getLongitude()); + double y = Math.floor(place.getPoint().getLatitude()); + return HELPER.geoshape(GF.createMultiPoint(new Coordinate[] {new Coordinate(x,y), new Coordinate(x+1,y+1)})); + }; + + private static final Function makeMultiLine = place -> { + double x = Math.floor(place.getPoint().getLongitude()); + double y = Math.floor(place.getPoint().getLatitude()); + return HELPER.geoshape(GF.createMultiLineString(new LineString[] { + GF.createLineString(new Coordinate[] {new Coordinate(x,y), new Coordinate(x+1,y+1)}), + GF.createLineString(new Coordinate[] {new Coordinate(x-1,y-1), new Coordinate(x,y)})})); + }; + + private static final Function makeMultiPolygon = place -> { + double x = Math.floor(place.getPoint().getLongitude()); + double y = Math.floor(place.getPoint().getLatitude()); + return HELPER.geoshape(GF.createMultiPolygon(new Polygon[] { + GF.createPolygon(new Coordinate[] {new Coordinate(x,y), new Coordinate(x+1,y), new Coordinate(x+1,y+1), new Coordinate(x,y)}), + GF.createPolygon(new Coordinate[] {new Coordinate(x+2,y+2), new Coordinate(x+2,y+3), new Coordinate(x+3,y+3), new Coordinate(x+2,y+2)})})); + }; + } diff --git a/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphTest.java b/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphTest.java index fbdee95e20..141c5686d0 100644 --- a/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphTest.java +++ b/janusgraph-test/src/main/java/org/janusgraph/graphdb/JanusGraphTest.java @@ -1034,7 +1034,9 @@ public void testDataTypes() throws Exception { PropertyKey birthday = makeKey("birthday", Instant.class); - PropertyKey geo = makeKey("geo", Geoshape.class); + PropertyKey location = makeKey("location", Geoshape.class); + + PropertyKey boundary = makeKey("boundary", Geoshape.class); PropertyKey precise = makeKey("precise", Double.class); @@ -1061,7 +1063,8 @@ public void testDataTypes() throws Exception { num = tx.getPropertyKey("num"); barr = tx.getPropertyKey("barr"); birthday = tx.getPropertyKey("birthday"); - geo = tx.getPropertyKey("geo"); + location = tx.getPropertyKey("location"); + boundary = tx.getPropertyKey("boundary"); precise = tx.getPropertyKey("precise"); any = tx.getPropertyKey("any"); @@ -1070,6 +1073,7 @@ public void testDataTypes() throws Exception { assertEquals(Object.class, any.dataType()); final Instant c = Instant.ofEpochSecond(1429225756); + final Geoshape point = Geoshape.point(10.0, 10.0); final Geoshape shape = Geoshape.box(10.0, 10.0, 20.0, 20.0); JanusGraphVertex v = tx.addVertex(); @@ -1077,7 +1081,8 @@ public void testDataTypes() throws Exception { v.property(VertexProperty.Cardinality.single, n(birthday), c); v.property(VertexProperty.Cardinality.single, n(num), new SpecialInt(10)); v.property(VertexProperty.Cardinality.single, n(barr), new byte[]{1, 2, 3, 4}); - v.property(VertexProperty.Cardinality.single, n(geo), shape); + v.property(VertexProperty.Cardinality.single, n(location), point); + v.property(VertexProperty.Cardinality.single, n(boundary), shape); v.property(VertexProperty.Cardinality.single, n(precise), 10.12345); v.property(n(any), "Hello"); v.property(n(any), 10l); @@ -1089,7 +1094,8 @@ public void testDataTypes() throws Exception { assertEquals(10, v.value("num").getValue()); assertEquals(c, v.value("birthday")); assertEquals(4, v.value("barr").length); - assertEquals(shape, v.value("geo")); + assertEquals(point, v.value("location")); + assertEquals(shape, v.value("boundary")); assertEquals(10.12345, v.value("precise").doubleValue(), 0.000001); assertCount(3, v.properties("any")); for (Object prop : v.query().labels("any").properties()) { @@ -1110,7 +1116,8 @@ else if (value.getClass().isArray()) { assertEquals(10, v.value("num").getValue()); assertEquals(c, v.value("birthday")); assertEquals(4, v.value("barr").length); - assertEquals(shape, v.value("geo")); + assertEquals(point, v.value("location")); + assertEquals(shape, v.value("boundary")); assertEquals(10.12345, v.value("precise").doubleValue(), 0.000001); assertCount(3, v.properties("any")); for (Object prop : v.query().labels("any").properties()) { diff --git a/janusgraph-test/src/test/java/org/janusgraph/graphdb/attribute/GeoshapeTest.java b/janusgraph-test/src/test/java/org/janusgraph/graphdb/attribute/GeoshapeTest.java index 1fbede74d6..4756afdcd5 100644 --- a/janusgraph-test/src/test/java/org/janusgraph/graphdb/attribute/GeoshapeTest.java +++ b/janusgraph-test/src/test/java/org/janusgraph/graphdb/attribute/GeoshapeTest.java @@ -14,9 +14,18 @@ package org.janusgraph.graphdb.attribute; +import com.spatial4j.core.context.jts.JtsSpatialContext; import org.janusgraph.core.attribute.Geoshape; + +import com.vividsolutions.jts.geom.Coordinate; +import com.vividsolutions.jts.geom.GeometryFactory; +import com.vividsolutions.jts.geom.LineString; +import com.vividsolutions.jts.geom.LinearRing; +import com.vividsolutions.jts.geom.Polygon; + import org.apache.tinkerpop.shaded.jackson.databind.ObjectMapper; import org.apache.tinkerpop.shaded.jackson.databind.module.SimpleModule; +import org.janusgraph.core.attribute.JtsGeoshapeHelper; import org.junit.Test; import java.util.Arrays; @@ -32,6 +41,10 @@ public class GeoshapeTest { + private static final JtsGeoshapeHelper HELPER = new JtsGeoshapeHelper(); + + private static final GeometryFactory GF = new GeometryFactory(); + @Test public void testDistance() { Geoshape p1 = Geoshape.point(37.759, -122.536); @@ -39,20 +52,22 @@ public void testDistance() { double distance = 1496; assertEquals(distance,p1.getPoint().distance(p2.getPoint()),5.0); - - p1 = Geoshape.point(0.0,0.0); - p2 = Geoshape.point(10.0,10.0); - //System.out.println(p1.getPoint().distance(p2.getPoint())); } @Test public void testIntersection() { for (int i=0;i<50;i++) { Geoshape point = Geoshape.point(i,i); + Geoshape line = Geoshape.line(Arrays.asList(new double[][] {{i-1,i-1},{i,i},{i+1,i+1}})); + Geoshape polygon = Geoshape.polygon(Arrays.asList(new double[][] {{i-1,i-1},{i,i-1},{i+1,i-i},{i+1,i+1},{i-1,i+1},{i-1,i-1}})); Geoshape circle = Geoshape.circle(0.0,0.0,point.getPoint().distance(Geoshape.point(0,0).getPoint())+10); assertTrue(circle.intersect(point)); assertTrue(point.intersect(circle)); assertTrue(circle.intersect(circle)); + assertTrue(polygon.intersect(circle)); + assertTrue(circle.intersect(polygon)); + assertTrue(line.intersect(circle)); + assertTrue(circle.intersect(line)); } } @@ -60,14 +75,28 @@ public void testIntersection() { public void testEquality() { Geoshape c = Geoshape.circle(10.0,12.5,100); Geoshape b = Geoshape.box(20.0, 22.5, 40.5, 60.5); + Geoshape l = Geoshape.line(Arrays.asList(new double[][] {{10.5,20.5},{10.5,22.5},{12.5,22.5}})); + Geoshape p = Geoshape.polygon(Arrays.asList(new double[][] {{10.5,20.5},{8.0,21.75},{10.5,22.5},{11.75,25.0},{12.5,22.5},{15.0,21.0},{12.5,20.5},{11.75,18.0},{10.5,20.5}})); assertEquals(Geoshape.circle(10.0,12.5,100),c); assertEquals(Geoshape.box(20.0,22.5,40.5,60.5),b); + assertEquals(Geoshape.line(Arrays.asList(new double[][] {{10.5,20.5},{10.5,22.5},{12.5,22.5}})),l); + assertEquals(Geoshape.polygon(Arrays.asList(new double[][] {{10.5,20.5},{8.0,21.75},{10.5,22.5},{11.75,25.0},{12.5,22.5},{15.0,21.0},{12.5,20.5},{11.75,18.0},{10.5,20.5}})),p); assertEquals(Geoshape.circle(10.0,12.5,100).hashCode(),c.hashCode()); assertEquals(Geoshape.box(20.0,22.5,40.5,60.5).hashCode(),b.hashCode()); + assertEquals(Geoshape.line(Arrays.asList(new double[][] {{10.5,20.5},{10.5,22.5},{12.5,22.5}})).hashCode(),l.hashCode()); + assertEquals(Geoshape.polygon(Arrays.asList(new double[][] {{10.5,20.5},{8.0,21.75},{10.5,22.5},{11.75,25.0},{12.5,22.5},{15.0,21.0},{12.5,20.5},{11.75,18.0},{10.5,20.5}})).hashCode(),p.hashCode()); assertNotSame(c.hashCode(),b.hashCode()); + assertNotSame(c.hashCode(),l.hashCode()); + assertNotSame(c.hashCode(),p.hashCode()); + assertNotSame(b.hashCode(),l.hashCode()); + assertNotSame(b.hashCode(),p.hashCode()); + assertNotSame(l.hashCode(),p.hashCode()); assertNotSame(c,b); - System.out.println(c); - System.out.println(b); + assertNotSame(c,l); + assertNotSame(c,p); + assertNotSame(b,l); + assertNotSame(b,p); + assertNotSame(l,p); } @@ -154,7 +183,7 @@ public void testGeoJsonCircleMissingRadius() throws IOException { } @Test - public void testGeoJsonPolygon() throws IOException { + public void testGeoJsonBox() throws IOException { Geoshape.GeoshapeSerializer s = new Geoshape.GeoshapeSerializer(); Map json = new ObjectMapper().readValue("{\n" + " \"type\": \"Feature\",\n" + @@ -183,7 +212,7 @@ public void testGeoJsonPolygon() throws IOException { } @Test(expected = IllegalArgumentException.class) - public void testGeoJsonPolygonNotBox1() throws IOException { + public void testGeoJsonInvalidBox1() throws IOException { Geoshape.GeoshapeSerializer s = new Geoshape.GeoshapeSerializer(); Map json = new ObjectMapper().readValue("{\n" + " \"type\": \"Feature\",\n" + @@ -200,7 +229,7 @@ public void testGeoJsonPolygonNotBox1() throws IOException { } @Test(expected = IllegalArgumentException.class) - public void testGeoJsonPolygonNotBox2() throws IOException { + public void testGeoJsonInvalidBox2() throws IOException { Geoshape.GeoshapeSerializer s = new Geoshape.GeoshapeSerializer(); Map json = new ObjectMapper().readValue("{\n" + " \"type\": \"Feature\",\n" + @@ -216,6 +245,81 @@ public void testGeoJsonPolygonNotBox2() throws IOException { } + @Test + public void testGeoJsonLine() throws IOException { + Geoshape.GeoshapeSerializer s = new Geoshape.GeoshapeSerializer(); + Map json = new ObjectMapper().readValue("{\n" + + " \"type\": \"Feature\",\n" + + " \"geometry\": {\n" + + " \"type\": \"LineString\",\n" + + " \"coordinates\": [[20.5, 10.5],[22.5, 10.5],[22.5, 12.5]]\n" + + " },\n" + + " \"properties\": {\n" + + " \"name\": \"Dinagat Islands\"\n" + + " }\n" + + "}", HashMap.class); + assertEquals(Geoshape.line(Arrays.asList(new double[][] {{20.5,10.5},{22.5,10.5},{22.5,12.5}})), s.convert(json)); + } + + @Test + public void testGeoJsonPolygon() throws IOException { + Geoshape.GeoshapeSerializer s = new Geoshape.GeoshapeSerializer(); + Map json = new ObjectMapper().readValue("{\n" + + " \"type\": \"Feature\",\n" + + " \"geometry\": {\n" + + " \"type\": \"Polygon\",\n" + + " \"coordinates\": [[[20.5,10.5],[21.75,8.0],[22.5,10.5],[25.0,11.75],[22.5,12.5],[21.0,15.0],[20.5,12.5],[18.0,11.75],[20.5,10.5]]]\n" + + " }" + + "}", HashMap.class); + assertEquals(Geoshape.polygon(Arrays.asList(new double[][] {{20.5,10.5},{21.75,8.0},{22.5,10.5},{25.0,11.75},{22.5,12.5},{21.0,15.0},{20.5,12.5},{18.0,11.75},{20.5,10.5}})), s.convert(json)); + } + + @Test + public void testGeoJsonMultiPoint() throws IOException { + Geoshape.GeoshapeSerializer s = new Geoshape.GeoshapeSerializer(); + Map json = new ObjectMapper().readValue("{\n" + + " \"type\": \"Feature\",\n" + + " \"geometry\": {\n" + + " \"type\": \"MultiPoint\",\n" + + " \"coordinates\": [[100.0, 0.0],[101.0, 1.0]]\n" + + " }" + + "}", HashMap.class); + assertEquals(HELPER.geoshape(GF.createMultiPoint(new Coordinate[] {new Coordinate(100,0), new Coordinate(101,1)})), s.convert(json)); + } + + @Test + public void testGeoJsonMultiLineString() throws IOException { + Geoshape.GeoshapeSerializer s = new Geoshape.GeoshapeSerializer(); + Map json = new ObjectMapper().readValue("{\n" + + " \"type\": \"Feature\",\n" + + " \"geometry\": {\n" + + " \"type\": \"MultiLineString\",\n" + + " \"coordinates\": [[[100.0,0.0],[101.0, 1.0]],[[102.0,2.0],[103.0,3.0]]]\n" + + " }" + + "}", HashMap.class); + assertEquals(HELPER.geoshape(GF.createMultiLineString(new LineString[] { + GF.createLineString(new Coordinate[] {new Coordinate(100,0), new Coordinate(101,1)}), + GF.createLineString(new Coordinate[] {new Coordinate(102,2), new Coordinate(103,3)})})), s.convert(json)); + } + + @Test + public void testGeoJsonMultiPolygon() throws IOException { + Geoshape.GeoshapeSerializer s = new Geoshape.GeoshapeSerializer(); + Map json = new ObjectMapper().readValue("{\n" + + " \"type\": \"Feature\",\n" + + " \"geometry\": {\n" + + " \"type\": \"MultiPolygon\",\n" + + " \"coordinates\": [[[[102.0,2.0],[103.0,2.0],[103.0,3.0],[102.0,3.0],[102.0,2.0]]]," + + "[[[100.0,0.0],[101.0,0.0],[101.0,1.0],[100.0,1.0],[100.0,0.0]],[[100.2,0.2],[100.8,0.2],[100.8,0.8],[100.2,0.8],[100.2,0.2]]]]\n" + + " }" + + "}", HashMap.class); + assertEquals(HELPER.geoshape(GF.createMultiPolygon(new Polygon[] { + GF.createPolygon(new Coordinate[] {new Coordinate(102,2), new Coordinate(103,2), new Coordinate(103,3), new Coordinate(102,3), new Coordinate(102,2)}), + GF.createPolygon(GF.createLinearRing(new Coordinate[] {new Coordinate(100,0), new Coordinate(101,0), new Coordinate(101,1), new Coordinate(100,1), new Coordinate(100,0)}), + new LinearRing[] { GF.createLinearRing(new Coordinate[] {new Coordinate(100.2,0.2), new Coordinate(100.8,0.2), new Coordinate(100.8,0.8), new Coordinate(100.2,0.8), new Coordinate(100.2,0.2) + })})})), s.convert(json)); + } + @Test public void testGeoJsonGeometry() throws IOException { Geoshape.GeoshapeSerializer s = new Geoshape.GeoshapeSerializer(); @@ -224,7 +328,6 @@ public void testGeoJsonGeometry() throws IOException { " \"coordinates\": [20.5, 10.5]\n" + "}", HashMap.class); assertEquals(Geoshape.point(10.5, 20.5), s.convert(json)); - } @@ -234,10 +337,25 @@ public void testGeoJsonSerialization() throws IOException { module.addSerializer(new Geoshape.GeoshapeGsonSerializer()); final ObjectMapper om = new ObjectMapper(); om.registerModule(module); + JtsSpatialContext context = (JtsSpatialContext) Geoshape.getSpatialContext(); assertEquals("{\"type\":\"Point\",\"coordinates\":[20.5,10.5]}", om.writeValueAsString(Geoshape.point(10.5, 20.5))); - assertEquals("{\"type\":\"Box\",\"coordinates\":[[20.5,10.5],[22.5,10.5],[22.5,12.5],[20.5,12.5]]}", om.writeValueAsString(Geoshape.box(10.5, 20.5, 12.5, 22.5))); - assertEquals("{\"type\":\"Circle\",\"radius\":30.5,\"coordinates\":[20.5,10.5]}", om.writeValueAsString(Geoshape.circle(10.5, 20.5, 30.5))); - + assertEquals("{\"type\":\"Polygon\",\"coordinates\": [[[20.5,10.5],[20.5,12.5],[22.5,12.5],[22.5,10.5],[20.5,10.5]]]}", om.writeValueAsString(Geoshape.box(10.5, 20.5, 12.5, 22.5))); + assertEquals("{\"type\":\"Circle\",\"coordinates\":[20.5,10.5],\"radius\":30.5,\"properties\":{\"radius_units\":\"km\"}}", om.writeValueAsString(Geoshape.circle(10.5, 20.5, 30.5))); + assertEquals("{\"type\":\"LineString\",\"coordinates\":[[20.5,10.5],[22.5,10.5],[22.5,12.5]]}", om.writeValueAsString(Geoshape.line(Arrays.asList(new double[][] {{20.5,10.5},{22.5,10.5},{22.5,12.5}})))); + assertEquals("{\"type\":\"Polygon\",\"coordinates\":[[[20.5,10.5],[21.75,8],[22.5,10.5],[25,11.75],[22.5,12.5],[21,15],[20.5,12.5],[18,11.75],[20.5,10.5]]]}", + om.writeValueAsString(Geoshape.polygon(Arrays.asList(new double[][] {{20.5,10.5},{21.75,8},{22.5,10.5},{25,11.75},{22.5,12.5},{21,15},{20.5,12.5},{18,11.75},{20.5,10.5}})))); + assertEquals("{\"type\":\"MultiPoint\",\"coordinates\":[[100,0],[101,1]]}", + om.writeValueAsString(Geoshape.geoshape(context.makeShape(GF.createMultiPoint(new Coordinate[] {new Coordinate(100,0), new Coordinate(101,1)}))))); + assertEquals("{\"type\":\"MultiLineString\",\"coordinates\":[[[100,0],[101,1]],[[102,2],[103,3]]]}", + om.writeValueAsString(Geoshape.geoshape(context.makeShape(GF.createMultiLineString(new LineString[] { + GF.createLineString(new Coordinate[] {new Coordinate(100,0), new Coordinate(101,1)}), + GF.createLineString(new Coordinate[] {new Coordinate(102,2), new Coordinate(103,3)})}))))); + assertEquals("{\"type\":\"MultiPolygon\",\"coordinates\":[[[[102,2],[103,2],[103,3],[102,3],[102,2]]],[[[100,0],[101,0],[101,1],[100,1],[100,0]],[[100.2,0.2],[100.8,0.2],[100.8,0.8],[100.2,0.8],[100.2,0.2]]]]}", + om.writeValueAsString(Geoshape.geoshape(context.makeShape(GF.createMultiPolygon(new Polygon[] { + GF.createPolygon(new Coordinate[] {new Coordinate(102,2), new Coordinate(103,2), new Coordinate(103,3), new Coordinate(102,3), new Coordinate(102,2)}), + GF.createPolygon(GF.createLinearRing(new Coordinate[] {new Coordinate(100,0), new Coordinate(101,0), new Coordinate(101,1), new Coordinate(100,1), new Coordinate(100,0)}), + new LinearRing[] { GF.createLinearRing(new Coordinate[] {new Coordinate(100.2,0.2), new Coordinate(100.8,0.2), + new Coordinate(100.8,0.8), new Coordinate(100.2,0.8), new Coordinate(100.2,0.2)})})}))))); } diff --git a/janusgraph-test/src/test/java/org/janusgraph/graphdb/serializer/SerializerTest.java b/janusgraph-test/src/test/java/org/janusgraph/graphdb/serializer/SerializerTest.java index a1583f3d93..ccf19320eb 100644 --- a/janusgraph-test/src/test/java/org/janusgraph/graphdb/serializer/SerializerTest.java +++ b/janusgraph-test/src/test/java/org/janusgraph/graphdb/serializer/SerializerTest.java @@ -409,6 +409,14 @@ public static final float randomGeoPoint() { return random.nextFloat()*180.0f-90.0f; } + public static final List randomGeoPoints(int n) { + List points = new ArrayList<>(); + for (int i=0; i TYPES = new HashMap() {{ put(Byte.class, new Factory() { @Override @@ -467,10 +475,19 @@ public Double newInstance() { put(Geoshape.class, new Factory() { @Override public Geoshape newInstance() { - if (random.nextDouble()>0.5) - return Geoshape.box(randomGeoPoint(),randomGeoPoint(),randomGeoPoint(),randomGeoPoint()); - else - return Geoshape.circle(randomGeoPoint(),randomGeoPoint(),random.nextInt(100)+1); + double alpha = random.nextDouble(); + double x0=randomGeoPoint(), y0=randomGeoPoint(), x1=randomGeoPoint(), y1=randomGeoPoint(); + if (alpha>0.75) { + double minx=Math.min(x0,x1), miny=Math.min(y0,y1); + double maxx=minx==x0? x1 : x0, maxy=miny==y0 ? y1 : y0; + return Geoshape.box(miny, minx, maxy, maxx); + } else if (alpha>0.5) { + return Geoshape.circle(y0,x0,random.nextInt(100)+1); + } else if (alpha>0.25) { + return Geoshape.line(Arrays.asList(new double[][] {{x0,y0},{x0,y1},{x1,y1},{x1,y0}})); + } else { + return Geoshape.polygon(Arrays.asList(new double[][] {{x0,y0},{x1,y0},{x1,y1},{x0,y1},{x0,y0}})); + } } }); put(String.class, STRING_FACTORY); diff --git a/pom.xml b/pom.xml index b21cd45089..34e9974fc4 100644 --- a/pom.xml +++ b/pom.xml @@ -878,6 +878,13 @@ 0.5 + + + com.vividsolutions + jts + 1.13 + + commons-httpclient From 33f7d8f2fe26e77e05102de979a8ff647cc198fa Mon Sep 17 00:00:00 2001 From: sjudeng Date: Fri, 10 Mar 2017 09:28:56 -0600 Subject: [PATCH 3/5] Remove Elasticsearch node client and add REST client. The REST client is compatible with both ES 2.x and 5.x servers because the ES 5.x REST API is backwards-compatible with 2.x. Split ES version properties out by search builder version (elasticsearch.version), REST client version (elasticsearch.rest.version), and the distribution version to use in testing and releases (elasticsearch.dist.version). Create Transport test variants for all test suites. Update Travis build matrix to include janusgraph-es test suite variants for ES 5.x and 2.x as well as REST and transport clients. Signed-off-by: sjudeng --- .gitignore | 1 + .travis.yml | 6 + docs/elasticsearch.txt | 301 ++------- docs/versions.txt | 5 +- .../janusgraph/core/attribute/Geoshape.java | 20 +- .../example/GraphOfTheGodsFactory.java | 3 - .../janusgraph-dist-hadoop-2/pom.xml | 4 +- janusgraph-dist/pom.xml | 44 ++ .../conf/janusgraph-berkeleyje-es.properties | 5 - ...anusgraph-cassandra-embedded-es.properties | 12 +- .../conf/janusgraph-cassandra-es.properties | 7 - .../conf/janusgraph-hbase-es.properties | 1 - .../assembly/descriptor/common.component.xml | 14 + .../src/assembly/static/bin/elasticsearch | 157 ----- .../assembly/static/bin/elasticsearch.in.sh | 91 --- .../src/assembly/static/bin/janusgraph.sh | 6 +- .../assembly/static/conf/es/elasticsearch.yml | 94 +-- .../src/assembly/static/conf/es/logging.yml | 85 --- janusgraph-es/pom.xml | 87 +-- .../org/elasticsearch/bootstrap/JarHell.java | 70 --- .../diskstorage/es/ElasticSearchClient.java | 43 ++ .../diskstorage/es/ElasticSearchIndex.java | 573 ++++++------------ .../diskstorage/es/ElasticSearchMutation.java | 87 +++ .../diskstorage/es/ElasticSearchRequest.java | 110 ++++ .../diskstorage/es/ElasticSearchResponse.java | 52 ++ .../diskstorage/es/ElasticSearchSetup.java | 132 ++-- .../es/TransportElasticSearchClient.java | 201 ++++++ .../diskstorage/es/rest/RestBulkResponse.java | 79 +++ .../es/rest/RestElasticSearchClient.java | 226 +++++++ .../es/rest/RestIndexSettings.java | 47 ++ .../diskstorage/es/rest/RestSearchHit.java | 91 +++ .../es/rest/RestSearchResponse.java | 70 +++ .../es/rest/RestSearchResults.java | 56 ++ .../main/resources/janusgraph-es.properties | 1 + janusgraph-es/src/test/bin/elasticsearch | 161 ----- .../src/test/bin/elasticsearch.in.sh | 78 --- janusgraph-es/src/test/binassembly.xml | 17 - .../src/test/config/elasticsearch.yml | 98 +-- janusgraph-es/src/test/config/logging.yml | 56 -- .../es/BerkeleyElasticsearchTest.java | 28 +- .../es/ElasticSearchConfigTest.java | 228 +------ .../es/ElasticSearchIndexTest.java | 99 +-- .../diskstorage/es/ElasticsearchRunner.java | 52 +- .../es/ThriftElasticsearchTest.java | 34 +- .../TransportBerkeleyElasticsearchTest.java | 38 ++ .../es/TransportElasticSearchConfigTest.java | 23 + .../es/TransportElasticSearchIndexTest.java | 37 ++ .../es/TransportThriftElasticsearchTest.java | 39 ++ .../resources/es_cfg_bogus_nodeclient.yml | 4 - .../src/test/resources/es_cfg_nodeclient.yml | 4 - .../src/test/resources/es_jvmlocal.yml | 5 - .../src/test/resources/es_nodename_bar.yml | 1 - .../src/test/resources/es_nodename_foo.yml | 1 - .../src/test/resources/log4j.properties | 2 +- pom.xml | 2 + 55 files changed, 1736 insertions(+), 2052 deletions(-) delete mode 100755 janusgraph-dist/src/assembly/static/bin/elasticsearch delete mode 100755 janusgraph-dist/src/assembly/static/bin/elasticsearch.in.sh delete mode 100644 janusgraph-dist/src/assembly/static/conf/es/logging.yml delete mode 100644 janusgraph-es/src/main/java/org/elasticsearch/bootstrap/JarHell.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchClient.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchRequest.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchResponse.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestBulkResponse.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestElasticSearchClient.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestIndexSettings.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchHit.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchResponse.java create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchResults.java delete mode 100755 janusgraph-es/src/test/bin/elasticsearch delete mode 100755 janusgraph-es/src/test/bin/elasticsearch.in.sh delete mode 100644 janusgraph-es/src/test/binassembly.xml delete mode 100644 janusgraph-es/src/test/config/logging.yml create mode 100644 janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportBerkeleyElasticsearchTest.java create mode 100644 janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportElasticSearchConfigTest.java create mode 100644 janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportElasticSearchIndexTest.java create mode 100644 janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportThriftElasticsearchTest.java delete mode 100644 janusgraph-es/src/test/resources/es_cfg_bogus_nodeclient.yml delete mode 100644 janusgraph-es/src/test/resources/es_cfg_nodeclient.yml delete mode 100644 janusgraph-es/src/test/resources/es_jvmlocal.yml delete mode 100644 janusgraph-es/src/test/resources/es_nodename_bar.yml delete mode 100644 janusgraph-es/src/test/resources/es_nodename_foo.yml diff --git a/.gitignore b/.gitignore index 410c1d66b3..f637518c8b 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ target/ /log/ /output/ /scripts/ +/elasticsearch/ # When executing tests in alphabetical order, Maven generates temporary # files with names like this: # diff --git a/.travis.yml b/.travis.yml index 2a479e6737..2a60b56319 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,11 +13,15 @@ env: # This is the encrypted COVERITY_SCAN_TOKEN, created via the # `travis encrypt` command using the project repo's public key. - secure: "v5ixqTeb74y0vRuPcDbe3C28GDDYvqyEXA2dt+9UVU6GG7WpnmpkBf05gI1dIhp51lBhwx9WSlFBtzho+KdCBmNY/CzBRhVHe/lCQYK9Hb6uGPvuwBvC0WjJgJXsVrLFjppeRhcf+OAweVQ3uw2RPMDRvKIVMUcO1BTFjjJl6REJXNUdzGS57MtH2mmRyOEz250EwgqUELZvcOytG7fNrjMJKVK2nSsoxi0BqZIpItTWPWWeQ1wi1FplJ18A2qtD+MPfAGNSB+/a+r0Av+VCT2eGl06ZyZAzP3q/vG5IYjQ3AJsSPqcZUt4ms+2us1+kwuzXIILjzZmcfImu29+y/thndU5E5b2v+nZ4H69CUCc5OmKW2RwozLNmBIUhO0n+35va/J7FiPIqm3pwxCz5vWA3YTHDADxnIYe7+9uY/+dOK/AvP5fyu7u07vuF3liKNBdrX7ylP3kYc7FXGmYl8wCZv31iy1yTtndQ9qKef7bo8lM9Cdh39KyowrygH+Um7pr9gqf2S9jn99nQ3bib32fBWgBkLpJRwhZYHPUupZjZfgu/9woby0DuriuHZKMqZd7QUawYz6wXGlhzu78x5Tohlj1pGBwHYdcJ/Tm3PiEpyH4aYQLffkjGHJAcCW5tO8QbB0qrLYWC8xVMWuFz1TpSBRXOqVYdBfIa2UZDtOU=" + # Default Elasticsearch heap size can be too large for Travis + - ES_JAVA_OPTS="-Xms256m -Xmx512m" matrix: - MODULE='berkeleyje' - MODULE='cassandra' - MODULE='es' ARGS='-DthreadCount=1' + - MODULE='es' ARGS='-DthreadCount=1 -Delasticsearch.dist.version=2.4.4' + - MODULE='es' ARGS='-DthreadCount=1 -Delasticsearch.dist.version=2.4.4 -Dtest=**/Transport*' - MODULE='hadoop-parent/janusgraph-hadoop-2' - MODULE='hbase-parent/janusgraph-hbase-098' - MODULE='hbase-parent/janusgraph-hbase-10' @@ -39,6 +43,8 @@ matrix: # Currently broken due to too many log statements (exceeds 4MB) # https://travis-ci.org/JanusGraph/janusgraph/jobs/197472453 - env: MODULE='es' ARGS='-DthreadCount=1' + - env: MODULE='es' ARGS='-DthreadCount=1 -Delasticsearch.dist.version=2.4.4' + - env: MODULE='es' ARGS='-DthreadCount=1 -Delasticsearch.dist.version=2.4.4 -Dtest=**/Transport*' # Currently broken due to too many log statements (exceeds 4MB) # https://travis-ci.org/JanusGraph/janusgraph/jobs/197672947 diff --git a/docs/elasticsearch.txt b/docs/elasticsearch.txt index d66a9d514d..0c29489be1 100644 --- a/docs/elasticsearch.txt +++ b/docs/elasticsearch.txt @@ -1,10 +1,10 @@ [[elasticsearch]] == Elasticsearch -[quote, 'http://www.elasticsearch.org/overview/[Elasticsearch Overview]'] -Elasticsearch is a flexible and powerful open source, distributed, real-time search and analytics engine. Architected from the ground up for use in distributed environments where reliability and scalability are must haves, Elasticsearch gives you the ability to move easily beyond simple full-text search. +[quote, 'https://www.elastic.co/products/elasticsearch/[Elasticsearch Overview]'] +Elasticsearch is a distributed, RESTful search and analytics engine capable of solving a growing number of use cases. As the heart of the Elastic Stack, it centrally stores your data so you can discover the expected and uncover the unexpected. -JanusGraph supports http://elasticsearch.org[Elasticsearch] as an index backend. Here are some of the Elasticsearch features supported by JanusGraph: +JanusGraph supports https://www.elastic.co/[Elasticsearch] as an index backend. Here are some of the Elasticsearch features supported by JanusGraph: * *Full-Text*: Supports all `Text` predicates to search for text properties that matches a given word, prefix or regular expression. * *Geo*: Supports all `Geo` predicates to search for geo properties that are intersecting, within, disjoint to or contained in a given query geometry. Supports points, lines and polygons for indexing. Supports circles, boxes and polygons for querying point properties and all shapes for querying non-point properties. Note that JTS is required when using line and polygon shapes (see <> for more information). @@ -17,16 +17,20 @@ JanusGraph supports http://elasticsearch.org[Elasticsearch] as an index backend. Please see <> for details on what versions of ES will work with JanusGraph. [IMPORTANT] -JanusGraph currently requires http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-scripting.html#_enabling_dynamic_scripting[Elasticsearch's dynamic scripting feature]. The `script.disable_dynamic` setting must be `false` or `sandbox` on the Elasticsearch cluster. This configuration requirement may be removed in future JanusGraph versions. +JanusGraph currently requires https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-security.html#enable-dynamic-scripting[Elasticsearch's dynamic scripting feature]. The `script.engine.groovy.inline.update` setting must be set to `true` on the Elasticsearch cluster. This configuration requirement may be removed in future JanusGraph versions. -=== Elasticsearch Configuration Overview +=== Running Elasticsearch + +JanusGraph supports connections to a running Elasticsearch cluster. JanusGraph provides two options for running local Elasticsearch instances for getting started quickly. JanusGraph server (see <>) automatically starts a local Elasticsearch instance. Alternatively JanusGraph releases include a full Elasticsearch distribution to allow users to manually start a local Elasticsearch instance (see https://www.elastic.co/guide/en/elasticsearch/guide/current/running-elasticsearch.html[this page] for more information). -JanusGraph supports two distinct configuration tracks for Elasticsearch. "Track" in this chapter means a set of configuration options. +[source,bourne] +---- +$ elasticsearch/bin/elasticsearch +---- -. The new <> -. The <> +=== Elasticsearch Configuration Overview -These tracks are mutually exclusive. A configuration uses one track or the other, but not both simultaneously. The `interface` track is recommended over the legacy track. The `interface` track, introduced in 0.5.1, offers a superset of the legacy track's functionality. The legacy track will be maintained through at least the end of the 0.5.x patch series. +JanusGraph supports HTTP and Transport client connections to a running Elasticsearch cluster. Please see <> for details on what versions of ES will work with the different client types in JanusGraph. [NOTE] JanusGraph's index options start with the string "`index.[X].`" where "`[X]`" is a user-defined name for the backend. This user-defined name must be passed to JanusGraph's ManagementSystem interface when building a mixed index, as described in <>, so that JanusGraph knows which of potentially multiple configured index backends to use. Configuration snippets in this chapter use the name `search`, whereas prose discussion of options typically write `[X]` in the same position. The exact index name is not significant as long as it is used consistently in JanusGraph's configuration and when administering indices. @@ -34,282 +38,75 @@ JanusGraph's index options start with the string "`index.[X].`" where "`[X]`" is [TIP] It's recommended that index names contain only alphanumeric lowercase characters and hyphens, and that they start with a lowercase letter. -[[es-cfg-interface-track]] -=== The `interface` Configuration Track +==== Connecting to Elasticsearch -The `interface` track is activated by setting either one of the following: +The Elasticsearch client is specified as follows: [source, properties] ---- -# Activate the interface track with ES's Node client -index.search.elasticsearch.interface=NODE +# ES REST client +index.search.elasticsearch.interface=REST_CLIENT index.search.backend=elasticsearch ---- [source, properties] ---- -# Or activate the interface with ES's TransportClient +# ES TransportClient index.search.elasticsearch.interface=TRANSPORT_CLIENT index.search.backend=elasticsearch ---- -The `NODE` and `TRANSPORT_CLIENT` values tell JanusGraph to use either the Node or Transport client, respectively, and activates the `interface` configuration track. One or the other must be specified to use this track. Do not specify both in the same configuration. - - -[TIP] -This chapter assumes some familiarity with the difference between Elasticsearch's "Node client" and "Transport client". For background on these two Elasticsearch clients and their comparative tradeoffs, see http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_talking_to_elasticsearch.html[Talking to Elasticsearch] and http://www.elasticsearch.org/guide/en/elasticsearch/client/java-api/current/client.html[Java Clients] in the official Elasticsearch documentation. +The `REST_CLIENT` and `TRANSPORT_CLIENT` values tell JanusGraph to use either the REST or Transport client, respectively. One or the other must be specified. Do not specify both in the same configuration. -Configuration on the `interface` track proceeds through roughly the following steps: - -. If the JanusGraph config option `index.[X].conf-file` is set, it's interpreted as the name of an Elasticsearch config file and its contents are copied into the ES transport or node configuration -. Any JanusGraph config options starting with `index.[X].elasticsearch.ext.` are copied verbatim to the ES transport or node configuration -. Any other ES-related JanusGraph config options listed in JanusGraph's config file are copied into their respective ES transport or node configuration settings (<> lists these options) -. `script.disable_dynamic` is set to false - -[[es-cfg-common-opts]] -==== Common Options - -Arbitrary Elasticsearch settings can be specified through one or several of the following mechanisms. - -===== Specifying an external ES `conf-file` - -The `index.[X].conf-file` option is interpreted as a path to an Elasticsearch YAML/JSON/properties file. The file must exist. If the path is relative, and the path appears in a JanusGraph properties file on disk, then the path will be interpreted relative to the directory containing the JanusGraph properties file in which it appears. The file will be opened and loaded using Elasticsearch's `ImmutableSettings.Builder.loadFromStream` method. This method will attempt to guess the file content's syntax by the filename extension; for this reason, it's recommended that the filename end in either ".json", ".yml", ".yaml", or ".properties", as appropriate, so that ES uses the correct parser. Here's an example configuration fragment: +When connecting to Elasticsearch a single or list of hostnames for the Elasticsearch instances must be provided. These are supplied via JanusGraph's `index.[X].hostname` key. [source, properties] ---- index.search.backend=elasticsearch -index.search.elasticsearch.interface=NODE # or TRANSPORT_CLIENT -index.search.conf-file=/home/janusgraph/elasticsearch_client.yaml ----- - -[source, yaml] ----- -# /home/janusgraph/elasticsearch_client.yaml -node.name=alice +index.search.elasticsearch.interface=TRANSPORT_CLIENT +index.search.hostname=10.0.0.10:9300 ---- -[[es-cfg-ext]] -===== Embedding ES settings with `ext` - -JanusGraph iterates over all properties prefixed with `index.[X].elasticsearch.ext.`, where `[X]` is an index name such as `search`. It strips the prefix from each property key. The remainder of the stripped key will be interpreted as an Elasticsearch configuration key. The value associated with the key is not modified. The stripped key and unmodified value are passed into the Elasticsearch client configuration. This allows embedding arbitrary Elasticsearch settings in JanusGraph's properties. Here's an example configuration fragment showing how to specify the Elasticsearch `node.name` setting using the `ext` config mechanism: +Each host or host:port pair specified here will be added to the HTTP client's round-robin list of request targets. Here's a minimal configuration that will round-robin over 10.0.0.10 on the default Elasticsearch HTTP port (9200) and 10.0.0.20 on port 7777: [source, properties] ---- index.search.backend=elasticsearch -index.search.elasticsearch.interface=NODE # or TRANSPORT_CLIENT -index.search.elasticsearch.ext.node.name=bob ----- - -[TIP] -The `conf-file` and `ext` mechanisms can be used together. The `conf-file`, when present, is loaded first. Any settings under `ext` are then applied. Hence, if a key exists in both `ext` and `conf-file`, the value from `ext` will take precedence. - -===== JanusGraph `index.[X]` and `index.[X].elasticsearch` options - -After processing `conf-file` and `ext`, JanusGraph checks for the following common options. On the `interface` config track, JanusGraph's only uses default values for `index-name` and `health-request-timeout`. If `ignore-cluster-name` or `cluster-name` is unset in JanusGraph's configuration, then Elasticsearch's internal defaults, any setting from `conf-file`, and any setting from `ext` apply, in that order. See <> for descriptions of these options and their accepted values. - -* `index.[X].elasticsearch.index-name` -* `index.[X].elasticsearch.cluster-name` -* `index.[X].elasticsearch.ignore-cluster-name` -* `index.[X].elasticsearch.health-request-timeout` - -[[es-cfg-transport-opts]] -==== Transport Client Options - -In addition to common options described in <>, the Transport client requires one or more hosts to which to connect. These are supplied via JanusGraph's `index.[X].hostname` key. Each host or host:port pair specified here will be added to the Transport client's round-robin list of request targets. This setting has no analog in an Elasticsearch configuration file and must be set through JanusGraph's `index.[X].hostname` option. Here's a minimal Transport client configuration that will round-robin over 10.0.0.10 on the default Elasticsearch native protocol port (9300) and 10.0.0.20 on port 7777: - -[source, properties] -index.search.backend=elasticsearch -index.search.elasticsearch.interface=TRANSPORT_CLIENT # or NODE +index.search.elasticsearch.interface=REST_CLIENT index.search.hostname=10.0.0.10, 10.0.0.20:7777 - -Furthermore, the Transport client accepts the `index.[X].client-sniff` option. This can be set just as effectively through the `conf-file` or `ext` mechanisms. However, it can also be controlled through this JanusGraph config option. This option exists for continuity with the legacy config track. - -[[es-cfg-node-opts]] -==== Node Client Options - -In addition to common options described in <>, the Node client also respects the following JanusGraph config options. See <> for descriptions of these options and their accepted values. - -* `index.[X].directory` -* `index.[X].elasticsearch.ttl-interval` -* `index.[X].elasticsearch.client-only` -* `index.[X].elasticsearch.local-mode` -* `index.[X].elasticsearch.load-default-node-settings` - -Unlike the Transport client, the Node client can be completely configured through `conf-file` or `ext`. If you provide a complete Node configuration via `conf-file` or `ext`, then none of the JanusGraph options listed above are required, and it's fine to leave them unset in JanusGraph's configuration. The JanusGraph options listed above are retained mainly for convenience and continuity with the legacy config track. - -However, there is one unique aspect to `index.[X].directory`. When `index.[X].directory` is set for Elasticsearch, it is taken as the path to a directory which will contain the ES data, work, and logs directories. These directories are created if they don't already exist. Furthermore, when the `index.[X].directory` setting appears in a JanusGraph properties file on disk and its value is a relative path, it will be interpreted relative to the directory containing that JanusGraph properties file (similar to how relative `conf-file` paths are handled). That's the difference between setting JanusGraph's `index.[X].directory` versus setting Elasticsearch's `path.home` directory: relative paths for the former are based on the directory containing the JanusGraph properties file, whereas relative paths for the latter are based on the JVM's current working directory. - -Note that `index.[X].hostname` is not in the list above. The recommended way to set a list of hostnames with the Node client is to use Elasticsearch's own config keys via `ext` or `conf-file`. See the Elasticsearch documentation on the http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-discovery-zen.html[`discovery` module] and the http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-transport.html[`transport` module] for relevant ES config keys. Also see <> for an example configuration using the Elasticsearch Zen discovery module and unicast addressing. - -[[es-cfg-jvmlocal-iface]] -===== Node Example: JVM-local Discovery - -The following JanusGraph configuration and accompanying Elasticsearch config file create a Node which uses ES's JVM-local discovery. This means that the Node can only see other Nodes within the JVM. The Node does not listen for connections on network sockets or attempt to discover a cluster over the network. This is convenient when testing JanusGraph in a single-machine setup. - -[source, properties] ----- -index.search.backend=elasticsearch -index.search.elasticsearch.interface=NODE -index.search.conf-file=es_jvmlocal.yml ---- -[source, yaml] ----- -# es_jvmlocal.yml -node.data: true -node.client: false -node.local: true -# This path is interpreted relative to the JVM's current working directory -path.home: es ----- +Connection configuration options (see <>) are read in roughly the following steps: -The following configuration is similar to the one above, except it uses `ext` and the `index.[X].directory` JanusGraph setting to locate the ES work, data, and log paths. When the `index.[X].directory` appears in a JanusGraph properties file and is set to a relative path, that path is interpreted relative to the directory containing the JanusGraph properties file. Compare this to setting `path.home` directly, which will be interpreted relative to the current working directory of the Java VM. +. Any JanusGraph config options starting with `index.[X].elasticsearch.ext.` +. Any other ES-related JanusGraph config options listed in JanusGraph's config file -[source, properties] ----- -index.search.backend=elasticsearch -index.search.elasticsearch.interface=NODE -# data, logs, etc. subdirectories for ES will be created in -# /../db/es -index.search.directory=../db/es -index.search.elasticsearch.ext.node.data=true -index.search.elasticsearch.ext.node.client=false -index.search.elasticsearch.ext.node.local=true ----- -[[es-cfg-unicast-zen]] -===== Node Example: Connecting to a Remote Cluster +Arbitrary Elasticsearch settings can be specified through one or both of the following mechanisms. -The following JanusGraph configuration and accompanying Elasticsearch config file create a Node which discovers its cluster by sending unicast packets to `host1` on the default port and `host2` on `customport`. The Node client will attempt to learn all members of the cluster using `unicast.hosts` as the initial points of contact. Since the following config sets 'node.data=false' and 'node.client=true', the Node started by JanusGraph won't store any persistent index data or attempt to become a master node. It discovers the cluster and routes requests using that information, but it doesn't hold any important state, so it can be lost without affecting Elasticsearch's availability or durability. - -[source, properties] ----- -index.search.backend=elasticsearch -index.search.elasticsearch.interface=NODE -index.search.conf-file=es_netclient.yml ----- - -[source, yaml] ----- -# es_netclient.yml -node.data: false -node.client: true -discovery.zen.ping.multicast.enabled: false -discovery.zen.ping.unicast.hosts: [ "host1", "host2:customport" ] ----- - -This configuration has the same effect as the one listed above, except using `ext` instead of `conf-file`. - -[source, properties] ----- -index.search.backend=elasticsearch -index.search.elasticsearch.interface=NODE -index.search.elasticsearch.ext.node.data=false -index.search.elasticsearch.ext.node.client=true -index.search.elasticsearch.ext.discovery.zen.ping.multicast.enabled=false -index.search.elasticsearch.ext.discovery.zen.ping.unicast.hosts=host1, host2:customport ----- - -===== Node Example: Joining an ES Cluster as a Data Node - -This is similar to the example in the previous section, except the Node holds Elasticsearch data. This means JanusGraph's Elasticsearch instance will be a full-fledged member of the Elasticsearch cluster, and if the process containing JanusGraph and the ES Node dies, it could affect Elasticsearch's availability or durability. This is an uncommon configuration. - -[source, properties] ----- -index.search.backend=elasticsearch -index.search.elasticsearch.interface=NODE -index.search.conf-file=es_clustermember.yml ----- - -[source, yaml] ----- -# es_clustermember.yml -node.data: true -node.client: false -node.local: false -path.home: es -discovery.zen.ping.multicast.enabled: false -discovery.zen.ping.unicast.hosts: [ "host1", "host2:customport" ] ----- - -This configuration has the same effect as the one listed above, except using `ext` instead of `conf-file`. - -[source, properties] ----- -index.search.backend=elasticsearch -index.search.elasticsearch.interface=NODE -index.search.elasticsearch.ext.node.data=true -index.search.elasticsearch.ext.node.client=false -index.search.elasticsearch.ext.node.local=false -# The next path is interpreted relative to the JVM working directory -index.search.elasticsearch.ext.path.home=es -index.search.elasticsearch.ext.discovery.zen.ping.multicast.enabled=false -index.search.elasticsearch.ext.discovery.zen.ping.unicast.hosts=host1, host2:customport ----- - -[[es-cfg-legacy-track]] -=== The Legacy Configuration track - -The legacy configuration track allows running either a Transport client or a Node in JVM-local discovery mode. Running a Node that discovers the cluster over network sockets is not supported. - -This track is activated by omitting the `index.[X].elasticsearch.interface` option from JanusGraph's configuration file. - -[WARNING] -The legacy track is not recommended for new deployments. Consider using the newer <> instead. - -[[es-cfg-jvmlocal-legacy]] -==== Embedded JVM-local Node Configuration - -The legacy track supports starting an Elasticsearch Node with JVM-local transport. Network transport and discovery are not supported on the legacy track. Due to this limitation, it's only useful running a single-node embedded ES instance, such as in testing. - -Here's an example JanusGraph configuration that starts a JVM-local Node using the legacy config track: - -[source, properties] ----- -index.search.backend=elasticsearch -# This will create /tmp/searchindex/work, /tmp/searchindex/logs, and -# /tmp/searchindex/data -index.search.directory=/tmp/searchindex -index.search.elasticsearch.client-only=false -index.search.elasticsearch.local-mode=true ----- - -Elasticsearch will not be accessible from outside of this particular JanusGraph instance, i.e. remote connections will not be possible. - -In the above configuration, the index backend is named `search`. Replace `search` by a different name to change the name of the index. - -[[es-cfg-transportclient-legacy]] -==== Transport Client Configuration - -The legacy track supports the Transport client. This can connect to Elasticsearch nodes running on the same machine or a cluster of remote machines. - -To use the Transport client on the legacy track, add the following JanusGraph options to the graph configuration file, where `hostname` lists the IP addresses of the Elasticsearch cluster nodes: - -[source, properties] ----- -index.search.backend=elasticsearch -index.search.hostname=100.100.101.1, 100.100.101.2 -index.search.elasticsearch.client-only=true ----- - -Make sure that the Elasticsearch cluster is running prior to starting a JanusGraph instance attempting to connect to it. Also ensure that the machine running JanusGraph can connect to the Elasticsearch instances over the network if the machines are physically separated. This might require setting additional configuration options which are summarized below. +[[es-cfg-ext]] +===== Embedding ES settings with `ext` -In the above configuration, the index backend is named `search`. Replace `search` by a different name to change the name of the index. +JanusGraph iterates over all properties prefixed with `index.[X].elasticsearch.ext.`, where `[X]` is an index name such as `search`. It strips the prefix from each property key. The remainder of the stripped key will be interpreted as an Elasticsearch configuration key. The value associated with the key is not modified. The stripped key and unmodified value are passed into the Elasticsearch client configuration. This allows embedding arbitrary Elasticsearch settings in JanusGraph's properties. -==== Legacy Configuration Options +===== JanusGraph `index.[X]` and `index.[X].elasticsearch` options -This section lists the subset of ES options that are effective on the legacy configuration track. See <> for descriptions of these options and their accepted values. +After processing `ext`, JanusGraph checks for the following common options. JanusGraph's only uses default values for `index-name` and `health-request-timeout`. If `ignore-cluster-name` or `cluster-name` is unset in JanusGraph's configuration, then Elasticsearch's internal defaults and any setting from `ext` apply, in that order. See <> for descriptions of these options and their accepted values. * `index.[X].elasticsearch.index-name` * `index.[X].elasticsearch.cluster-name` -* `index.[X].elasticsearch.local-mode` -* `index.[X].elasticsearch.client-only` +* `index.[X].elasticsearch.ignore-cluster-name` * `index.[X].elasticsearch.health-request-timeout` -* `index.[X].conf-file` -* `index.[X].directory` -* `index.[X].hostname` -On the legacy track, setting `cluster-name` automatically enables cluster name validation. Leaving `cluster-name` unset disables cluster name validation. +[[es-cfg-rest-opts]] +==== REST Client Options + +The REST client accepts the `index.[X].bulk-refresh` option. This option controls when changes are made visible to search. See https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-refresh.html[?refresh documentation] for more information. This this can also be set through the `ext` mechanism. + +[[es-cfg-transport-opts]] +==== Transport Client Options + +The Transport client accepts the `index.[X].client-sniff` option. This can be set just as effectively through the `ext` mechanism. However, it can also be controlled through this JanusGraph config option. This option exists for continuity with the legacy config. === Secure Elasticsearch @@ -318,7 +115,7 @@ Elasticsearch does not perform authentication or authorization. A client that c * The HTTP REST API, usually on port 9200 * The native "transport" protocol, usually on port 9300 -A client uses either one protocol/port or the other, but not both simultaneously. JanusGraph uses Elasticsearch's two official Java clients. Each of these uses only the native "transport" protocol typically listening on port 9300. Although both of Elasticsearch's ports should be secured when running ES on a public network, JanusGraph is only concerned with the latter port, so it's the focus of this section. There are a couple of ways to approach security on the native "transport" protocol port: +A client uses either one protocol/port or the other, but not both simultaneously. Securing the HTTP protocol port is generally done with a combination of firewalling and a reverse proxy with SSL encryption and HTTP authentication. There are a couple of ways to approach security on the native "transport" protocol port: Tunnel ES's native "transport" protocol:: This approach can be implemented with SSL/TLS tunneling (for instance via https://www.stunnel.org/index.html[stunnel]), a VPN, or SSH port forwarding. SSL/TLS tunnels require non-trivial setup and monitoring: one or both ends of the tunnel need a certificate, and the stunnel processes need to be configured and running continuously in order for JanusGraph and Elasticsearch to communicate. The setup for most secure VPNs is likewise non-trivial. Some Elasticsearch service providers handle server-side tunnel management and provide a custom Elasticsearch `transport.type` to simplify the client setup. JanusGraph is compatible with these custom transports. See <> for information on how to override the `transport.type` and provide arbitrary `transport.*` config keys to JanusGraph's ES client. Add a firewall rule that allows only trusted clients to connect on Elasticsearch's native protocol port:: This is typically done at the host firewall level. This doesn't require any configuration changes in JanusGraph or Elasticsearch, nor does it require helper processes like stunnel. Easy to configure, but very weak security by itself. @@ -326,7 +123,7 @@ Add a firewall rule that allows only trusted clients to connect on Elasticsearch [[es-cfg-index-create]] === Index Creation Options -Since 0.5.3, JanusGraph supports customization of the index settings it uses when creating its Elasticsearch index. The customization mechanism is based on but distinct from the `ext` config prefix described in <>. It allows setting arbitrary key-value pairs on the `settings` object in the http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/indices-create-index.html[Elasticsearch `create index` request] issued by JanusGraph. Here is a non-exhaustive sample of Elasticsearch index settings that can be customized using this mechanism: +JanusGraph supports customization of the index settings it uses when creating its Elasticsearch index. The customization mechanism is based on but distinct from the `ext` config prefix described in <>. It allows setting arbitrary key-value pairs on the `settings` object in the https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html[Elasticsearch `create index` request] issued by JanusGraph. Here is a non-exhaustive sample of Elasticsearch index settings that can be customized using this mechanism: * `index.number_of_replicas` * `index.number_of_shards` @@ -360,13 +157,13 @@ index.search.elasticsearch.create.ext.index.shard.check_on_startup=false The `create.ext` config prefix described in this section is similar but not identical to the `ext` config prefix described in <>. Whereas the `ext` prefix controls settings applied to the client connection, the `create.ext` prefix controls settings specific to index creation requests. [TIP] -The `create.ext` mechanism for specifying index creation settings is compatible with both of JanusGraph's Elasticsearch configuration tracks. +The `create.ext` mechanism for specifying index creation settings is compatible with JanusGraph's Elasticsearch configuration. === Troubleshooting ==== Connection Issues to remote Elasticsearch cluster -Check that the Elasticsearch cluster nodes are reachable on the native "transport" protocol port from the JanusGraph nodes. Check the node listen port by examining the Elasticsearch node configuration logs or using a general diagnostic utility like `netstat`. Check the JanusGraph configuration; try the Transport client while troubleshoot connectivity issues, since it's easier to control which ES hosts the Transport client will use. Disable sniffing to restrict the Transport client to just the configured host list. Check that the client and server have the same major version: 0.90.x and 1.x are not compatible. +Check that the Elasticsearch cluster nodes are reachable on the HTTP and native "transport" protocol ports from the JanusGraph nodes. Check the node listen port by examining the Elasticsearch node configuration logs or using a general diagnostic utility like `netstat`. Check the JanusGraph configuration. Disable sniffing to restrict the Transport client to just the configured host list. Check that the client and server have the same major version: 1.x and 2.x are not compatible. ==== Classpath or Field errors @@ -379,10 +176,10 @@ java.lang.NoSuchFieldError: LUCENE_5_5_2 ==== Write Optimization -For <> or other write-intense applications, consider increasing Elasticsearch's refresh interval. Refer to https://groups.google.com/d/topic/elasticsearch/yp6bTiP2JYE/discussion[this discussion] on how to increase the refresh interval and its impact on write performance. Note, that a higher refresh interval means that it takes a longer time for graph mutations to be available in the index. +For <> or other write-intense applications, consider increasing Elasticsearch's refresh interval. Refer to https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-indexing-speed.html[this discussion] on how to increase the refresh interval and its impact on write performance. Note, that a higher refresh interval means that it takes a longer time for graph mutations to be available in the index. For additional suggestions on how to increase write performance in Elasticsearch with detailed instructions, please read http://blog.bugsense.com/post/35580279634/indexing-bigdata-with-elasticsearch[this blog post]. ==== Further Reading -* Please refer to the http://elasticsearch.org[Elasticsearch homepage] and available documentation for more information on Elasticsearch and how to setup an Elasticsearch cluster. +* Please refer to the https://www.elastic.co[Elasticsearch homepage] and available documentation for more information on Elasticsearch and how to setup an Elasticsearch cluster. diff --git a/docs/versions.txt b/docs/versions.txt index 9fec31a58d..0d071a9d7a 100644 --- a/docs/versions.txt +++ b/docs/versions.txt @@ -13,5 +13,6 @@ JanusGraph. [options="header"] |========================== | JanusGraph | Cassandra | HBase | Bigtable | Elasticsearch | Solr | TinkerPop -| 0.1.0 | 1.2.z, 2.0.z, 2.1.z | 0.98.z, 1.0.z, 1.1.z, 1.2.z | 0.9.z | 1.5.z | 5.2.z | 3.2.z -|========================== +| 0.1.0 | 1.2.z, 2.0.z, 2.1.z | 0.98.z, 1.0.z, 1.1.z, 1.2.z | 0.9.z | 2.z,5.z* | 5.2.z | 3.2.z + +*Only the REST client is compatible with both Elasticsearch 2.z and 5.z. The transport client is only compatibile with Elasticsearch 2.z. diff --git a/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geoshape.java b/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geoshape.java index 66e7003875..f3e419aae5 100644 --- a/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geoshape.java +++ b/janusgraph-core/src/main/java/org/janusgraph/core/attribute/Geoshape.java @@ -21,6 +21,8 @@ import com.spatial4j.core.shape.Circle; import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.SpatialRelation; +import org.apache.tinkerpop.shaded.jackson.databind.ObjectReader; +import org.apache.tinkerpop.shaded.jackson.databind.ObjectWriter; import org.janusgraph.diskstorage.ScanBuffer; import org.janusgraph.diskstorage.WriteBuffer; import org.janusgraph.graphdb.database.idhandling.VariableLong; @@ -80,6 +82,14 @@ public class Geoshape { HELPER = haveJts ? new JtsGeoshapeHelper() : new GeoshapeHelper(); } + private static final ObjectReader mapReader; + private static final ObjectWriter mapWriter; + static { + final ObjectMapper mapper = new ObjectMapper(); + mapReader = mapper.readerWithView(LinkedHashMap.class).forType(LinkedHashMap.class); + mapWriter = mapper.writerWithView(Map.class); + } + /** * The Type of a shape: a point, box, circle, line or polygon. */ @@ -151,6 +161,10 @@ public String toGeoJson() { return GeoshapeGsonSerializer.toGeoJson(this); } + public Map toMap() throws IOException { + return mapReader.readValue(toGeoJson()); + } + /** * Returns the underlying {@link Shape}. * @return @@ -510,7 +524,7 @@ private Geoshape convertGeometry(Map geometry) throws IOExceptio } } - String json = new ObjectMapper().writeValueAsString(geometry); + String json = mapWriter.writeValueAsString(geometry); return new Geoshape(HELPER.getGeojsonReader().read(new StringReader(json))); } @@ -616,7 +630,7 @@ public void serializeWithType(Geoshape geoshape, JsonGenerator jgen, SerializerP jgen.writeStartObject(); if (typeSerializer != null) jgen.writeStringField(GraphSONTokens.CLASS, Geoshape.class.getName()); String geojson = toGeoJson(geoshape); - Map json = new ObjectMapper().readValue(geojson, LinkedHashMap.class); + Map json = mapReader.readValue(geojson); if (geoshape.getType() == Type.POINT) { double[] coords = ((List) json.get("coordinates")).stream().map(i -> i.doubleValue()).mapToDouble(i -> i).toArray(); GraphSONUtil.writeWithType(FIELD_COORDINATES, coords, jgen, serializerProvider, typeSerializer); @@ -652,7 +666,7 @@ public Geoshape deserialize(JsonParser jsonParser, DeserializationContext deseri try { HashMap map = jsonParser.readValueAs(LinkedHashMap.class); jsonParser.nextToken(); - String json = new ObjectMapper().writeValueAsString(map); + String json = mapWriter.writeValueAsString(map); Geoshape shape = new Geoshape(HELPER.getGeojsonReader().read(new StringReader(json))); return shape; } catch (ParseException e) { diff --git a/janusgraph-core/src/main/java/org/janusgraph/example/GraphOfTheGodsFactory.java b/janusgraph-core/src/main/java/org/janusgraph/example/GraphOfTheGodsFactory.java index bb518d313a..8caf33c735 100644 --- a/janusgraph-core/src/main/java/org/janusgraph/example/GraphOfTheGodsFactory.java +++ b/janusgraph-core/src/main/java/org/janusgraph/example/GraphOfTheGodsFactory.java @@ -46,9 +46,6 @@ public static JanusGraph create(final String directory) { config.set("storage.backend", "berkeleyje"); config.set("storage.directory", directory); config.set("index." + INDEX_NAME + ".backend", "elasticsearch"); - config.set("index." + INDEX_NAME + ".directory", directory + File.separator + "es"); - config.set("index." + INDEX_NAME + ".elasticsearch.local-mode", true); - config.set("index." + INDEX_NAME + ".elasticsearch.client-only", false); JanusGraph graph = config.open(); GraphOfTheGodsFactory.load(graph); diff --git a/janusgraph-dist/janusgraph-dist-hadoop-2/pom.xml b/janusgraph-dist/janusgraph-dist-hadoop-2/pom.xml index 6790309f0d..b9102674b7 100644 --- a/janusgraph-dist/janusgraph-dist-hadoop-2/pom.xml +++ b/janusgraph-dist/janusgraph-dist-hadoop-2/pom.xml @@ -136,8 +136,8 @@ java -classpath - - org.janusgraph.example.GraphOfTheGodsFactory + ${project.build.directory}/dependency/janusgraph-es-${project.version}-tests.jar:${project.build.directory}/dependency/* + org.janusgraph.diskstorage.es.ElasticsearchRunner ${project.build.directory}/conf/janusgraph-berkeleyje-es.properties diff --git a/janusgraph-dist/pom.xml b/janusgraph-dist/pom.xml index 8e9d6e9c55..16f77b8880 100644 --- a/janusgraph-dist/pom.xml +++ b/janusgraph-dist/pom.xml @@ -19,6 +19,7 @@ ${project.basedir}/src/assembly/cfilter ${project.build.directory}/cfilter ${project.basedir}/src/assembly/descriptor + ${project.build.directory}/elasticsearch-${elasticsearch.dist.version} ${project.build.directory}/test-classes @@ -120,6 +121,26 @@ tests test + + org.elasticsearch.distribution.zip + elasticsearch + ${elasticsearch.dist.version} + zip + + + * + * + + + org.elasticsearch + elasticsearch + + + net.java.dev.jna + jna + + + ${project.groupId} janusgraph-solr @@ -153,6 +174,29 @@ + + maven-dependency-plugin + + + unpack-plugins + + unpack-dependencies + + generate-test-resources + + ${project.build.directory} + org.elasticsearch.distribution.zip + + + + copy-dependencies + package + + copy-dependencies + + + + - org.elasticsearch.module - lang-groovy - ${elasticsearch.version} - zip + org.elasticsearch.client + rest + ${elasticsearch.rest.version} + + + org.elasticsearch.distribution.zip + elasticsearch + ${elasticsearch.dist.version} test + zip - org.codehaus.groovy - groovy + org.elasticsearch + elasticsearch + + + net.java.dev.jna + jna @@ -114,21 +121,6 @@ test-jar - - - pre-test-jar - process-test-classes - - jar - - - es_jarhell - - **/JarHell.class - - - @@ -142,7 +134,7 @@ copy-resources - ${project.basedir}/config + ${project.build.directory}/elasticsearch-${elasticsearch.dist.version}/config $MAVEN{*} @@ -176,44 +168,10 @@ - - - maven-assembly-plugin - - - copy-es-bin - process-test-resources - - single - - - false - / - false - ${project.basedir}/bin - - ${project.basedir}/src/test/binassembly.xml - - - - - - maven-dependency-plugin - - build-es-classpath - process-test-resources - - build-classpath - - - ${project.build.directory}/es_classpath.txt - - - + unpack-plugins @@ -221,12 +179,8 @@ generate-test-resources - ${project.build.directory}/db/es/plugins/lang-groovy - lang-groovy - org.elasticsearch.module - true - pom - groovy*.jarexcludes> + ${project.build.directory} + org.elasticsearch.distribution.zip test @@ -265,6 +219,9 @@ default-test -Dtest.cassandra.confdir=${project.build.directory}/cassandra/conf/localhost-murmur -Dtest.cassandra.datadir=${project.build.directory}/cassandra/data/localhost-murmur + + **/Transport*.java + diff --git a/janusgraph-es/src/main/java/org/elasticsearch/bootstrap/JarHell.java b/janusgraph-es/src/main/java/org/elasticsearch/bootstrap/JarHell.java deleted file mode 100644 index 7060d85ffc..0000000000 --- a/janusgraph-es/src/main/java/org/elasticsearch/bootstrap/JarHell.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.bootstrap; - -import org.elasticsearch.common.io.PathUtils; - -import java.net.MalformedURLException; -import java.net.URL; - -/** - * This class masks the Elasticsearch class of the same name. - * Clients are responsible for ensuring their classpath is sane. - */ -public class JarHell { - - public static void checkJarHell() { } - - public static URL[] parseClassPath() { - return parseClassPath(System.getProperty("java.class.path")); - } - - static URL[] parseClassPath(String classPath) { - String pathSeparator = System.getProperty("path.separator"); - String fileSeparator = System.getProperty("file.separator"); - String elements[] = classPath.split(pathSeparator); - URL urlElements[] = new URL[elements.length]; - for (int i = 0; i < elements.length; i++) { - String element = elements[i]; - if (element.isEmpty()) { - throw new IllegalStateException("Classpath should not contain empty elements! (outdated shell script from a previous version?) classpath='" + classPath + "'"); - } - if (element.startsWith("/") && "\\".equals(fileSeparator)) { - element = element.replace("/", "\\"); - if (element.length() >= 3 && element.charAt(2) == ':') { - element = element.substring(1); - } - } - try { - urlElements[i] = PathUtils.get(element).toUri().toURL(); - } catch (MalformedURLException e) { - throw new RuntimeException(e); - } - } - return urlElements; - } - - public static void checkJarHell(URL urls[]) { } - - public static void checkVersionFormat(String targetVersion) { } - - public static void checkJavaVersion(String resource, String targetVersion) { } - -} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchClient.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchClient.java new file mode 100644 index 0000000000..52e51a0c06 --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchClient.java @@ -0,0 +1,43 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; +import java.util.Map; + +public interface ElasticSearchClient extends Closeable { + + void clusterHealthRequest(String timeout) throws IOException; + + boolean indexExists(String indexName) throws IOException; + + void createIndex(String indexName, Settings settings) throws IOException; + + Map getIndexSettings(String indexName) throws IOException; + + void createMapping(String indexName, String typeName, XContentBuilder mapping) throws IOException; + + void deleteIndex(String indexName) throws IOException; + + void bulkRequest(List requests) throws IOException; + + ElasticSearchResponse search(String indexName, String type, ElasticSearchRequest request) throws IOException; + +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java index e6708e480c..cfbb27f350 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java @@ -15,87 +15,81 @@ package org.janusgraph.diskstorage.es; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterators; import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.Multimap; - +import org.apache.commons.lang.StringUtils; +import org.apache.tinkerpop.shaded.jackson.core.type.TypeReference; +import org.apache.tinkerpop.shaded.jackson.databind.ObjectMapper; +import org.elasticsearch.Version; +import org.elasticsearch.common.geo.ShapeRelation; +import org.elasticsearch.common.geo.builders.LineStringBuilder; +import org.elasticsearch.common.geo.builders.PolygonBuilder; +import org.elasticsearch.common.geo.builders.ShapeBuilder; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.DistanceUnit; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.GeoPolygonQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.sort.SortOrder; import org.janusgraph.core.Cardinality; import org.janusgraph.core.JanusGraphException; -import org.janusgraph.core.attribute.*; +import org.janusgraph.core.attribute.Cmp; +import org.janusgraph.core.attribute.Geo; +import org.janusgraph.core.attribute.Geoshape; +import org.janusgraph.core.attribute.Text; import org.janusgraph.core.schema.Mapping; -import org.janusgraph.diskstorage.*; +import org.janusgraph.diskstorage.BackendException; +import org.janusgraph.diskstorage.BaseTransaction; +import org.janusgraph.diskstorage.BaseTransactionConfig; +import org.janusgraph.diskstorage.BaseTransactionConfigurable; +import org.janusgraph.diskstorage.PermanentBackendException; +import org.janusgraph.diskstorage.TemporaryBackendException; import org.janusgraph.diskstorage.configuration.ConfigNamespace; import org.janusgraph.diskstorage.configuration.ConfigOption; import org.janusgraph.diskstorage.configuration.Configuration; -import org.janusgraph.diskstorage.indexing.*; +import org.janusgraph.diskstorage.indexing.IndexEntry; +import org.janusgraph.diskstorage.indexing.IndexFeatures; +import org.janusgraph.diskstorage.indexing.IndexMutation; +import org.janusgraph.diskstorage.indexing.IndexProvider; +import org.janusgraph.diskstorage.indexing.IndexQuery; +import org.janusgraph.diskstorage.indexing.KeyInformation; +import org.janusgraph.diskstorage.indexing.RawQuery; import org.janusgraph.diskstorage.util.DefaultTransaction; import org.janusgraph.graphdb.configuration.PreInitializeConfigOptions; - import static org.janusgraph.diskstorage.configuration.ConfigOption.disallowEmpty; -import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.*; - import org.janusgraph.graphdb.database.serialize.AttributeUtil; import org.janusgraph.graphdb.internal.Order; import org.janusgraph.graphdb.query.JanusGraphPredicate; -import org.janusgraph.graphdb.query.condition.*; +import org.janusgraph.graphdb.query.condition.And; +import org.janusgraph.graphdb.query.condition.Condition; +import org.janusgraph.graphdb.query.condition.Not; +import org.janusgraph.graphdb.query.condition.Or; +import org.janusgraph.graphdb.query.condition.PredicateCondition; import org.janusgraph.graphdb.types.ParameterType; -import org.janusgraph.util.system.IOUtils; -import org.apache.commons.lang.StringUtils; -import org.elasticsearch.Version; -import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; -import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; -import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest; -import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse; -import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse; -import org.elasticsearch.action.bulk.BulkItemResponse; -import org.elasticsearch.action.bulk.BulkRequestBuilder; -import org.elasticsearch.action.bulk.BulkResponse; -import org.elasticsearch.action.delete.DeleteRequest; -import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.action.search.SearchRequestBuilder; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.action.update.UpdateRequestBuilder; -import org.elasticsearch.client.Client; -import org.elasticsearch.client.transport.TransportClient; -import org.elasticsearch.common.geo.ShapeRelation; -import org.elasticsearch.common.geo.builders.LineStringBuilder; -import org.elasticsearch.common.geo.builders.PolygonBuilder; -import org.elasticsearch.common.geo.builders.ShapeBuilder; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.transport.InetSocketTransportAddress; -import org.elasticsearch.common.unit.DistanceUnit; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.index.IndexNotFoundException; -import org.elasticsearch.index.query.*; -import org.elasticsearch.node.Node; -import org.elasticsearch.node.NodeBuilder; -import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.script.Script; -import org.elasticsearch.script.ScriptService; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.sort.FieldSortBuilder; -import org.elasticsearch.search.sort.SortOrder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; -import java.net.InetAddress; -import java.net.UnknownHostException; import java.time.Instant; -import java.util.*; -import java.util.AbstractMap.SimpleEntry; -import java.util.concurrent.TimeUnit; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.IntStream; -import java.util.stream.Stream; + +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_MAX_RESULT_SET_SIZE; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_NAME; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_NS; /** * @author Matthias Broecheler (me@matthiasb.com) @@ -106,32 +100,16 @@ public class ElasticSearchIndex implements IndexProvider { private static final Logger log = LoggerFactory.getLogger(ElasticSearchIndex.class); - private static final String TTL_FIELD = "_ttl"; private static final String STRING_MAPPING_SUFFIX = "__STRING"; public static final ConfigNamespace ELASTICSEARCH_NS = new ConfigNamespace(INDEX_NS, "elasticsearch", "Elasticsearch index configuration"); - public static final ConfigOption CLIENT_ONLY = - new ConfigOption(ELASTICSEARCH_NS, "client-only", - "The Elasticsearch node.client option is set to this boolean value, and the Elasticsearch node.data " + - "option is set to the negation of this value. True creates a thin client which holds no data. False " + - "creates a regular Elasticsearch cluster node that may store data.", - ConfigOption.Type.GLOBAL_OFFLINE, true); - public static final ConfigOption CLUSTER_NAME = new ConfigOption(ELASTICSEARCH_NS, "cluster-name", "The name of the Elasticsearch cluster. This should match the \"cluster.name\" setting " + "in the Elasticsearch nodes' configuration.", ConfigOption.Type.GLOBAL_OFFLINE, "elasticsearch"); - public static final ConfigOption LOCAL_MODE = - new ConfigOption(ELASTICSEARCH_NS, "local-mode", - "On the legacy config track, this option chooses between starting a TransportClient (false) or " + - "a Node with JVM-local transport and local data (true). On the interface config track, this option " + - "is considered by (but optional for) the Node client and ignored by the TransportClient. See the manual " + - "for more information about ES config tracks.", - ConfigOption.Type.GLOBAL_OFFLINE, false); - public static final ConfigOption CLIENT_SNIFF = new ConfigOption(ELASTICSEARCH_NS, "sniff", "Whether to enable cluster sniffing. This option only applies to the TransportClient. " + @@ -143,7 +121,7 @@ public class ElasticSearchIndex implements IndexProvider { "Whether to connect to ES using the Node or Transport client (see the \"Talking to Elasticsearch\" " + "section of the ES manual for discussion of the difference). Setting this option enables the " + "interface config track (see manual for more information about ES config tracks).", - ConfigOption.Type.MASKABLE, String.class, ElasticSearchSetup.TRANSPORT_CLIENT.toString(), + ConfigOption.Type.MASKABLE, String.class, ElasticSearchSetup.REST_CLIENT.toString(), disallowEmpty(String.class)); public static final ConfigOption IGNORE_CLUSTER_NAME = @@ -152,12 +130,6 @@ public class ElasticSearchIndex implements IndexProvider { "This option is only used on the interface configuration track (see manual for " + "information about ES config tracks).", ConfigOption.Type.MASKABLE, true); - public static final ConfigOption TTL_INTERVAL = - new ConfigOption(ELASTICSEARCH_NS, "ttl-interval", - "The period of time between runs of ES's bulit-in expired document deleter. " + - "This string will become the value of ES's indices.ttl.interval setting and should " + - "be formatted accordingly, e.g. 5s or 60s.", ConfigOption.Type.MASKABLE, "5s"); - public static final ConfigOption HEALTH_REQUEST_TIMEOUT = new ConfigOption(ELASTICSEARCH_NS, "health-request-timeout", "When JanusGraph initializes its ES backend, JanusGraph waits up to this duration for the " + @@ -165,18 +137,10 @@ public class ElasticSearchIndex implements IndexProvider { "This string should be formatted as a natural number followed by the lowercase letter " + "\"s\", e.g. 3s or 60s.", ConfigOption.Type.MASKABLE, "30s"); - public static final ConfigOption LOAD_DEFAULT_NODE_SETTINGS = - new ConfigOption(ELASTICSEARCH_NS, "load-default-node-settings", - "Whether ES's Node client will internally attempt to load default configuration settings " + - "from system properties/process environment variables. Only meaningful when using the Node " + - "client (has no effect with TransportClient).", ConfigOption.Type.MASKABLE, true); - - public static final ConfigOption USE_EDEPRECATED_IGNORE_UNMAPPED_OPTION = - new ConfigOption<>(ELASTICSEARCH_NS, "use-deprecated-ignore-unmapped-option", - "Elasticsearch versions before 1.4.0 supported the \"ignore_unmapped\" sort option. " + - "In 1.4.0, it was deprecated by the new \"unmapped_type\" sort option. This configuration" + - "setting controls which ES option JanusGraph uses: false for the newer \"unmapped_type\"," + - "true for the older \"ignore_unmapped\".", ConfigOption.Type.MASKABLE, false); + public static final ConfigOption BULK_REFRESH = + new ConfigOption(ELASTICSEARCH_NS, "bulk-refresh", + "Elasticsearch bulk API refresh setting used to control when changes made by this request are made " + + "visible to search", ConfigOption.Type.MASKABLE, "false"); public static final ConfigNamespace ES_EXTRAS_NS = new ConfigNamespace(ELASTICSEARCH_NS, "ext", "Overrides for arbitrary elasticsearch.yaml settings", true); @@ -194,10 +158,10 @@ public class ElasticSearchIndex implements IndexProvider { public static final ConfigNamespace ES_CREATE_EXTRAS_NS = new ConfigNamespace(ES_CREATE_NS, "ext", "Overrides for arbitrary settings applied at index creation", true); - private static final IndexFeatures ES_FEATURES = new IndexFeatures.Builder().supportsDocumentTTL() + private static final IndexFeatures ES_FEATURES = new IndexFeatures.Builder() .setDefaultStringMapping(Mapping.TEXT).supportedStringMappings(Mapping.TEXT, Mapping.TEXTSTRING, Mapping.STRING).setWildcardField("_all").supportsCardinality(Cardinality.SINGLE).supportsCardinality(Cardinality.LIST).supportsCardinality(Cardinality.SET).supportsNanoseconds().build(); - public static final int HOST_PORT_DEFAULT = 9300; + public static final int HOST_PORT_DEFAULT = 9200; /** * Default tree_levels used when creating geo_shape mappings. @@ -211,34 +175,28 @@ public class ElasticSearchIndex implements IndexProvider { private static final Map SPATIAL_PREDICATES = spatialPredicates(); - private final Node node; - private final Client client; + private final ElasticSearchClient client; private final String indexName; private final int maxResultsSize; - private final boolean useDeprecatedIgnoreUnmapped; public ElasticSearchIndex(Configuration config) throws BackendException { indexName = config.get(INDEX_NAME); - useDeprecatedIgnoreUnmapped = config.get(USE_EDEPRECATED_IGNORE_UNMAPPED_OPTION); checkExpectedClientVersion(); - final ElasticSearchSetup.Connection c; - if (!config.has(INTERFACE)) { - c = legacyConfiguration(config); - } else { - c = interfaceConfiguration(config); - } - node = c.getNode(); + final ElasticSearchSetup.Connection c = interfaceConfiguration(config); client = c.getClient(); maxResultsSize = config.get(INDEX_MAX_RESULT_SET_SIZE); log.debug("Configured ES query result set max size to {}", maxResultsSize); - client.admin().cluster().prepareHealth().setTimeout(config.get(HEALTH_REQUEST_TIMEOUT)) - .setWaitForYellowStatus().execute().actionGet(); + try { + client.clusterHealthRequest(config.get(HEALTH_REQUEST_TIMEOUT)); + checkForOrCreateIndex(config); + } catch (IOException e) { + throw new PermanentBackendException(e); + } - checkForOrCreateIndex(config); } /** @@ -250,21 +208,21 @@ public ElasticSearchIndex(Configuration config) throws BackendException { * of the index to check for existence or create. * * @param config the config for this ElasticSearchIndex - * @throws java.lang.IllegalArgumentException if the index could not be created + * @throws IOException if the index status could not be checked or index could not be created */ - private void checkForOrCreateIndex(Configuration config) { + private void checkForOrCreateIndex(Configuration config) throws IOException { Preconditions.checkState(null != client); //Create index if it does not already exist - IndicesExistsResponse response = client.admin().indices().exists(new IndicesExistsRequest(indexName)).actionGet(); - if (!response.isExists()) { + if (!client.indexExists(indexName)) { - Settings.Builder settings = Settings.settingsBuilder(); + Settings.Builder settings = Settings.builder(); ElasticSearchSetup.applySettingsFromJanusGraphConf(settings, config, ES_CREATE_EXTRAS_NS); + settings.put("index.max_result_window", Integer.MAX_VALUE); + + client.createIndex(indexName, settings.build()); - CreateIndexResponse create = client.admin().indices().prepareCreate(indexName) - .setSettings(settings.build()).execute().actionGet(); try { final long sleep = config.get(CREATE_SLEEP); log.debug("Sleeping {} ms after {} index creation returned from actionGet()", sleep, indexName); @@ -272,23 +230,17 @@ private void checkForOrCreateIndex(Configuration config) { } catch (InterruptedException e) { throw new JanusGraphException("Interrupted while waiting for index to settle in", e); } - if (!create.isAcknowledged()) throw new IllegalArgumentException("Could not create index: " + indexName); + if (!client.indexExists(indexName)) throw new IllegalArgumentException("Could not create index: " + indexName); } } /** - * Configure ElasticSearchIndex's ES client according to semantics introduced in - * 0.5.1. Allows greater flexibility than the previous config semantics. See - * {@link org.janusgraph.diskstorage.es.ElasticSearchSetup} for more + * Configure ElasticSearchIndex's ES client. See{@link org.janusgraph.diskstorage.es.ElasticSearchSetup} for more * information. - *

    - * This is activated by setting an explicit value for {@link #INTERFACE} in - * the JanusGraph configuration. * - * @see #legacyConfiguration(org.janusgraph.diskstorage.configuration.Configuration) * @param config a config passed to ElasticSearchIndex's constructor - * @return a node and client object open and ready for use + * @return a client object open and ready for use */ private ElasticSearchSetup.Connection interfaceConfiguration(Configuration config) { ElasticSearchSetup clientMode = ConfigOption.getEnumValue(config.get(INTERFACE), ElasticSearchSetup.class); @@ -300,110 +252,6 @@ private ElasticSearchSetup.Connection interfaceConfiguration(Configuration confi } } - /** - * Configure ElasticSearchIndex's ES client according to 0.4.x - 0.5.0 semantics. - * This checks local-mode first. If local-mode is true, then it creates a Node that - * uses JVM local transport and can't talk over the network. If local-mode is - * false, then it creates a TransportClient that can talk over the network and - * uses {@link org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration#INDEX_HOSTS} - * as the server addresses. Note that this configuration method - * does not allow creating a Node that talks over the network. - *

    - * This is activated by not setting an explicit value for {@link #INTERFACE} in the - * JanusGraph configuration. - * - * @see #interfaceConfiguration(org.janusgraph.diskstorage.configuration.Configuration) - * @param config a config passed to ElasticSearchIndex's constructor - * @return a node and client object open and ready for use - */ - private ElasticSearchSetup.Connection legacyConfiguration(Configuration config) throws BackendException { - Node node; - Client client; - - if (config.get(LOCAL_MODE)) { - - log.debug("Configuring ES for JVM local transport"); - - boolean clientOnly = config.get(CLIENT_ONLY); - boolean local = config.get(LOCAL_MODE); - - NodeBuilder builder = NodeBuilder.nodeBuilder(); - Preconditions.checkArgument(config.has(INDEX_CONF_FILE) || config.has(INDEX_DIRECTORY), - "Must either configure configuration file or base directory"); - if (config.has(INDEX_CONF_FILE)) { - String configFile = config.get(INDEX_CONF_FILE); - Settings.Builder sb = Settings.settingsBuilder(); - log.debug("Configuring ES from YML file [{}]", configFile); - FileInputStream fis = null; - try { - fis = new FileInputStream(configFile); - sb.loadFromStream(configFile, fis); - builder.settings(sb.build()); - } catch (FileNotFoundException e) { - throw new JanusGraphException(e); - } finally { - IOUtils.closeQuietly(fis); - } - } else { - Settings.Builder b = Settings.settingsBuilder(); - b.put("script.inline", true); - b.put("indices.ttl.interval", "5s"); - - builder.settings(b.build()); - - String clustername = config.get(CLUSTER_NAME); - Preconditions.checkArgument(StringUtils.isNotBlank(clustername), "Invalid cluster name: %s", clustername); - builder.clusterName(clustername); - } - - String dataDirectory = config.get(INDEX_DIRECTORY); - if (StringUtils.isNotBlank(dataDirectory)) { - log.debug("Configuring ES with home directory [{}]", dataDirectory); - File f = new File(dataDirectory); - if (!f.exists()) f.mkdirs(); - builder.settings().put("path.home", dataDirectory); - } - - builder.settings().put("index.max_result_window", Integer.MAX_VALUE); - - node = builder.client(clientOnly).data(!clientOnly).local(local).node(); - client = node.client(); - - } else { - log.debug("Configuring ES for network transport"); - Settings.Builder settings = Settings.settingsBuilder(); - if (config.has(CLUSTER_NAME)) { - String clustername = config.get(CLUSTER_NAME); - Preconditions.checkArgument(StringUtils.isNotBlank(clustername), "Invalid cluster name: %s", clustername); - settings.put("cluster.name", clustername); - } else { - settings.put("client.transport.ignore_cluster_name", true); - } - log.debug("Transport sniffing enabled: {}", config.get(CLIENT_SNIFF)); - settings.put("client.transport.sniff", config.get(CLIENT_SNIFF)); - settings.put("script.inline", true); - settings.put("index.max_result_window", Integer.MAX_VALUE); - TransportClient tc = TransportClient.builder().settings(settings.build()).build(); - int defaultPort = config.has(INDEX_PORT)?config.get(INDEX_PORT):HOST_PORT_DEFAULT; - for (String host : config.get(INDEX_HOSTS)) { - String[] hostparts = host.split(":"); - String hostname = hostparts[0]; - int hostport = defaultPort; - if (hostparts.length == 2) hostport = Integer.parseInt(hostparts[1]); - log.info("Configured remote host: {} : {}", hostname, hostport); - try { - tc.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(hostname), hostport)); - } catch (UnknownHostException e) { - throw new TemporaryBackendException(e); - } - } - client = tc; - node = null; - } - - return new ElasticSearchSetup.Connection(node, client); - } - private BackendException convert(Exception esException) { if (esException instanceof InterruptedException) { return new TemporaryBackendException("Interrupted while waiting for response", esException); @@ -435,10 +283,6 @@ public void register(String store, String key, KeyInformation information, BaseT try { mapping = XContentFactory.jsonBuilder(). startObject(). - startObject(store). - field(TTL_FIELD, new HashMap() {{ - put("enabled", true); - }}). startObject("properties"). startObject(key); @@ -510,15 +354,14 @@ public void register(String store, String key, KeyInformation information, BaseT mapping.field("index","not_analyzed"); } - mapping.endObject().endObject().endObject().endObject(); + mapping.endObject().endObject().endObject(); } catch (IOException e) { throw new PermanentBackendException("Could not render json for put mapping request", e); } try { - PutMappingResponse response = client.admin().indices().preparePutMapping(indexName) - .setType(store).setSource(mapping).execute().actionGet(); + client.createMapping(indexName, store, mapping); } catch (Exception e) { throw convert(e); } @@ -535,55 +378,44 @@ private static boolean hasDualStringMapping(KeyInformation information) { return AttributeUtil.isString(information.getDataType()) && getStringMapping(information)==Mapping.TEXTSTRING; } - public XContentBuilder getNewDocument(final List additions, KeyInformation.StoreRetriever informations) throws BackendException { - try { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - - // JSON writes duplicate fields one after another, which forces us - // at this stage to make de-duplication on the IndexEntry list. We don't want to pay the - // price map storage on the Mutation level because non of other backends need that. + public Map getNewDocument(final List additions, KeyInformation.StoreRetriever informations) throws BackendException { + // JSON writes duplicate fields one after another, which forces us + // at this stage to make de-duplication on the IndexEntry list. We don't want to pay the + // price map storage on the Mutation level because none of other backends need that. - Multimap uniq = LinkedListMultimap.create(); - for (IndexEntry e : additions) { - uniq.put(e.field, e); - } + Multimap uniq = LinkedListMultimap.create(); + for (IndexEntry e : additions) { + uniq.put(e.field, e); + } - for (Map.Entry> add : uniq.asMap().entrySet()) { - KeyInformation keyInformation = informations.get(add.getKey()); - Object value = null; - switch (keyInformation.getCardinality()) { - case SINGLE: - value = convertToEsType(Iterators.getLast(add.getValue().iterator()).value); - break; - case SET: - case LIST: - value = add.getValue().stream().map(v -> convertToEsType(v.value)) + final Map doc = new HashMap<>(); + for (Map.Entry> add : uniq.asMap().entrySet()) { + KeyInformation keyInformation = informations.get(add.getKey()); + Object value = null; + switch (keyInformation.getCardinality()) { + case SINGLE: + value = convertToEsType(Iterators.getLast(add.getValue().iterator()).value); + break; + case SET: + case LIST: + value = add.getValue().stream().map(v -> convertToEsType(v.value)) .filter(v -> { Preconditions.checkArgument(!(v instanceof byte[]), "Collections not supported for " + add.getKey()); return true; }) .collect(Collectors.toList()).toArray(); - break; - } - - if (value instanceof byte[]) { - builder.rawField(add.getKey(), new ByteArrayInputStream((byte[]) value)); - } else { - builder.field(add.getKey(), value); - } - if (hasDualStringMapping(informations.get(add.getKey())) && keyInformation.getDataType() == String.class) { - builder.field(getDualMappingName(add.getKey()), value); - } - + break; + } + doc.put(add.getKey(), value); + if (hasDualStringMapping(informations.get(add.getKey())) && keyInformation.getDataType() == String.class) { + doc.put(getDualMappingName(add.getKey()), value); } - builder.endObject(); - return builder; - } catch (IOException e) { - throw new PermanentBackendException("Could not write json"); } + + return doc; } private static Object convertToEsType(Object value) { @@ -597,8 +429,10 @@ private static Object convertToEsType(Object value) { return value; } else if (value instanceof Geoshape) { return convertgeo((Geoshape) value); - } else if (value instanceof Date || value instanceof Instant) { + } else if (value instanceof Date) { return value; + } else if (value instanceof Instant) { + return Date.from((Instant) value); } else if (value instanceof Boolean) { return value; } else if (value instanceof UUID) { @@ -611,7 +445,11 @@ private static Object convertgeo(Geoshape geoshape) { Geoshape.Point p = geoshape.getPoint(); return new double[]{p.getLongitude(), p.getLatitude()}; } else if (geoshape.getType() != Geoshape.Type.BOX && geoshape.getType() != Geoshape.Type.CIRCLE) { - return geoshape.toGeoJson().getBytes(); + try { + return geoshape.toMap(); + } catch (IOException e) { + throw new IllegalArgumentException("Invalid geoshape: " + geoshape, e); + } } else { throw new IllegalArgumentException("Unsupported or invalid shape type for indexing: " + geoshape.getType()); } @@ -619,9 +457,7 @@ private static Object convertgeo(Geoshape geoshape) { @Override public void mutate(Map> mutations, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException { - BulkRequestBuilder brb = client.prepareBulk(); - - int bulkrequests = 0; + final List requests = new ArrayList<>(); try { for (Map.Entry> stores : mutations.entrySet()) { String storename = stores.getKey(); @@ -637,67 +473,39 @@ public void mutate(Map> mutations, KeyInforma if (mutation.hasDeletions()) { if (mutation.isDeleted()) { log.trace("Deleting entire document {}", docid); - brb.add(new DeleteRequest(indexName, storename, docid)); + requests.add(ElasticSearchMutation.createDeleteRequest(indexName, storename, docid)); } else { String script = getDeletionScript(informations, storename, mutation); - brb.add(client.prepareUpdate(indexName, storename, docid).setScript(new Script(script, ScriptService.ScriptType.INLINE, null, null))); + requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script)); log.trace("Adding script {}", script); } - - bulkrequests++; } if (mutation.hasAdditions()) { - long ttl = mutation.determineTTL() * 1000l; - if (mutation.isNew()) { //Index log.trace("Adding entire document {}", docid); - Preconditions.checkArgument(ttl >= 0); - IndexRequest request = new IndexRequest(indexName, storename, docid) - .source(getNewDocument(mutation.getAdditions(), informations.get(storename))); - if (ttl > 0) { - request.ttl(ttl); - } - brb.add(request); - + Map source = getNewDocument(mutation.getAdditions(), informations.get(storename)); + requests.add(ElasticSearchMutation.createIndexRequest(indexName, storename, docid, source)); } else { - Preconditions.checkArgument(ttl == 0, "Elasticsearch only supports TTL on new documents [%s]", docid); - boolean needUpsert = !mutation.hasDeletions(); String script = getAdditionScript(informations, storename, mutation); - UpdateRequestBuilder update = client.prepareUpdate(indexName, storename, docid).setScript(new Script(script, ScriptService.ScriptType.INLINE, null, null)); if (needUpsert) { - XContentBuilder doc = getNewDocument(mutation.getAdditions(), informations.get(storename)); - - update.setUpsert(doc); + Map doc = getNewDocument(mutation.getAdditions(), informations.get(storename)); + requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script, doc)); + } else { + requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script)); } - brb.add(update); log.trace("Adding script {}", script); } - - bulkrequests++; } } } - if (bulkrequests > 0) { - BulkResponse bulkItemResponses = brb.execute().actionGet(); - if (bulkItemResponses.hasFailures()) { - boolean actualFailure = false; - for(BulkItemResponse response : bulkItemResponses.getItems()) { - //The document may have been deleted, which is OK - if(response.isFailed() && response.getFailure().getStatus() != RestStatus.NOT_FOUND) { - log.error("Failed to execute ES query {}", response.getFailureMessage()); - actualFailure = true; - } - } - if(actualFailure) { - throw new Exception(bulkItemResponses.buildFailureMessage()); - } - } + if (!requests.isEmpty()) { + client.bulkRequest(requests); } } catch (Exception e) { - log.error("Failed to execute ES query {}", brb.request().timeout(), e); + log.error("Failed to execute bulk Elasticsearch query", e); throw convert(e); } } @@ -766,6 +574,7 @@ private static String convertToJsType(Object value) throws PermanentBackendExcep builder.field("value", esValue); } + builder.endObject(); String s = builder.string(); int prefixLength = "{\"value\":".length(); int suffixLength = "}".length(); @@ -781,8 +590,7 @@ private static String convertToJsType(Object value) throws PermanentBackendExcep public void restore(Map>> documents, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException { - BulkRequestBuilder bulk = client.prepareBulk(); - int requests = 0; + final List requests = new ArrayList<>(); try { for (Map.Entry>> stores : documents.entrySet()) { String store = stores.getKey(); @@ -796,26 +604,19 @@ public void restore(Map>> documents, KeyInfo if (log.isTraceEnabled()) log.trace("Deleting entire document {}", docID); - bulk.add(new DeleteRequest(indexName, store, docID)); - requests++; + requests.add(ElasticSearchMutation.createDeleteRequest(indexName, store, docID)); } else { // Add if (log.isTraceEnabled()) log.trace("Adding entire document {}", docID); - long ttl = IndexMutation.determineTTL(content) * 1000l; - Preconditions.checkArgument(ttl >= 0); - IndexRequest request = new IndexRequest(indexName, store, docID).source(getNewDocument(content, informations.get(store))); - if (ttl > 0) { - request.ttl(ttl); - } - bulk.add(request); - requests++; + Map source = getNewDocument(content, informations.get(store)); + requests.add(ElasticSearchMutation.createIndexRequest(indexName, store, docID, source)); } } } - if (requests > 0) - bulk.execute().actionGet(); + if (!requests.isEmpty()) + client.bulkRequest(requests); } catch (Exception e) { throw convert(e); } @@ -904,7 +705,7 @@ public QueryBuilder getFilter(Condition condition, KeyInformation.StoreRetrie throw new IllegalArgumentException("Unsupported or invalid search shape type for geopoint: " + shape.getType()); } - return janusgraphPredicate == Geo.DISJOINT ? QueryBuilders.notQuery(queryBuilder) : queryBuilder; + return janusgraphPredicate == Geo.DISJOINT ? QueryBuilders.boolQuery().mustNot(queryBuilder) : queryBuilder; } else if (value instanceof Geoshape) { // geoshape Preconditions.checkArgument(janusgraphPredicate instanceof Geo, "Relation not supported on geoshape types: " + janusgraphPredicate); @@ -1002,43 +803,36 @@ public QueryBuilder getFilter(Condition condition, KeyInformation.StoreRetrie @Override public List query(IndexQuery query, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException { - SearchRequestBuilder srb = client.prepareSearch(indexName); - srb.setTypes(query.getStore()); - srb.setQuery(QueryBuilders.matchAllQuery()); - srb.setPostFilter(getFilter(query.getCondition(),informations.get(query.getStore()))); + ElasticSearchRequest sr = new ElasticSearchRequest(); + + sr.setQuery(QueryBuilders.matchAllQuery()); + sr.setPostFilter(getFilter(query.getCondition(),informations.get(query.getStore()))); if (!query.getOrder().isEmpty()) { List orders = query.getOrder(); for (int i = 0; i < orders.size(); i++) { IndexQuery.OrderEntry orderEntry = orders.get(i); - FieldSortBuilder fsb = new FieldSortBuilder(orders.get(i).getKey()) - .order(orderEntry.getOrder() == Order.ASC ? SortOrder.ASC : SortOrder.DESC); - if (useDeprecatedIgnoreUnmapped) { - fsb.ignoreUnmapped(true); - } else { - KeyInformation information = informations.get(query.getStore()).get(orders.get(i).getKey()); - Mapping mapping = Mapping.getMapping(information); - Class datatype = orderEntry.getDatatype(); - fsb.unmappedType(convertToEsDataType(datatype, mapping)); - } - srb.addSort(fsb); + String order = (orderEntry.getOrder() == Order.ASC ? SortOrder.ASC : SortOrder.DESC).toString(); + KeyInformation information = informations.get(query.getStore()).get(orders.get(i).getKey()); + Mapping mapping = Mapping.getMapping(information); + Class datatype = orderEntry.getDatatype(); + sr.addSort(orders.get(i).getKey(), order, convertToEsDataType(datatype, mapping)); } } - srb.setFrom(0); - if (query.hasLimit()) srb.setSize(query.getLimit()); - else srb.setSize(maxResultsSize); - srb.setNoFields(); - //srb.setExplain(true); - - SearchResponse response = srb.execute().actionGet(); - log.debug("Executed query [{}] in {} ms", query.getCondition(), response.getTookInMillis()); - SearchHits hits = response.getHits(); - if (!query.hasLimit() && hits.totalHits() >= maxResultsSize) - log.warn("Query result set truncated to first [{}] elements for query: {}", maxResultsSize, query); - List result = new ArrayList(hits.hits().length); - for (SearchHit hit : hits) { - result.add(hit.id()); + sr.setFrom(0); + if (query.hasLimit()) sr.setSize(query.getLimit()); + else sr.setSize(maxResultsSize); + + ElasticSearchResponse response; + try { + response = client.search(indexName, query.getStore(), sr); + } catch (IOException e) { + throw new PermanentBackendException(e); } - return result; + + log.debug("Executed query [{}] in {} ms", query.getCondition(), response.getTook()); + if (!query.hasLimit() && response.getTotal() >= maxResultsSize) + log.warn("Query result set truncated to first [{}] elements for query: {}", maxResultsSize, query); + return response.getResults().stream().map(result -> result.getResult()).collect(Collectors.toList()); } private String convertToEsDataType(Class datatype, Mapping mapping) { @@ -1075,26 +869,23 @@ else if (Geoshape.class.isAssignableFrom(datatype)) { @Override public Iterable> query(RawQuery query, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException { - SearchRequestBuilder srb = client.prepareSearch(indexName); - srb.setTypes(query.getStore()); - srb.setQuery(QueryBuilders.queryStringQuery(query.getQuery())); - - srb.setFrom(query.getOffset()); - if (query.hasLimit()) srb.setSize(query.getLimit()); - else srb.setSize(maxResultsSize); - srb.setNoFields(); - //srb.setExplain(true); - - SearchResponse response = srb.execute().actionGet(); - log.debug("Executed query [{}] in {} ms", query.getQuery(), response.getTookInMillis()); - SearchHits hits = response.getHits(); - if (!query.hasLimit() && hits.totalHits() >= maxResultsSize) - log.warn("Query result set truncated to first [{}] elements for query: {}", maxResultsSize, query); - List> result = new ArrayList>(hits.hits().length); - for (SearchHit hit : hits) { - result.add(new RawQuery.Result(hit.id(),hit.getScore())); + ElasticSearchRequest sr = new ElasticSearchRequest(); + sr.setQuery(QueryBuilders.queryStringQuery(query.getQuery())); + + sr.setFrom(query.getOffset()); + if (query.hasLimit()) sr.setSize(query.getLimit()); + else sr.setSize(maxResultsSize); + + ElasticSearchResponse response; + try { + response = client.search(indexName, query.getStore(), sr); + } catch (IOException e) { + throw new PermanentBackendException(e); } - return result; + log.debug("Executed query [{}] in {} ms", query.getQuery(), response.getTook()); + if (!query.hasLimit() && response.getTotal() >= maxResultsSize) + log.warn("Query result set truncated to first [{}] elements for query: {}", maxResultsSize, query); + return response.getResults(); } @Override @@ -1167,25 +958,18 @@ public BaseTransactionConfigurable beginTransaction(BaseTransactionConfig config @Override public void close() throws BackendException { - - if (node != null && !node.isClosed()) { - node.close(); + try { + client.close(); + } catch (IOException e) { + throw new PermanentBackendException(e); } - client.close(); } @Override public void clearStorage() throws BackendException { try { - try { - client.admin().indices() - .delete(new DeleteIndexRequest(indexName)).actionGet(); - // We wait for one second to let ES delete the river - Thread.sleep(1000); - } catch (IndexNotFoundException e) { - // Index does not exist... Fine - } + client.deleteIndex(indexName); } catch (Exception e) { throw new PermanentBackendException("Could not delete index " + indexName, e); } finally { @@ -1193,13 +977,6 @@ public void clearStorage() throws BackendException { } } - /** - * Exposed for testing - */ - Node getNode() { - return node; - } - private void checkExpectedClientVersion() { /* * This is enclosed in a catch block to prevent an unchecked exception diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java new file mode 100644 index 0000000000..d55bbde4d8 --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java @@ -0,0 +1,87 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es; + +import com.google.common.collect.ImmutableMap; + +import java.util.Map; + +public class ElasticSearchMutation { + + public enum RequestType { + + INDEX, + + UPDATE, + + DELETE; + + } + + private final RequestType requestType; + + private final String index; + + private final String type; + + private final String id; + + private final Map source; + + private ElasticSearchMutation(RequestType requestType, String index, String type, String id, Map source) { + this.requestType = requestType; + this.index = index; + this.type = type; + this.id = id; + this.source = source; + } + + public static ElasticSearchMutation createDeleteRequest(String index, String type, String id) { + return new ElasticSearchMutation(RequestType.DELETE, index, type, id, null); + } + + public static ElasticSearchMutation createIndexRequest(String index, String type, String id, Map source) { + return new ElasticSearchMutation(RequestType.INDEX, index, type, id, source); + } + + public static ElasticSearchMutation createUpdateRequest(String index, String type, String id, String script) { + return new ElasticSearchMutation(RequestType.UPDATE, index, type, id, ImmutableMap.of("script", ImmutableMap.of("inline", script, "lang", "groovy"))); + } + + public static ElasticSearchMutation createUpdateRequest(String index, String type, String id, String script, Map upsert) { + return new ElasticSearchMutation(RequestType.UPDATE, index, type, id, ImmutableMap.of("script", ImmutableMap.of("inline", script, "lang", "groovy"), "upsert", upsert)); + } + + public RequestType getRequestType() { + return requestType; + } + + public String getIndex() { + return index; + } + + public String getType() { + return type; + } + + public String getId() { + return id; + } + + public Map getSource() { + return source; + } + +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchRequest.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchRequest.java new file mode 100644 index 0000000000..3330733b90 --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchRequest.java @@ -0,0 +1,110 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es; + +import com.google.common.collect.ImmutableMap; +import org.apache.tinkerpop.shaded.jackson.annotation.JsonProperty; +import org.elasticsearch.index.query.QueryBuilder; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class ElasticSearchRequest { + + private QueryBuilder query; + + private QueryBuilder postFilter; + + private Integer size; + + private Integer from; + + private List> sorts; + + public ElasticSearchRequest() { + this.sorts = new ArrayList<>(); + } + + public QueryBuilder getQuery() { + return query; + } + + public void setQuery(QueryBuilder query) { + this.query = query; + } + + public QueryBuilder getPostFilter() { + return postFilter; + } + + public void setPostFilter(QueryBuilder postFilter) { + this.postFilter = postFilter; + } + + public Integer getSize() { + return size; + } + + public void setSize(Integer size) { + this.size = size; + } + + public Integer getFrom() { + return from; + } + + public void setFrom(Integer from) { + this.from = from; + } + + public List> getSorts() { + return sorts; + } + + public void addSort(String key, String order, String unmappedType) { + this.sorts.add(ImmutableMap.of(key, new RestSortInfo(order, unmappedType))); + } + + public static class RestSortInfo { + + String order; + + @JsonProperty("unmapped_type") + String unmappedType; + + public RestSortInfo(String order, String unmappedType) { + this.order = order; + this.unmappedType = unmappedType; + } + + public String getOrder() { + return order; + } + + public void setOrder(String order) { + this.order = order; + } + + public String getUnmappedType() { + return unmappedType; + } + + public void setUnmappedType(String unmappedType) { + this.unmappedType = unmappedType; + } + + } +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchResponse.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchResponse.java new file mode 100644 index 0000000000..5c271209a0 --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchResponse.java @@ -0,0 +1,52 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es; + +import org.janusgraph.diskstorage.indexing.RawQuery; + +import java.util.List; + +public class ElasticSearchResponse { + + private long took; + + private long total; + + private List> results; + + public long getTook() { + return took; + } + + public void setTook(long took) { + this.took = took; + } + + public long getTotal() { + return total; + } + + public void setTotal(long total) { + this.total = total; + } + + public List> getResults() { + return results; + } + + public void setResults(List> results) { + this.results = results; + } +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java index 4e9ba326f3..b255d70091 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchSetup.java @@ -16,50 +16,49 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; -import org.janusgraph.diskstorage.configuration.ConfigNamespace; -import org.janusgraph.diskstorage.configuration.ConfigOption; -import org.janusgraph.diskstorage.configuration.Configuration; -import org.janusgraph.util.system.IOUtils; import org.apache.commons.lang.StringUtils; -import org.elasticsearch.client.Client; +import org.apache.http.HttpHost; +import org.elasticsearch.client.RestClient; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.InetSocketTransportAddress; -import org.elasticsearch.node.Node; -import org.elasticsearch.node.NodeBuilder; +import org.janusgraph.diskstorage.configuration.ConfigNamespace; +import org.janusgraph.diskstorage.configuration.ConfigOption; +import org.janusgraph.diskstorage.configuration.Configuration; +import org.janusgraph.diskstorage.es.rest.RestElasticSearchClient; +import org.janusgraph.util.system.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; import java.lang.reflect.Array; import java.net.InetAddress; +import java.util.ArrayList; import java.util.List; import java.util.Map; -import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.*; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_CONF_FILE; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_HOSTS; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_PORT; /** * Create an ES {@link org.elasticsearch.client.transport.TransportClient} or - * {@link org.elasticsearch.node.Node} from a JanusGraph + * {@link org.elasticsearch.client.RestClient} from a JanusGraph * {@link org.janusgraph.diskstorage.configuration.Configuration}. *

    - * TransportClient assumes that an ES cluster is already running. It does not attempt - * to start an embedded ES instance. It just connects to whatever hosts are given in + * Assumes that an ES cluster is already running. It does not attempt to start an + * embedded ES instance. It just connects to whatever hosts are given in * {@link org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration#INDEX_HOSTS}. *

    - * Node can be configured to either behave strictly as a client or as both a client - * and ES data node. The latter is essentially a fully-fledged ES cluster node embedded in JanusGraph. - * Node can also be configured to use either network or JVM local transport. - * In practice, JVM local transport is usually only useful for testing. Most deployments - * will use the network transport. - *

    - * Setting arbitrary ES options is supported with both TransportClient and Node + * Setting arbitrary ES options is supported with the TransportClient * via {@link org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration#INDEX_CONF_FILE}. * When this is set, it will be opened as an ordinary file and the contents will be * parsed as Elasticsearch settings. These settings override JanusGraph's defaults but - * options explicitly provided in JanusGraph's config file (e.g. setting an explicit value for - * {@link org.janusgraph.diskstorage.es.ElasticSearchIndex#CLIENT_ONLY} in - * JanusGraph's properties will override any value that might be in the ES settings file). + * options explicitly provided in JanusGraph's config file in JanusGraph's properties will + * override any value that might be in the ES settings file. *

    * After loading the index conf file (when provided), any key-value pairs under the * {@link org.janusgraph.diskstorage.es.ElasticSearchIndex#ES_EXTRAS_NS} namespace @@ -103,51 +102,41 @@ public Connection connect(Configuration config) throws IOException { log.info("Configured remote host: {} : {}", hostname, hostport); tc.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(hostname), hostport)); } - return new Connection(null, tc); + + TransportElasticSearchClient client = new TransportElasticSearchClient(tc); + if (config.has(ElasticSearchIndex.BULK_REFRESH)) { + client.setBulkRefresh(config.get(ElasticSearchIndex.BULK_REFRESH).equals("true")); + } + return new Connection(client); } }, /** - * Start an ES {@code Node} and use its attached {@code Client}. + * Create an ES RestClient connected to + * {@link org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration#INDEX_HOSTS}. */ - NODE { + REST_CLIENT { @Override public Connection connect(Configuration config) throws IOException { + log.debug("Configuring RestClient"); - log.debug("Configuring Node Client"); - - Settings.Builder settingsBuilder = settingsBuilder(config); - - if (config.has(ElasticSearchIndex.TTL_INTERVAL)) { - String k = "indices.ttl.interval"; - settingsBuilder.put(k, config.get(ElasticSearchIndex.TTL_INTERVAL)); - log.debug("Set {}: {}", k, config.get(ElasticSearchIndex.TTL_INTERVAL)); - } - - makeLocalDirsIfNecessary(settingsBuilder, config); - - NodeBuilder nodeBuilder = NodeBuilder.nodeBuilder(); - - // Apply explicit JanusGraph properties file overrides (otherwise conf-file or ES defaults apply) - if (config.has(ElasticSearchIndex.CLIENT_ONLY)) { - boolean clientOnly = config.get(ElasticSearchIndex.CLIENT_ONLY); - nodeBuilder.client(clientOnly).data(!clientOnly); + final List hosts = new ArrayList<>(); + int defaultPort = config.has(INDEX_PORT) ? config.get(INDEX_PORT) : ElasticSearchIndex.HOST_PORT_DEFAULT; + for (String host : config.get(INDEX_HOSTS)) { + String[] hostparts = host.split(":"); + String hostname = hostparts[0]; + int hostport = defaultPort; + if (hostparts.length == 2) hostport = Integer.parseInt(hostparts[1]); + log.info("Configured remote host: {} : {}", hostname, hostport); + hosts.add(new HttpHost(hostname, hostport, "http")); } + RestClient rc = RestClient.builder(hosts.toArray(new HttpHost[hosts.size()])).build(); - if (config.has(ElasticSearchIndex.LOCAL_MODE)) - nodeBuilder.local(config.get(ElasticSearchIndex.LOCAL_MODE)); - - if (config.has(ElasticSearchIndex.LOAD_DEFAULT_NODE_SETTINGS)) { - // Elasticsearch >2.3 always loads default settings - String k = "config.ignore_system_properties"; - settingsBuilder.put(k, !config.get(ElasticSearchIndex.LOAD_DEFAULT_NODE_SETTINGS)); + RestElasticSearchClient client = new RestElasticSearchClient(rc); + if (config.has(ElasticSearchIndex.BULK_REFRESH)) { + client.setBulkRefresh(config.get(ElasticSearchIndex.BULK_REFRESH)); } - - settingsBuilder.put("index.max_result_window", Integer.MAX_VALUE); - nodeBuilder.settings(settingsBuilder.build()); - Node node = nodeBuilder.node(); - Client client = node.client(); - return new Connection(node, client); + return new Connection(client); } }; @@ -240,8 +229,8 @@ static void applySettingsFromFile(Settings.Builder settings, } static void applySettingsFromJanusGraphConf(Settings.Builder settings, - Configuration config, - ConfigNamespace rootNS) { + Configuration config, + ConfigNamespace rootNS) { int keysLoaded = 0; Map configSub = config.getSubset(rootNS); for (Map.Entry entry : configSub.entrySet()) { @@ -270,41 +259,20 @@ static void applySettingsFromJanusGraphConf(Settings.Builder settings, log.debug("Loaded {} settings from the {} JanusGraph config namespace", keysLoaded, rootNS); } - - private static void makeLocalDirsIfNecessary(Settings.Builder settingsBuilder, Configuration config) { - if (config.has(INDEX_DIRECTORY)) { - String dataDirectory = config.get(INDEX_DIRECTORY); - File f = new File(dataDirectory); - if (!f.exists()) { - log.info("Creating ES directory prefix: {}", f); - f.mkdirs(); - } - settingsBuilder.put("path.home", dataDirectory); - } - - } - private static final Logger log = LoggerFactory.getLogger(ElasticSearchSetup.class); public abstract Connection connect(Configuration config) throws IOException; public static class Connection { - private final Node node; - private final Client client; + private final ElasticSearchClient client; - public Connection(Node node, Client client) { - this.node = node; + public Connection(ElasticSearchClient client) { this.client = client; Preconditions.checkNotNull(this.client, "Unable to instantiate Elasticsearch Client object"); - // node may be null - } - - public Node getNode() { - return node; } - public Client getClient() { + public ElasticSearchClient getClient() { return client; } } diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java new file mode 100644 index 0000000000..99bcb0a0c7 --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java @@ -0,0 +1,201 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es; + +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; +import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest; +import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse; +import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest; +import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse; +import org.elasticsearch.action.bulk.BulkItemResponse; +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.delete.DeleteRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.search.SearchRequestBuilder; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.update.UpdateRequestBuilder; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.IndexNotFoundException; +import org.elasticsearch.node.Node; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptService; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.SearchHits; +import org.elasticsearch.search.sort.FieldSortBuilder; +import org.elasticsearch.search.sort.SortOrder; +import org.janusgraph.diskstorage.indexing.RawQuery; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class TransportElasticSearchClient implements ElasticSearchClient { + + private static final Logger log = LoggerFactory.getLogger(TransportElasticSearchClient.class); + + private Client client; + + private boolean bulkRefresh; + + public TransportElasticSearchClient(Client client) { + this.client = client; + } + + @Override + public void clusterHealthRequest(String timeout) throws IOException { + client.admin().cluster().prepareHealth().setTimeout(timeout).setWaitForYellowStatus().execute().actionGet(); + } + + @Override + public boolean indexExists(String indexName) throws IOException { + IndicesExistsResponse response = client.admin().indices().exists(new IndicesExistsRequest(indexName)).actionGet(); + return response.isExists(); + } + + @Override + public void createIndex(String indexName, Settings settings) throws IOException { + CreateIndexResponse create = client.admin().indices().prepareCreate(indexName) + .setSettings(settings).execute().actionGet(); + } + + @Override + public Map getIndexSettings(String indexName) throws IOException { + GetSettingsResponse response = client.admin().indices().getSettings(new GetSettingsRequest().indices(indexName)).actionGet(); + return response.getIndexToSettings().get(indexName).getAsMap().entrySet().stream() + .collect(Collectors.toMap(e->e.getKey().replace("index.",""), Map.Entry::getValue)); + } + + @Override + public void createMapping(String indexName, String typeName, XContentBuilder mapping) throws IOException { + client.admin().indices().preparePutMapping(indexName).setType(typeName).setSource(mapping).execute().actionGet(); + } + + @Override + public void deleteIndex(String indexName) throws IOException { + try { + client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet(); + // We wait for one second to let ES delete the river + Thread.sleep(1000); + } catch (IndexNotFoundException e) { + // Index does not exist... Fine + } catch (InterruptedException e) { + throw new IOException(e); + } + } + + @Override + public void bulkRequest(List requests) throws IOException { + BulkRequestBuilder brb = client.prepareBulk(); + requests.stream().forEach(request -> { + String indexName = request.getIndex(); + String type = request.getType(); + String id = request.getId(); + switch (request.getRequestType()) { + case DELETE: { + brb.add(new DeleteRequest(indexName, type, id)); + break; + } case INDEX: { + brb.add(new IndexRequest(indexName, type, id).source(request.getSource())); + break; + } case UPDATE: { + String inline = ((Map) request.getSource().get("script")).get("inline"); + Script script = new Script(inline, ScriptService.ScriptType.INLINE, null, null); + UpdateRequestBuilder update = client.prepareUpdate(indexName, type, id).setScript(script); + if (request.getSource().containsKey("upsert")) { + update.setUpsert((Map) request.getSource().get("upsert")); + } + brb.add(update); + break; + } default: + throw new IllegalArgumentException("Unsupported request type: " + request.getRequestType()); + } + }); + + if (!requests.isEmpty()) { + if (bulkRefresh) { + brb.setRefresh(true); + } + BulkResponse bulkItemResponses = brb.execute().actionGet(); + if (bulkItemResponses.hasFailures()) { + boolean actualFailure = false; + for(BulkItemResponse response : bulkItemResponses.getItems()) { + //The document may have been deleted, which is OK + if(response.isFailed() && response.getFailure().getStatus() != RestStatus.NOT_FOUND) { + log.error("Failed to execute ES query {}", response.getFailureMessage()); + actualFailure = true; + } + } + if(actualFailure) { + throw new IOException("Failure(s) in Elasicsearch bulk request: " + bulkItemResponses.buildFailureMessage()); + } + } + } + } + + @Override + public ElasticSearchResponse search(String indexName, String type, ElasticSearchRequest request) throws IOException { + SearchRequestBuilder srb = client.prepareSearch(indexName); + srb.setTypes(type); + srb.setQuery(request.getQuery()); + srb.setPostFilter(request.getPostFilter()); + if (request.getFrom() != null) { + srb.setFrom(request.getFrom()); + } + if (request.getSize() != null) { + srb.setSize(request.getSize()); + } + request.getSorts().stream().flatMap(item -> item.entrySet().stream()).forEach(item -> { + String key = item.getKey(); + ElasticSearchRequest.RestSortInfo sortInfo = item.getValue(); + FieldSortBuilder fsb = new FieldSortBuilder(key) + .order(SortOrder.valueOf(sortInfo.getOrder().toUpperCase())) + .unmappedType(sortInfo.getUnmappedType()); + srb.addSort(fsb); + }); + + SearchResponse response = srb.execute().actionGet(); + SearchHits hits = response.getHits(); + + List> results = new ArrayList<>(hits.hits().length); + for (SearchHit hit : hits) { + results.add(new RawQuery.Result(hit.id(),hit.getScore())); + } + + ElasticSearchResponse result = new ElasticSearchResponse(); + result.setTook(response.getTookInMillis()); + result.setTotal(hits.getTotalHits()); + result.setResults(results); + return result; + } + + @Override + public void close() throws IOException { + client.close(); + } + + public void setBulkRefresh(boolean bulkRefresh) { + this.bulkRefresh = bulkRefresh; + } + +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestBulkResponse.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestBulkResponse.java new file mode 100644 index 0000000000..efc317de35 --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestBulkResponse.java @@ -0,0 +1,79 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es.rest; + +import org.apache.tinkerpop.shaded.jackson.annotation.JsonIgnoreProperties; + +import java.util.List; +import java.util.Map; + +@JsonIgnoreProperties(ignoreUnknown=true) +public class RestBulkResponse { + + private boolean errors; + + private List> items; + + public boolean isErrors() { + return errors; + } + + public void setErrors(boolean errors) { + this.errors = errors; + } + + public List> getItems() { + return items; + } + + public void setItems(List> items) { + this.items = items; + } + + @JsonIgnoreProperties(ignoreUnknown=true) + public static class RestBulkItemResponse { + + private String result; + + private int status; + + private Map error; + + public String getResult() { + return result; + } + + public void setResult(String result) { + this.result = result; + } + + public int getStatus() { + return status; + } + + public void setStatus(int status) { + this.status = status; + } + + public Map getError() { + return error; + } + + public void setError(Map error) { + this.error = error; + } + } + +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestElasticSearchClient.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestElasticSearchClient.java new file mode 100644 index 0000000000..4854b1eaa7 --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestElasticSearchClient.java @@ -0,0 +1,226 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es.rest; + +import com.google.common.collect.ImmutableMap; +import org.apache.http.HttpEntity; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.tinkerpop.shaded.jackson.core.type.TypeReference; +import org.apache.tinkerpop.shaded.jackson.databind.ObjectMapper; +import org.apache.tinkerpop.shaded.jackson.databind.ObjectReader; +import org.apache.tinkerpop.shaded.jackson.databind.ObjectWriter; +import org.apache.tinkerpop.shaded.jackson.databind.SerializationFeature; +import org.apache.tinkerpop.shaded.jackson.databind.module.SimpleModule; +import org.elasticsearch.client.Response; +import org.elasticsearch.client.RestClient; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.rest.RestStatus; +import org.janusgraph.core.attribute.Geoshape; +import org.janusgraph.diskstorage.es.ElasticSearchClient; +import org.janusgraph.diskstorage.es.ElasticSearchMutation; +import org.janusgraph.diskstorage.es.ElasticSearchRequest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +public class RestElasticSearchClient implements ElasticSearchClient { + + private static final Logger log = LoggerFactory.getLogger(RestElasticSearchClient.class); + + private static final ObjectMapper mapper; + private static final ObjectReader mapReader; + private static final ObjectWriter mapWriter; + static { + final SimpleModule module = new SimpleModule(); + module.addSerializer(new Geoshape.GeoshapeGsonSerializer()); + mapper = new ObjectMapper(); + mapper.registerModule(module); + mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); + mapReader = mapper.readerWithView(Map.class).forType(HashMap.class); + mapWriter = mapper.writerWithView(Map.class); + } + + private RestClient delegate; + + private String bulkRefresh; + + public RestElasticSearchClient(RestClient delegate) { + this.delegate = delegate; + } + + @Override + public void close() throws IOException { + delegate.close(); + } + + @Override + public void clusterHealthRequest(String timeout) throws IOException { + Map params = ImmutableMap.of("wait_for_status","yellow","timeout",timeout); + final Response response = delegate.performRequest("GET", "/_cluster/health", params); + try (final InputStream inputStream = response.getEntity().getContent()) { + final Map values = mapReader.readValue(inputStream); + if (!values.containsKey("timed_out")) { + throw new IOException("Unexpected response for Elasticsearch cluster health request"); + } else if (!Objects.equals(values.get("timed_out"), false)) { + throw new IOException("Elasticsearch timeout waiting for yellow status"); + } + } + } + + @Override + public boolean indexExists(String indexName) throws IOException { + boolean exists = false; + try { + delegate.performRequest("GET", "/" + indexName); + exists = true; + } catch (IOException e) { + if (!e.getMessage().contains("404 Not Found")) { + throw e; + } + } + return exists; + } + + @Override + public void createIndex(String indexName, Settings settings) throws IOException { + performRequest("PUT", "/" + indexName, mapWriter.writeValueAsBytes(settings.getAsMap())); + } + + @Override + public Map getIndexSettings(String indexName) throws IOException { + Response response = performRequest("GET", "/" + indexName + "/_settings", null); + try (final InputStream inputStream = response.getEntity().getContent()) { + Map settings = mapper.readValue(inputStream, new TypeReference>() {}); + return settings.get(indexName).getSettings().getMap(); + } + } + + @Override + public void createMapping(String indexName, String typeName, XContentBuilder mapping) throws IOException { + byte[] bytes = mapping.bytes().toBytes(); + performRequest("PUT", "/" + indexName + "/_mapping/" + typeName, bytes); + } + + @Override + public void deleteIndex(String indexName) throws IOException { + try { + performRequest("DELETE", "/" + indexName, null); + } catch (IOException e) { + if (!e.getMessage().contains("no such index")) { + throw e; + } + } + } + + @Override + public void bulkRequest(List requests) throws IOException { + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + for (final ElasticSearchMutation request : requests) { + Map actionData = ImmutableMap.of(request.getRequestType().name().toLowerCase(), + ImmutableMap.of("_index", request.getIndex(), "_type", request.getType(), "_id", request.getId())); + outputStream.write(mapWriter.writeValueAsBytes(actionData)); + outputStream.write("\n".getBytes()); + if (request.getSource() != null) { + outputStream.write(mapWriter.writeValueAsBytes(request.getSource())); + outputStream.write("\n".getBytes()); + } + } + + final StringBuilder builder = new StringBuilder("/_bulk"); + if (bulkRefresh != null && !bulkRefresh.toLowerCase().equals("false")) { + builder.append("?refresh=" + bulkRefresh); + } + + final Response response = performRequest("POST", builder.toString(), outputStream.toByteArray()); + try (final InputStream inputStream = response.getEntity().getContent()) { + final RestBulkResponse bulkResponse = mapper.readValue(inputStream, RestBulkResponse.class); + List> errors = bulkResponse.getItems().stream() + .flatMap(item -> item.values().stream()) + .filter(item -> item.getError() != null && item.getStatus() != RestStatus.NOT_FOUND.getStatus()) + .map(item -> item.getError()).collect(Collectors.toList()); + if (!errors.isEmpty()) { + errors.forEach(error -> log.error("Failed to execute ES query {}", error.get("reason"))); + throw new IOException("Failure(s) in Elasicsearch bulk request: " + mapper.writeValueAsString(errors)); + } + } + } + + @Override + public RestSearchResponse search(String indexName, String type, ElasticSearchRequest request) throws IOException { + final String path = "/" + indexName + "/" + type + "/_search"; + + final Map requestBody = new HashMap<>(); + + if (request.getSize() != null) { + requestBody.put("size", request.getSize()); + } + + if (request.getFrom() != null) { + requestBody.put("from", request.getFrom()); + } + + if (!request.getSorts().isEmpty()) { + requestBody.put("sort", request.getSorts()); + } + + if (request.getQuery() != null) { + final Map query = mapReader.readValue(request.getQuery().buildAsBytes().array()); + requestBody.put("query", query); + } + + if (request.getPostFilter() != null) { + final Map query = mapReader.readValue(request.getPostFilter().buildAsBytes().array()); + requestBody.put("post_filter", query); + } + + final byte[] requestData = mapper.writeValueAsBytes(requestBody); + if (log.isDebugEnabled()) { + log.debug("Elasticsearch request: " + mapper.writerWithDefaultPrettyPrinter().writeValueAsString(requestBody)); + } + Response response = performRequest("POST", path, requestData); + try (final InputStream inputStream = response.getEntity().getContent()) { + return mapper.readValue(inputStream, RestSearchResponse.class); + } + } + + public void setBulkRefresh(String bulkRefresh) { + this.bulkRefresh = bulkRefresh; + } + + private Response performRequest(String method, String path, byte[] requestData) throws IOException { + final HttpEntity entity = requestData != null ? new ByteArrayEntity(requestData) : null; + final Response response = delegate.performRequest( + method, + path, + Collections.emptyMap(), + entity); + + if (response.getStatusLine().getStatusCode() >= 400) { + throw new IOException("Error executing request: " + response.getStatusLine().getReasonPhrase()); + } + return response; + } + +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestIndexSettings.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestIndexSettings.java new file mode 100644 index 0000000000..4e794427ef --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestIndexSettings.java @@ -0,0 +1,47 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es.rest; + +import org.apache.tinkerpop.shaded.jackson.annotation.JsonProperty; + +import java.util.Map; + +public class RestIndexSettings { + + private Settings settings; + + public Settings getSettings() { + return settings; + } + + public void setSettings(Settings settings) { + this.settings = settings; + } + + public static class Settings { + + @JsonProperty("index") + private Map map; + + public Map getMap() { + return map; + } + + public void setMap(Map map) { + this.map = map; + } + } + +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchHit.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchHit.java new file mode 100644 index 0000000000..9f0a303cef --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchHit.java @@ -0,0 +1,91 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es.rest; + +import org.apache.tinkerpop.shaded.jackson.annotation.JsonIgnoreProperties; +import org.apache.tinkerpop.shaded.jackson.annotation.JsonProperty; + +import java.util.List; +import java.util.Map; + +@JsonIgnoreProperties(ignoreUnknown=true) +public class RestSearchHit { + + @JsonProperty("_index") + private String index; + + @JsonProperty("_type") + private String type; + + @JsonProperty("_id") + private String id; + + @JsonProperty("_score") + private Float score; + + @JsonProperty("_source") + private Map source; + + private Map> fields; + + public String getIndex() { + return index; + } + + public void setIndex(String index) { + this.index = index; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Float getScore() { + return score; + } + + public void setScore(Float score) { + this.score = score; + } + + public Map getSource() { + return source; + } + + public void setSource(Map source) { + this.source = source; + } + + public void setFields(Map> fields) { + this.fields = fields; + } + + public List field(String name) { + return this.fields != null ? this.fields.get(name) : null; + } + +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchResponse.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchResponse.java new file mode 100644 index 0000000000..2dd606da96 --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchResponse.java @@ -0,0 +1,70 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es.rest; + +import org.apache.tinkerpop.shaded.jackson.annotation.JsonIgnoreProperties; +import org.apache.tinkerpop.shaded.jackson.annotation.JsonProperty; +import org.janusgraph.diskstorage.es.ElasticSearchResponse; +import org.janusgraph.diskstorage.indexing.RawQuery; + +import java.util.List; +import java.util.stream.Collectors; + +@JsonIgnoreProperties(ignoreUnknown=true) +public class RestSearchResponse extends ElasticSearchResponse { + + private long took; + + @JsonProperty("hits") + private RestSearchResults hits; + + @Override + public long getTook() { + return took; + } + + public void setTook(long took) { + this.took = took; + } + + public RestSearchResults getHits() { + return hits; + } + + public void setHits(RestSearchResults hits) { + this.hits = hits; + } + + public int getNumHits() { + return hits.getHits().size(); + } + + @Override + public long getTotal() { + return hits.getTotal(); + } + + public Float getMaxScore() { + return hits.getMaxScore(); + } + + @Override + public List> getResults() { + return hits.getHits().stream() + .map(hit -> new RawQuery.Result(hit.getId(),hit.getScore() != null ? hit.getScore() : 0f)) + .collect(Collectors.toList()); + } + +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchResults.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchResults.java new file mode 100644 index 0000000000..092191dff6 --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestSearchResults.java @@ -0,0 +1,56 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es.rest; + +import org.apache.tinkerpop.shaded.jackson.annotation.JsonIgnoreProperties; +import org.apache.tinkerpop.shaded.jackson.annotation.JsonProperty; + +import java.util.List; + +@JsonIgnoreProperties(ignoreUnknown=true) +public class RestSearchResults { + + private Long total; + + @JsonProperty("max_score") + private Float maxScore; + + private List hits; + + public Long getTotal() { + return total; + } + + public void setTotal(Long total) { + this.total = total; + } + + public Float getMaxScore() { + return maxScore; + } + + public void setMaxScore(Float maxScore) { + this.maxScore = maxScore; + } + + public List getHits() { + return hits; + } + + public void setHits(List hits) { + this.hits = hits; + } + +} diff --git a/janusgraph-es/src/main/resources/janusgraph-es.properties b/janusgraph-es/src/main/resources/janusgraph-es.properties index d1b33183cb..ba7739e9cb 100644 --- a/janusgraph-es/src/main/resources/janusgraph-es.properties +++ b/janusgraph-es/src/main/resources/janusgraph-es.properties @@ -1 +1,2 @@ es.version=${elasticsearch.version} +es.dist.version=${elasticsearch.dist.version} diff --git a/janusgraph-es/src/test/bin/elasticsearch b/janusgraph-es/src/test/bin/elasticsearch deleted file mode 100755 index 8461e00531..0000000000 --- a/janusgraph-es/src/test/bin/elasticsearch +++ /dev/null @@ -1,161 +0,0 @@ -#!/bin/sh - -# CONTROLLING STARTUP: -# -# This script relies on few environment variables to determine startup -# behavior, those variables are: -# -# ES_CLASSPATH -- A Java classpath containing everything necessary to run. -# JAVA_OPTS -- Additional arguments to the JVM for heap size, etc -# ES_JAVA_OPTS -- External Java Opts on top of the defaults set -# -# -# Optionally, exact memory values can be set using the following values, note, -# they can still be set using the `ES_JAVA_OPTS`. Sample format include "512m", and "10g". -# -# ES_HEAP_SIZE -- Sets both the minimum and maximum memory to allocate (recommended) -# -# As a convenience, a fragment of shell is sourced in order to set one or -# more of these variables. This so-called `include' can be placed in a -# number of locations and will be searched for in order. The lowest -# priority search path is the same directory as the startup script, and -# since this is the location of the sample in the project tree, it should -# almost work Out Of The Box. -# -# Any serious use-case though will likely require customization of the -# include. For production installations, it is recommended that you copy -# the sample to one of /usr/share/elasticsearch/elasticsearch.in.sh, -# /usr/local/share/elasticsearch/elasticsearch.in.sh, or -# /opt/elasticsearch/elasticsearch.in.sh and make your modifications there. -# -# Another option is to specify the full path to the include file in the -# environment. For example: -# -# $ ES_INCLUDE=/path/to/in.sh elasticsearch -p /var/run/es.pid -# -# Note: This is particularly handy for running multiple instances on a -# single installation, or for quick tests. -# -# If you would rather configure startup entirely from the environment, you -# can disable the include by exporting an empty ES_INCLUDE, or by -# ensuring that no include files exist in the aforementioned search list. -# Be aware that you will be entirely responsible for populating the needed -# environment variables. - -# Maven will replace the project.name with elasticsearch below. If that -# hasn't been done, we assume that this is not a packaged version and the -# user has forgotten to run Maven to create a package. - -IS_PACKAGED_VERSION='distributions' -if [ "$IS_PACKAGED_VERSION" != "distributions" ]; then - cat >&2 << EOF -Error: You must build the project with Maven or download a pre-built package -before you can run Elasticsearch. See 'Building from Source' in README.textile -or visit https://www.elastic.co/download to get a pre-built package. -EOF - exit 1 -fi - -CDPATH="" -SCRIPT="$0" - -# SCRIPT may be an arbitrarily deep series of symlinks. Loop until we have the concrete path. -while [ -h "$SCRIPT" ] ; do - ls=`ls -ld "$SCRIPT"` - # Drop everything prior to -> - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - SCRIPT="$link" - else - SCRIPT=`dirname "$SCRIPT"`/"$link" - fi -done - -# determine elasticsearch home -ES_HOME=`dirname "$SCRIPT"`/.. - -# make ELASTICSEARCH_HOME absolute -ES_HOME=`cd "$ES_HOME"; pwd` - -#### Start JanusGraph-specific edit -ES_INCLUDE="$ES_HOME/bin/elasticsearch.in.sh" -ES_CLASSPATH="$ES_HOME/target/es_jarhell.jar:`cat $ES_HOME/target/es_classpath.txt`" -#### End JanusGraph-specific edit - -# If an include wasn't specified in the environment, then search for one... -if [ "x$ES_INCLUDE" = "x" ]; then - # Locations (in order) to use when searching for an include file. - for include in /usr/share/elasticsearch/elasticsearch.in.sh \ - /usr/local/share/elasticsearch/elasticsearch.in.sh \ - /opt/elasticsearch/elasticsearch.in.sh \ - ~/.elasticsearch.in.sh \ - "$ES_HOME/bin/elasticsearch.in.sh" \ - "`dirname "$0"`"/elasticsearch.in.sh; do - if [ -r "$include" ]; then - . "$include" - break - fi - done -# ...otherwise, source the specified include. -elif [ -r "$ES_INCLUDE" ]; then - . "$ES_INCLUDE" -fi - -if [ -x "$JAVA_HOME/bin/java" ]; then - JAVA="$JAVA_HOME/bin/java" -else - JAVA=`which java` -fi - -if [ ! -x "$JAVA" ]; then - echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME" - exit 1 -fi - -if [ -z "$ES_CLASSPATH" ]; then - echo "You must set the ES_CLASSPATH var" >&2 - exit 1 -fi - -# don't let JAVA_TOOL_OPTIONS slip in (e.g. crazy agents in ubuntu) -# works around https://bugs.launchpad.net/ubuntu/+source/jayatana/+bug/1441487 -if [ "x$JAVA_TOOL_OPTIONS" != "x" ]; then - echo "Warning: Ignoring JAVA_TOOL_OPTIONS=$JAVA_TOOL_OPTIONS" - echo "Please pass JVM parameters via JAVA_OPTS instead" - unset JAVA_TOOL_OPTIONS -fi - -# Special-case path variables. -case `uname` in - CYGWIN*) - ES_CLASSPATH=`cygpath -p -w "$ES_CLASSPATH"` - ES_HOME=`cygpath -p -w "$ES_HOME"` - ;; -esac - -# full hostname passed through cut for portability on systems that do not support hostname -s -# export on separate line for shells that do not support combining definition and export -HOSTNAME=`hostname | cut -d. -f1` -export HOSTNAME - -# manual parsing to find out, if process should be detached -daemonized=`echo $* | egrep -- '(^-d |-d$| -d |--daemonize$|--daemonize )'` -if [ -z "$daemonized" ] ; then - exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" \ - org.elasticsearch.bootstrap.Elasticsearch start "$@" -else - exec "$JAVA" $JAVA_OPTS $ES_JAVA_OPTS -Des.path.home="$ES_HOME" -cp "$ES_CLASSPATH" \ - org.elasticsearch.bootstrap.Elasticsearch start "$@" <&- & - retval=$? - pid=$! - [ $retval -eq 0 ] || exit $retval - if [ ! -z "$ES_STARTUP_SLEEP_TIME" ]; then - sleep $ES_STARTUP_SLEEP_TIME - fi - if ! ps -p $pid > /dev/null ; then - exit 1 - fi - exit 0 -fi - -exit $? diff --git a/janusgraph-es/src/test/bin/elasticsearch.in.sh b/janusgraph-es/src/test/bin/elasticsearch.in.sh deleted file mode 100755 index 376d811e7e..0000000000 --- a/janusgraph-es/src/test/bin/elasticsearch.in.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/sh - -# check in case a user was using this mechanism - -if [ "x$ES_MIN_MEM" = "x" ]; then - ES_MIN_MEM=256m -fi -if [ "x$ES_MAX_MEM" = "x" ]; then - ES_MAX_MEM=1g -fi -if [ "x$ES_HEAP_SIZE" != "x" ]; then - ES_MIN_MEM=$ES_HEAP_SIZE - ES_MAX_MEM=$ES_HEAP_SIZE -fi - -# min and max heap sizes should be set to the same value to avoid -# stop-the-world GC pauses during resize, and so that we can lock the -# heap in memory on startup to prevent any of it from being swapped -# out. -JAVA_OPTS="$JAVA_OPTS -Xms${ES_MIN_MEM}" -JAVA_OPTS="$JAVA_OPTS -Xmx${ES_MAX_MEM}" - -# new generation -if [ "x$ES_HEAP_NEWSIZE" != "x" ]; then - JAVA_OPTS="$JAVA_OPTS -Xmn${ES_HEAP_NEWSIZE}" -fi - -# max direct memory -if [ "x$ES_DIRECT_SIZE" != "x" ]; then - JAVA_OPTS="$JAVA_OPTS -XX:MaxDirectMemorySize=${ES_DIRECT_SIZE}" -fi - -# set to headless, just in case -JAVA_OPTS="$JAVA_OPTS -Djava.awt.headless=true" - -# Force the JVM to use IPv4 stack -if [ "x$ES_USE_IPV4" != "x" ]; then - JAVA_OPTS="$JAVA_OPTS -Djava.net.preferIPv4Stack=true" -fi - -# Add gc options. ES_GC_OPTS is unsupported, for internal testing -if [ "x$ES_GC_OPTS" = "x" ]; then - ES_GC_OPTS="$ES_GC_OPTS -XX:+UseParNewGC" - ES_GC_OPTS="$ES_GC_OPTS -XX:+UseConcMarkSweepGC" - ES_GC_OPTS="$ES_GC_OPTS -XX:CMSInitiatingOccupancyFraction=75" - ES_GC_OPTS="$ES_GC_OPTS -XX:+UseCMSInitiatingOccupancyOnly" -fi - -JAVA_OPTS="$JAVA_OPTS $ES_GC_OPTS" - -# GC logging options -if [ -n "$ES_GC_LOG_FILE" ]; then - JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCDetails" - JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCTimeStamps" - JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCDateStamps" - JAVA_OPTS="$JAVA_OPTS -XX:+PrintClassHistogram" - JAVA_OPTS="$JAVA_OPTS -XX:+PrintTenuringDistribution" - JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCApplicationStoppedTime" - JAVA_OPTS="$JAVA_OPTS -Xloggc:$ES_GC_LOG_FILE" - - # Ensure that the directory for the log file exists: the JVM will not create it. - mkdir -p "`dirname \"$ES_GC_LOG_FILE\"`" -fi - -# Causes the JVM to dump its heap on OutOfMemory. -JAVA_OPTS="$JAVA_OPTS -XX:+HeapDumpOnOutOfMemoryError" -# The path to the heap dump location, note directory must exists and have enough -# space for a full heap dump. -#JAVA_OPTS="$JAVA_OPTS -XX:HeapDumpPath=$ES_HOME/logs/heapdump.hprof" - -# Disables explicit GC -JAVA_OPTS="$JAVA_OPTS -XX:+DisableExplicitGC" - -# Ensure UTF-8 encoding by default (e.g. filenames) -JAVA_OPTS="$JAVA_OPTS -Dfile.encoding=UTF-8" - -# Use our provided JNA always versus the system one -JAVA_OPTS="$JAVA_OPTS -Djna.nosys=true" diff --git a/janusgraph-es/src/test/binassembly.xml b/janusgraph-es/src/test/binassembly.xml deleted file mode 100644 index a5001f51a5..0000000000 --- a/janusgraph-es/src/test/binassembly.xml +++ /dev/null @@ -1,17 +0,0 @@ - - es-bin - / - - - dir - - - - - src/test/bin - / - false - - - diff --git a/janusgraph-es/src/test/config/elasticsearch.yml b/janusgraph-es/src/test/config/elasticsearch.yml index 0cc9d632d8..7059e5b9d9 100644 --- a/janusgraph-es/src/test/config/elasticsearch.yml +++ b/janusgraph-es/src/test/config/elasticsearch.yml @@ -1,97 +1 @@ -# ======================== Elasticsearch Configuration ========================= -# -# NOTE: Elasticsearch comes with reasonable defaults for most settings. -# Before you set out to tweak and tune the configuration, make sure you -# understand what are you trying to accomplish and the consequences. -# -# The primary way of configuring a node is via this file. This template lists -# the most important settings you may want to configure for a production cluster. -# -# Please see the documentation for further information on configuration options: -# -# -# ---------------------------------- Cluster ----------------------------------- -# -# Use a descriptive name for your cluster: -# -# cluster.name: my-application -# -# ------------------------------------ Node ------------------------------------ -# -# Use a descriptive name for the node: -# -# node.name: node-1 -# -# Add custom attributes to the node: -# -# node.rack: r1 -# -# ----------------------------------- Paths ------------------------------------ -# -# Path to directory where to store the data (separate multiple locations by comma): -# -path.data: $MAVEN{project.build.directory}/es-data -# -# Path to log files: -# -path.logs: $MAVEN{project.build.directory}/es-logs -# -# ----------------------------------- Memory ----------------------------------- -# -# Lock the memory on startup: -# -# bootstrap.mlockall: true -# -# Make sure that the `ES_HEAP_SIZE` environment variable is set to about half the memory -# available on the system and that the owner of the process is allowed to use this limit. -# -# Elasticsearch performs poorly when the system is swapping the memory. -# -# ---------------------------------- Network ----------------------------------- -# -# Set the bind address to a specific IP (IPv4 or IPv6): -# -network.host: 127.0.0.1 -# -# Set a custom port for HTTP: -# -# http.port: 9200 -# -# For more information, see the documentation at: -# -# -# --------------------------------- Discovery ---------------------------------- -# -# Pass an initial list of hosts to perform discovery when new node is started: -# The default list of hosts is ["127.0.0.1", "[::1]"] -# -# discovery.zen.ping.unicast.hosts: ["host1", "host2"] -# -# Prevent the "split brain" by configuring the majority of nodes (total number of nodes / 2 + 1): -# -# discovery.zen.minimum_master_nodes: 3 -# -# For more information, see the documentation at: -# -# -# ---------------------------------- Gateway ----------------------------------- -# -# Block initial recovery after a full cluster restart until N nodes are started: -# -# gateway.recover_after_nodes: 3 -# -# For more information, see the documentation at: -# -# -# ---------------------------------- Various ----------------------------------- -# -# Disable starting multiple nodes on a single system: -# -# node.max_local_storage_nodes: 1 -# -# Require explicit names when deleting indices: -# -# action.destructive_requires_name: true - -index.max_result_window: 10000000 - +script.engine.groovy.inline.update: true diff --git a/janusgraph-es/src/test/config/logging.yml b/janusgraph-es/src/test/config/logging.yml deleted file mode 100644 index b5504e64b3..0000000000 --- a/janusgraph-es/src/test/config/logging.yml +++ /dev/null @@ -1,56 +0,0 @@ -# you can override this using by setting a system property, for example -Des.logger.level=DEBUG -es.logger.level: INFO -rootLogger: ${es.logger.level}, console, file -logger: - # log action execution errors for easier debugging - action: DEBUG - # reduce the logging for aws, too much is logged under the default INFO - com.amazonaws: WARN - - # gateway - #gateway: DEBUG - #index.gateway: DEBUG - - # peer shard recovery - #indices.recovery: DEBUG - - # discovery - #discovery: TRACE - - index.search.slowlog: TRACE, index_search_slow_log_file - index.indexing.slowlog: TRACE, index_indexing_slow_log_file - -additivity: - index.search.slowlog: false - index.indexing.slowlog: false - -appender: - console: - type: console - layout: - type: consolePattern - conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" - - file: - type: dailyRollingFile - file: ${path.logs}/elasticsearch.log - datePattern: "'.'yyyy-MM-dd" - layout: - type: pattern - conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" - - index_search_slow_log_file: - type: dailyRollingFile - file: ${path.logs}/${cluster.name}_index_search_slowlog.log - datePattern: "'.'yyyy-MM-dd" - layout: - type: pattern - conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" - - index_indexing_slow_log_file: - type: dailyRollingFile - file: ${path.logs}/${cluster.name}_index_indexing_slowlog.log - datePattern: "'.'yyyy-MM-dd" - layout: - type: pattern - conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/BerkeleyElasticsearchTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/BerkeleyElasticsearchTest.java index 4a876c8278..b11fb9e0d0 100644 --- a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/BerkeleyElasticsearchTest.java +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/BerkeleyElasticsearchTest.java @@ -14,20 +14,22 @@ package org.janusgraph.diskstorage.es; -import org.janusgraph.StorageSetup; import org.janusgraph.core.JanusGraph; import org.janusgraph.diskstorage.configuration.ModifiableConfiguration; import org.janusgraph.diskstorage.configuration.WriteConfiguration; import org.janusgraph.example.GraphOfTheGodsFactory; import org.janusgraph.graphdb.JanusGraphIndexTest; import org.janusgraph.util.system.IOUtils; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; import java.io.File; -import static org.janusgraph.diskstorage.es.ElasticSearchIndex.*; -import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.*; import static org.janusgraph.BerkeleyStorageSetup.getBerkeleyJEConfiguration; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.BULK_REFRESH; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.INTERFACE; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_HOSTS; /** * @author Matthias Broecheler (me@matthiasb.com) @@ -35,6 +37,19 @@ public class BerkeleyElasticsearchTest extends JanusGraphIndexTest { + private static ElasticsearchRunner esr; + + @BeforeClass + public static void startElasticsearch() { + esr = new ElasticsearchRunner(); + esr.start(); + } + + @AfterClass + public static void stopElasticsearch() { + esr.stop(); + } + public BerkeleyElasticsearchTest() { super(true, true, true); } @@ -43,10 +58,9 @@ public BerkeleyElasticsearchTest() { public WriteConfiguration getConfiguration() { ModifiableConfiguration config = getBerkeleyJEConfiguration(); //Add index - config.set(INDEX_BACKEND,"elasticsearch",INDEX); - config.set(LOCAL_MODE,true,INDEX); - config.set(CLIENT_ONLY,false,INDEX); - config.set(INDEX_DIRECTORY, StorageSetup.getHomeDir("es"), INDEX); + config.set(INTERFACE, ElasticSearchSetup.REST_CLIENT.toString(), INDEX); + config.set(INDEX_HOSTS, new String[]{ "127.0.0.1" }, INDEX); + config.set(BULK_REFRESH, "wait_for", INDEX); return config.getConfiguration(); } diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchConfigTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchConfigTest.java index bc73ef1bfd..6a457417e9 100644 --- a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchConfigTest.java +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchConfigTest.java @@ -14,8 +14,6 @@ package org.janusgraph.diskstorage.es; -import com.google.common.base.Joiner; - import org.janusgraph.core.JanusGraphFactory; import org.janusgraph.core.JanusGraph; import org.janusgraph.core.attribute.Text; @@ -26,91 +24,75 @@ import org.janusgraph.diskstorage.configuration.ModifiableConfiguration; import org.janusgraph.diskstorage.configuration.backend.CommonsConfiguration; import org.janusgraph.diskstorage.indexing.*; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.*; import org.janusgraph.diskstorage.util.StandardBaseTransactionConfig; import org.janusgraph.diskstorage.util.time.TimestampProviders; import org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration; import org.janusgraph.graphdb.query.condition.PredicateCondition; -import org.janusgraph.util.system.IOUtils; import org.apache.commons.configuration.BaseConfiguration; -import org.apache.commons.io.FileUtils; -import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest; -import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.node.Node; -import org.elasticsearch.node.NodeBuilder; +import org.junit.After; import org.junit.Assert; import org.junit.Before; -import org.junit.BeforeClass; import org.junit.Test; -import java.io.File; import java.io.IOException; import java.time.Duration; -import static org.janusgraph.diskstorage.es.ElasticSearchIndex.*; -import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_CONF_FILE; -import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_DIRECTORY; import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_HOSTS; import static org.junit.Assert.*; /** * Test behavior JanusGraph ConfigOptions governing ES client setup. - * - * {@link ElasticSearchIndexTest#testConfiguration()} exercises legacy - * config options using an embedded JVM-local-transport ES instance. By contrast, - * this class exercises the new {@link ElasticSearchIndex#INTERFACE} configuration - * mechanism and uses a network-capable embedded ES instance. */ public class ElasticSearchConfigTest { private static final String INDEX_NAME = "escfg"; - @BeforeClass - public static void killElasticsearch() { - IOUtils.deleteDirectory(new File("es"), true); - ElasticsearchRunner esr = new ElasticsearchRunner(); + private ElasticsearchRunner esr; + + private int port; + + @Before + public void setup() throws Exception { + esr = new ElasticsearchRunner(); + esr.start(); + port = getInterface() == ElasticSearchSetup.REST_CLIENT ? 9200 : 9300; + } + + @After + public void teardown() throws Exception { esr.stop(); } - @Before - public void setup() throws IOException { - String baseDir = Joiner.on(File.separator).join("target", "es"); - FileUtils.deleteDirectory(new File(baseDir + File.separator + "data")); + public ElasticSearchSetup getInterface() { + return ElasticSearchSetup.REST_CLIENT; } @Test - public void testJanusGraphFactoryBuilder() - { - String baseDir = Joiner.on(File.separator).join("target", "es"); + public void testJanusGraphFactoryBuilder() { JanusGraphFactory.Builder builder = JanusGraphFactory.build(); builder.set("storage.backend", "inmemory"); - builder.set("index." + INDEX_NAME + ".elasticsearch.interface", "NODE"); - builder.set("index." + INDEX_NAME + ".elasticsearch.ext.node.data", "true"); - builder.set("index." + INDEX_NAME + ".elasticsearch.ext.node.client", "false"); - builder.set("index." + INDEX_NAME + ".elasticsearch.ext.node.local", "true"); - builder.set("index." + INDEX_NAME + ".elasticsearch.ext.path.home", baseDir); + builder.set("index." + INDEX_NAME + ".elasticsearch.hostname", "127.0.0.1:" + port); JanusGraph graph = builder.open(); // Must not throw an exception assertTrue(graph.isOpen()); graph.close(); } @Test - public void testTransportClient() throws BackendException, InterruptedException { - ElasticsearchRunner esr = new ElasticsearchRunner("."); - esr.start(); + public void testClient() throws BackendException, InterruptedException { ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration(); - config.set(INTERFACE, ElasticSearchSetup.TRANSPORT_CLIENT.toString(), INDEX_NAME); - config.set(INDEX_HOSTS, new String[]{ "127.0.0.1" }, INDEX_NAME); + config.set(INTERFACE, getInterface().toString(), INDEX_NAME); + config.set(INDEX_HOSTS, new String[]{ "127.0.0.1:" + port }, INDEX_NAME); Configuration indexConfig = config.restrictTo(INDEX_NAME); IndexProvider idx = new ElasticSearchIndex(indexConfig); simpleWriteAndQuery(idx); idx.close(); config = GraphDatabaseConfiguration.buildGraphConfiguration(); - config.set(INTERFACE, ElasticSearchSetup.TRANSPORT_CLIENT.toString(), INDEX_NAME); - config.set(INDEX_HOSTS, new String[]{ "10.11.12.13" }, INDEX_NAME); + config.set(INTERFACE, getInterface().toString(), INDEX_NAME); + config.set(INDEX_HOSTS, new String[]{ "10.11.12.13:" + port }, INDEX_NAME); indexConfig = config.restrictTo(INDEX_NAME); Throwable failure = null; try { @@ -120,174 +102,30 @@ public void testTransportClient() throws BackendException, InterruptedException } // idx.close(); Assert.assertNotNull("ES client failed to throw exception on connection failure", failure); - - esr.stop(); - } - - @Test - public void testLocalNodeUsingExt() throws BackendException, InterruptedException { - - String baseDir = Joiner.on(File.separator).join("target", "es"); - - CommonsConfiguration cc = new CommonsConfiguration(new BaseConfiguration()); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.data", "true"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.client", "false"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.local", "true"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.path.home", baseDir); - ModifiableConfiguration config = - new ModifiableConfiguration(GraphDatabaseConfiguration.ROOT_NS, - cc, BasicConfiguration.Restriction.NONE); - config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); - Configuration indexConfig = config.restrictTo(INDEX_NAME); - IndexProvider idx = new ElasticSearchIndex(indexConfig); - simpleWriteAndQuery(idx); - idx.close(); - - assertTrue(new File(baseDir + File.separator + "data").exists()); } @Test - public void testLocalNodeUsingExtAndIndexDirectory() throws BackendException, InterruptedException { - - String baseDir = Joiner.on(File.separator).join("target", "es"); - - CommonsConfiguration cc = new CommonsConfiguration(new BaseConfiguration()); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.data", "true"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.client", "false"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.local", "true"); - ModifiableConfiguration config = - new ModifiableConfiguration(GraphDatabaseConfiguration.ROOT_NS, - cc, BasicConfiguration.Restriction.NONE); - config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); - config.set(INDEX_DIRECTORY, baseDir, INDEX_NAME); - Configuration indexConfig = config.restrictTo(INDEX_NAME); - IndexProvider idx = new ElasticSearchIndex(indexConfig); - simpleWriteAndQuery(idx); - idx.close(); - - assertTrue(new File(baseDir + File.separator + "data").exists()); - } - - @Test - public void testLocalNodeUsingYaml() throws BackendException, InterruptedException { - - String baseDir = Joiner.on(File.separator).join("target", "es"); - - ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration(); - config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); - config.set(INDEX_CONF_FILE, - Joiner.on(File.separator).join("target", "test-classes", "es_jvmlocal.yml"), INDEX_NAME); - Configuration indexConfig = config.restrictTo(INDEX_NAME); - IndexProvider idx = new ElasticSearchIndex(indexConfig); - simpleWriteAndQuery(idx); - idx.close(); - - assertTrue(new File(baseDir + File.separator + "data").exists()); - } - - @Test - public void testNetworkNodeUsingExt() throws BackendException, InterruptedException { - ElasticsearchRunner esr = new ElasticsearchRunner("."); - esr.start(); - CommonsConfiguration cc = new CommonsConfiguration(new BaseConfiguration()); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.data", "false"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.node.client", "true"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.discovery.zen.ping.multicast.enabled", "false"); - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.discovery.zen.ping.unicast.hosts", "localhost,127.0.0.1:9300"); - ModifiableConfiguration config = - new ModifiableConfiguration(GraphDatabaseConfiguration.ROOT_NS, - cc, BasicConfiguration.Restriction.NONE); - config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); - Configuration indexConfig = config.restrictTo(INDEX_NAME); - IndexProvider idx = new ElasticSearchIndex(indexConfig); - simpleWriteAndQuery(idx); - idx.close(); - - cc.set("index." + INDEX_NAME + ".elasticsearch.ext.discovery.zen.ping.unicast.hosts", "10.11.12.13"); - config = new ModifiableConfiguration(GraphDatabaseConfiguration.ROOT_NS, - cc, BasicConfiguration.Restriction.NONE); - config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); - config.set(HEALTH_REQUEST_TIMEOUT, "5s", INDEX_NAME); - indexConfig = config.restrictTo(INDEX_NAME); - - Throwable failure = null; - try { - idx = new ElasticSearchIndex(indexConfig); - } catch (Throwable t) { - failure = t; - } - // idx.close(); - Assert.assertNotNull("ES client failed to throw exception on connection failure", failure); - esr.stop(); - } - - @Test - public void testNetworkNodeUsingYaml() throws BackendException, InterruptedException { - ElasticsearchRunner esr = new ElasticsearchRunner("."); - esr.start(); - ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration(); - config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); - config.set(INDEX_CONF_FILE, - Joiner.on(File.separator).join("target", "test-classes", "es_cfg_nodeclient.yml"), INDEX_NAME); - Configuration indexConfig = config.restrictTo(INDEX_NAME); - IndexProvider idx = new ElasticSearchIndex(indexConfig); - simpleWriteAndQuery(idx); - idx.close(); - - config = GraphDatabaseConfiguration.buildGraphConfiguration(); - config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); - config.set(HEALTH_REQUEST_TIMEOUT, "5s", INDEX_NAME); - config.set(INDEX_CONF_FILE, - Joiner.on(File.separator).join("target", "test-classes", "es_cfg_bogus_nodeclient.yml"), INDEX_NAME); - indexConfig = config.restrictTo(INDEX_NAME); - - Throwable failure = null; - try { - idx = new ElasticSearchIndex(indexConfig); - } catch (Throwable t) { - failure = t; - } - //idx.close(); - Assert.assertNotNull("ES client failed to throw exception on connection failure", failure); - esr.stop(); - } - - @Test - public void testIndexCreationOptions() throws InterruptedException, BackendException { - - String baseDir = Joiner.on(File.separator).join("target", "es"); - + public void testIndexCreationOptions() throws InterruptedException, BackendException, IOException { final int shards = 7; - ElasticsearchRunner esr = new ElasticsearchRunner("."); - esr.start(); CommonsConfiguration cc = new CommonsConfiguration(new BaseConfiguration()); cc.set("index." + INDEX_NAME + ".elasticsearch.create.ext.number_of_shards", String.valueOf(shards)); ModifiableConfiguration config = - new ModifiableConfiguration(GraphDatabaseConfiguration.ROOT_NS, - cc, BasicConfiguration.Restriction.NONE); - config.set(INTERFACE, ElasticSearchSetup.NODE.toString(), INDEX_NAME); - config.set(GraphDatabaseConfiguration.INDEX_DIRECTORY, baseDir, INDEX_NAME); + new ModifiableConfiguration(GraphDatabaseConfiguration.ROOT_NS, + cc, BasicConfiguration.Restriction.NONE); + config.set(INTERFACE, getInterface().toString(), INDEX_NAME); + config.set(INDEX_HOSTS, new String[]{ "127.0.0.1:" + port }, INDEX_NAME); + config.set(GraphDatabaseConfiguration.INDEX_NAME, "janusgraph_creation_opts", INDEX_NAME); Configuration indexConfig = config.restrictTo(INDEX_NAME); IndexProvider idx = new ElasticSearchIndex(indexConfig); simpleWriteAndQuery(idx); + ElasticSearchClient client = getInterface().connect(indexConfig).getClient(); - - Settings.Builder settingsBuilder = Settings.settingsBuilder(); - settingsBuilder.put("discovery.zen.ping.multicast.enabled", "false"); - settingsBuilder.put("discovery.zen.ping.unicast.hosts", "localhost,127.0.0.1:9300"); - settingsBuilder.put("path.home", baseDir); - NodeBuilder nodeBuilder = NodeBuilder.nodeBuilder().settings(settingsBuilder.build()); - nodeBuilder.client(true).data(false).local(false); - Node n = nodeBuilder.build().start(); - - GetSettingsResponse response = n.client().admin().indices().getSettings(new GetSettingsRequest().indices("janusgraph")).actionGet(); - assertEquals(String.valueOf(shards), response.getSetting("janusgraph", "index.number_of_shards")); + assertEquals(String.valueOf(shards), client.getIndexSettings("janusgraph_creation_opts").get("number_of_shards")); idx.close(); - n.close(); - esr.stop(); + client.close(); } private void simpleWriteAndQuery(IndexProvider idx) throws BackendException, InterruptedException { diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java index 04b28c7bae..a051dc147a 100644 --- a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java @@ -14,11 +14,9 @@ package org.janusgraph.diskstorage.es; -import com.google.common.base.Joiner; import com.google.common.base.Throwables; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; -import org.janusgraph.StorageSetup; import org.janusgraph.core.Cardinality; import org.janusgraph.core.JanusGraphException; import org.janusgraph.core.schema.Parameter; @@ -32,20 +30,20 @@ import org.janusgraph.diskstorage.indexing.IndexQuery; import org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration; import org.janusgraph.graphdb.query.condition.PredicateCondition; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; -import java.io.File; import java.util.Date; import java.util.UUID; -import static org.janusgraph.diskstorage.es.ElasticSearchIndex.*; -import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_CONF_FILE; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.BULK_REFRESH; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.INTERFACE; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_HOSTS; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_HOSTS; /** * @author Matthias Broecheler (me@matthiasb.com) @@ -53,9 +51,22 @@ public class ElasticSearchIndexTest extends IndexProviderTest { + private static ElasticsearchRunner esr; + + @BeforeClass + public static void startElasticsearch() { + esr = new ElasticsearchRunner(); + esr.start(); + } + + @AfterClass + public static void stopElasticsearch() { + esr.stop(); + } + @Override public IndexProvider openIndex() throws BackendException { - return new ElasticSearchIndex(getLocalESTestConfig()); + return new ElasticSearchIndex(getESTestConfig()); } @Override @@ -63,17 +74,15 @@ public boolean supportsLuceneStyleQueries() { return true; } - public static final Configuration getLocalESTestConfig() { + public Configuration getESTestConfig() { final String index = "es"; ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration(); - config.set(LOCAL_MODE, true, index); - config.set(CLIENT_ONLY, false, index); - config.set(TTL_INTERVAL, "5s", index); - config.set(GraphDatabaseConfiguration.INDEX_DIRECTORY, StorageSetup.getHomeDir("es"), index); + config.set(INTERFACE, ElasticSearchSetup.REST_CLIENT.toString(), index); + config.set(INDEX_HOSTS, new String[]{ "127.0.0.1" }, index); + config.set(BULK_REFRESH, "wait_for", index); return config.restrictTo(index); } - @Test public void testSupport() { assertTrue(index.supports(of(String.class, Cardinality.SINGLE), Text.CONTAINS)); @@ -110,65 +119,6 @@ public void testSupport() { assertTrue(index.supports(of(Geoshape.class, Cardinality.SINGLE, new Parameter("mapping",Mapping.PREFIX_TREE)), Geo.DISJOINT)); } - @Test - public void testConfiguration() throws BackendException { - // Test that local-mode has precedence over hostname - final String index = "es"; - ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration(); - config.set(LOCAL_MODE, true, index); - config.set(CLIENT_ONLY, true, index); - config.set(INDEX_HOSTS, new String[] { "10.0.0.1" }, index); - config.set(GraphDatabaseConfiguration.INDEX_DIRECTORY, StorageSetup.getHomeDir("es"), index); - Configuration indexConfig = config.restrictTo(index); - - IndexProvider idx = new ElasticSearchIndex(indexConfig); // Shouldn't throw exception - idx.close(); - - config = GraphDatabaseConfiguration.buildGraphConfiguration(); - config.set(LOCAL_MODE, false, index); - config.set(CLIENT_ONLY, true, index); - config.set(INDEX_HOSTS, new String[] { "10.0.0.1" }, index); - config.set(GraphDatabaseConfiguration.INDEX_DIRECTORY, StorageSetup.getHomeDir("es"), index); - indexConfig = config.restrictTo(index); - - RuntimeException expectedException = null; - try { - idx = new ElasticSearchIndex(indexConfig); // Should try 10.0.0.1 and throw exception - idx.close(); - } catch (RuntimeException re) { - expectedException = re; - } - assertNotNull(expectedException); - } - - @Test - public void testConfigurationFile() throws BackendException { - final String index = "es"; - ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration(); - config.set(LOCAL_MODE, true, index); - config.set(CLIENT_ONLY, true, index); - config.set(INDEX_CONF_FILE, Joiner.on(File.separator).join("target", "test-classes", "es_nodename_foo.yml"), index); - config.set(GraphDatabaseConfiguration.INDEX_DIRECTORY, StorageSetup.getHomeDir("es"), index); - Configuration indexConfig = config.restrictTo(index); - - ElasticSearchIndex idx = new ElasticSearchIndex(indexConfig); // Shouldn't throw exception - idx.close(); - - assertEquals("foo", idx.getNode().settings().get("node.name")); - - config = GraphDatabaseConfiguration.buildGraphConfiguration(); - config.set(LOCAL_MODE, true, index); - config.set(CLIENT_ONLY, true, index); - config.set(INDEX_CONF_FILE, Joiner.on(File.separator).join("target", "test-classes", "es_nodename_bar.yml"), index); - config.set(GraphDatabaseConfiguration.INDEX_DIRECTORY, StorageSetup.getHomeDir("es"), index); - indexConfig = config.restrictTo(index); - - idx = new ElasticSearchIndex(indexConfig); // Shouldn't throw exception - idx.close(); - - assertEquals("bar", idx.getNode().settings().get("node.name")); - } - @Test public void testErrorInBatch() throws Exception { initialize("vertex"); @@ -183,7 +133,8 @@ public void testErrorInBatch() throws Exception { fail("Commit should not have succeeded."); } catch (JanusGraphException e) { // Looking for a NumberFormatException since we tried to stick a string of text into a time field. - if (!Throwables.getRootCause(e).getMessage().contains("NumberFormatException")) { + if (!Throwables.getRootCause(e).getMessage().contains("number_format_exception") + && !Throwables.getRootCause(e).getMessage().contains("NumberFormatException")) { throw e; } } finally { diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchRunner.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchRunner.java index a0a93e6575..8100349f2e 100644 --- a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchRunner.java +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchRunner.java @@ -15,19 +15,25 @@ package org.janusgraph.diskstorage.es; import org.janusgraph.DaemonRunner; +import org.janusgraph.example.GraphOfTheGodsFactory; import org.janusgraph.util.system.IOUtils; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; +import java.util.Scanner; import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Start and stop a separate Elasticsearch server process. */ public class ElasticsearchRunner extends DaemonRunner { + private static final String DEFAULT_HOME_DIR = "."; + private final String homedir; private static final Logger log = @@ -35,14 +41,30 @@ public class ElasticsearchRunner extends DaemonRunner { public static final String ES_PID_FILE = "/tmp/janusgraph-test-es.pid"; - public ElasticsearchRunner() { - this.homedir = "."; - } - public ElasticsearchRunner(String esHome) { - this.homedir = esHome; + final Pattern VERSION_PATTERN = Pattern.compile("es.dist.version=(.*)"); + String version = null; + try (InputStream in = ElasticsearchRunner.class.getClassLoader().getResourceAsStream("janusgraph-es.properties")) { + if (in != null) { + try (Scanner s = new Scanner(in)) { + s.useDelimiter("\\A"); + final Matcher m = VERSION_PATTERN.matcher(s.next()); + if (m.find()) { + version = m.group(1); + } + } + } + } catch (IOException e) { } + if (version == null) { + throw new RuntimeException("Unable to find Elasticsearch version"); + } + + this.homedir = esHome + File.separator + "target" + File.separator + "elasticsearch-" + version; } + public ElasticsearchRunner() { + this(DEFAULT_HOME_DIR); + } @Override protected String getDaemonShortName() { @@ -72,8 +94,8 @@ protected void killImpl(ElasticsearchStatus stat) throws IOException { @Override protected ElasticsearchStatus startImpl() throws IOException { - File data = new File(homedir + File.separator + "target" + File.separator + "es-data"); - File logs = new File(homedir + File.separator + "target" + File.separator + "es-logs"); + File data = new File(homedir + File.separator + "data"); + File logs = new File(homedir + File.separator + "logs"); if (data.exists() && data.isDirectory()) { log.info("Deleting {}", data); @@ -85,7 +107,7 @@ protected ElasticsearchStatus startImpl() throws IOException { FileUtils.deleteDirectory(logs); } - runCommand(homedir + File.separator + "bin/elasticsearch", "-d", "-p", ES_PID_FILE); + runCommand(homedir + File.separator + "bin" + File.separator + "elasticsearch", "-d", "-p", ES_PID_FILE); try { watchLog(" started", 60L, TimeUnit.SECONDS); } catch (InterruptedException e) { @@ -105,8 +127,7 @@ private void watchLog(String suffix, long duration, TimeUnit unit) throws Interr long durationMS = TimeUnit.MILLISECONDS.convert(duration, unit); long elapsedMS; - File logFile = new File(homedir + File.separator + "target" + File.separator - + "es-logs" + File.separator + "elasticsearch.log"); + File logFile = new File(homedir + File.separator + "logs" + File.separator + "elasticsearch.log"); log.info("Watching ES logfile {} for {} token", logFile, suffix); @@ -137,4 +158,15 @@ private void watchLog(String suffix, long duration, TimeUnit unit) throws Interr log.info("Elasticsearch logfile timeout ({} {})", elapsedMS, TimeUnit.MILLISECONDS); } + /** + * Start Elasticsearch process, load GraphOfTheGods, and stop process. Used for integration testing. + * @param args a singleton array containing a path to a JanusGraph config properties file + */ + public static void main(String[] args) { + final ElasticsearchRunner runner = new ElasticsearchRunner(); + runner.start(); + GraphOfTheGodsFactory.main(args); + runner.stop(); + } + } diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ThriftElasticsearchTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ThriftElasticsearchTest.java index e55dfb5895..8a3d7248f2 100644 --- a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ThriftElasticsearchTest.java +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ThriftElasticsearchTest.java @@ -16,20 +16,33 @@ import org.janusgraph.CassandraStorageSetup; -import org.janusgraph.StorageSetup; import org.janusgraph.diskstorage.configuration.ModifiableConfiguration; import org.janusgraph.diskstorage.configuration.WriteConfiguration; import org.janusgraph.graphdb.JanusGraphIndexTest; +import org.junit.AfterClass; import org.junit.BeforeClass; import static org.janusgraph.CassandraStorageSetup.*; -import static org.janusgraph.diskstorage.es.ElasticSearchIndex.CLIENT_ONLY; -import static org.janusgraph.diskstorage.es.ElasticSearchIndex.LOCAL_MODE; -import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_BACKEND; -import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_DIRECTORY; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.BULK_REFRESH; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.INTERFACE; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_HOSTS; public class ThriftElasticsearchTest extends JanusGraphIndexTest { + private static ElasticsearchRunner esr; + + @BeforeClass + public static void startElasticsearch() { + CassandraStorageSetup.startCleanEmbedded(); + esr = new ElasticsearchRunner(); + esr.start(); + } + + @AfterClass + public static void stopElasticsearch() { + esr.stop(); + } + public ThriftElasticsearchTest() { super(true, true, true); } @@ -39,10 +52,9 @@ public WriteConfiguration getConfiguration() { ModifiableConfiguration config = getCassandraThriftConfiguration(ThriftElasticsearchTest.class.getName()); //Add index - config.set(INDEX_BACKEND,"elasticsearch",INDEX); - config.set(LOCAL_MODE,true,INDEX); - config.set(CLIENT_ONLY,false,INDEX); - config.set(INDEX_DIRECTORY, StorageSetup.getHomeDir("es"),INDEX); + config.set(INTERFACE, ElasticSearchSetup.REST_CLIENT.toString(), INDEX); + config.set(INDEX_HOSTS, new String[]{ "127.0.0.1" }, INDEX); + config.set(BULK_REFRESH, "wait_for", INDEX); return config.getConfiguration(); } @@ -60,8 +72,4 @@ protected boolean supportsCollections() { return true; } - @BeforeClass - public static void beforeClass() { - CassandraStorageSetup.startCleanEmbedded(); - } } diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportBerkeleyElasticsearchTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportBerkeleyElasticsearchTest.java new file mode 100644 index 0000000000..3f86a49e6f --- /dev/null +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportBerkeleyElasticsearchTest.java @@ -0,0 +1,38 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es; + +import org.janusgraph.diskstorage.configuration.ModifiableConfiguration; +import org.janusgraph.diskstorage.configuration.WriteConfiguration; + +import static org.janusgraph.BerkeleyStorageSetup.getBerkeleyJEConfiguration; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.BULK_REFRESH; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.INTERFACE; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_HOSTS; + +public class TransportBerkeleyElasticsearchTest extends BerkeleyElasticsearchTest { + + @Override + public WriteConfiguration getConfiguration() { + ModifiableConfiguration config = getBerkeleyJEConfiguration(); + //Add index + config.set(INTERFACE, ElasticSearchSetup.TRANSPORT_CLIENT.toString(), INDEX); + config.set(INDEX_HOSTS, new String[]{ "127.0.0.1:9300" }, INDEX); + config.set(BULK_REFRESH, "true", INDEX); + return config.getConfiguration(); + + } + +} diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportElasticSearchConfigTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportElasticSearchConfigTest.java new file mode 100644 index 0000000000..19f968f94c --- /dev/null +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportElasticSearchConfigTest.java @@ -0,0 +1,23 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es; + +public class TransportElasticSearchConfigTest extends ElasticSearchConfigTest { + + public ElasticSearchSetup getInterface() { + return ElasticSearchSetup.TRANSPORT_CLIENT; + } + +} diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportElasticSearchIndexTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportElasticSearchIndexTest.java new file mode 100644 index 0000000000..f3318f63a5 --- /dev/null +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportElasticSearchIndexTest.java @@ -0,0 +1,37 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es; + +import org.janusgraph.diskstorage.configuration.Configuration; +import org.janusgraph.diskstorage.configuration.ModifiableConfiguration; +import org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration; + +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.BULK_REFRESH; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.INTERFACE; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_HOSTS; + +public class TransportElasticSearchIndexTest extends ElasticSearchIndexTest { + + @Override + public Configuration getESTestConfig() { + final String index = "es"; + ModifiableConfiguration config = GraphDatabaseConfiguration.buildGraphConfiguration(); + config.set(INTERFACE, ElasticSearchSetup.TRANSPORT_CLIENT.toString(), index); + config.set(INDEX_HOSTS, new String[]{ "127.0.0.1:9300" }, index); + config.set(BULK_REFRESH, "true", index); + return config.restrictTo(index); + } + +} diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportThriftElasticsearchTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportThriftElasticsearchTest.java new file mode 100644 index 0000000000..8dc8fc62c2 --- /dev/null +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/TransportThriftElasticsearchTest.java @@ -0,0 +1,39 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es; + + +import org.janusgraph.diskstorage.configuration.ModifiableConfiguration; +import org.janusgraph.diskstorage.configuration.WriteConfiguration; + +import static org.janusgraph.CassandraStorageSetup.getCassandraThriftConfiguration; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.BULK_REFRESH; +import static org.janusgraph.diskstorage.es.ElasticSearchIndex.INTERFACE; +import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_HOSTS; + +public class TransportThriftElasticsearchTest extends ThriftElasticsearchTest { + + @Override + public WriteConfiguration getConfiguration() { + ModifiableConfiguration config = + getCassandraThriftConfiguration(TransportThriftElasticsearchTest.class.getName()); + //Add index + config.set(INTERFACE, ElasticSearchSetup.TRANSPORT_CLIENT.toString(), INDEX); + config.set(INDEX_HOSTS, new String[]{ "127.0.0.1:9300" }, INDEX); + config.set(BULK_REFRESH, "true", INDEX); + return config.getConfiguration(); + } + +} diff --git a/janusgraph-es/src/test/resources/es_cfg_bogus_nodeclient.yml b/janusgraph-es/src/test/resources/es_cfg_bogus_nodeclient.yml deleted file mode 100644 index 29b13bca5e..0000000000 --- a/janusgraph-es/src/test/resources/es_cfg_bogus_nodeclient.yml +++ /dev/null @@ -1,4 +0,0 @@ -node.data: false -node.client: true -discovery.zen.ping.multicast.enabled: false -discovery.zen.ping.unicast.hosts: [ "10.11.12.13" ] diff --git a/janusgraph-es/src/test/resources/es_cfg_nodeclient.yml b/janusgraph-es/src/test/resources/es_cfg_nodeclient.yml deleted file mode 100644 index 22c6e28c20..0000000000 --- a/janusgraph-es/src/test/resources/es_cfg_nodeclient.yml +++ /dev/null @@ -1,4 +0,0 @@ -node.data: false -node.client: true -discovery.zen.ping.multicast.enabled: false -discovery.zen.ping.unicast.hosts: [ "localhost", "127.0.0.1:9300" ] diff --git a/janusgraph-es/src/test/resources/es_jvmlocal.yml b/janusgraph-es/src/test/resources/es_jvmlocal.yml deleted file mode 100644 index 89908984a3..0000000000 --- a/janusgraph-es/src/test/resources/es_jvmlocal.yml +++ /dev/null @@ -1,5 +0,0 @@ -node.data: true -node.client: false -node.local: true -path.home: ${project.build.directory}/es - diff --git a/janusgraph-es/src/test/resources/es_nodename_bar.yml b/janusgraph-es/src/test/resources/es_nodename_bar.yml deleted file mode 100644 index f7e83e11a4..0000000000 --- a/janusgraph-es/src/test/resources/es_nodename_bar.yml +++ /dev/null @@ -1 +0,0 @@ -node.name: bar diff --git a/janusgraph-es/src/test/resources/es_nodename_foo.yml b/janusgraph-es/src/test/resources/es_nodename_foo.yml deleted file mode 100644 index d74fbe25d5..0000000000 --- a/janusgraph-es/src/test/resources/es_nodename_foo.yml +++ /dev/null @@ -1 +0,0 @@ -node.name: foo diff --git a/janusgraph-es/src/test/resources/log4j.properties b/janusgraph-es/src/test/resources/log4j.properties index 8190f5f70e..4a6ccf7180 100644 --- a/janusgraph-es/src/test/resources/log4j.properties +++ b/janusgraph-es/src/test/resources/log4j.properties @@ -8,7 +8,7 @@ log4j.appender.A1.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %-5p # A2 is a ConsoleAppender. log4j.appender.A2=org.apache.log4j.ConsoleAppender -log4j.appender.A2.Threshold=WARN +log4j.appender.A2.Threshold=ERROR # A2 uses PatternLayout. log4j.appender.A2.layout=org.apache.log4j.PatternLayout log4j.appender.A2.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %-5p %c{2}: %m%n diff --git a/pom.xml b/pom.xml index 34e9974fc4..0ed3f87faa 100644 --- a/pom.xml +++ b/pom.xml @@ -88,6 +88,8 @@ vice-versa. --> 5.5.2 2.4.4 + 5.2.2 + 5.2.2 1.7.0 3.2.2 2.8.2 From 12dcf2126ac78b21006cf0f241e299b85aa8ef7d Mon Sep 17 00:00:00 2001 From: sjudeng Date: Tue, 14 Mar 2017 14:36:27 -0500 Subject: [PATCH 4/5] Support using sandboxed Painless scripts instead of Groovy with ES 5.x. Painless scripts are enabled by default and do not require custom dynamic scripting configuration to use. Added property, elasticsearch.groovy.inline, to control whether Groovy dynamic scripting is enabled on embedded ES instances. Signed-off-by: sjudeng --- .travis.yml | 6 +-- docs/elasticsearch.txt | 6 ++- docs/searchpredicates.txt | 1 + .../assembly/descriptor/common.component.xml | 2 +- .../assembly/static/conf/es/elasticsearch.yml | 3 +- janusgraph-es/pom.xml | 28 ++++++++++- .../diskstorage/es/ElasticMajorVersion.java | 23 +++++++++ .../diskstorage/es/ElasticSearchClient.java | 2 + .../diskstorage/es/ElasticSearchIndex.java | 30 ++++++------ .../diskstorage/es/ElasticSearchMutation.java | 8 ++-- .../es/TransportElasticSearchClient.java | 5 ++ .../es/rest/RestElasticSearchClient.java | 48 +++++++++++++++++++ .../src/test/config/elasticsearch.yml | 2 +- pom.xml | 3 ++ 14 files changed, 139 insertions(+), 28 deletions(-) create mode 100644 janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticMajorVersion.java diff --git a/.travis.yml b/.travis.yml index 2a60b56319..a11f332b5b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,8 +20,7 @@ env: - MODULE='berkeleyje' - MODULE='cassandra' - MODULE='es' ARGS='-DthreadCount=1' - - MODULE='es' ARGS='-DthreadCount=1 -Delasticsearch.dist.version=2.4.4' - - MODULE='es' ARGS='-DthreadCount=1 -Delasticsearch.dist.version=2.4.4 -Dtest=**/Transport*' + - MODULE='es' ARGS='-DthreadCount=1 -Pelasticsearch2' - MODULE='hadoop-parent/janusgraph-hadoop-2' - MODULE='hbase-parent/janusgraph-hbase-098' - MODULE='hbase-parent/janusgraph-hbase-10' @@ -43,8 +42,7 @@ matrix: # Currently broken due to too many log statements (exceeds 4MB) # https://travis-ci.org/JanusGraph/janusgraph/jobs/197472453 - env: MODULE='es' ARGS='-DthreadCount=1' - - env: MODULE='es' ARGS='-DthreadCount=1 -Delasticsearch.dist.version=2.4.4' - - env: MODULE='es' ARGS='-DthreadCount=1 -Delasticsearch.dist.version=2.4.4 -Dtest=**/Transport*' + - env: MODULE='es' ARGS='-DthreadCount=1 -Pelasticsearch2' # Currently broken due to too many log statements (exceeds 4MB) # https://travis-ci.org/JanusGraph/janusgraph/jobs/197672947 diff --git a/docs/elasticsearch.txt b/docs/elasticsearch.txt index 0c29489be1..a4e5e6f125 100644 --- a/docs/elasticsearch.txt +++ b/docs/elasticsearch.txt @@ -17,7 +17,11 @@ JanusGraph supports https://www.elastic.co/[Elasticsearch] as an index backend. Please see <> for details on what versions of ES will work with JanusGraph. [IMPORTANT] -JanusGraph currently requires https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-security.html#enable-dynamic-scripting[Elasticsearch's dynamic scripting feature]. The `script.engine.groovy.inline.update` setting must be set to `true` on the Elasticsearch cluster. This configuration requirement may be removed in future JanusGraph versions. +=============================== +Beginning with Elasticsearch 5.0 JanusGraph uses sandboxed https://www.elastic.co/guide/en/elasticsearch/reference/master/modules-scripting-painless.html[Painless scripts] for inline updates, which are enabled by default in Elasticsearch 5.x. + +Using JanusGraph with Elasticsearch 2.x requires enabling Groovy inline scripting by setting `script.engine.groovy.inline.update` to `true` on the Elasticsearch cluster (see https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-security.html#enable-dynamic-scripting[dynamic scripting documentation] for more information). +=============================== === Running Elasticsearch diff --git a/docs/searchpredicates.txt b/docs/searchpredicates.txt index 7ef11fc63c..40bd3dca7a 100644 --- a/docs/searchpredicates.txt +++ b/docs/searchpredicates.txt @@ -86,6 +86,7 @@ While JanusGraph's composite indexes support any data type that can be stored in Additional data types will be supported in the future. +[[geoshape]] === Geoshape Data Type The Geoshape data type supports representing a point, circle, box, line, polygon, multi-point, multi-line and multi-polygon. Index backends currently support indexing points, lines and polygons. Indexing multi-point, multi-line and multi-polygon properties has not been tested. Geospatial index lookups are only supported via mixed indexes. diff --git a/janusgraph-dist/src/assembly/descriptor/common.component.xml b/janusgraph-dist/src/assembly/descriptor/common.component.xml index bb7b81e1f4..916f5a188c 100644 --- a/janusgraph-dist/src/assembly/descriptor/common.component.xml +++ b/janusgraph-dist/src/assembly/descriptor/common.component.xml @@ -32,7 +32,7 @@ ${assembly.static.dir}/conf/es /elasticsearch/config - false + true diff --git a/janusgraph-dist/src/assembly/static/conf/es/elasticsearch.yml b/janusgraph-dist/src/assembly/static/conf/es/elasticsearch.yml index 13105a4d85..02ac9ab21d 100644 --- a/janusgraph-dist/src/assembly/static/conf/es/elasticsearch.yml +++ b/janusgraph-dist/src/assembly/static/conf/es/elasticsearch.yml @@ -1,4 +1,3 @@ path.data: db/es/data path.logs: log -script.engine.groovy.inline.update: true - +script.engine.groovy.inline.update: ${elasticsearch.groovy.inline} diff --git a/janusgraph-es/pom.xml b/janusgraph-es/pom.xml index 7eeef173db..68375b007f 100644 --- a/janusgraph-es/pom.xml +++ b/janusgraph-es/pom.xml @@ -257,8 +257,34 @@ - + + elasticsearch2 + + ${elasticsearch.version} + true + + + + + maven-surefire-plugin + + + default-test + + -Dtest.cassandra.confdir=${project.build.directory}/cassandra/conf/localhost-murmur -Dtest.cassandra.datadir=${project.build.directory}/cassandra/data/localhost-murmur + + false + + + + + + + + + janusgraph-release diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticMajorVersion.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticMajorVersion.java new file mode 100644 index 0000000000..4731320ce1 --- /dev/null +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticMajorVersion.java @@ -0,0 +1,23 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.diskstorage.es; + +public enum ElasticMajorVersion { + + TWO, + + FIVE + +} diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchClient.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchClient.java index 52e51a0c06..a2443de461 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchClient.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchClient.java @@ -24,6 +24,8 @@ public interface ElasticSearchClient extends Closeable { + ElasticMajorVersion getMajorVersion(); + void clusterHealthRequest(String timeout) throws IOException; boolean indexExists(String indexName) throws IOException; diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java index cfbb27f350..2edba2ffa6 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java @@ -20,8 +20,6 @@ import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.Multimap; import org.apache.commons.lang.StringUtils; -import org.apache.tinkerpop.shaded.jackson.core.type.TypeReference; -import org.apache.tinkerpop.shaded.jackson.databind.ObjectMapper; import org.elasticsearch.Version; import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.geo.builders.LineStringBuilder; @@ -178,6 +176,7 @@ public class ElasticSearchIndex implements IndexProvider { private final ElasticSearchClient client; private final String indexName; private final int maxResultsSize; + private final String scriptLang; public ElasticSearchIndex(Configuration config) throws BackendException { indexName = config.get(INDEX_NAME); @@ -190,6 +189,9 @@ public ElasticSearchIndex(Configuration config) throws BackendException { maxResultsSize = config.get(INDEX_MAX_RESULT_SET_SIZE); log.debug("Configured ES query result set max size to {}", maxResultsSize); + scriptLang = client.getMajorVersion() == ElasticMajorVersion.TWO ? "groovy" : "painless"; + log.debug("Using {} script language", scriptLang); + try { client.clusterHealthRequest(config.get(HEALTH_REQUEST_TIMEOUT)); checkForOrCreateIndex(config); @@ -476,7 +478,7 @@ public void mutate(Map> mutations, KeyInforma requests.add(ElasticSearchMutation.createDeleteRequest(indexName, storename, docid)); } else { String script = getDeletionScript(informations, storename, mutation); - requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script)); + requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script, scriptLang)); log.trace("Adding script {}", script); } } @@ -490,9 +492,9 @@ public void mutate(Map> mutations, KeyInforma String script = getAdditionScript(informations, storename, mutation); if (needUpsert) { Map doc = getNewDocument(mutation.getAdditions(), informations.get(storename)); - requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script, doc)); + requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script, scriptLang, doc)); } else { - requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script)); + requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script, scriptLang)); } log.trace("Adding script {}", script); @@ -524,7 +526,7 @@ private String getDeletionScript(KeyInformation.IndexRetriever informations, Str break; case SET: case LIST: - String jsValue = convertToJsType(deletion.value); + String jsValue = convertToJsType(deletion.value, scriptLang); script.append("def index = ctx._source[\"" + deletion.field + "\"].indexOf(" + jsValue + "); ctx._source[\"" + deletion.field + "\"].remove(index);"); if (hasDualStringMapping(informations.get(storename, deletion.field))) { script.append("def index = ctx._source[\"" + getDualMappingName(deletion.field) + "\"].indexOf(" + jsValue + "); ctx._source[\"" + getDualMappingName(deletion.field) + "\"].remove(index);"); @@ -542,18 +544,16 @@ private String getAdditionScript(KeyInformation.IndexRetriever informations, Str KeyInformation keyInformation = informations.get(storename).get(e.field); switch (keyInformation.getCardinality()) { case SINGLE: - script.append("ctx._source[\"" + e.field + "\"] = " + convertToJsType(e.value) + ";"); + script.append("ctx._source[\"" + e.field + "\"] = " + convertToJsType(e.value, scriptLang) + ";"); if (hasDualStringMapping(keyInformation)) { - script.append("ctx._source[\"" + getDualMappingName(e.field) + "\"] = " + convertToJsType(e.value) + ";"); + script.append("ctx._source[\"" + getDualMappingName(e.field) + "\"] = " + convertToJsType(e.value, scriptLang) + ";"); } break; case SET: case LIST: - script.append("if(ctx._source[\"" + e.field + "\"] == null) {ctx._source[\"" + e.field + "\"] = []};"); - script.append("ctx._source[\"" + e.field + "\"].add(" + convertToJsType(e.value) + ");"); + script.append("ctx._source[\"" + e.field + "\"].add(" + convertToJsType(e.value, scriptLang) + ");"); if (hasDualStringMapping(keyInformation)) { - script.append("if(ctx._source[\"" + getDualMappingName(e.field) + "\"] == null) {ctx._source[\"" + e.field + "\"] = []};"); - script.append("ctx._source[\"" + getDualMappingName(e.field) + "\"].add(" + convertToJsType(e.value) + ");"); + script.append("ctx._source[\"" + getDualMappingName(e.field) + "\"].add(" + convertToJsType(e.value, scriptLang) + ");"); } break; @@ -563,7 +563,7 @@ private String getAdditionScript(KeyInformation.IndexRetriever informations, Str return script.toString(); } - private static String convertToJsType(Object value) throws PermanentBackendException { + private static String convertToJsType(Object value, String scriptLang) throws PermanentBackendException { try { XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); @@ -579,7 +579,9 @@ private static String convertToJsType(Object value) throws PermanentBackendExcep int prefixLength = "{\"value\":".length(); int suffixLength = "}".length(); String result = s.substring(prefixLength, s.length() - suffixLength); - result = result.replace("$", "\\$"); + if (scriptLang.equals("groovy")) { + result = result.replace("$", "\\$"); + } return result; } catch (IOException e) { throw new PermanentBackendException("Could not write json"); diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java index d55bbde4d8..4409387b2f 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java @@ -56,12 +56,12 @@ public static ElasticSearchMutation createIndexRequest(String index, String type return new ElasticSearchMutation(RequestType.INDEX, index, type, id, source); } - public static ElasticSearchMutation createUpdateRequest(String index, String type, String id, String script) { - return new ElasticSearchMutation(RequestType.UPDATE, index, type, id, ImmutableMap.of("script", ImmutableMap.of("inline", script, "lang", "groovy"))); + public static ElasticSearchMutation createUpdateRequest(String index, String type, String id, String script, String lang) { + return new ElasticSearchMutation(RequestType.UPDATE, index, type, id, ImmutableMap.of("script", ImmutableMap.of("inline", script, "lang", lang))); } - public static ElasticSearchMutation createUpdateRequest(String index, String type, String id, String script, Map upsert) { - return new ElasticSearchMutation(RequestType.UPDATE, index, type, id, ImmutableMap.of("script", ImmutableMap.of("inline", script, "lang", "groovy"), "upsert", upsert)); + public static ElasticSearchMutation createUpdateRequest(String index, String type, String id, String script, String lang, Map upsert) { + return new ElasticSearchMutation(RequestType.UPDATE, index, type, id, ImmutableMap.of("script", ImmutableMap.of("inline", script, "lang", lang), "upsert", upsert)); } public RequestType getRequestType() { diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java index 99bcb0a0c7..9c9b1f9401 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java @@ -194,6 +194,11 @@ public void close() throws IOException { client.close(); } + @Override + public ElasticMajorVersion getMajorVersion() { + return ElasticMajorVersion.TWO; + } + public void setBulkRefresh(boolean bulkRefresh) { this.bulkRefresh = bulkRefresh; } diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestElasticSearchClient.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestElasticSearchClient.java index 4854b1eaa7..9b3a3aa9ca 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestElasticSearchClient.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/rest/RestElasticSearchClient.java @@ -17,6 +17,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.http.HttpEntity; import org.apache.http.entity.ByteArrayEntity; +import org.apache.tinkerpop.shaded.jackson.annotation.JsonIgnoreProperties; import org.apache.tinkerpop.shaded.jackson.core.type.TypeReference; import org.apache.tinkerpop.shaded.jackson.databind.ObjectMapper; import org.apache.tinkerpop.shaded.jackson.databind.ObjectReader; @@ -29,6 +30,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.rest.RestStatus; import org.janusgraph.core.attribute.Geoshape; +import org.janusgraph.diskstorage.es.ElasticMajorVersion; import org.janusgraph.diskstorage.es.ElasticSearchClient; import org.janusgraph.diskstorage.es.ElasticSearchMutation; import org.janusgraph.diskstorage.es.ElasticSearchRequest; @@ -43,6 +45,8 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; public class RestElasticSearchClient implements ElasticSearchClient { @@ -64,10 +68,13 @@ public class RestElasticSearchClient implements ElasticSearchClient { private RestClient delegate; + private ElasticMajorVersion majorVersion; + private String bulkRefresh; public RestElasticSearchClient(RestClient delegate) { this.delegate = delegate; + majorVersion = getMajorVersion(); } @Override @@ -75,6 +82,32 @@ public void close() throws IOException { delegate.close(); } + @Override + public ElasticMajorVersion getMajorVersion() { + if (majorVersion != null) { + return majorVersion; + } + + final Pattern pattern = Pattern.compile("(\\d+)\\.\\d+\\.\\d+"); + majorVersion = ElasticMajorVersion.TWO; + try { + final Response response = delegate.performRequest("GET", "/"); + try (final InputStream inputStream = response.getEntity().getContent()) { + final ClusterInfo info = mapper.readValue(inputStream, ClusterInfo.class); + final Matcher m = info.getVersion() != null ? pattern.matcher((String) info.getVersion().get("number")) : null; + if (m == null || !m.find() || Integer.valueOf(m.group(1)) < 5) { + majorVersion = ElasticMajorVersion.TWO; + } else { + majorVersion = ElasticMajorVersion.FIVE; + } + } + } catch (Exception e) { + log.warn("Unable to determine Elasticsearch server version. Assuming 2.x.", e); + } + + return majorVersion; + } + @Override public void clusterHealthRequest(String timeout) throws IOException { Map params = ImmutableMap.of("wait_for_status","yellow","timeout",timeout); @@ -223,4 +256,19 @@ private Response performRequest(String method, String path, byte[] requestData) return response; } + @JsonIgnoreProperties(ignoreUnknown=true) + private static final class ClusterInfo { + + private Map version; + + public Map getVersion() { + return version; + } + + public void setVersion(Map version) { + this.version = version; + } + + } + } diff --git a/janusgraph-es/src/test/config/elasticsearch.yml b/janusgraph-es/src/test/config/elasticsearch.yml index 7059e5b9d9..9a6bc3cc3c 100644 --- a/janusgraph-es/src/test/config/elasticsearch.yml +++ b/janusgraph-es/src/test/config/elasticsearch.yml @@ -1 +1 @@ -script.engine.groovy.inline.update: true +script.engine.groovy.inline.update: ${elasticsearch.groovy.inline} diff --git a/pom.xml b/pom.xml index 0ed3f87faa..9e002d12bf 100644 --- a/pom.xml +++ b/pom.xml @@ -90,6 +90,9 @@ 2.4.4 5.2.2 5.2.2 + + false 1.7.0 3.2.2 2.8.2 From 230382c959add916adf7f7b662bed13f008d00ee Mon Sep 17 00:00:00 2001 From: sjudeng Date: Sun, 19 Mar 2017 09:29:26 -0500 Subject: [PATCH 5/5] Support non-scripting field updates. Add tests for updating large string content. Signed-off-by: sjudeng --- .../es/ElasticSearchConstants.java | 11 ++-- .../diskstorage/es/ElasticSearchIndex.java | 53 ++++++++++++++----- .../diskstorage/es/ElasticSearchMutation.java | 9 ++-- .../es/TransportElasticSearchClient.java | 23 ++++++-- .../es/ElasticSearchIndexTest.java | 27 ++++++++++ 5 files changed, 97 insertions(+), 26 deletions(-) diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchConstants.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchConstants.java index 31e34817fe..cfb18e8ed5 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchConstants.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchConstants.java @@ -24,11 +24,16 @@ public class ElasticSearchConstants { + private static final Logger log = LoggerFactory.getLogger(ElasticSearchConstants.class); + public static final String ES_PROPERTIES_FILE = "janusgraph-es.properties"; + public static final String ES_DOC_KEY = "doc"; + public static final String ES_UPSERT_KEY = "upsert"; + public static final String ES_SCRIPT_KEY = "script"; + public static final String ES_INLINE_KEY = "inline"; + public static final String ES_LANG_KEY = "lang"; public static final String ES_VERSION_EXPECTED; - - private static final Logger log = LoggerFactory.getLogger(ElasticSearchConstants.class); - + static { Properties props; diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java index 2edba2ffa6..d95bb0a0c2 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java @@ -50,6 +50,10 @@ import org.janusgraph.diskstorage.configuration.ConfigNamespace; import org.janusgraph.diskstorage.configuration.ConfigOption; import org.janusgraph.diskstorage.configuration.Configuration; +import static org.janusgraph.diskstorage.es.ElasticSearchConstants.ES_DOC_KEY; +import static org.janusgraph.diskstorage.es.ElasticSearchConstants.ES_INLINE_KEY; +import static org.janusgraph.diskstorage.es.ElasticSearchConstants.ES_LANG_KEY; +import static org.janusgraph.diskstorage.es.ElasticSearchConstants.ES_SCRIPT_KEY; import org.janusgraph.diskstorage.indexing.IndexEntry; import org.janusgraph.diskstorage.indexing.IndexFeatures; import org.janusgraph.diskstorage.indexing.IndexMutation; @@ -478,7 +482,8 @@ public void mutate(Map> mutations, KeyInforma requests.add(ElasticSearchMutation.createDeleteRequest(indexName, storename, docid)); } else { String script = getDeletionScript(informations, storename, mutation); - requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script, scriptLang)); + Map doc = ImmutableMap.of(ES_SCRIPT_KEY, ImmutableMap.of(ES_INLINE_KEY, script, ES_LANG_KEY, scriptLang)); + requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, doc)); log.trace("Adding script {}", script); } } @@ -488,16 +493,27 @@ public void mutate(Map> mutations, KeyInforma Map source = getNewDocument(mutation.getAdditions(), informations.get(storename)); requests.add(ElasticSearchMutation.createIndexRequest(indexName, storename, docid, source)); } else { - boolean needUpsert = !mutation.hasDeletions(); - String script = getAdditionScript(informations, storename, mutation); - if (needUpsert) { - Map doc = getNewDocument(mutation.getAdditions(), informations.get(storename)); - requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script, scriptLang, doc)); + final Map upsert; + if (!mutation.hasDeletions()) { + upsert = getNewDocument(mutation.getAdditions(), informations.get(storename)); } else { - requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, script, scriptLang)); + upsert = null; } - log.trace("Adding script {}", script); + String inline = getAdditionScript(informations, storename, mutation); + if (!inline.isEmpty()) { + Map script = ImmutableMap.of(ES_INLINE_KEY, inline, ES_LANG_KEY, scriptLang); + final ImmutableMap.Builder builder = ImmutableMap.builder().put(ES_SCRIPT_KEY, script); + requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, builder, upsert)); + log.trace("Adding script {}", inline); + } + + Map doc = getAdditionDoc(informations, storename, mutation); + if (!doc.isEmpty()) { + final ImmutableMap.Builder builder = ImmutableMap.builder().put(ES_DOC_KEY, doc); + requests.add(ElasticSearchMutation.createUpdateRequest(indexName, storename, docid, builder, upsert)); + log.trace("Adding update {}", doc); + } } } @@ -543,12 +559,6 @@ private String getAdditionScript(KeyInformation.IndexRetriever informations, Str for (IndexEntry e : mutation.getAdditions()) { KeyInformation keyInformation = informations.get(storename).get(e.field); switch (keyInformation.getCardinality()) { - case SINGLE: - script.append("ctx._source[\"" + e.field + "\"] = " + convertToJsType(e.value, scriptLang) + ";"); - if (hasDualStringMapping(keyInformation)) { - script.append("ctx._source[\"" + getDualMappingName(e.field) + "\"] = " + convertToJsType(e.value, scriptLang) + ";"); - } - break; case SET: case LIST: script.append("ctx._source[\"" + e.field + "\"].add(" + convertToJsType(e.value, scriptLang) + ");"); @@ -563,6 +573,21 @@ private String getAdditionScript(KeyInformation.IndexRetriever informations, Str return script.toString(); } + private Map getAdditionDoc(KeyInformation.IndexRetriever informations, String storename, IndexMutation mutation) throws PermanentBackendException { + Map doc = new HashMap<>(); + for (IndexEntry e : mutation.getAdditions()) { + KeyInformation keyInformation = informations.get(storename).get(e.field); + if (keyInformation.getCardinality() == Cardinality.SINGLE) { + doc.put(e.field, convertToEsType(e.value)); + if (hasDualStringMapping(keyInformation)) { + doc.put(getDualMappingName(e.field), convertToEsType(e.value)); + } + } + } + + return doc; + } + private static String convertToJsType(Object value, String scriptLang) throws PermanentBackendException { try { XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java index 4409387b2f..4b5abe0a68 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchMutation.java @@ -56,12 +56,13 @@ public static ElasticSearchMutation createIndexRequest(String index, String type return new ElasticSearchMutation(RequestType.INDEX, index, type, id, source); } - public static ElasticSearchMutation createUpdateRequest(String index, String type, String id, String script, String lang) { - return new ElasticSearchMutation(RequestType.UPDATE, index, type, id, ImmutableMap.of("script", ImmutableMap.of("inline", script, "lang", lang))); + public static ElasticSearchMutation createUpdateRequest(String index, String type, String id, Map source) { + return new ElasticSearchMutation(RequestType.UPDATE, index, type, id, source); } - public static ElasticSearchMutation createUpdateRequest(String index, String type, String id, String script, String lang, Map upsert) { - return new ElasticSearchMutation(RequestType.UPDATE, index, type, id, ImmutableMap.of("script", ImmutableMap.of("inline", script, "lang", lang), "upsert", upsert)); + public static ElasticSearchMutation createUpdateRequest(String index, String type, String id, ImmutableMap.Builder builder, Map upsert) { + final Map source = upsert == null ? builder.build() : builder.put("upsert", upsert).build(); + return new ElasticSearchMutation(RequestType.UPDATE, index, type, id, source); } public RequestType getRequestType() { diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java index 9c9b1f9401..ade2fa213d 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/TransportElasticSearchClient.java @@ -40,6 +40,12 @@ import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.sort.FieldSortBuilder; import org.elasticsearch.search.sort.SortOrder; +import static org.janusgraph.diskstorage.es.ElasticSearchConstants.ES_DOC_KEY; +import static org.janusgraph.diskstorage.es.ElasticSearchConstants.ES_INLINE_KEY; +import static org.janusgraph.diskstorage.es.ElasticSearchConstants.ES_LANG_KEY; +import static org.janusgraph.diskstorage.es.ElasticSearchConstants.ES_SCRIPT_KEY; +import static org.janusgraph.diskstorage.es.ElasticSearchConstants.ES_UPSERT_KEY; + import org.janusgraph.diskstorage.indexing.RawQuery; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -119,11 +125,18 @@ public void bulkRequest(List requests) throws IOException brb.add(new IndexRequest(indexName, type, id).source(request.getSource())); break; } case UPDATE: { - String inline = ((Map) request.getSource().get("script")).get("inline"); - Script script = new Script(inline, ScriptService.ScriptType.INLINE, null, null); - UpdateRequestBuilder update = client.prepareUpdate(indexName, type, id).setScript(script); - if (request.getSource().containsKey("upsert")) { - update.setUpsert((Map) request.getSource().get("upsert")); + UpdateRequestBuilder update = client.prepareUpdate(indexName, type, id); + if (request.getSource().containsKey(ES_SCRIPT_KEY)) { + Map script = ((Map) request.getSource().get(ES_SCRIPT_KEY)); + String inline = script.get(ES_INLINE_KEY); + String lang = script.get(ES_LANG_KEY); + update.setScript(new Script(inline, ScriptService.ScriptType.INLINE, lang, null)); + } + if (request.getSource().containsKey(ES_DOC_KEY)) { + update.setDoc((Map) request.getSource().get(ES_DOC_KEY)); + } + if (request.getSource().containsKey(ES_UPSERT_KEY)) { + update.setUpsert((Map) request.getSource().get(ES_UPSERT_KEY)); } brb.add(update); break; diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java index a051dc147a..632c401730 100644 --- a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticSearchIndexTest.java @@ -17,6 +17,7 @@ import com.google.common.base.Throwables; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; +import org.apache.commons.lang.RandomStringUtils; import org.janusgraph.core.Cardinality; import org.janusgraph.core.JanusGraphException; import org.janusgraph.core.schema.Parameter; @@ -164,4 +165,30 @@ public void testUnescapedDollarInSet() throws Exception { assertEquals("unescaped", tx.query(new IndexQuery("vertex", PredicateCondition.of(PHONE_SET, Cmp.EQUAL, "$123"))).get(0)); assertEquals("unescaped", tx.query(new IndexQuery("vertex", PredicateCondition.of(PHONE_SET, Cmp.EQUAL, "12345"))).get(0)); } + + /** + * Test adding and overwriting with long string content. + * + */ + @Test + public void testUpdateAdditionWithLongString() throws Exception { + initialize("vertex"); + Multimap initialDoc = HashMultimap.create(); + initialDoc.put(TEXT, RandomStringUtils.randomAlphanumeric(500000) + " bob " + RandomStringUtils.randomAlphanumeric(500000)); + + add("vertex", "long", initialDoc, true); + + clopen(); + + assertEquals(1, tx.query(new IndexQuery("vertex", PredicateCondition.of(TEXT, Text.CONTAINS, "bob"))).size()); + assertEquals(0, tx.query(new IndexQuery("vertex", PredicateCondition.of(TEXT, Text.CONTAINS, "world"))).size()); + + tx.add("vertex", "long", TEXT, RandomStringUtils.randomAlphanumeric(500000) + " world " + RandomStringUtils.randomAlphanumeric(500000), false); + + clopen(); + + assertEquals(0, tx.query(new IndexQuery("vertex", PredicateCondition.of(TEXT, Text.CONTAINS, "bob"))).size()); + assertEquals(1, tx.query(new IndexQuery("vertex", PredicateCondition.of(TEXT, Text.CONTAINS, "world"))).size()); + } + }