diff --git a/modules/ROOT/examples/Columnar.py b/modules/ROOT/examples/Columnar.py index 396822e..3c6e0e1 100644 --- a/modules/ROOT/examples/Columnar.py +++ b/modules/ROOT/examples/Columnar.py @@ -2,14 +2,14 @@ # tag::init[] spark = (SparkSession.builder - .appName("Couchbase Spark Connector Columnar Example") + .appName("Couchbase Spark Connector Capella Analytics Example") # Note whether you need the .master(...) and .config("spark.jars"...) lines depends on how you are using Spark. # See our PySpark documentation for more details. .master("local[*]") .config("spark.jars", "/path/to/spark-connector-assembly-.jar") - .config("spark.couchbase.connectionString", "couchbases://cb.your.columnar.connection.string.com") - .config("spark.couchbase.username", "YourColumnarUsername") - .config("spark.couchbase.password", "YourColumnarPassword") + .config("spark.couchbase.connectionString", "couchbases://cb.your.capella.analytics.connection.string.com") + .config("spark.couchbase.username", "YourCapellaUsername") + .config("spark.couchbase.password", "YourCapellaPassword") .getOrCreate()) # end::init[] diff --git a/modules/ROOT/examples/Columnar.scala b/modules/ROOT/examples/Columnar.scala index 3f4ad31..1c92861 100644 --- a/modules/ROOT/examples/Columnar.scala +++ b/modules/ROOT/examples/Columnar.scala @@ -4,9 +4,9 @@ object Columnar { // tag::init[] val spark = SparkSession .builder() - .appName("Couchbase Spark Connector Columnar Example") // your app name + .appName("Couchbase Spark Connector Capella Analytics Example") // your app name .master("local[*]") // your local or remote Spark master node - .config("spark.couchbase.connectionString", "couchbases://your-columnar-endpoint.cloud.couchbase.com") + .config("spark.couchbase.connectionString", "couchbases://your-capella-analytics-endpoint.cloud.couchbase.com") .config("spark.couchbase.username", "username") .config("spark.couchbase.password", "password") .getOrCreate() diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index dd57b0f..4cb73e6 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -8,7 +8,7 @@ * xref:java-api.adoc[Java API] * xref:streaming.adoc[Structured Streaming Support] * xref:spark-shell.adoc[Using the Spark Shell] -* xref:columnar.adoc[Columnar Support] +* xref:enterprise-analytics.adoc[Analytics Support] * xref:databricks.adoc[Databricks Support] * xref:configuration.adoc[Configuration] * xref:release-notes.adoc[Release Notes] diff --git a/modules/ROOT/pages/columnar.adoc b/modules/ROOT/pages/enterprise-analytics.adoc similarity index 85% rename from modules/ROOT/pages/columnar.adoc rename to modules/ROOT/pages/enterprise-analytics.adoc index e713695..8e624b9 100644 --- a/modules/ROOT/pages/columnar.adoc +++ b/modules/ROOT/pages/enterprise-analytics.adoc @@ -1,17 +1,20 @@ -= Capella Columnar Support -:nav-title: Columnar Support += Enterprise Analytics Support +:nav-title: Analytics Support +:page-aliases: columnar.adoc +:description: Connecting to Capella Analytics or Enterprise Analytics cluster is very similar to connecting to any Couchbase cluster. [abstract] -Connecting to Capella Columnar is very similar to connecting to any Couchbase cluster. This section explains how. +{description} +This section explains how. -Capella Columnar clusters can be connected to using either Scala or PySpark. +Capella Analytics or Enterprise Analytics clusters can be connected to using either Scala or PySpark. To get bootstrapped, users should view the xref:getting-started.adoc[Scala getting started] or xref:pyspark.adoc[PySpark getting started] guides. == Spark Configuration -The first step as usual is to create a `SparkSession`, here connecting to your Capella Columnar cluster. +The first step as usual is to create a `SparkSession`, here connecting to your Capella Analytics or Enterprise Analytics cluster. This works just like connecting to any other type of Couchbase cluster. [{tabs}] @@ -136,5 +139,5 @@ include::example$Columnar.py[tag=sql,indent=0] -- ==== -Note this SQL is executed purely within Spark, and is not sent to the Capella Columnar cluster. +Note this SQL is executed purely within Spark, and is not sent to the Analytics cluster. diff --git a/modules/ROOT/pages/index.adoc b/modules/ROOT/pages/index.adoc index d3b2e67..6d19784 100644 --- a/modules/ROOT/pages/index.adoc +++ b/modules/ROOT/pages/index.adoc @@ -11,7 +11,7 @@ Please see one of the following getting started guide: * xref:pyspark.adoc[PySpark getting started]. * xref:java-api.adoc[Java getting started]. -The Couchbase Spark Connector supports any type of Couchbase cluster. Users of Capella Columnar should see the xref:columnar.adoc[Columnar Support] page. +The Couchbase Spark Connector supports any type of Couchbase cluster. Users of Capella Analytics or Enterprise Analytics should see the xref:enterprise-analytics.adoc[Analytics Support] page. == Compatibility diff --git a/modules/ROOT/pages/spark-sql.adoc b/modules/ROOT/pages/spark-sql.adoc index 899b16b..7b8fa81 100644 --- a/modules/ROOT/pages/spark-sql.adoc +++ b/modules/ROOT/pages/spark-sql.adoc @@ -1,6 +1,6 @@ = DataFrames, Datasets, and SQL :page-topic-type: concept -:description: Spark allows accessing query, analytics, Enterprise Analytics, and Capella Columnar in powerful and convenient ways. +:description: Spark allows accessing query, analytics, Enterprise Analytics, and Capella Analytics, in powerful and convenient ways. [abstract] @@ -12,7 +12,9 @@ Let's see how to use those with Couchbase. Before you can create DataFrames or Datasets with Couchbase, you need to create a `SparkSession`. See xref:getting-started.adoc[Getting Started (Scala)] or xref:pyspark.adoc[Getting Start(PySpark)] + == Running examples + All query examples presented on this page at least require the `travel-sample` dataset to be installed on your Couchbase cluster, with a primary index. If you haven't done so already, you can create a primary index by executing this {sqlpp} statement: `pass:c[CREATE PRIMARY INDEX ON `travel-sample`]`. @@ -23,7 +25,22 @@ Enterprise Analytics is also supported, and provides enhanced analytics capabili == DataFrames A read `DataFrame` can be created through `spark.read.format(...)`, and which format to choose depends on the type of service you want to use. -For `spark.read`, `couchbase.query`, `couchbase.analytics`, `couchbase.enterprise-analytics`, and `couchbase.columnar` are available. +For `spark.read` -- `couchbase.query`, `couchbase.analytics`, `couchbase.enterprise-analytics`, and `couchbase.columnar` are available. + + +.Spark DataFrame Creation by Analytics Service +|=== +| Analytics solution | Spark method + +a| xref:enterprise-analytics:intro:intro.adoc[Enterprise Analytics] +| `couchbase.enterprise-analytics` + +a| xref:analytics:intro:intro.adoc[Capella Analytics] -- _formerly known as Columnar_ +| `couchbase.columnar` + +a| xref:server:learn:services-and-indexes/services/analytics-service.adoc[CBAS (Couchbase Analytics Service)] -- row-based analytics in operational clusters (self-managed or Capella Operational) +| `couchbase.analytics` +|=== (For reading from the KV service - see xref:working-with-rdds.adoc[RDDs]) @@ -60,7 +77,7 @@ PySpark:: ---- include::example$DataFrames.py[tag=query-collection,indent=0] ---- -Scala (Columnar):: +Scala (Capella Analytics, _a.k.a._ Columnar):: + -- [source,scala] @@ -86,7 +103,9 @@ include::example$DataFrames.py[tag=enterprise-analytics-collection,indent=0] -- ==== -Note all query options are documented here xref:_query_options[Query Options], all Capella Columnar options here xref:_columnar_options[Capella Columnar options], and all Enterprise Analytics options here xref:_enterprise_analytics_options[Enterprise Analytics options]. +Note, all query options are documented in xref:_query_options[Query Options], +all Capella Analytics options in xref:_columnar_options[Capella Analytics options], +and all Enterprise Analytics options in xref:_enterprise_analytics_options[Enterprise Analytics options]. If you will usually be using the same collection, it can be more convenient to provide it in the SparkSession config instead: @@ -111,13 +130,13 @@ We can limit both the inference and the {sqlpp} executed from the DataFrame to o [{tabs}] ==== -Scala (Columnar):: +Scala (Capella Analytics, _a.k.a._ Columnar):: + [source,scala] ---- include::example$SparkSQL.scala[tag=queryfilter,indent=0] ---- -PySpark (Columnar):: +PySpark (Capella Analytics, _a.k.a._ Columnar):: + [source,python] ---- @@ -595,6 +614,7 @@ The available options for Enterprise Analytics DataFrame and Dataset operations: [#_kv_options] === KeyValue options + The available options for query DataFrame and Dataset operations: .KeyValue Options @@ -670,7 +690,8 @@ The available options for query DataFrame and Dataset operations: [#_columnar_options] -=== Capella Columnar options +=== Capella Analytics options + The available options for Capella Columnar DataFrame and Dataset operations: .Capella Columnar Options @@ -680,17 +701,17 @@ The available options for Capella Columnar DataFrame and Dataset operations: | "database" | ColumnarOptions.Database -| Name of a Couchbase Columnar database +| Name of a Couchbase Analytics database | Mandatory | "scope" | ColumnarOptions.Scope -| Name of a Couchbase Columnar scope +| Name of a Couchbase Analytics scope | Mandatory | "collection" | ColumnarOptions.Collection -| Name of a Couchbase Columnar collection +| Name of a Couchbase Analytics collection | Mandatory | "timeout" @@ -705,7 +726,7 @@ The available options for Capella Columnar DataFrame and Dataset operations: | "scanConsistency" | ColumnarOptions.ScanConsistency -| Valid options are "requestPlus" / ColumnarOptions.RequestPlusScanConsistency and "notBounded" / ColumnarOptions.NotBoundedScanConsistency. +| Valid options are ,requestPlus` / `ColumnarOptions.RequestPlusScanConsistency` and `notBounded` / `ColumnarOptions.NotBoundedScanConsistency`. | "notBounded" | "inferLimit" @@ -722,7 +743,7 @@ The available options for Capella Columnar DataFrame and Dataset operations: == Aggregate Push Down -The following predicates are pushed down to the query, operational analytics, Enterprise Analytics, and Capella Columnar engines if possible: +The following predicates are pushed down to the query, operational analytics, Enterprise Analytics, and Capella Analytics engines if possible: - `MIN(field)` - `MAX(field)`