From c00fe0453fab8d6441c2ef04a5aa862da44a53ab Mon Sep 17 00:00:00 2001
From: anabranch <wac.chambers@gmail.com>
Date: Sun, 8 Jan 2017 08:54:46 -0800
Subject: [PATCH 1/5] updated to include all joins, make them consistent

---
 R/pkg/R/DataFrame.R                                 |  5 +++--
 python/pyspark/sql/dataframe.py                     |  5 +++--
 .../main/scala/org/apache/spark/sql/Dataset.scala   | 13 +++++++++----
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 7737ffe4ed43b..34a8eaa21a41d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2314,8 +2314,9 @@ setMethod("dropDuplicates",
 #' Column expression. If joinExpr is omitted, the default, inner join is attempted and an error is
 #' thrown if it would be a Cartesian Product. For Cartesian join, use crossJoin instead.
 #' @param joinType The type of join to perform. The following join types are available:
-#' 'inner', 'outer', 'full', 'fullouter', leftouter', 'left_outer', 'left',
-#' 'right_outer', 'rightouter', 'right', and 'leftsemi'. The default joinType is "inner".
+#' 'inner', 'cross', 'outer', 'full', 'full_outer', 'left', 'left_outer',
+#' 'right', 'right_outer', 'left_semi', 'left_anti', 'cross'.
+#' and 'cross'. The default joinType is "inner".
 #' @return A SparkDataFrame containing the result of the join operation.
 #' @family SparkDataFrame functions
 #' @aliases join,SparkDataFrame,SparkDataFrame-method
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index b9d90384e3e2c..36a07cca49c99 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -730,8 +730,9 @@ def join(self, other, on=None, how=None):
             a join expression (Column), or a list of Columns.
             If `on` is a string or a list of strings indicating the name of the join column(s),
             the column(s) must exist on both sides, and this performs an equi-join.
-        :param how: str, default 'inner'.
-            One of `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
+        :param how: str, default 'inner'. Can be any of: `inner`, `cross`, `outer`,
+            `full`, `full_outer`, `left`, `left_outer`, `right`, `right_outer`,
+            `left_semi`, `left_anti`, `cross`.
 
         The following performs a full outer join between ``df1`` and ``df2``.
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index fd75d51538029..8153994b4b69a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -750,14 +750,17 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Equi-join with another `DataFrame` using the given columns.
+   * Equi-join with another `DataFrame` using the given columns. A cross join with a predicate
+   * is specified as an inner join. If you would explicitly like to perform a cross join use the
+   * `crossJoin` method.
    *
    * Different from other join functions, the join columns will only appear once in the output,
    * i.e. similar to SQL's `JOIN USING` syntax.
    *
    * @param right Right side of the join operation.
    * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
-   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
+   * @param joinType One of: `inner`, `cross`, `outer`, `full`, `full_outer`,
+   *                 `left`, `left_outer`, `right`, `right_outer`, `left_semi`, `left_anti`.
    *
    * @note If you perform a self-join using this function without aliasing the input
    * `DataFrame`s, you will NOT be able to reference any columns after the join, since
@@ -812,7 +815,8 @@ class Dataset[T] private[sql](
    *
    * @param right Right side of the join.
    * @param joinExprs Join expression.
-   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
+   * @param joinType One of: `inner`, `cross`, `outer`, `full`, `full_outer`,
+   *                 `left`, `left_outer`, `right`, `right_outer`, `left_semi`, `left_anti`.
    *
    * @group untypedrel
    * @since 2.0.0
@@ -889,7 +893,8 @@ class Dataset[T] private[sql](
    *
    * @param other Right side of the join.
    * @param condition Join expression.
-   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
+   * @param joinType One of: `inner`, `cross`, `outer`, `full`, `full_outer`,
+   *                 `left`, `left_outer`, `right`, `right_outer`, `left_semi`, `left_anti`.
    *
    * @group typedrel
    * @since 1.6.0

From f1526fc298f18e09729f411c759fffd69233f0e3 Mon Sep 17 00:00:00 2001
From: anabranch <wac.chambers@gmail.com>
Date: Sun, 8 Jan 2017 09:03:21 -0800
Subject: [PATCH 2/5] clean up labelling

---
 R/pkg/R/DataFrame.R                               |  7 +++----
 python/pyspark/sql/dataframe.py                   |  6 +++---
 .../main/scala/org/apache/spark/sql/Dataset.scala | 15 +++++++++------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 34a8eaa21a41d..f447e7650a8ee 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2313,10 +2313,9 @@ setMethod("dropDuplicates",
 #' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
 #' Column expression. If joinExpr is omitted, the default, inner join is attempted and an error is
 #' thrown if it would be a Cartesian Product. For Cartesian join, use crossJoin instead.
-#' @param joinType The type of join to perform. The following join types are available:
-#' 'inner', 'cross', 'outer', 'full', 'full_outer', 'left', 'left_outer',
-#' 'right', 'right_outer', 'left_semi', 'left_anti', 'cross'.
-#' and 'cross'. The default joinType is "inner".
+#' @param joinType The type of join to perform, default 'inner'.
+#' Must be one of: 'inner', 'cross', 'outer', 'full', 'full_outer',
+#' 'left', 'left_outer', 'right', 'right_outer', 'left_semi', and 'left_anti'.
 #' @return A SparkDataFrame containing the result of the join operation.
 #' @family SparkDataFrame functions
 #' @aliases join,SparkDataFrame,SparkDataFrame-method
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 36a07cca49c99..6e304c3221971 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -730,9 +730,9 @@ def join(self, other, on=None, how=None):
             a join expression (Column), or a list of Columns.
             If `on` is a string or a list of strings indicating the name of the join column(s),
             the column(s) must exist on both sides, and this performs an equi-join.
-        :param how: str, default 'inner'. Can be any of: `inner`, `cross`, `outer`,
-            `full`, `full_outer`, `left`, `left_outer`, `right`, `right_outer`,
-            `left_semi`, `left_anti`, `cross`.
+        :param how: str, default ``inner``. Must be any of: ``inner``, ``cross``, ``outer``,
+            ``full``, ``full_outer``, ``left``, ``left_outer``, ``right``, ``right_outer``,
+            ``left_semi``, and ``left_anti``.
 
         The following performs a full outer join between ``df1`` and ``df2``.
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 8153994b4b69a..054c0e38593c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -759,8 +759,9 @@ class Dataset[T] private[sql](
    *
    * @param right Right side of the join operation.
    * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
-   * @param joinType One of: `inner`, `cross`, `outer`, `full`, `full_outer`,
-   *                 `left`, `left_outer`, `right`, `right_outer`, `left_semi`, `left_anti`.
+   * @param joinType Type of join to perform. Default `inner`. Must be any of:
+   *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
+   *                 `right`, `right_outer`, `left_semi`, `left_anti`.
    *
    * @note If you perform a self-join using this function without aliasing the input
    * `DataFrame`s, you will NOT be able to reference any columns after the join, since
@@ -815,8 +816,9 @@ class Dataset[T] private[sql](
    *
    * @param right Right side of the join.
    * @param joinExprs Join expression.
-   * @param joinType One of: `inner`, `cross`, `outer`, `full`, `full_outer`,
-   *                 `left`, `left_outer`, `right`, `right_outer`, `left_semi`, `left_anti`.
+   * @param joinType Type of join to perform. Default `inner`. Must be any of:
+   *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
+   *                 `right`, `right_outer`, `left_semi`, `left_anti`.
    *
    * @group untypedrel
    * @since 2.0.0
@@ -893,8 +895,9 @@ class Dataset[T] private[sql](
    *
    * @param other Right side of the join.
    * @param condition Join expression.
-   * @param joinType One of: `inner`, `cross`, `outer`, `full`, `full_outer`,
-   *                 `left`, `left_outer`, `right`, `right_outer`, `left_semi`, `left_anti`.
+   * @param joinType Type of join to perform. Default `inner`. Must be any of:
+   *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
+   *                 `right`, `right_outer`, `left_semi`, `left_anti`.
    *
    * @group typedrel
    * @since 1.6.0

From fe27b0c144224556331d44c777ec9a8c1a87a43e Mon Sep 17 00:00:00 2001
From: anabranch <wac.chambers@gmail.com>
Date: Sun, 8 Jan 2017 09:04:24 -0800
Subject: [PATCH 3/5] wording

---
 R/pkg/R/DataFrame.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index f447e7650a8ee..4f643bfedeb5f 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2314,7 +2314,7 @@ setMethod("dropDuplicates",
 #' Column expression. If joinExpr is omitted, the default, inner join is attempted and an error is
 #' thrown if it would be a Cartesian Product. For Cartesian join, use crossJoin instead.
 #' @param joinType The type of join to perform, default 'inner'.
-#' Must be one of: 'inner', 'cross', 'outer', 'full', 'full_outer',
+#' Must be any of: 'inner', 'cross', 'outer', 'full', 'full_outer',
 #' 'left', 'left_outer', 'right', 'right_outer', 'left_semi', and 'left_anti'.
 #' @return A SparkDataFrame containing the result of the join operation.
 #' @family SparkDataFrame functions

From 61b6c0720c6d13a7e5f960be4cfd5b92106ac690 Mon Sep 17 00:00:00 2001
From: anabranch <wac.chambers@gmail.com>
Date: Sun, 8 Jan 2017 15:39:34 -0800
Subject: [PATCH 4/5] address comments

---
 R/pkg/R/DataFrame.R | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 4f643bfedeb5f..5c6c779804acd 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2315,7 +2315,7 @@ setMethod("dropDuplicates",
 #' thrown if it would be a Cartesian Product. For Cartesian join, use crossJoin instead.
 #' @param joinType The type of join to perform, default 'inner'.
 #' Must be any of: 'inner', 'cross', 'outer', 'full', 'full_outer',
-#' 'left', 'left_outer', 'right', 'right_outer', 'left_semi', and 'left_anti'.
+#' 'left', 'left_outer', 'right', 'right_outer', 'left_semi', or 'left_anti'.
 #' @return A SparkDataFrame containing the result of the join operation.
 #' @family SparkDataFrame functions
 #' @aliases join,SparkDataFrame,SparkDataFrame-method
@@ -2344,15 +2344,18 @@ setMethod("join",
               if (is.null(joinType)) {
                 sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc)
               } else {
-                if (joinType %in% c("inner", "outer", "full", "fullouter",
-                    "leftouter", "left_outer", "left",
-                    "rightouter", "right_outer", "right", "leftsemi")) {
+                if (joinType %in% c("inner", "cross",
+                    "outer", "full", "fullouter", "full_outer",
+                    "left", "leftouter", "left_outer",
+                    "right", "rightouter", "right_outer",
+                    "left_semi", "leftsemi", "left_anti", "leftanti")) {
                   joinType <- gsub("_", "", joinType)
                   sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType)
                 } else {
                   stop("joinType must be one of the following types: ",
-                      "'inner', 'outer', 'full', 'fullouter', 'leftouter', 'left_outer', 'left',
-                      'rightouter', 'right_outer', 'right', 'leftsemi'")
+                       "'inner', 'cross', 'outer', 'full', 'full_outer',",
+                       "'left', 'left_outer', 'right', 'right_outer',",
+                       "'left_semi', or 'left_anti'.")
                 }
               }
             }

From 66b71808609e8386be56f517889a427c53d88c0e Mon Sep 17 00:00:00 2001
From: anabranch <wac.chambers@gmail.com>
Date: Sun, 8 Jan 2017 16:10:57 -0800
Subject: [PATCH 5/5] grammar

---
 R/pkg/R/DataFrame.R                                        | 2 +-
 python/pyspark/sql/dataframe.py                            | 2 +-
 sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 5c6c779804acd..c56648a8c4fba 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2314,7 +2314,7 @@ setMethod("dropDuplicates",
 #' Column expression. If joinExpr is omitted, the default, inner join is attempted and an error is
 #' thrown if it would be a Cartesian Product. For Cartesian join, use crossJoin instead.
 #' @param joinType The type of join to perform, default 'inner'.
-#' Must be any of: 'inner', 'cross', 'outer', 'full', 'full_outer',
+#' Must be one of: 'inner', 'cross', 'outer', 'full', 'full_outer',
 #' 'left', 'left_outer', 'right', 'right_outer', 'left_semi', or 'left_anti'.
 #' @return A SparkDataFrame containing the result of the join operation.
 #' @family SparkDataFrame functions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 6e304c3221971..10e42d0f9d322 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -730,7 +730,7 @@ def join(self, other, on=None, how=None):
             a join expression (Column), or a list of Columns.
             If `on` is a string or a list of strings indicating the name of the join column(s),
             the column(s) must exist on both sides, and this performs an equi-join.
-        :param how: str, default ``inner``. Must be any of: ``inner``, ``cross``, ``outer``,
+        :param how: str, default ``inner``. Must be one of: ``inner``, ``cross``, ``outer``,
             ``full``, ``full_outer``, ``left``, ``left_outer``, ``right``, ``right_outer``,
             ``left_semi``, and ``left_anti``.
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 054c0e38593c0..1a7a5ba798077 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -759,7 +759,7 @@ class Dataset[T] private[sql](
    *
    * @param right Right side of the join operation.
    * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
-   * @param joinType Type of join to perform. Default `inner`. Must be any of:
+   * @param joinType Type of join to perform. Default `inner`. Must be one of:
    *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
    *                 `right`, `right_outer`, `left_semi`, `left_anti`.
    *
@@ -816,7 +816,7 @@ class Dataset[T] private[sql](
    *
    * @param right Right side of the join.
    * @param joinExprs Join expression.
-   * @param joinType Type of join to perform. Default `inner`. Must be any of:
+   * @param joinType Type of join to perform. Default `inner`. Must be one of:
    *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
    *                 `right`, `right_outer`, `left_semi`, `left_anti`.
    *
@@ -895,7 +895,7 @@ class Dataset[T] private[sql](
    *
    * @param other Right side of the join.
    * @param condition Join expression.
-   * @param joinType Type of join to perform. Default `inner`. Must be any of:
+   * @param joinType Type of join to perform. Default `inner`. Must be one of:
    *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
    *                 `right`, `right_outer`, `left_semi`, `left_anti`.
    *