From 7ea7dec0a3c5f6694a6a7ef51409a9277aeb733f Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Sat, 5 May 2018 18:56:46 +0200
Subject: [PATCH 01/10] Separate JSONOptions in read

---
 .../spark/sql/catalyst/json/JSONOptions.scala | 61 +++++++++++++------
 .../datasources/json/JsonFileFormat.scala     |  9 +--
 .../datasources/json/JsonSuite.scala          | 19 ++++--
 3 files changed, 61 insertions(+), 28 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 5f130af606e19..c419a9d7fac5f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -94,31 +94,15 @@ private[sql] class JSONOptions(
     sep
   }
 
+  protected def checkedEncoding(enc: String): String = enc
+
   /**
    * Standard encoding (charset) name. For example UTF-8, UTF-16LE and UTF-32BE.
-   * If the encoding is not specified (None), it will be detected automatically
+   * If the encoding is not specified (None) in read, it will be detected automatically
    * when the multiLine option is set to `true`.
    */
   val encoding: Option[String] = parameters.get("encoding")
-    .orElse(parameters.get("charset")).map { enc =>
-      // The following encodings are not supported in per-line mode (multiline is false)
-      // because they cause some problems in reading files with BOM which is supposed to
-      // present in the files with such encodings. After splitting input files by lines,
-      // only the first lines will have the BOM which leads to impossibility for reading
-      // the rest lines. Besides of that, the lineSep option must have the BOM in such
-      // encodings which can never present between lines.
-      val blacklist = Seq(Charset.forName("UTF-16"), Charset.forName("UTF-32"))
-      val isBlacklisted = blacklist.contains(Charset.forName(enc))
-      require(multiLine || !isBlacklisted,
-        s"""The ${enc} encoding must not be included in the blacklist when multiLine is disabled:
-           | ${blacklist.mkString(", ")}""".stripMargin)
-
-      val isLineSepRequired = !(multiLine == false &&
-        Charset.forName(enc) != StandardCharsets.UTF_8 && lineSeparator.isEmpty)
-      require(isLineSepRequired, s"The lineSep option must be specified for the $enc encoding")
-
-      enc
-  }
+    .orElse(parameters.get("charset")).map(checkedEncoding)
 
   val lineSeparatorInRead: Option[Array[Byte]] = lineSeparator.map { lineSep =>
     lineSep.getBytes(encoding.getOrElse("UTF-8"))
@@ -137,3 +121,40 @@ private[sql] class JSONOptions(
     factory.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, allowUnquotedControlChars)
   }
 }
+
+private[sql] class JSONOptionsInRead(
+    @transient private val parameters: CaseInsensitiveMap[String],
+    defaultTimeZoneId: String,
+    defaultColumnNameOfCorruptRecord: String)
+  extends JSONOptions(parameters, defaultTimeZoneId, defaultColumnNameOfCorruptRecord) {
+
+  def this(
+    parameters: Map[String, String],
+    defaultTimeZoneId: String,
+    defaultColumnNameOfCorruptRecord: String = "") = {
+    this(
+      CaseInsensitiveMap(parameters),
+      defaultTimeZoneId,
+      defaultColumnNameOfCorruptRecord)
+  }
+
+  protected override def checkedEncoding(enc: String): String = {
+    // The following encodings are not supported in per-line mode (multiline is false)
+    // because they cause some problems in reading files with BOM which is supposed to
+    // present in the files with such encodings. After splitting input files by lines,
+    // only the first lines will have the BOM which leads to impossibility for reading
+    // the rest lines. Besides of that, the lineSep option must have the BOM in such
+    // encodings which can never present between lines.
+    val blacklist = Seq(Charset.forName("UTF-16"), Charset.forName("UTF-32"))
+    val isBlacklisted = blacklist.contains(Charset.forName(enc))
+    require(multiLine || !isBlacklisted,
+      s"""The ${enc} encoding must not be included in the blacklist when multiLine is disabled:
+         | ${blacklist.mkString(", ")}""".stripMargin)
+
+    val isLineSepRequired = !(multiLine == false &&
+      Charset.forName(enc) != StandardCharsets.UTF_8 && lineSeparator.isEmpty)
+    require(isLineSepRequired, s"The lineSep option must be specified for the $enc encoding")
+
+    enc
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
index 3b04510d29695..a81d31cbf7441 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -26,7 +26,8 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.json.{JacksonGenerator, JacksonParser, JSONOptions}
+import org.apache.spark.sql.catalyst.json.{JacksonGenerator, JacksonParser}
+import org.apache.spark.sql.catalyst.json.{JSONOptions, JSONOptionsInRead}
 import org.apache.spark.sql.catalyst.util.CompressionCodecs
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources._
@@ -40,7 +41,7 @@ class JsonFileFormat extends TextBasedFileFormat with DataSourceRegister {
       sparkSession: SparkSession,
       options: Map[String, String],
       path: Path): Boolean = {
-    val parsedOptions = new JSONOptions(
+    val parsedOptions = new JSONOptionsInRead(
       options,
       sparkSession.sessionState.conf.sessionLocalTimeZone,
       sparkSession.sessionState.conf.columnNameOfCorruptRecord)
@@ -52,7 +53,7 @@ class JsonFileFormat extends TextBasedFileFormat with DataSourceRegister {
       sparkSession: SparkSession,
       options: Map[String, String],
       files: Seq[FileStatus]): Option[StructType] = {
-    val parsedOptions = new JSONOptions(
+    val parsedOptions = new JSONOptionsInRead(
       options,
       sparkSession.sessionState.conf.sessionLocalTimeZone,
       sparkSession.sessionState.conf.columnNameOfCorruptRecord)
@@ -99,7 +100,7 @@ class JsonFileFormat extends TextBasedFileFormat with DataSourceRegister {
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
 
-    val parsedOptions = new JSONOptions(
+    val parsedOptions = new JSONOptionsInRead(
       options,
       sparkSession.sessionState.conf.sessionLocalTimeZone,
       sparkSession.sessionState.conf.columnNameOfCorruptRecord)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 0db688fec9a67..ede7cb310af65 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -2262,7 +2262,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
     withTempPath { path =>
       val df = spark.createDataset(Seq(("Dog", 42)))
       df.write
-        .options(Map("encoding" -> encoding, "lineSep" -> "\n"))
+        .options(Map("encoding" -> encoding))
         .json(path.getCanonicalPath)
 
       checkEncoding(
@@ -2286,15 +2286,16 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
 
   test("SPARK-23723: wrong output encoding") {
     val encoding = "UTF-128"
-    val exception = intercept[UnsupportedCharsetException] {
+    val exception = intercept[SparkException] {
       withTempPath { path =>
         val df = spark.createDataset(Seq((0)))
         df.write
-          .options(Map("encoding" -> encoding, "lineSep" -> "\n"))
+          .options(Map("encoding" -> encoding))
           .json(path.getCanonicalPath)
       }
-    }
+    }.getCause.getCause.getCause
 
+    assert(exception.isInstanceOf[java.nio.charset.UnsupportedCharsetException])
     assert(exception.getMessage == encoding)
   }
 
@@ -2408,4 +2409,14 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       spark.read.option("mode", "PERMISSIVE").option("encoding", "UTF-8").json(Seq(badJson).toDS()),
       Row(badJson))
   }
+
+  test("SPARK-24190: restrictions for JSONOptions in read") {
+    val exception = intercept[IllegalArgumentException] {
+      spark.read
+        .option("encoding", "UTF-16")
+        .json(testFile("test-data/utf16LE.json"))
+        .count()
+    }
+    assert(exception.getMessage.contains("encoding must not be included in the blacklist"))
+  }
 }

From 9c366a02a6872adff0e36ce6e22358b40e3004a6 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Thu, 10 May 2018 10:26:30 +0200
Subject: [PATCH 02/10] Overriding parameters

---
 .../scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 7608075a9b6cb..7e83b517ea586 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -123,7 +123,7 @@ private[sql] class JSONOptions(
 }
 
 private[sql] class JSONOptionsInRead(
-    @transient private val parameters: CaseInsensitiveMap[String],
+    @transient override val parameters: CaseInsensitiveMap[String],
     defaultTimeZoneId: String,
     defaultColumnNameOfCorruptRecord: String)
   extends JSONOptions(parameters, defaultTimeZoneId, defaultColumnNameOfCorruptRecord) {

From 97c4af76addc78a85ceb503a5db16f3285f18a5f Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Thu, 10 May 2018 14:02:18 +0200
Subject: [PATCH 03/10] Removing unnecessary tests

---
 .../datasources/json/JsonSuite.scala          | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 4ce42c84ac54d..ede7cb310af65 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -2314,25 +2314,6 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
     }
   }
 
-  test("SPARK-23723: write json in UTF-16/32 with multiline off") {
-    Seq("UTF-16", "UTF-32").foreach { encoding =>
-      withTempPath { path =>
-        val ds = spark.createDataset(Seq(
-          ("a", 1), ("b", 2), ("c", 3))
-        ).repartition(2)
-        val e = intercept[IllegalArgumentException] {
-          ds.write
-            .option("encoding", encoding)
-            .option("multiline", "false")
-            .format("json").mode("overwrite")
-            .save(path.getCanonicalPath)
-        }.getMessage
-        assert(e.contains(
-          s"$encoding encoding in the blacklist is not allowed when multiLine is disabled"))
-      }
-    }
-  }
-
   def checkReadJson(lineSep: String, encoding: String, inferSchema: Boolean, id: Int): Unit = {
     test(s"SPARK-23724: checks reading json in ${encoding} #${id}") {
       val schema = new StructType().add("f1", StringType).add("f2", IntegerType)

From d2cd96486b7de4e7963793eb277b06def27f430c Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Thu, 31 May 2018 15:09:26 +0200
Subject: [PATCH 04/10] Revert "Removing unnecessary tests"

This reverts commit 97c4af7
---
 .../datasources/json/JsonSuite.scala          | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index ede7cb310af65..4ce42c84ac54d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -2314,6 +2314,25 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
     }
   }
 
+  test("SPARK-23723: write json in UTF-16/32 with multiline off") {
+    Seq("UTF-16", "UTF-32").foreach { encoding =>
+      withTempPath { path =>
+        val ds = spark.createDataset(Seq(
+          ("a", 1), ("b", 2), ("c", 3))
+        ).repartition(2)
+        val e = intercept[IllegalArgumentException] {
+          ds.write
+            .option("encoding", encoding)
+            .option("multiline", "false")
+            .format("json").mode("overwrite")
+            .save(path.getCanonicalPath)
+        }.getMessage
+        assert(e.contains(
+          s"$encoding encoding in the blacklist is not allowed when multiLine is disabled"))
+      }
+    }
+  }
+
   def checkReadJson(lineSep: String, encoding: String, inferSchema: Boolean, id: Int): Unit = {
     test(s"SPARK-23724: checks reading json in ${encoding} #${id}") {
       val schema = new StructType().add("f1", StringType).add("f2", IntegerType)

From 2285652f9df893cf124e98c191ef71cd308e1159 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Thu, 31 May 2018 15:52:10 +0200
Subject: [PATCH 05/10] Checking of written json in UTF-16 and UTF-32

---
 .../datasources/json/JsonSuite.scala          | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 4ce42c84ac54d..ba0ef79f45bff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.sql.execution.datasources.json
 
 import java.io.{File, FileOutputStream, StringWriter}
-import java.nio.charset.{StandardCharsets, UnsupportedCharsetException}
-import java.nio.file.{Files, Paths, StandardOpenOption}
+import java.nio.charset.{Charset, StandardCharsets, UnsupportedCharsetException}
+import java.nio.file.Files
 import java.sql.{Date, Timestamp}
 import java.util.Locale
 
@@ -2317,18 +2317,17 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
   test("SPARK-23723: write json in UTF-16/32 with multiline off") {
     Seq("UTF-16", "UTF-32").foreach { encoding =>
       withTempPath { path =>
-        val ds = spark.createDataset(Seq(
-          ("a", 1), ("b", 2), ("c", 3))
-        ).repartition(2)
-        val e = intercept[IllegalArgumentException] {
-          ds.write
-            .option("encoding", encoding)
-            .option("multiline", "false")
-            .format("json").mode("overwrite")
-            .save(path.getCanonicalPath)
-        }.getMessage
-        assert(e.contains(
-          s"$encoding encoding in the blacklist is not allowed when multiLine is disabled"))
+        val ds = spark.createDataset(Seq(("a", 1))).repartition(1)
+        ds.write
+          .option("encoding", encoding)
+          .option("multiline", false)
+          .json(path.getCanonicalPath)
+        val jsonFiles = path.listFiles().filter(_.getName.endsWith("json"))
+        jsonFiles.foreach { jsonFile =>
+          val readback = Files.readAllBytes(jsonFile.toPath)
+          val expected = ("""{"_1":"a","_2":1}""" + "\n").getBytes(Charset.forName(encoding))
+          assert(readback === expected)
+        }
       }
     }
   }

From 21f609c4aae77c645e713b1cabcd1349983fa57a Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Fri, 15 Jun 2018 13:33:44 +0200
Subject: [PATCH 06/10] Fix test of incorrect encoding

---
 .../execution/datasources/json/JsonSuite.scala    | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index ba0ef79f45bff..06d6248daa834 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.json
 
-import java.io.{File, FileOutputStream, StringWriter}
+import java.io._
 import java.nio.charset.{Charset, StandardCharsets, UnsupportedCharsetException}
 import java.nio.file.Files
 import java.sql.{Date, Timestamp}
@@ -27,8 +27,8 @@ import com.fasterxml.jackson.core.JsonFactory
 import org.apache.hadoop.fs.{Path, PathFilter}
 import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.GzipCodec
-
 import org.apache.spark.{SparkException, TestUtils}
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{functions => F, _}
 import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions}
@@ -2293,10 +2293,15 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
           .options(Map("encoding" -> encoding))
           .json(path.getCanonicalPath)
       }
-    }.getCause.getCause.getCause
+    }
+
+    val baos = new ByteArrayOutputStream()
+    val ps = new PrintStream(baos, true, "UTF-8")
+    exception.printStackTrace(ps)
+    ps.flush()
 
-    assert(exception.isInstanceOf[java.nio.charset.UnsupportedCharsetException])
-    assert(exception.getMessage == encoding)
+    assert(baos.toString.contains(
+      "java.nio.charset.UnsupportedCharsetException: UTF-128"))
   }
 
   test("SPARK-23723: read back json in UTF-16LE") {

From 4a95588a9bf94d0d0c5699b3cf47469f38c0a626 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Fri, 15 Jun 2018 13:44:48 +0200
Subject: [PATCH 07/10] Check that both encodings are blacklisted in read

---
 .../execution/datasources/json/JsonSuite.scala    | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 06d6248daa834..00808297c7bf3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -2434,12 +2434,15 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
   }
 
   test("SPARK-24190: restrictions for JSONOptions in read") {
-    val exception = intercept[IllegalArgumentException] {
-      spark.read
-        .option("encoding", "UTF-16")
-        .json(testFile("test-data/utf16LE.json"))
-        .count()
+    for (encoding <- Set("UTF-16", "UTF-32")) {
+      val exception = intercept[IllegalArgumentException] {
+        spark.read
+          .option("encoding", encoding)
+          .option("multiLine", false)
+          .json(testFile("test-data/utf16LE.json"))
+          .count()
+      }
+      assert(exception.getMessage.contains("encoding must not be included in the blacklist"))
     }
-    assert(exception.getMessage.contains("encoding must not be included in the blacklist"))
   }
 }

From 6ddf50305e74d2dfe309a916895ec6fc12ba3b78 Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Fri, 15 Jun 2018 14:10:38 +0200
Subject: [PATCH 08/10] Added comment about default encoding in write

---
 .../scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 7e83b517ea586..d13296d650d97 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -99,7 +99,8 @@ private[sql] class JSONOptions(
   /**
    * Standard encoding (charset) name. For example UTF-8, UTF-16LE and UTF-32BE.
    * If the encoding is not specified (None) in read, it will be detected automatically
-   * when the multiLine option is set to `true`.
+   * when the multiLine option is set to `true`. If encoding is not specified in write,
+   * UTF-8 is used by default.
    */
   val encoding: Option[String] = parameters.get("encoding")
     .orElse(parameters.get("charset")).map(checkedEncoding)

From c1971a5214d38c634e93b53e63ecbefeac8aba6e Mon Sep 17 00:00:00 2001
From: Maxim Gekk <maxim.gekk@databricks.com>
Date: Fri, 15 Jun 2018 14:41:59 +0200
Subject: [PATCH 09/10] Output warning for blacklisted encodings in write

---
 .../spark/sql/catalyst/json/JSONOptions.scala | 24 ++++++++++++-------
 .../datasources/json/JsonFileFormat.scala     |  5 ++++
 .../datasources/json/JsonSuite.scala          |  2 +-
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index d13296d650d97..b4417d065c52e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -140,17 +140,10 @@ private[sql] class JSONOptionsInRead(
   }
 
   protected override def checkedEncoding(enc: String): String = {
-    // The following encodings are not supported in per-line mode (multiline is false)
-    // because they cause some problems in reading files with BOM which is supposed to
-    // present in the files with such encodings. After splitting input files by lines,
-    // only the first lines will have the BOM which leads to impossibility for reading
-    // the rest lines. Besides of that, the lineSep option must have the BOM in such
-    // encodings which can never present between lines.
-    val blacklist = Seq(Charset.forName("UTF-16"), Charset.forName("UTF-32"))
-    val isBlacklisted = blacklist.contains(Charset.forName(enc))
+    val isBlacklisted = JSONOptionsInRead.blacklist.contains(Charset.forName(enc))
     require(multiLine || !isBlacklisted,
       s"""The ${enc} encoding must not be included in the blacklist when multiLine is disabled:
-         |Blacklist: ${blacklist.mkString(", ")}""".stripMargin)
+         |Blacklist: ${JSONOptionsInRead.blacklist.mkString(", ")}""".stripMargin)
 
     val isLineSepRequired =
         multiLine || Charset.forName(enc) == StandardCharsets.UTF_8 || lineSeparator.nonEmpty
@@ -159,3 +152,16 @@ private[sql] class JSONOptionsInRead(
     enc
   }
 }
+
+private[sql] object JSONOptionsInRead {
+  // The following encodings are not supported in per-line mode (multiline is false)
+  // because they cause some problems in reading files with BOM which is supposed to
+  // present in the files with such encodings. After splitting input files by lines,
+  // only the first lines will have the BOM which leads to impossibility for reading
+  // the rest lines. Besides of that, the lineSep option must have the BOM in such
+  // encodings which can never present between lines.
+  val blacklist = Seq(
+    Charset.forName("UTF-16"),
+    Charset.forName("UTF-32")
+  )
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
index a81d31cbf7441..2bdfdf31cfb93 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -159,6 +159,11 @@ private[json] class JsonOutputWriter(
     case None => StandardCharsets.UTF_8
   }
 
+  if (JSONOptionsInRead.blacklist.contains(encoding)) {
+    logWarning(s"The JSON file ($path) was written in the encoding ${encoding.displayName()}" +
+         " which can be read back by Spark only if multiLine is enabled.")
+  }
+
   private val writer = CodecStreams.createOutputStreamWriter(
     context, new Path(path), encoding)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 00808297c7bf3..8481f0212d319 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -27,8 +27,8 @@ import com.fasterxml.jackson.core.JsonFactory
 import org.apache.hadoop.fs.{Path, PathFilter}
 import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.GzipCodec
-import org.apache.spark.{SparkException, TestUtils}
 
+import org.apache.spark.{SparkException, TestUtils}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{functions => F, _}
 import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions}

From 2f7e7db8eb4e2078b350da30ec6909a4970d9f5a Mon Sep 17 00:00:00 2001
From: Maxim Gekk <max.gekk@gmail.com>
Date: Fri, 22 Jun 2018 18:46:52 +0200
Subject: [PATCH 10/10] Fold imports

---
 .../spark/sql/execution/datasources/json/JsonFileFormat.scala  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
index a81d31cbf7441..3278a480fb5dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -26,8 +26,7 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.json.{JacksonGenerator, JacksonParser}
-import org.apache.spark.sql.catalyst.json.{JSONOptions, JSONOptionsInRead}
+import org.apache.spark.sql.catalyst.json.{JacksonGenerator, JacksonParser, JSONOptions, JSONOptionsInRead}
 import org.apache.spark.sql.catalyst.util.CompressionCodecs
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources._