From 5f3348296aee6b1e891345e1037036984209789a Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Wed, 16 Jul 2014 06:24:32 -0700
Subject: [PATCH 01/22] update pom.xml for hadoop-2.3-cdh50.0 and hbase
 0.96.1.1

---
 pom.xml | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff --git a/pom.xml b/pom.xml
index ae97bf03c53a2..23d724dad7722 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1069,6 +1069,16 @@
       </properties>
     </profile>
 
+    <profile>
+      <id>hadoop-2.3-cdh5.0.0</id>
+      <properties>
+        <hadoop.version>2.3.0-cdh5.0.0</hadoop.version>
+        <protobuf.version>2.5.0</protobuf.version>
+        <jets3t.version>0.9.0</jets3t.version>
+        <hbase.version>0.96.1.1-cdh5.0.0</hbase.version>
+      </properties>
+    </profile>
+
     <profile>
       <id>hadoop-2.4</id>
       <properties>

From 8ddbcce084df8f3310ec4cbac4c16a3bc864d3b4 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Wed, 23 Jul 2014 09:09:26 -0700
Subject: [PATCH 02/22] Mesos workaround

---
 .../org/apache/spark/executor/MesosExecutorBackend.scala      | 2 +-
 .../spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index a42c8b43bbf7f..8c44c1696c833 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -41,7 +41,7 @@ private[spark] class MesosExecutorBackend
     driver.sendStatusUpdate(MesosTaskStatus.newBuilder()
       .setTaskId(mesosTaskId)
       .setState(TaskState.toMesos(state))
-      .setData(ByteString.copyFrom(data))
+//      .setData(ByteString.copyFrom(data))
       .build())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index c717e7c621a8f..8afe2ecfec40a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -116,7 +116,7 @@ private[spark] class MesosSchedulerBackend(
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
-      .setData(ByteString.copyFrom(createExecArg()))
+//      .setData(ByteString.copyFrom(createExecArg()))
       .addResources(memory)
       .build()
   }
@@ -253,7 +253,7 @@ private[spark] class MesosSchedulerBackend(
       .setExecutor(createExecutorInfo(slaveId))
       .setName(task.name)
       .addResources(cpuResource)
-      .setData(ByteString.copyFrom(task.serializedTask))
+//      .setData(ByteString.copyFrom(task.serializedTask))
       .build()
   }
 

From 7ea3391911a472f08121b95a0e19508c43dd3638 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Tue, 22 Jul 2014 21:33:25 -0700
Subject: [PATCH 03/22] SPARK-2638 MapOutputTracker concurrency improvement

Rolled back files not intended for checkin
---
 .../org/apache/spark/MapOutputTracker.scala   |  2 +-
 .../spark/executor/MesosExecutorBackend.scala |  2 +-
 .../cluster/mesos/MesosSchedulerBackend.scala |  4 +-
 .../apache/spark/MapOutputTrackerSuite.scala  | 67 +++++++++++++++++--
 pom.xml                                       | 10 ---
 5 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 894091761485d..560782079ff31 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -130,7 +130,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
     if (statuses == null) {
       logInfo("Don't have map outputs for shuffle " + shuffleId + ", fetching them")
       var fetchedStatuses: Array[MapStatus] = null
-      fetching.synchronized {
+      shuffleId.toString.intern.synchronized {
         if (fetching.contains(shuffleId)) {
           // Someone else is fetching it; wait for them to be done
           while (fetching.contains(shuffleId)) {
diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index 8c44c1696c833..a42c8b43bbf7f 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -41,7 +41,7 @@ private[spark] class MesosExecutorBackend
     driver.sendStatusUpdate(MesosTaskStatus.newBuilder()
       .setTaskId(mesosTaskId)
       .setState(TaskState.toMesos(state))
-//      .setData(ByteString.copyFrom(data))
+      .setData(ByteString.copyFrom(data))
       .build())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index 8afe2ecfec40a..c717e7c621a8f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -116,7 +116,7 @@ private[spark] class MesosSchedulerBackend(
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
-//      .setData(ByteString.copyFrom(createExecArg()))
+      .setData(ByteString.copyFrom(createExecArg()))
       .addResources(memory)
       .build()
   }
@@ -253,7 +253,7 @@ private[spark] class MesosSchedulerBackend(
       .setExecutor(createExecutorInfo(slaveId))
       .setName(task.name)
       .addResources(cpuResource)
-//      .setData(ByteString.copyFrom(task.serializedTask))
+      .setData(ByteString.copyFrom(task.serializedTask))
       .build()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 9702838085627..af3e2ecb80589 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -17,6 +17,11 @@
 
 package org.apache.spark
 
+import java.util.Date
+import java.util.concurrent._
+
+import org.apache.log4j.Logger
+
 import scala.concurrent.Await
 
 import akka.actor._
@@ -29,6 +34,7 @@ import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.AkkaUtils
 
 class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
+  val logger = Logger.getLogger(getClass.getName)
   private val conf = new SparkConf
   test("compressSize") {
     assert(MapOutputTracker.compressSize(0L) === 0)
@@ -137,29 +143,76 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     val timeout = AkkaUtils.lookupTimeout(conf)
     slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
 
-    masterTracker.registerShuffle(10, 1)
+    // Test single shuffle execution
+    val shuffleId = 10
+    val start = new Date().getTime
+    invokeRemoteFetch(masterTracker, slaveTracker, shuffleId)
+    val singleFetchDuration = new Date().getTime - start
+
+    // Test Parallel execution of shuffles
+    val NShuffles = 20
+    import collection.mutable.ArrayBuffer
+    val threads = new ArrayBuffer[Thread](NShuffles)
+    val barrier = new CyclicBarrier(NShuffles)
+    val latch = new CountDownLatch(NShuffles)
+
+    class ShuffleTesterThread(threadNum: Int, barrier: CyclicBarrier, latch : CountDownLatch)
+      extends Thread(s"ShuffleTester$threadNum") {
+      override def run() = {
+        val shuffleId = 20+threadNum
+        barrier.await
+
+        invokeRemoteFetch(masterTracker, slaveTracker, shuffleId)
+        latch.countDown()
+      }
+    }
+
+    for (shuffle <- 1 to NShuffles) {
+      val t = new ShuffleTesterThread(shuffle, barrier, latch)
+      threads += t
+      t.start()
+    }
+    val pstart = new Date().getTime
+    latch.await(10, TimeUnit.SECONDS)
+    assert(latch.getCount == 0, "Not all shuffles completed within allowed time period")
+    val parallelFetchDuration = new Date().getTime - pstart
+    log(s"All $NShuffles fetches Completed: duration=$parallelFetchDuration "
+      + s"(vs single fetch=$singleFetchDuration)")
+        assert (parallelFetchDuration > singleFetchDuration
+          && parallelFetchDuration <= (NShuffles * 0.4) * singleFetchDuration,
+        "Parallel remote fetch should show strong sub-linear execution time increase "
+          + s" vs number of remote Fetches. "
+          + s"Actual: single fetch=$singleFetchDuration $NShuffles-threads=$parallelFetchDuration")
+
+  }
+
+  def invokeRemoteFetch(masterTracker: MapOutputTrackerMaster,
+                        slaveTracker: MapOutputTrackerWorker, shuffleId: Int): Unit = {
+    masterTracker.registerShuffle(shuffleId, 1)
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
 
     val compressedSize1000 = MapOutputTracker.compressSize(1000L)
     val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
-    masterTracker.registerMapOutput(10, 0, new MapStatus(
+    masterTracker.registerMapOutput(shuffleId, 0, new MapStatus(
       BlockManagerId("a", "hostA", 1000, 0), Array(compressedSize1000)))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
+    assert(slaveTracker.getServerStatuses(shuffleId, 0).toSeq ===
       Seq((BlockManagerId("a", "hostA", 1000, 0), size1000)))
 
-    masterTracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0))
+    masterTracker.unregisterMapOutput(shuffleId, 0, BlockManagerId("a", "hostA", 1000, 0))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
 
     // failure should be cached
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
   }
 
+  def log(msg: String) = logger.info(msg)
+
   test("remote fetch below akka frame size") {
     val newConf = new SparkConf
     newConf.set("spark.akka.frameSize", "1")
diff --git a/pom.xml b/pom.xml
index 23d724dad7722..ae97bf03c53a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1069,16 +1069,6 @@
       </properties>
     </profile>
 
-    <profile>
-      <id>hadoop-2.3-cdh5.0.0</id>
-      <properties>
-        <hadoop.version>2.3.0-cdh5.0.0</hadoop.version>
-        <protobuf.version>2.5.0</protobuf.version>
-        <jets3t.version>0.9.0</jets3t.version>
-        <hbase.version>0.96.1.1-cdh5.0.0</hbase.version>
-      </properties>
-    </profile>
-
     <profile>
       <id>hadoop-2.4</id>
       <properties>

From f780ad12db493634a8a14aa71de93d082db01a49 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 25 Jul 2014 02:15:39 -0700
Subject: [PATCH 04/22] Updated concurrency fix for using same monitor on the
 synchronized and wait logic

---
 .../org/apache/spark/MapOutputTracker.scala      | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 560782079ff31..681d75d48e5ba 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -18,6 +18,7 @@
 package org.apache.spark
 
 import java.io._
+import java.util.concurrent.ConcurrentSkipListSet
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
 import scala.collection.mutable.{HashSet, HashMap, Map}
@@ -95,7 +96,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   protected val epochLock = new AnyRef
 
   /** Remembers which map output locations are currently being fetched on a worker. */
-  private val fetching = new HashSet[Int]
+  private val fetching = new ConcurrentSkipListSet[Int]
 
   /**
    * Send a message to the trackerActor and get its result within a default timeout, or
@@ -130,12 +131,13 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
     if (statuses == null) {
       logInfo("Don't have map outputs for shuffle " + shuffleId + ", fetching them")
       var fetchedStatuses: Array[MapStatus] = null
-      shuffleId.toString.intern.synchronized {
+      val monitor = shuffleId.toString.intern
+      monitor.synchronized {
         if (fetching.contains(shuffleId)) {
           // Someone else is fetching it; wait for them to be done
           while (fetching.contains(shuffleId)) {
             try {
-              fetching.wait()
+              monitor.wait()
             } catch {
               case e: InterruptedException =>
             }
@@ -147,7 +149,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
         fetchedStatuses = mapStatuses.get(shuffleId).orNull
         if (fetchedStatuses == null) {
           // We have to do the fetch, get others to wait for us.
-          fetching += shuffleId
+          fetching.add(shuffleId)
         }
       }
 
@@ -162,9 +164,9 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
           logInfo("Got the output locations")
           mapStatuses.put(shuffleId, fetchedStatuses)
         } finally {
-          fetching.synchronized {
-            fetching -= shuffleId
-            fetching.notifyAll()
+          monitor.synchronized {
+            fetching.remove(shuffleId)
+            monitor.notifyAll()
           }
         }
       }

From 46bccf551f8b4bf01cdbb18fc153808e5419477b Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 14:56:05 -0700
Subject: [PATCH 05/22] Manually revert custom changes to master

---
 .../org/apache/spark/MapOutputTracker.scala   | 16 ++---
 .../spark/executor/MesosExecutorBackend.scala |  2 +-
 .../cluster/mesos/MesosSchedulerBackend.scala |  4 +-
 .../apache/spark/MapOutputTrackerSuite.scala  | 67 ++-----------------
 pom.xml                                       | 10 +++
 5 files changed, 27 insertions(+), 72 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 681d75d48e5ba..894091761485d 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -18,7 +18,6 @@
 package org.apache.spark
 
 import java.io._
-import java.util.concurrent.ConcurrentSkipListSet
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
 import scala.collection.mutable.{HashSet, HashMap, Map}
@@ -96,7 +95,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   protected val epochLock = new AnyRef
 
   /** Remembers which map output locations are currently being fetched on a worker. */
-  private val fetching = new ConcurrentSkipListSet[Int]
+  private val fetching = new HashSet[Int]
 
   /**
    * Send a message to the trackerActor and get its result within a default timeout, or
@@ -131,13 +130,12 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
     if (statuses == null) {
       logInfo("Don't have map outputs for shuffle " + shuffleId + ", fetching them")
       var fetchedStatuses: Array[MapStatus] = null
-      val monitor = shuffleId.toString.intern
-      monitor.synchronized {
+      fetching.synchronized {
         if (fetching.contains(shuffleId)) {
           // Someone else is fetching it; wait for them to be done
           while (fetching.contains(shuffleId)) {
             try {
-              monitor.wait()
+              fetching.wait()
             } catch {
               case e: InterruptedException =>
             }
@@ -149,7 +147,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
         fetchedStatuses = mapStatuses.get(shuffleId).orNull
         if (fetchedStatuses == null) {
           // We have to do the fetch, get others to wait for us.
-          fetching.add(shuffleId)
+          fetching += shuffleId
         }
       }
 
@@ -164,9 +162,9 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
           logInfo("Got the output locations")
           mapStatuses.put(shuffleId, fetchedStatuses)
         } finally {
-          monitor.synchronized {
-            fetching.remove(shuffleId)
-            monitor.notifyAll()
+          fetching.synchronized {
+            fetching -= shuffleId
+            fetching.notifyAll()
           }
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index a42c8b43bbf7f..8c44c1696c833 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -41,7 +41,7 @@ private[spark] class MesosExecutorBackend
     driver.sendStatusUpdate(MesosTaskStatus.newBuilder()
       .setTaskId(mesosTaskId)
       .setState(TaskState.toMesos(state))
-      .setData(ByteString.copyFrom(data))
+//      .setData(ByteString.copyFrom(data))
       .build())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index c717e7c621a8f..8afe2ecfec40a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -116,7 +116,7 @@ private[spark] class MesosSchedulerBackend(
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
-      .setData(ByteString.copyFrom(createExecArg()))
+//      .setData(ByteString.copyFrom(createExecArg()))
       .addResources(memory)
       .build()
   }
@@ -253,7 +253,7 @@ private[spark] class MesosSchedulerBackend(
       .setExecutor(createExecutorInfo(slaveId))
       .setName(task.name)
       .addResources(cpuResource)
-      .setData(ByteString.copyFrom(task.serializedTask))
+//      .setData(ByteString.copyFrom(task.serializedTask))
       .build()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index af3e2ecb80589..9702838085627 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -17,11 +17,6 @@
 
 package org.apache.spark
 
-import java.util.Date
-import java.util.concurrent._
-
-import org.apache.log4j.Logger
-
 import scala.concurrent.Await
 
 import akka.actor._
@@ -34,7 +29,6 @@ import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.AkkaUtils
 
 class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
-  val logger = Logger.getLogger(getClass.getName)
   private val conf = new SparkConf
   test("compressSize") {
     assert(MapOutputTracker.compressSize(0L) === 0)
@@ -143,76 +137,29 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     val timeout = AkkaUtils.lookupTimeout(conf)
     slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
 
-    // Test single shuffle execution
-    val shuffleId = 10
-    val start = new Date().getTime
-    invokeRemoteFetch(masterTracker, slaveTracker, shuffleId)
-    val singleFetchDuration = new Date().getTime - start
-
-    // Test Parallel execution of shuffles
-    val NShuffles = 20
-    import collection.mutable.ArrayBuffer
-    val threads = new ArrayBuffer[Thread](NShuffles)
-    val barrier = new CyclicBarrier(NShuffles)
-    val latch = new CountDownLatch(NShuffles)
-
-    class ShuffleTesterThread(threadNum: Int, barrier: CyclicBarrier, latch : CountDownLatch)
-      extends Thread(s"ShuffleTester$threadNum") {
-      override def run() = {
-        val shuffleId = 20+threadNum
-        barrier.await
-
-        invokeRemoteFetch(masterTracker, slaveTracker, shuffleId)
-        latch.countDown()
-      }
-    }
-
-    for (shuffle <- 1 to NShuffles) {
-      val t = new ShuffleTesterThread(shuffle, barrier, latch)
-      threads += t
-      t.start()
-    }
-    val pstart = new Date().getTime
-    latch.await(10, TimeUnit.SECONDS)
-    assert(latch.getCount == 0, "Not all shuffles completed within allowed time period")
-    val parallelFetchDuration = new Date().getTime - pstart
-    log(s"All $NShuffles fetches Completed: duration=$parallelFetchDuration "
-      + s"(vs single fetch=$singleFetchDuration)")
-        assert (parallelFetchDuration > singleFetchDuration
-          && parallelFetchDuration <= (NShuffles * 0.4) * singleFetchDuration,
-        "Parallel remote fetch should show strong sub-linear execution time increase "
-          + s" vs number of remote Fetches. "
-          + s"Actual: single fetch=$singleFetchDuration $NShuffles-threads=$parallelFetchDuration")
-
-  }
-
-  def invokeRemoteFetch(masterTracker: MapOutputTrackerMaster,
-                        slaveTracker: MapOutputTrackerWorker, shuffleId: Int): Unit = {
-    masterTracker.registerShuffle(shuffleId, 1)
+    masterTracker.registerShuffle(10, 1)
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
 
     val compressedSize1000 = MapOutputTracker.compressSize(1000L)
     val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
-    masterTracker.registerMapOutput(shuffleId, 0, new MapStatus(
+    masterTracker.registerMapOutput(10, 0, new MapStatus(
       BlockManagerId("a", "hostA", 1000, 0), Array(compressedSize1000)))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    assert(slaveTracker.getServerStatuses(shuffleId, 0).toSeq ===
+    assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
       Seq((BlockManagerId("a", "hostA", 1000, 0), size1000)))
 
-    masterTracker.unregisterMapOutput(shuffleId, 0, BlockManagerId("a", "hostA", 1000, 0))
+    masterTracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
 
     // failure should be cached
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
   }
 
-  def log(msg: String) = logger.info(msg)
-
   test("remote fetch below akka frame size") {
     val newConf = new SparkConf
     newConf.set("spark.akka.frameSize", "1")
diff --git a/pom.xml b/pom.xml
index ae97bf03c53a2..23d724dad7722 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1069,6 +1069,16 @@
       </properties>
     </profile>
 
+    <profile>
+      <id>hadoop-2.3-cdh5.0.0</id>
+      <properties>
+        <hadoop.version>2.3.0-cdh5.0.0</hadoop.version>
+        <protobuf.version>2.5.0</protobuf.version>
+        <jets3t.version>0.9.0</jets3t.version>
+        <hbase.version>0.96.1.1-cdh5.0.0</hbase.version>
+      </properties>
+    </profile>
+
     <profile>
       <id>hadoop-2.4</id>
       <properties>

From a91f6a39af4e799617fb6f8cb490aaedfbb2af4e Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Wed, 16 Jul 2014 06:24:32 -0700
Subject: [PATCH 06/22] update pom.xml for hadoop-2.3-cdh50.0 and hbase
 0.96.1.1

---
 pom.xml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pom.xml b/pom.xml
index ae97bf03c53a2..23d724dad7722 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1069,6 +1069,16 @@
       </properties>
     </profile>
 
+    <profile>
+      <id>hadoop-2.3-cdh5.0.0</id>
+      <properties>
+        <hadoop.version>2.3.0-cdh5.0.0</hadoop.version>
+        <protobuf.version>2.5.0</protobuf.version>
+        <jets3t.version>0.9.0</jets3t.version>
+        <hbase.version>0.96.1.1-cdh5.0.0</hbase.version>
+      </properties>
+    </profile>
+
     <profile>
       <id>hadoop-2.4</id>
       <properties>

From c638587cf929cf89846a3761b6de1ec084ac160a Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Wed, 23 Jul 2014 09:09:26 -0700
Subject: [PATCH 07/22] Mesos workaround

---
 .../org/apache/spark/executor/MesosExecutorBackend.scala      | 2 +-
 .../spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index a42c8b43bbf7f..8c44c1696c833 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -41,7 +41,7 @@ private[spark] class MesosExecutorBackend
     driver.sendStatusUpdate(MesosTaskStatus.newBuilder()
       .setTaskId(mesosTaskId)
       .setState(TaskState.toMesos(state))
-      .setData(ByteString.copyFrom(data))
+//      .setData(ByteString.copyFrom(data))
       .build())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index c717e7c621a8f..8afe2ecfec40a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -116,7 +116,7 @@ private[spark] class MesosSchedulerBackend(
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
-      .setData(ByteString.copyFrom(createExecArg()))
+//      .setData(ByteString.copyFrom(createExecArg()))
       .addResources(memory)
       .build()
   }
@@ -253,7 +253,7 @@ private[spark] class MesosSchedulerBackend(
       .setExecutor(createExecutorInfo(slaveId))
       .setName(task.name)
       .addResources(cpuResource)
-      .setData(ByteString.copyFrom(task.serializedTask))
+//      .setData(ByteString.copyFrom(task.serializedTask))
       .build()
   }
 

From 31dcd4fbad565e9dca06307a516a682d86dcfc09 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Tue, 22 Jul 2014 21:33:25 -0700
Subject: [PATCH 08/22] SPARK-2638 MapOutputTracker concurrency improvement

Rolled back files not intended for checkin
---
 .../org/apache/spark/MapOutputTracker.scala   |  2 +-
 .../spark/executor/MesosExecutorBackend.scala |  2 +-
 .../cluster/mesos/MesosSchedulerBackend.scala |  4 +-
 .../apache/spark/MapOutputTrackerSuite.scala  | 67 +++++++++++++++++--
 pom.xml                                       | 10 ---
 5 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 894091761485d..560782079ff31 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -130,7 +130,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
     if (statuses == null) {
       logInfo("Don't have map outputs for shuffle " + shuffleId + ", fetching them")
       var fetchedStatuses: Array[MapStatus] = null
-      fetching.synchronized {
+      shuffleId.toString.intern.synchronized {
         if (fetching.contains(shuffleId)) {
           // Someone else is fetching it; wait for them to be done
           while (fetching.contains(shuffleId)) {
diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index 8c44c1696c833..a42c8b43bbf7f 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -41,7 +41,7 @@ private[spark] class MesosExecutorBackend
     driver.sendStatusUpdate(MesosTaskStatus.newBuilder()
       .setTaskId(mesosTaskId)
       .setState(TaskState.toMesos(state))
-//      .setData(ByteString.copyFrom(data))
+      .setData(ByteString.copyFrom(data))
       .build())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index 8afe2ecfec40a..c717e7c621a8f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -116,7 +116,7 @@ private[spark] class MesosSchedulerBackend(
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
-//      .setData(ByteString.copyFrom(createExecArg()))
+      .setData(ByteString.copyFrom(createExecArg()))
       .addResources(memory)
       .build()
   }
@@ -253,7 +253,7 @@ private[spark] class MesosSchedulerBackend(
       .setExecutor(createExecutorInfo(slaveId))
       .setName(task.name)
       .addResources(cpuResource)
-//      .setData(ByteString.copyFrom(task.serializedTask))
+      .setData(ByteString.copyFrom(task.serializedTask))
       .build()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 9702838085627..af3e2ecb80589 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -17,6 +17,11 @@
 
 package org.apache.spark
 
+import java.util.Date
+import java.util.concurrent._
+
+import org.apache.log4j.Logger
+
 import scala.concurrent.Await
 
 import akka.actor._
@@ -29,6 +34,7 @@ import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.AkkaUtils
 
 class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
+  val logger = Logger.getLogger(getClass.getName)
   private val conf = new SparkConf
   test("compressSize") {
     assert(MapOutputTracker.compressSize(0L) === 0)
@@ -137,29 +143,76 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     val timeout = AkkaUtils.lookupTimeout(conf)
     slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
 
-    masterTracker.registerShuffle(10, 1)
+    // Test single shuffle execution
+    val shuffleId = 10
+    val start = new Date().getTime
+    invokeRemoteFetch(masterTracker, slaveTracker, shuffleId)
+    val singleFetchDuration = new Date().getTime - start
+
+    // Test Parallel execution of shuffles
+    val NShuffles = 20
+    import collection.mutable.ArrayBuffer
+    val threads = new ArrayBuffer[Thread](NShuffles)
+    val barrier = new CyclicBarrier(NShuffles)
+    val latch = new CountDownLatch(NShuffles)
+
+    class ShuffleTesterThread(threadNum: Int, barrier: CyclicBarrier, latch : CountDownLatch)
+      extends Thread(s"ShuffleTester$threadNum") {
+      override def run() = {
+        val shuffleId = 20+threadNum
+        barrier.await
+
+        invokeRemoteFetch(masterTracker, slaveTracker, shuffleId)
+        latch.countDown()
+      }
+    }
+
+    for (shuffle <- 1 to NShuffles) {
+      val t = new ShuffleTesterThread(shuffle, barrier, latch)
+      threads += t
+      t.start()
+    }
+    val pstart = new Date().getTime
+    latch.await(10, TimeUnit.SECONDS)
+    assert(latch.getCount == 0, "Not all shuffles completed within allowed time period")
+    val parallelFetchDuration = new Date().getTime - pstart
+    log(s"All $NShuffles fetches Completed: duration=$parallelFetchDuration "
+      + s"(vs single fetch=$singleFetchDuration)")
+        assert (parallelFetchDuration > singleFetchDuration
+          && parallelFetchDuration <= (NShuffles * 0.4) * singleFetchDuration,
+        "Parallel remote fetch should show strong sub-linear execution time increase "
+          + s" vs number of remote Fetches. "
+          + s"Actual: single fetch=$singleFetchDuration $NShuffles-threads=$parallelFetchDuration")
+
+  }
+
+  def invokeRemoteFetch(masterTracker: MapOutputTrackerMaster,
+                        slaveTracker: MapOutputTrackerWorker, shuffleId: Int): Unit = {
+    masterTracker.registerShuffle(shuffleId, 1)
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
 
     val compressedSize1000 = MapOutputTracker.compressSize(1000L)
     val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
-    masterTracker.registerMapOutput(10, 0, new MapStatus(
+    masterTracker.registerMapOutput(shuffleId, 0, new MapStatus(
       BlockManagerId("a", "hostA", 1000, 0), Array(compressedSize1000)))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
+    assert(slaveTracker.getServerStatuses(shuffleId, 0).toSeq ===
       Seq((BlockManagerId("a", "hostA", 1000, 0), size1000)))
 
-    masterTracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0))
+    masterTracker.unregisterMapOutput(shuffleId, 0, BlockManagerId("a", "hostA", 1000, 0))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
 
     // failure should be cached
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
   }
 
+  def log(msg: String) = logger.info(msg)
+
   test("remote fetch below akka frame size") {
     val newConf = new SparkConf
     newConf.set("spark.akka.frameSize", "1")
diff --git a/pom.xml b/pom.xml
index 23d724dad7722..ae97bf03c53a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1069,16 +1069,6 @@
       </properties>
     </profile>
 
-    <profile>
-      <id>hadoop-2.3-cdh5.0.0</id>
-      <properties>
-        <hadoop.version>2.3.0-cdh5.0.0</hadoop.version>
-        <protobuf.version>2.5.0</protobuf.version>
-        <jets3t.version>0.9.0</jets3t.version>
-        <hbase.version>0.96.1.1-cdh5.0.0</hbase.version>
-      </properties>
-    </profile>
-
     <profile>
       <id>hadoop-2.4</id>
       <properties>

From afe17e27bb0a9314b565dec96d17f9e3224d33bb Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 25 Jul 2014 02:15:39 -0700
Subject: [PATCH 09/22] Updated concurrency fix for using same monitor on the
 synchronized and wait logic

---
 .../org/apache/spark/MapOutputTracker.scala      | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 560782079ff31..681d75d48e5ba 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -18,6 +18,7 @@
 package org.apache.spark
 
 import java.io._
+import java.util.concurrent.ConcurrentSkipListSet
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
 import scala.collection.mutable.{HashSet, HashMap, Map}
@@ -95,7 +96,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   protected val epochLock = new AnyRef
 
   /** Remembers which map output locations are currently being fetched on a worker. */
-  private val fetching = new HashSet[Int]
+  private val fetching = new ConcurrentSkipListSet[Int]
 
   /**
    * Send a message to the trackerActor and get its result within a default timeout, or
@@ -130,12 +131,13 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
     if (statuses == null) {
       logInfo("Don't have map outputs for shuffle " + shuffleId + ", fetching them")
       var fetchedStatuses: Array[MapStatus] = null
-      shuffleId.toString.intern.synchronized {
+      val monitor = shuffleId.toString.intern
+      monitor.synchronized {
         if (fetching.contains(shuffleId)) {
           // Someone else is fetching it; wait for them to be done
           while (fetching.contains(shuffleId)) {
             try {
-              fetching.wait()
+              monitor.wait()
             } catch {
               case e: InterruptedException =>
             }
@@ -147,7 +149,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
         fetchedStatuses = mapStatuses.get(shuffleId).orNull
         if (fetchedStatuses == null) {
           // We have to do the fetch, get others to wait for us.
-          fetching += shuffleId
+          fetching.add(shuffleId)
         }
       }
 
@@ -162,9 +164,9 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
           logInfo("Got the output locations")
           mapStatuses.put(shuffleId, fetchedStatuses)
         } finally {
-          fetching.synchronized {
-            fetching -= shuffleId
-            fetching.notifyAll()
+          monitor.synchronized {
+            fetching.remove(shuffleId)
+            monitor.notifyAll()
           }
         }
       }

From 0d9db983b2adc5fd8223181a1dcc1a643b856e40 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 14:56:05 -0700
Subject: [PATCH 10/22] Manually revert custom changes to master

---
 .../org/apache/spark/MapOutputTracker.scala   | 16 ++---
 .../spark/executor/MesosExecutorBackend.scala |  2 +-
 .../cluster/mesos/MesosSchedulerBackend.scala |  4 +-
 .../apache/spark/MapOutputTrackerSuite.scala  | 67 ++-----------------
 pom.xml                                       | 10 +++
 5 files changed, 27 insertions(+), 72 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 681d75d48e5ba..894091761485d 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -18,7 +18,6 @@
 package org.apache.spark
 
 import java.io._
-import java.util.concurrent.ConcurrentSkipListSet
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
 import scala.collection.mutable.{HashSet, HashMap, Map}
@@ -96,7 +95,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   protected val epochLock = new AnyRef
 
   /** Remembers which map output locations are currently being fetched on a worker. */
-  private val fetching = new ConcurrentSkipListSet[Int]
+  private val fetching = new HashSet[Int]
 
   /**
    * Send a message to the trackerActor and get its result within a default timeout, or
@@ -131,13 +130,12 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
     if (statuses == null) {
       logInfo("Don't have map outputs for shuffle " + shuffleId + ", fetching them")
       var fetchedStatuses: Array[MapStatus] = null
-      val monitor = shuffleId.toString.intern
-      monitor.synchronized {
+      fetching.synchronized {
         if (fetching.contains(shuffleId)) {
           // Someone else is fetching it; wait for them to be done
           while (fetching.contains(shuffleId)) {
             try {
-              monitor.wait()
+              fetching.wait()
             } catch {
               case e: InterruptedException =>
             }
@@ -149,7 +147,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
         fetchedStatuses = mapStatuses.get(shuffleId).orNull
         if (fetchedStatuses == null) {
           // We have to do the fetch, get others to wait for us.
-          fetching.add(shuffleId)
+          fetching += shuffleId
         }
       }
 
@@ -164,9 +162,9 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
           logInfo("Got the output locations")
           mapStatuses.put(shuffleId, fetchedStatuses)
         } finally {
-          monitor.synchronized {
-            fetching.remove(shuffleId)
-            monitor.notifyAll()
+          fetching.synchronized {
+            fetching -= shuffleId
+            fetching.notifyAll()
           }
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index a42c8b43bbf7f..8c44c1696c833 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -41,7 +41,7 @@ private[spark] class MesosExecutorBackend
     driver.sendStatusUpdate(MesosTaskStatus.newBuilder()
       .setTaskId(mesosTaskId)
       .setState(TaskState.toMesos(state))
-      .setData(ByteString.copyFrom(data))
+//      .setData(ByteString.copyFrom(data))
       .build())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index c717e7c621a8f..8afe2ecfec40a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -116,7 +116,7 @@ private[spark] class MesosSchedulerBackend(
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
-      .setData(ByteString.copyFrom(createExecArg()))
+//      .setData(ByteString.copyFrom(createExecArg()))
       .addResources(memory)
       .build()
   }
@@ -253,7 +253,7 @@ private[spark] class MesosSchedulerBackend(
       .setExecutor(createExecutorInfo(slaveId))
       .setName(task.name)
       .addResources(cpuResource)
-      .setData(ByteString.copyFrom(task.serializedTask))
+//      .setData(ByteString.copyFrom(task.serializedTask))
       .build()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index af3e2ecb80589..9702838085627 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -17,11 +17,6 @@
 
 package org.apache.spark
 
-import java.util.Date
-import java.util.concurrent._
-
-import org.apache.log4j.Logger
-
 import scala.concurrent.Await
 
 import akka.actor._
@@ -34,7 +29,6 @@ import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.AkkaUtils
 
 class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
-  val logger = Logger.getLogger(getClass.getName)
   private val conf = new SparkConf
   test("compressSize") {
     assert(MapOutputTracker.compressSize(0L) === 0)
@@ -143,76 +137,29 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     val timeout = AkkaUtils.lookupTimeout(conf)
     slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
 
-    // Test single shuffle execution
-    val shuffleId = 10
-    val start = new Date().getTime
-    invokeRemoteFetch(masterTracker, slaveTracker, shuffleId)
-    val singleFetchDuration = new Date().getTime - start
-
-    // Test Parallel execution of shuffles
-    val NShuffles = 20
-    import collection.mutable.ArrayBuffer
-    val threads = new ArrayBuffer[Thread](NShuffles)
-    val barrier = new CyclicBarrier(NShuffles)
-    val latch = new CountDownLatch(NShuffles)
-
-    class ShuffleTesterThread(threadNum: Int, barrier: CyclicBarrier, latch : CountDownLatch)
-      extends Thread(s"ShuffleTester$threadNum") {
-      override def run() = {
-        val shuffleId = 20+threadNum
-        barrier.await
-
-        invokeRemoteFetch(masterTracker, slaveTracker, shuffleId)
-        latch.countDown()
-      }
-    }
-
-    for (shuffle <- 1 to NShuffles) {
-      val t = new ShuffleTesterThread(shuffle, barrier, latch)
-      threads += t
-      t.start()
-    }
-    val pstart = new Date().getTime
-    latch.await(10, TimeUnit.SECONDS)
-    assert(latch.getCount == 0, "Not all shuffles completed within allowed time period")
-    val parallelFetchDuration = new Date().getTime - pstart
-    log(s"All $NShuffles fetches Completed: duration=$parallelFetchDuration "
-      + s"(vs single fetch=$singleFetchDuration)")
-        assert (parallelFetchDuration > singleFetchDuration
-          && parallelFetchDuration <= (NShuffles * 0.4) * singleFetchDuration,
-        "Parallel remote fetch should show strong sub-linear execution time increase "
-          + s" vs number of remote Fetches. "
-          + s"Actual: single fetch=$singleFetchDuration $NShuffles-threads=$parallelFetchDuration")
-
-  }
-
-  def invokeRemoteFetch(masterTracker: MapOutputTrackerMaster,
-                        slaveTracker: MapOutputTrackerWorker, shuffleId: Int): Unit = {
-    masterTracker.registerShuffle(shuffleId, 1)
+    masterTracker.registerShuffle(10, 1)
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
 
     val compressedSize1000 = MapOutputTracker.compressSize(1000L)
     val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
-    masterTracker.registerMapOutput(shuffleId, 0, new MapStatus(
+    masterTracker.registerMapOutput(10, 0, new MapStatus(
       BlockManagerId("a", "hostA", 1000, 0), Array(compressedSize1000)))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    assert(slaveTracker.getServerStatuses(shuffleId, 0).toSeq ===
+    assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
       Seq((BlockManagerId("a", "hostA", 1000, 0), size1000)))
 
-    masterTracker.unregisterMapOutput(shuffleId, 0, BlockManagerId("a", "hostA", 1000, 0))
+    masterTracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0))
     masterTracker.incrementEpoch()
     slaveTracker.updateEpoch(masterTracker.getEpoch)
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
 
     // failure should be cached
-    intercept[FetchFailedException] { slaveTracker.getServerStatuses(shuffleId, 0) }
+    intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
   }
 
-  def log(msg: String) = logger.info(msg)
-
   test("remote fetch below akka frame size") {
     val newConf = new SparkConf
     newConf.set("spark.akka.frameSize", "1")
diff --git a/pom.xml b/pom.xml
index ae97bf03c53a2..23d724dad7722 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1069,6 +1069,16 @@
       </properties>
     </profile>
 
+    <profile>
+      <id>hadoop-2.3-cdh5.0.0</id>
+      <properties>
+        <hadoop.version>2.3.0-cdh5.0.0</hadoop.version>
+        <protobuf.version>2.5.0</protobuf.version>
+        <jets3t.version>0.9.0</jets3t.version>
+        <hbase.version>0.96.1.1-cdh5.0.0</hbase.version>
+      </properties>
+    </profile>
+
     <profile>
       <id>hadoop-2.4</id>
       <properties>

From b08c87f2f620c225b5cd7c420f42ca65676f4ebc Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 15:36:52 -0700
Subject: [PATCH 11/22] Revert "update pom.xml for hadoop-2.3-cdh50.0 and hbase
 0.96.1.1"

This reverts commit a91f6a39af4e799617fb6f8cb490aaedfbb2af4e.
---
 pom.xml | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pom.xml b/pom.xml
index 23d724dad7722..ae97bf03c53a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1069,16 +1069,6 @@
       </properties>
     </profile>
 
-    <profile>
-      <id>hadoop-2.3-cdh5.0.0</id>
-      <properties>
-        <hadoop.version>2.3.0-cdh5.0.0</hadoop.version>
-        <protobuf.version>2.5.0</protobuf.version>
-        <jets3t.version>0.9.0</jets3t.version>
-        <hbase.version>0.96.1.1-cdh5.0.0</hbase.version>
-      </properties>
-    </profile>
-
     <profile>
       <id>hadoop-2.4</id>
       <properties>

From 1e10e0024cb6d2f354f9aea72fdff6f6b9ecab6c Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 15:37:13 -0700
Subject: [PATCH 12/22] Revert "SPARK-2638 MapOutputTracker concurrency
 improvement"

This reverts commit 31dcd4fbad565e9dca06307a516a682d86dcfc09.
---
 pom.xml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pom.xml b/pom.xml
index ae97bf03c53a2..23d724dad7722 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1069,6 +1069,16 @@
       </properties>
     </profile>
 
+    <profile>
+      <id>hadoop-2.3-cdh5.0.0</id>
+      <properties>
+        <hadoop.version>2.3.0-cdh5.0.0</hadoop.version>
+        <protobuf.version>2.5.0</protobuf.version>
+        <jets3t.version>0.9.0</jets3t.version>
+        <hbase.version>0.96.1.1-cdh5.0.0</hbase.version>
+      </properties>
+    </profile>
+
     <profile>
       <id>hadoop-2.4</id>
       <properties>

From dea01f506837cf0e14e117bc7783461909f44041 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 15:52:33 -0700
Subject: [PATCH 13/22] Manually revert custom changes to master

---
 .../apache/spark/executor/MesosExecutorBackend.scala   |  2 +-
 .../cluster/mesos/MesosSchedulerBackend.scala          |  4 ++--
 pom.xml                                                | 10 ----------
 3 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index 8c44c1696c833..a42c8b43bbf7f 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -41,7 +41,7 @@ private[spark] class MesosExecutorBackend
     driver.sendStatusUpdate(MesosTaskStatus.newBuilder()
       .setTaskId(mesosTaskId)
       .setState(TaskState.toMesos(state))
-//      .setData(ByteString.copyFrom(data))
+      .setData(ByteString.copyFrom(data))
       .build())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index 8afe2ecfec40a..c717e7c621a8f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -116,7 +116,7 @@ private[spark] class MesosSchedulerBackend(
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
-//      .setData(ByteString.copyFrom(createExecArg()))
+      .setData(ByteString.copyFrom(createExecArg()))
       .addResources(memory)
       .build()
   }
@@ -253,7 +253,7 @@ private[spark] class MesosSchedulerBackend(
       .setExecutor(createExecutorInfo(slaveId))
       .setName(task.name)
       .addResources(cpuResource)
-//      .setData(ByteString.copyFrom(task.serializedTask))
+      .setData(ByteString.copyFrom(task.serializedTask))
       .build()
   }
 
diff --git a/pom.xml b/pom.xml
index 23d724dad7722..ae97bf03c53a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1069,16 +1069,6 @@
       </properties>
     </profile>
 
-    <profile>
-      <id>hadoop-2.3-cdh5.0.0</id>
-      <properties>
-        <hadoop.version>2.3.0-cdh5.0.0</hadoop.version>
-        <protobuf.version>2.5.0</protobuf.version>
-        <jets3t.version>0.9.0</jets3t.version>
-        <hbase.version>0.96.1.1-cdh5.0.0</hbase.version>
-      </properties>
-    </profile>
-
     <profile>
       <id>hadoop-2.4</id>
       <properties>

From 2fc131ec57d8c298c0e3759ef177f62010ba9a09 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 16:04:51 -0700
Subject: [PATCH 14/22] Do this again: Manually revert custom changes to master

---
 .../apache/spark/executor/MesosExecutorBackend.scala   |  2 +-
 .../cluster/mesos/MesosSchedulerBackend.scala          |  4 ++--
 pom.xml                                                | 10 ----------
 3 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index 8c44c1696c833..a42c8b43bbf7f 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -41,7 +41,7 @@ private[spark] class MesosExecutorBackend
     driver.sendStatusUpdate(MesosTaskStatus.newBuilder()
       .setTaskId(mesosTaskId)
       .setState(TaskState.toMesos(state))
-//      .setData(ByteString.copyFrom(data))
+      .setData(ByteString.copyFrom(data))
       .build())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index 8afe2ecfec40a..c717e7c621a8f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -116,7 +116,7 @@ private[spark] class MesosSchedulerBackend(
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
-//      .setData(ByteString.copyFrom(createExecArg()))
+      .setData(ByteString.copyFrom(createExecArg()))
       .addResources(memory)
       .build()
   }
@@ -253,7 +253,7 @@ private[spark] class MesosSchedulerBackend(
       .setExecutor(createExecutorInfo(slaveId))
       .setName(task.name)
       .addResources(cpuResource)
-//      .setData(ByteString.copyFrom(task.serializedTask))
+      .setData(ByteString.copyFrom(task.serializedTask))
       .build()
   }
 
diff --git a/pom.xml b/pom.xml
index 23d724dad7722..ae97bf03c53a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1069,16 +1069,6 @@
       </properties>
     </profile>
 
-    <profile>
-      <id>hadoop-2.3-cdh5.0.0</id>
-      <properties>
-        <hadoop.version>2.3.0-cdh5.0.0</hadoop.version>
-        <protobuf.version>2.5.0</protobuf.version>
-        <jets3t.version>0.9.0</jets3t.version>
-        <hbase.version>0.96.1.1-cdh5.0.0</hbase.version>
-      </properties>
-    </profile>
-
     <profile>
       <id>hadoop-2.4</id>
       <properties>

From 42f5016d0fb000c80bbd9eb4833511b9429776b0 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Thu, 24 Jul 2014 23:26:11 -0700
Subject: [PATCH 15/22] SPARK-2686 Add Length support to Spark SQL and HQL and
 Strlen support to SQL

---
 .../apache/spark/sql/catalyst/SqlParser.scala | 10 +++
 .../sql/catalyst/expressions/Expression.scala |  2 +-
 .../expressions/stringOperations.scala        | 81 ++++++++++++++++++-
 .../sql/catalyst/optimizer/Optimizer.scala    |  3 +
 .../ExpressionEvaluationSuite.scala           | 29 +++++++
 .../optimizer/ConstantFoldingSuite.scala      | 12 ++-
 .../org/apache/spark/sql/QueryTest.scala      |  2 +
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 28 +++++++
 .../org/apache/spark/sql/hive/HiveQl.scala    |  8 +-
 9 files changed, 170 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 2c73a80f64ebf..a73514f4fc197 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -122,6 +122,9 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
   protected val EXCEPT = Keyword("EXCEPT")
   protected val SUBSTR = Keyword("SUBSTR")
   protected val SUBSTRING = Keyword("SUBSTRING")
+  protected val LEN = Keyword("LEN")
+  protected val LENGTH = Keyword("LENGTH")
+  protected val STRLEN = Keyword("STRLEN")
 
   // Use reflection to find the reserved words defined in this class.
   protected val reservedWords =
@@ -323,6 +326,13 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
     (SUBSTR | SUBSTRING) ~> "(" ~> expression ~ "," ~ expression ~ "," ~ expression <~ ")" ^^ {
       case s ~ "," ~ p ~ "," ~ l => Substring(s,p,l)
     } |
+    (LEN | LENGTH) ~> "(" ~> expression <~ ")" ^^ { case s => Length(s) } |
+    STRLEN ~> "(" ~> expression ~ "," ~  expression <~ ")" ^^ {
+      case s ~ "," ~  e => Strlen(s, e)
+    } |
+    STRLEN ~> "(" ~> expression  <~ ")" ^^ {
+      case s  => Strlen(s, Literal(StrlenConstants.DefaultEncoding))
+    } |
     ident ~ "(" ~ repsep(expression, ",") <~ ")" ^^ {
       case udfName ~ _ ~ exprs => UnresolvedFunction(udfName, exprs)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index ba62dabe3dd6a..5829afec7d928 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -37,7 +37,7 @@ abstract class Expression extends TreeNode[Expression] {
    *  - A [[BinaryExpression]] is foldable if its both left and right child are foldable
    *  - A [[Not]], [[IsNull]], or [[IsNotNull]] is foldable if its child is foldable
    *  - A [[Literal]] is foldable
-   *  - A [[Cast]] or [[UnaryMinus]] is foldable if its child is foldable
+   *  - A [[Cast]] or [[UnaryMinus]] or [[Length/Strlen]] is foldable if its child is foldable
    */
   def foldable: Boolean = false
   def nullable: Boolean
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 97fc3a3b14b88..a61435e7250f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -17,13 +17,16 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.io.UnsupportedEncodingException
 import java.util.regex.Pattern
 
+import org.apache.spark.Logging
+
 import scala.collection.IndexedSeqOptimized
 
 
 import org.apache.spark.sql.catalyst.analysis.UnresolvedException
-import org.apache.spark.sql.catalyst.types.{BinaryType, BooleanType, DataType, StringType}
+import org.apache.spark.sql.catalyst.types.{BinaryType, BooleanType, DataType, StringType, IntegerType}
 
 trait StringRegexExpression {
   self: BinaryExpression =>
@@ -208,6 +211,82 @@ case class EndsWith(left: Expression, right: Expression)
   def compare(l: String, r: String) = l.endsWith(r)
 }
 
+/**
+ * A function that returns the number of bytes in an expression
+ */
+case class Length(child: Expression) extends UnaryExpression {
+
+  type EvaluatedType = Any
+
+  override def dataType = IntegerType
+
+  override def foldable = child.foldable
+
+  override def nullable = child.nullable
+
+  override def toString = s"Length($child)"
+
+  override def eval(input: Row): EvaluatedType = {
+    val string = child.eval(input)
+    if (string == null) {
+      null
+    } else if (!string.isInstanceOf[String]) {
+      string.toString.length
+    } else {
+      new String(string.toString.getBytes, StrlenConstants.DefaultEncoding).length
+    }
+  }
+
+}
+
+object StrlenConstants {
+  val DefaultEncoding = "ISO-8859-1"
+}
+
+/**
+ * A function that returns the number of characters in a string expression
+ */
+case class Strlen(child: Expression, encoding : Expression) extends UnaryExpression
+  with Logging {
+
+  type EvaluatedType = Any
+
+  override def dataType = IntegerType
+
+  override def foldable = child.foldable
+
+  override def nullable = true
+
+  override def toString = s"Strlen($child, $encoding)"
+
+  override def eval(input: Row): EvaluatedType = {
+    val string = child.eval(input)
+    if (string == null) {
+      null
+    } else if (!string.isInstanceOf[String]) {
+      log.debug(s"Non-string value [$string] provided to strlen")
+      null
+    } else {
+      var evalEncoding = encoding.eval(input)
+      val strEncoding =
+        if (evalEncoding != null) {
+          evalEncoding.toString
+        } else {
+          StrlenConstants.DefaultEncoding
+        }
+      val s: String = ""
+      try {
+        new String(string.asInstanceOf[String].getBytes, strEncoding).length
+      } catch {
+        case ue : UnsupportedEncodingException => {
+          log.debug(s"strlen: Caught UnsupportedEncodingException for encoding=[$strEncoding]")
+          null
+        }
+      }
+    }
+  }
+}
+
 /**
  * A function that takes a substring of its first argument starting at a given position.
  * Defined for String and Binary types.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 5f86d6047cb9c..cd54808d3dd39 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -174,6 +174,9 @@ object NullPropagation extends Rule[LogicalPlan] {
       case e @ Substring(_, Literal(null, _), _) => Literal(null, e.dataType)
       case e @ Substring(_, _, Literal(null, _)) => Literal(null, e.dataType)
 
+      case e @ Length(Literal(null, _)) => Literal(null, e.dataType)
+      case e @ Strlen(Literal(null, _),_) => Literal(null, e.dataType)
+
       // Put exceptional cases above if any
       case e: BinaryArithmetic => e.children match {
         case Literal(null, _) :: right :: Nil => Literal(null, e.dataType)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index 999c9fff38d60..c5016e6eeb5e2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -18,11 +18,14 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
+import java.util.concurrent.atomic.AtomicInteger
 
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.catalyst.types._
 
+import scala.collection.mutable.ArrayBuffer
+
 /* Implicit conversions */
 import org.apache.spark.sql.catalyst.dsl.expressions._
 
@@ -567,4 +570,30 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(s.substring(0, 2), "ex", row)
     checkEvaluation(s.substring(0), "example", row)
   }
+
+  test("Length") {
+    checkEvaluation(Length(Literal(null, IntegerType)), null)
+    checkEvaluation(Length(Literal(0,IntegerType)), 1)
+    checkEvaluation(Length(Literal(12,IntegerType)), 2)
+    checkEvaluation(Length(Literal(123,IntegerType)), 3)
+    checkEvaluation(Length(Literal(12.4F, FloatType)), 4)
+    checkEvaluation(Length(Literal(12345678901L,LongType)), 11)
+    checkEvaluation(Length(Literal(1234567890.2D, DoubleType)), 14)
+    checkEvaluation(Length(Literal("1234567890ABC",StringType)), 13)
+    checkEvaluation(Length(Literal("\uF93D\uF936\uF949\uF942",StringType)), 4)
+  }
+
+  test("Strlen") {
+    checkEvaluation(Strlen(Literal(null, StringType), "ISO-8859-1"), null)
+    checkEvaluation(Strlen(Literal(null, StringType), "UTF-8"), null)
+    checkEvaluation(Strlen(Literal(null, StringType), "UTF-16"), null)
+    checkEvaluation(Strlen(Literal("1234567890ABC", StringType), "ISO-8859-1"), 13)
+    checkEvaluation(Strlen(Literal("1234567890ABC", StringType), "UTF-8"), 13)
+    checkEvaluation(Strlen(Literal("1234567890ABC", StringType), "UTF-16"), 7)
+    checkEvaluation(Strlen(Literal("\uF93D\uF936\uF949\uF942", StringType), "ISO-8859-1"), 4)
+    checkEvaluation(Strlen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-8"), 4)
+    checkEvaluation(Strlen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-16"), 2)
+    checkEvaluation(Strlen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-32"), 1)
+  }
+
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 0a27cce337482..08a8868f4ef68 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -208,7 +208,11 @@ class ConstantFoldingSuite extends PlanTest {
           Substring("abc", 0, Literal(null, IntegerType)) as 'c18,
 
           Contains(Literal(null, StringType), "abc") as 'c19,
-          Contains("abc", Literal(null, StringType)) as 'c20
+          Contains("abc", Literal(null, StringType)) as 'c20,
+
+          Length(Literal(null, IntegerType)) as 'c21,
+          Strlen(Literal(null, StringType), "ISO-8859-1") as 'c22
+
         )
 
     val optimized = Optimize(originalQuery.analyze)
@@ -243,7 +247,11 @@ class ConstantFoldingSuite extends PlanTest {
           Literal(null, StringType) as 'c18,
 
           Literal(null, BooleanType) as 'c19,
-          Literal(null, BooleanType) as 'c20
+          Literal(null, BooleanType) as 'c20,
+
+          Literal(null, IntegerType) as 'c21,
+          Literal(null, IntegerType) as 'c22
+
         ).analyze
 
     comparePlans(optimized, correctAnswer)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 1fd8d27b34c59..e7aa3265d7b60 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import java.io.{PrintWriter, StringWriter}
+
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.util._
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 5c571d35d1bb9..ce21aa49fbd70 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -47,6 +47,34 @@ class SQLQuerySuite extends QueryTest {
     checkAnswer(
       sql("SELECT substring(tableName, 3) FROM tableName"),
       "st")
+    checkAnswer(
+      sql("SELECT substring(tableName, 2) FROM tableName group by substring(tableName, 2)"),
+      "est")
+  }
+
+  test("SPARK-TBD Added Parser of SQL LENGTH()") {
+    checkAnswer(
+       sql("SELECT length(key) as keylen from testData where key = 100"), 3)
+    checkAnswer(
+      sql("SELECT len(key), count(*) as cnt from testData where key <= 100 group by len(key)"),
+      Seq(Seq(1,9),Seq(2,90), Seq(3,1)))
+    checkAnswer(
+      sql("SELECT max(length(key * key) - len(key)) from testData where key <= 100"), 2)
+    checkAnswer(
+      sql("SELECT min(Length(s)) FROM nullableRepeatedData where s is not null"), 4)
+    checkAnswer(
+      sql("SELECT max(LENGTH(s)) FROM nullableRepeatedData"), 4)
+  }
+
+  test("SPARK-TBD Added Parser of SQL STRLEN()") {
+    checkAnswer(
+      sql("SELECT StrLen(s) from repeatedData"), Seq(Seq(4),Seq(4)))
+    checkAnswer(
+      sql("SELECT StrLen(s,'UTF-8') from repeatedData"), Seq(Seq(4),Seq(4)))
+    checkAnswer(
+      sql("SELECT max(StrLen(s,'UTF-8')) from nullStrings"), 3)
+    checkAnswer(
+      sql("SELECT strlen('a','ISO-8859-1') + strlen('abcde','ISO-8859-1') FROM testData limit 1"), 6)
   }
 
   test("index into array") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 3d2eb1eefaeda..29f6c532627fd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -866,6 +866,7 @@ private[hive] object HiveQl {
   val WHEN = "(?i)WHEN".r
   val CASE = "(?i)CASE".r
   val SUBSTR = "(?i)SUBSTR(?:ING)?".r
+  val STRLEN = "(?i)STRLEN".r
 
   protected def nodeToExpr(node: Node): Expression = node match {
     /* Attribute References */
@@ -995,8 +996,13 @@ private[hive] object HiveQl {
     case Token("TOK_FUNCTION", Token(RAND(), Nil) :: Nil) => Rand
     case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: Nil) => 
       Substring(nodeToExpr(string), nodeToExpr(pos), Literal(Integer.MAX_VALUE, IntegerType))
-    case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) => 
+    case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) =>
       Substring(nodeToExpr(string), nodeToExpr(pos), nodeToExpr(length))
+    case Token("TOK_FUNCTION", Token(STRLEN(), Nil) :: string :: Nil) =>
+      Strlen(nodeToExpr(string), Literal(StrlenConstants.DefaultEncoding))
+    case Token("TOK_FUNCTION", Token(STRLEN(), Nil) :: string :: encoding :: Nil) =>
+      Strlen(nodeToExpr(string), nodeToExpr(encoding))
+
 
     /* UDFs - Must be last otherwise will preempt built in functions */
     case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>

From ad3859eb547669125ad77e64c645001c1dbe4dc3 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 12:28:23 -0700
Subject: [PATCH 16/22] Ongoing work with Ueshin and Marmbrus

---
 .../apache/spark/sql/catalyst/SqlParser.scala | 13 +++---
 .../sql/catalyst/expressions/Expression.scala |  2 +-
 .../expressions/stringOperations.scala        | 42 ++++++++++++-------
 .../sql/catalyst/optimizer/Optimizer.scala    |  2 +-
 .../ExpressionEvaluationSuite.scala           | 22 +++++-----
 .../optimizer/ConstantFoldingSuite.scala      |  2 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 14 +++----
 .../org/apache/spark/sql/hive/HiveQl.scala    | 11 ++---
 8 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index a73514f4fc197..77a3dc2729a48 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -124,7 +124,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
   protected val SUBSTRING = Keyword("SUBSTRING")
   protected val LEN = Keyword("LEN")
   protected val LENGTH = Keyword("LENGTH")
-  protected val STRLEN = Keyword("STRLEN")
+  protected val CHAR_LEN = Keyword("CHAR_LEN")
+  protected val OCTET_LEN = Keyword("OCTET_LEN")
 
   // Use reflection to find the reserved words defined in this class.
   protected val reservedWords =
@@ -326,12 +327,12 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
     (SUBSTR | SUBSTRING) ~> "(" ~> expression ~ "," ~ expression ~ "," ~ expression <~ ")" ^^ {
       case s ~ "," ~ p ~ "," ~ l => Substring(s,p,l)
     } |
-    (LEN | LENGTH) ~> "(" ~> expression <~ ")" ^^ { case s => Length(s) } |
-    STRLEN ~> "(" ~> expression ~ "," ~  expression <~ ")" ^^ {
-      case s ~ "," ~  e => Strlen(s, e)
+    (LEN | LENGTH | CHAR_LEN) ~> "(" ~> expression <~ ")" ^^ { case s => Length(s) } |
+      OCTET_LEN ~> "(" ~> expression ~ "," ~  expression <~ ")" ^^ {
+      case s ~ "," ~  e => OctetLen(s, e)
     } |
-    STRLEN ~> "(" ~> expression  <~ ")" ^^ {
-      case s  => Strlen(s, Literal(StrlenConstants.DefaultEncoding))
+    OCTET_LEN ~> "(" ~> expression  <~ ")" ^^ {
+      case s  => OctetLen(s, Literal(OctetLenConstants.DefaultEncoding))
     } |
     ident ~ "(" ~ repsep(expression, ",") <~ ")" ^^ {
       case udfName ~ _ ~ exprs => UnresolvedFunction(udfName, exprs)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 5829afec7d928..f3a7233b0b46d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -37,7 +37,7 @@ abstract class Expression extends TreeNode[Expression] {
    *  - A [[BinaryExpression]] is foldable if its both left and right child are foldable
    *  - A [[Not]], [[IsNull]], or [[IsNotNull]] is foldable if its child is foldable
    *  - A [[Literal]] is foldable
-   *  - A [[Cast]] or [[UnaryMinus]] or [[Length/Strlen]] is foldable if its child is foldable
+   *  - A [[Cast]] or [[UnaryMinus]] or [[Length/Octetlen]] is foldable if its child is foldable
    */
   def foldable: Boolean = false
   def nullable: Boolean
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index a61435e7250f3..99073283b761a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -211,6 +211,7 @@ case class EndsWith(left: Expression, right: Expression)
   def compare(l: String, r: String) = l.endsWith(r)
 }
 
+
 /**
  * A function that returns the number of bytes in an expression
  */
@@ -227,26 +228,40 @@ case class Length(child: Expression) extends UnaryExpression {
   override def toString = s"Length($child)"
 
   override def eval(input: Row): EvaluatedType = {
-    val string = child.eval(input)
-    if (string == null) {
+    val inputVal = child.eval(input)
+    if (inputVal == null) {
       null
-    } else if (!string.isInstanceOf[String]) {
-      string.toString.length
+    } else if (!inputVal.isInstanceOf[String]) {
+      inputVal.toString.length
     } else {
-      new String(string.toString.getBytes, StrlenConstants.DefaultEncoding).length
+      OctetLenUtils.len(inputVal.asInstanceOf[String])
     }
   }
 
 }
 
-object StrlenConstants {
+object OctetLenConstants {
   val DefaultEncoding = "ISO-8859-1"
 }
 
+object OctetLenUtils {
+  def len(s : String)  = {
+    if (s == null) {
+      null
+    } else {
+      @inline def isUtfStartByte(b : Byte) = (b & 0xC0) != 0x80
+      s.getBytes.foldLeft(0) { case (cnt, b) => {
+        cnt + (if (isUtfStartByte(b)) 1 else 0)
+      }
+      }
+    }
+  }
+}
+
 /**
  * A function that returns the number of characters in a string expression
  */
-case class Strlen(child: Expression, encoding : Expression) extends UnaryExpression
+case class OctetLen(child: Expression, encoding : Expression) extends UnaryExpression
   with Logging {
 
   type EvaluatedType = Any
@@ -257,14 +272,14 @@ case class Strlen(child: Expression, encoding : Expression) extends UnaryExpress
 
   override def nullable = true
 
-  override def toString = s"Strlen($child, $encoding)"
+  override def toString = s"OctetLen($child, $encoding)"
 
   override def eval(input: Row): EvaluatedType = {
     val string = child.eval(input)
     if (string == null) {
       null
     } else if (!string.isInstanceOf[String]) {
-      log.debug(s"Non-string value [$string] provided to strlen")
+      log.debug(s"Non-string value [$string] provided to OctetLen")
       null
     } else {
       var evalEncoding = encoding.eval(input)
@@ -272,18 +287,17 @@ case class Strlen(child: Expression, encoding : Expression) extends UnaryExpress
         if (evalEncoding != null) {
           evalEncoding.toString
         } else {
-          StrlenConstants.DefaultEncoding
+          OctetLenConstants.DefaultEncoding
         }
       val s: String = ""
       try {
-        new String(string.asInstanceOf[String].getBytes, strEncoding).length
+        string.asInstanceOf[String].getBytes(strEncoding).length
       } catch {
         case ue : UnsupportedEncodingException => {
-          log.debug(s"strlen: Caught UnsupportedEncodingException for encoding=[$strEncoding]")
-          null
+          throw new UnsupportedEncodingException(s"OctetLen: Caught UnsupportedEncodingException for encoding=[$strEncoding]")
         }
       }
-    }
+      }
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index cd54808d3dd39..d140da368039f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -175,7 +175,7 @@ object NullPropagation extends Rule[LogicalPlan] {
       case e @ Substring(_, _, Literal(null, _)) => Literal(null, e.dataType)
 
       case e @ Length(Literal(null, _)) => Literal(null, e.dataType)
-      case e @ Strlen(Literal(null, _),_) => Literal(null, e.dataType)
+      case e @ OctetLen(Literal(null, _),_) => Literal(null, e.dataType)
 
       // Put exceptional cases above if any
       case e: BinaryArithmetic => e.children match {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index c5016e6eeb5e2..df816f411ee6c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -583,17 +583,17 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(Length(Literal("\uF93D\uF936\uF949\uF942",StringType)), 4)
   }
 
-  test("Strlen") {
-    checkEvaluation(Strlen(Literal(null, StringType), "ISO-8859-1"), null)
-    checkEvaluation(Strlen(Literal(null, StringType), "UTF-8"), null)
-    checkEvaluation(Strlen(Literal(null, StringType), "UTF-16"), null)
-    checkEvaluation(Strlen(Literal("1234567890ABC", StringType), "ISO-8859-1"), 13)
-    checkEvaluation(Strlen(Literal("1234567890ABC", StringType), "UTF-8"), 13)
-    checkEvaluation(Strlen(Literal("1234567890ABC", StringType), "UTF-16"), 7)
-    checkEvaluation(Strlen(Literal("\uF93D\uF936\uF949\uF942", StringType), "ISO-8859-1"), 4)
-    checkEvaluation(Strlen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-8"), 4)
-    checkEvaluation(Strlen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-16"), 2)
-    checkEvaluation(Strlen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-32"), 1)
+  test("OctetLen") {
+    checkEvaluation(OctetLen(Literal(null, StringType), "ISO-8859-1"), null)
+    checkEvaluation(OctetLen(Literal(null, StringType), "UTF-8"), null)
+    checkEvaluation(OctetLen(Literal(null, StringType), "UTF-16"), null)
+    checkEvaluation(OctetLen(Literal("1234567890ABC", StringType), "ISO-8859-1"), 13)
+    checkEvaluation(OctetLen(Literal("1234567890ABC", StringType), "UTF-8"), 13)
+    checkEvaluation(OctetLen(Literal("1234567890ABC", StringType), "UTF-16"), 7)
+    checkEvaluation(OctetLen(Literal("\uF93D\uF936\uF949\uF942", StringType), "ISO-8859-1"), 4)
+    checkEvaluation(OctetLen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-8"), 4)
+    checkEvaluation(OctetLen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-16"), 2)
+    checkEvaluation(OctetLen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-32"), 1)
   }
 
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 08a8868f4ef68..4997770d13c5a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -211,7 +211,7 @@ class ConstantFoldingSuite extends PlanTest {
           Contains("abc", Literal(null, StringType)) as 'c20,
 
           Length(Literal(null, IntegerType)) as 'c21,
-          Strlen(Literal(null, StringType), "ISO-8859-1") as 'c22
+          OctetLen(Literal(null, StringType), "ISO-8859-1") as 'c22
 
         )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index ce21aa49fbd70..5a8c3219f0cd3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -52,9 +52,9 @@ class SQLQuerySuite extends QueryTest {
       "est")
   }
 
-  test("SPARK-TBD Added Parser of SQL LENGTH()") {
+  test("SPARK-2686 Added Parser of SQL LENGTH()") {
     checkAnswer(
-       sql("SELECT length(key) as keylen from testData where key = 100"), 3)
+       sql("SELECT char_length(key) as keylen from testData where key = 100"), 3)
     checkAnswer(
       sql("SELECT len(key), count(*) as cnt from testData where key <= 100 group by len(key)"),
       Seq(Seq(1,9),Seq(2,90), Seq(3,1)))
@@ -66,15 +66,15 @@ class SQLQuerySuite extends QueryTest {
       sql("SELECT max(LENGTH(s)) FROM nullableRepeatedData"), 4)
   }
 
-  test("SPARK-TBD Added Parser of SQL STRLEN()") {
+  test("SPARK-2686 Added Parser of SQL STRLEN()") {
     checkAnswer(
-      sql("SELECT StrLen(s) from repeatedData"), Seq(Seq(4),Seq(4)))
+      sql("SELECT octet_len(s) from repeatedData"), Seq(Seq(4),Seq(4)))
     checkAnswer(
-      sql("SELECT StrLen(s,'UTF-8') from repeatedData"), Seq(Seq(4),Seq(4)))
+      sql("SELECT octet_len(s,'UTF-8') from repeatedData"), Seq(Seq(4),Seq(4)))
     checkAnswer(
-      sql("SELECT max(StrLen(s,'UTF-8')) from nullStrings"), 3)
+      sql("SELECT max(octet_len(s,'UTF-8')) from nullStrings"), 3)
     checkAnswer(
-      sql("SELECT strlen('a','ISO-8859-1') + strlen('abcde','ISO-8859-1') FROM testData limit 1"), 6)
+      sql("SELECT octet_len('a','ISO-8859-1') + strlen('abcde','ISO-8859-1') FROM testData limit 1"), 6)
   }
 
   test("index into array") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 29f6c532627fd..c7ea1cc59a487 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -866,7 +866,8 @@ private[hive] object HiveQl {
   val WHEN = "(?i)WHEN".r
   val CASE = "(?i)CASE".r
   val SUBSTR = "(?i)SUBSTR(?:ING)?".r
-  val STRLEN = "(?i)STRLEN".r
+  val CHAR_LEN = "(?i)CHAR_LEN".r
+  val OCTET_LEN = "(?i)OCTET_LEN".r
 
   protected def nodeToExpr(node: Node): Expression = node match {
     /* Attribute References */
@@ -998,10 +999,10 @@ private[hive] object HiveQl {
       Substring(nodeToExpr(string), nodeToExpr(pos), Literal(Integer.MAX_VALUE, IntegerType))
     case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) =>
       Substring(nodeToExpr(string), nodeToExpr(pos), nodeToExpr(length))
-    case Token("TOK_FUNCTION", Token(STRLEN(), Nil) :: string :: Nil) =>
-      Strlen(nodeToExpr(string), Literal(StrlenConstants.DefaultEncoding))
-    case Token("TOK_FUNCTION", Token(STRLEN(), Nil) :: string :: encoding :: Nil) =>
-      Strlen(nodeToExpr(string), nodeToExpr(encoding))
+    case Token("TOK_FUNCTION", Token(OCTET_LEN(), Nil) :: string :: Nil) =>
+      OctetLen(nodeToExpr(string), Literal(StrlenConstants.DefaultEncoding))
+    case Token("TOK_FUNCTION", Token(OCTET_LEN(), Nil) :: string :: encoding :: Nil) =>
+      OctetLen(nodeToExpr(string), nodeToExpr(encoding))
 
 
     /* UDFs - Must be last otherwise will preempt built in functions */

From 6a6222ade546ab624925c1977ec67f19700d53e1 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 14:27:35 -0700
Subject: [PATCH 17/22] Ongoing work with Takuya and Michael A

---
 .../apache/spark/sql/catalyst/SqlParser.scala |  14 +-
 .../expressions/stringOperations.scala        |  16 +-
 .../sql/catalyst/optimizer/Optimizer.scala    |   2 +-
 .../ExpressionEvaluationSuite.scala           | 149 ++++++++++--------
 .../optimizer/ConstantFoldingSuite.scala      |   2 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  10 +-
 .../org/apache/spark/sql/hive/HiveQl.scala    |  12 +-
 7 files changed, 110 insertions(+), 95 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 77a3dc2729a48..188953b272dac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -124,8 +124,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
   protected val SUBSTRING = Keyword("SUBSTRING")
   protected val LEN = Keyword("LEN")
   protected val LENGTH = Keyword("LENGTH")
-  protected val CHAR_LEN = Keyword("CHAR_LEN")
-  protected val OCTET_LEN = Keyword("OCTET_LEN")
+  protected val CHAR_LENGTH = Keyword("CHAR_LENGTH")
+  protected val OCTET_LENGTH = Keyword("OCTET_LENGTH")
 
   // Use reflection to find the reserved words defined in this class.
   protected val reservedWords =
@@ -327,12 +327,12 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
     (SUBSTR | SUBSTRING) ~> "(" ~> expression ~ "," ~ expression ~ "," ~ expression <~ ")" ^^ {
       case s ~ "," ~ p ~ "," ~ l => Substring(s,p,l)
     } |
-    (LEN | LENGTH | CHAR_LEN) ~> "(" ~> expression <~ ")" ^^ { case s => Length(s) } |
-      OCTET_LEN ~> "(" ~> expression ~ "," ~  expression <~ ")" ^^ {
-      case s ~ "," ~  e => OctetLen(s, e)
+    (LEN | LENGTH | CHAR_LENGTH) ~> "(" ~> expression <~ ")" ^^ { case s => Length(s) } |
+      OCTET_LENGTH ~> "(" ~> expression ~ "," ~  expression <~ ")" ^^ {
+      case s ~ "," ~  e => OctetLength(s, e)
     } |
-    OCTET_LEN ~> "(" ~> expression  <~ ")" ^^ {
-      case s  => OctetLen(s, Literal(OctetLenConstants.DefaultEncoding))
+      OCTET_LENGTH ~> "(" ~> expression  <~ ")" ^^ {
+      case s  => OctetLength(s, Literal(OctetLengthConstants.DefaultEncoding))
     } |
     ident ~ "(" ~ repsep(expression, ",") <~ ")" ^^ {
       case udfName ~ _ ~ exprs => UnresolvedFunction(udfName, exprs)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 99073283b761a..505642394e552 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -240,7 +240,7 @@ case class Length(child: Expression) extends UnaryExpression {
 
 }
 
-object OctetLenConstants {
+object OctetLengthConstants {
   val DefaultEncoding = "ISO-8859-1"
 }
 
@@ -261,7 +261,7 @@ object OctetLenUtils {
 /**
  * A function that returns the number of characters in a string expression
  */
-case class OctetLen(child: Expression, encoding : Expression) extends UnaryExpression
+case class OctetLength(child: Expression, encoding : Expression) extends UnaryExpression
   with Logging {
 
   type EvaluatedType = Any
@@ -275,11 +275,11 @@ case class OctetLen(child: Expression, encoding : Expression) extends UnaryExpre
   override def toString = s"OctetLen($child, $encoding)"
 
   override def eval(input: Row): EvaluatedType = {
-    val string = child.eval(input)
-    if (string == null) {
+    val evalInput = child.eval(input)
+    if (evalInput == null) {
       null
-    } else if (!string.isInstanceOf[String]) {
-      log.debug(s"Non-string value [$string] provided to OctetLen")
+    } else if (!evalInput.isInstanceOf[String]) {
+      log.debug(s"Non-string value [$evalInput] provided to OctetLen")
       null
     } else {
       var evalEncoding = encoding.eval(input)
@@ -287,11 +287,11 @@ case class OctetLen(child: Expression, encoding : Expression) extends UnaryExpre
         if (evalEncoding != null) {
           evalEncoding.toString
         } else {
-          OctetLenConstants.DefaultEncoding
+          OctetLengthConstants.DefaultEncoding
         }
       val s: String = ""
       try {
-        string.asInstanceOf[String].getBytes(strEncoding).length
+        evalInput.asInstanceOf[String].getBytes(strEncoding).length
       } catch {
         case ue : UnsupportedEncodingException => {
           throw new UnsupportedEncodingException(s"OctetLen: Caught UnsupportedEncodingException for encoding=[$strEncoding]")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d140da368039f..02b81b799e4c7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -175,7 +175,7 @@ object NullPropagation extends Rule[LogicalPlan] {
       case e @ Substring(_, _, Literal(null, _)) => Literal(null, e.dataType)
 
       case e @ Length(Literal(null, _)) => Literal(null, e.dataType)
-      case e @ OctetLen(Literal(null, _),_) => Literal(null, e.dataType)
+      case e @ OctetLength(Literal(null, _),_) => Literal(null, e.dataType)
 
       // Put exceptional cases above if any
       case e: BinaryArithmetic => e.children match {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index df816f411ee6c..d3cab802dbf78 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.types._
 import scala.collection.mutable.ArrayBuffer
 
 /* Implicit conversions */
+
 import org.apache.spark.sql.catalyst.dsl.expressions._
 
 class ExpressionEvaluationSuite extends FunSuite {
@@ -62,8 +63,8 @@ class ExpressionEvaluationSuite extends FunSuite {
 
   val notTrueTable =
     (true, false) ::
-    (false, true) ::
-    (null, null) :: Nil
+      (false, true) ::
+      (null, null) :: Nil
 
   test("3VL Not") {
     notTrueTable.foreach {
@@ -73,45 +74,45 @@ class ExpressionEvaluationSuite extends FunSuite {
   }
 
   booleanLogicTest("AND", _ && _,
-    (true,  true,  true) ::
-    (true,  false, false) ::
-    (true,  null,  null) ::
-    (false, true,  false) ::
-    (false, false, false) ::
-    (false, null,  false) ::
-    (null,  true,  null) ::
-    (null,  false, false) ::
-    (null,  null,  null) :: Nil)
+    (true, true, true) ::
+      (true, false, false) ::
+      (true, null, null) ::
+      (false, true, false) ::
+      (false, false, false) ::
+      (false, null, false) ::
+      (null, true, null) ::
+      (null, false, false) ::
+      (null, null, null) :: Nil)
 
   booleanLogicTest("OR", _ || _,
-    (true,  true,  true) ::
-    (true,  false, true) ::
-    (true,  null,  true) ::
-    (false, true,  true) ::
-    (false, false, false) ::
-    (false, null,  null) ::
-    (null,  true,  true) ::
-    (null,  false, null) ::
-    (null,  null,  null) :: Nil)
+    (true, true, true) ::
+      (true, false, true) ::
+      (true, null, true) ::
+      (false, true, true) ::
+      (false, false, false) ::
+      (false, null, null) ::
+      (null, true, true) ::
+      (null, false, null) ::
+      (null, null, null) :: Nil)
 
   booleanLogicTest("=", _ === _,
-    (true,  true,  true) ::
-    (true,  false, false) ::
-    (true,  null,  null) ::
-    (false, true,  false) ::
-    (false, false, true) ::
-    (false, null,  null) ::
-    (null,  true,  null) ::
-    (null,  false, null) ::
-    (null,  null,  null) :: Nil)
+    (true, true, true) ::
+      (true, false, false) ::
+      (true, null, null) ::
+      (false, true, false) ::
+      (false, false, true) ::
+      (false, null, null) ::
+      (null, true, null) ::
+      (null, false, null) ::
+      (null, null, null) :: Nil)
 
   def booleanLogicTest(
-      name: String,
-      op: (Expression, Expression) => Expression,
-      truthTable: Seq[(Any, Any, Any)]) {
+                        name: String,
+                        op: (Expression, Expression) => Expression,
+                        truthTable: Seq[(Any, Any, Any)]) {
     test(s"3VL $name") {
       truthTable.foreach {
-        case (l,r,answer) =>
+        case (l, r, answer) =>
           val expr = op(Literal(l, BooleanType), Literal(r, BooleanType))
           checkEvaluation(expr, answer)
       }
@@ -126,8 +127,8 @@ class ExpressionEvaluationSuite extends FunSuite {
     val actual = try evaluate(expression, inputRow) catch {
       case e: Exception => fail(s"Exception evaluating $expression", e)
     }
-    if(actual != expected) {
-      val input = if(inputRow == EmptyRow) "" else s", input: $inputRow"
+    if (actual != expected) {
+      val input = if (inputRow == EmptyRow) "" else s", input: $inputRow"
       fail(s"Incorrect Evaluation: $expression, actual: $actual, expected: $expected$input")
     }
   }
@@ -152,8 +153,8 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation("addb" like "a%", true)
     checkEvaluation("addb" like "**", false)
     checkEvaluation("abc" like "a%", true)
-    checkEvaluation("abc"  like "b%", false)
-    checkEvaluation("abc"  like "bc%", false)
+    checkEvaluation("abc" like "b%", false)
+    checkEvaluation("abc" like "bc%", false)
   }
 
   test("LIKE Non-literal Regular Expression") {
@@ -190,10 +191,10 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation("axe" rlike "pi|apa", false)
     checkEvaluation("pip" rlike "^(pi)*$", false)
 
-    checkEvaluation("abc"  rlike "^ab", true)
-    checkEvaluation("abc"  rlike "^bc", false)
-    checkEvaluation("abc"  rlike "^ab", true)
-    checkEvaluation("abc"  rlike "^bc", false)
+    checkEvaluation("abc" rlike "^ab", true)
+    checkEvaluation("abc" rlike "^bc", false)
+    checkEvaluation("abc" rlike "^ab", true)
+    checkEvaluation("abc" rlike "^bc", false)
 
     intercept[java.util.regex.PatternSyntaxException] {
       evaluate("abbbbc" rlike "**")
@@ -260,7 +261,9 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(Literal(23.toByte) + Cast(true, ByteType), 24.toByte)
     checkEvaluation(Literal(23.toShort) + Cast(true, ShortType), 24.toShort)
 
-    intercept[Exception] {evaluate(Literal(1) cast BinaryType, null)}
+    intercept[Exception] {
+      evaluate(Literal(1) cast BinaryType, null)
+    }
 
     assert(("abcdef" cast StringType).nullable === false)
     assert(("abcdef" cast BinaryType).nullable === false)
@@ -287,7 +290,7 @@ class ExpressionEvaluationSuite extends FunSuite {
   test("timestamp casting") {
     val millis = 15 * 1000 + 2
     val ts = new Timestamp(millis)
-    val ts1 = new Timestamp(15 * 1000)  // a timestamp without the milliseconds part
+    val ts1 = new Timestamp(15 * 1000) // a timestamp without the milliseconds part
     checkEvaluation(Cast(ts, ShortType), 15)
     checkEvaluation(Cast(ts, IntegerType), 15)
     checkEvaluation(Cast(ts, LongType), 15)
@@ -338,11 +341,11 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(If(Literal(false, BooleanType),
       Literal("a", StringType), Literal("b", StringType)), "b", row)
 
-    checkEvaluation(c1 in (c1, c2), true, row)
+    checkEvaluation(c1 in(c1, c2), true, row)
     checkEvaluation(
       Literal("^Ba*n", StringType) in (Literal("^Ba*n", StringType)), true, row)
     checkEvaluation(
-      Literal("^Ba*n", StringType) in (Literal("^Ba*n", StringType), c2), true, row)
+      Literal("^Ba*n", StringType) in(Literal("^Ba*n", StringType), c2), true, row)
   }
 
   test("case when") {
@@ -390,11 +393,11 @@ class ExpressionEvaluationSuite extends FunSuite {
 
   test("complex type") {
     val row = new GenericRow(Array[Any](
-      "^Ba*n",                                  // 0
-      null.asInstanceOf[String],                // 1
-      new GenericRow(Array[Any]("aa", "bb")),   // 2
-      Map("aa"->"bb"),                          // 3
-      Seq("aa", "bb")                           // 4
+      "^Ba*n", // 0
+      null.asInstanceOf[String], // 1
+      new GenericRow(Array[Any]("aa", "bb")), // 2
+      Map("aa" -> "bb"), // 3
+      Seq("aa", "bb") // 4
     ))
 
     val typeS = StructType(
@@ -425,7 +428,7 @@ class ExpressionEvaluationSuite extends FunSuite {
         :: StructField("b", StringType, nullable = false) :: Nil
     )
 
-    assert(GetField(BoundReference(2,typeS, nullable = true), "a").nullable === true)
+    assert(GetField(BoundReference(2, typeS, nullable = true), "a").nullable === true)
     assert(GetField(BoundReference(2, typeS_notNullable, nullable = false), "a").nullable === false)
 
     assert(GetField(Literal(null, typeS), "a").nullable === true)
@@ -573,27 +576,39 @@ class ExpressionEvaluationSuite extends FunSuite {
 
   test("Length") {
     checkEvaluation(Length(Literal(null, IntegerType)), null)
-    checkEvaluation(Length(Literal(0,IntegerType)), 1)
-    checkEvaluation(Length(Literal(12,IntegerType)), 2)
-    checkEvaluation(Length(Literal(123,IntegerType)), 3)
+    checkEvaluation(Length(Literal(0, IntegerType)), 1)
+    checkEvaluation(Length(Literal(12, IntegerType)), 2)
+    checkEvaluation(Length(Literal(123, IntegerType)), 3)
     checkEvaluation(Length(Literal(12.4F, FloatType)), 4)
-    checkEvaluation(Length(Literal(12345678901L,LongType)), 11)
+    checkEvaluation(Length(Literal(12345678901L, LongType)), 11)
     checkEvaluation(Length(Literal(1234567890.2D, DoubleType)), 14)
-    checkEvaluation(Length(Literal("1234567890ABC",StringType)), 13)
-    checkEvaluation(Length(Literal("\uF93D\uF936\uF949\uF942",StringType)), 4)
+    checkEvaluation(Length(Literal("1234567890ABC", StringType)), 13)
+    checkEvaluation(Length(Literal("\uF93D\uF936\uF949\uF942", StringType)), 4)
   }
 
   test("OctetLen") {
-    checkEvaluation(OctetLen(Literal(null, StringType), "ISO-8859-1"), null)
-    checkEvaluation(OctetLen(Literal(null, StringType), "UTF-8"), null)
-    checkEvaluation(OctetLen(Literal(null, StringType), "UTF-16"), null)
-    checkEvaluation(OctetLen(Literal("1234567890ABC", StringType), "ISO-8859-1"), 13)
-    checkEvaluation(OctetLen(Literal("1234567890ABC", StringType), "UTF-8"), 13)
-    checkEvaluation(OctetLen(Literal("1234567890ABC", StringType), "UTF-16"), 7)
-    checkEvaluation(OctetLen(Literal("\uF93D\uF936\uF949\uF942", StringType), "ISO-8859-1"), 4)
-    checkEvaluation(OctetLen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-8"), 4)
-    checkEvaluation(OctetLen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-16"), 2)
-    checkEvaluation(OctetLen(Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-32"), 1)
+    checkEvaluation(OctetLength(Literal(null, StringType), "ISO-8859-1"), null)
+    checkEvaluation(OctetLength(Literal(null, StringType), "UTF-8"), null)
+    checkEvaluation(OctetLength(Literal(null, StringType), "UTF-16"), null)
+    checkEvaluation(OctetLength(Literal("1234567890ABC", StringType), "ISO-8859-1"), 13)
+    checkEvaluation(OctetLength(Literal("1234567890ABC", StringType), "UTF-8"), 13)
+    checkEvaluation(OctetLength(Literal("1234567890ABC", StringType), "UTF-16"), 28)
+    checkEvaluation(OctetLength(Literal("1234567890ABC", StringType), "UTF-32"), 52)
+    checkEvaluation(OctetLength(
+      Literal("\uF93D\uF936\uF949\uF942", StringType), "ISO-8859-1"), 4)
+                  // Chinese characters get truncated by ISO-8859-1 encoding
+    checkEvaluation(OctetLength(
+      Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-8"), 12) // chinese characters
+    checkEvaluation(OctetLength(
+      Literal("\uD840\uDC0B\uD842\uDFB7", StringType), "UTF-8"), 8) // 2 surrogate pairs
+    checkEvaluation(OctetLength(
+      Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-16"), 10) // chinese characters
+    checkEvaluation(OctetLength(
+      Literal("\uD840\uDC0B\uD842\uDFB7", StringType), "UTF-16"), 10) // 2 surrogate pairs
+    checkEvaluation(OctetLength(
+      Literal("\uF93D\uF936\uF949\uF942", StringType), "UTF-32"), 16) // chinese characters
+    checkEvaluation(OctetLength(
+      Literal("\uD840\uDC0B\uD842\uDFB7", StringType), "UTF-32"), 8) // 2 surrogate pairs
   }
 
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 4997770d13c5a..d6549e9de1d1b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -211,7 +211,7 @@ class ConstantFoldingSuite extends PlanTest {
           Contains("abc", Literal(null, StringType)) as 'c20,
 
           Length(Literal(null, IntegerType)) as 'c21,
-          OctetLen(Literal(null, StringType), "ISO-8859-1") as 'c22
+          OctetLength(Literal(null, StringType), "ISO-8859-1") as 'c22
 
         )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 5a8c3219f0cd3..8e834e6e3ce1e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -66,15 +66,15 @@ class SQLQuerySuite extends QueryTest {
       sql("SELECT max(LENGTH(s)) FROM nullableRepeatedData"), 4)
   }
 
-  test("SPARK-2686 Added Parser of SQL STRLEN()") {
+  test("SPARK-2686 Added Parser of SQL OCTET_LENGTH()") {
     checkAnswer(
-      sql("SELECT octet_len(s) from repeatedData"), Seq(Seq(4),Seq(4)))
+      sql("SELECT octet_length(s) from repeatedData"), Seq(Seq(4),Seq(4)))
     checkAnswer(
-      sql("SELECT octet_len(s,'UTF-8') from repeatedData"), Seq(Seq(4),Seq(4)))
+      sql("SELECT octet_length(s,'UTF-8') from repeatedData"), Seq(Seq(4),Seq(4)))
     checkAnswer(
-      sql("SELECT max(octet_len(s,'UTF-8')) from nullStrings"), 3)
+      sql("SELECT max(octet_length(s,'UTF-8')) from nullStrings"), 3)
     checkAnswer(
-      sql("SELECT octet_len('a','ISO-8859-1') + strlen('abcde','ISO-8859-1') FROM testData limit 1"), 6)
+      sql("SELECT octet_length('a','ISO-8859-1') + octet_length('abcde','ISO-8859-1') FROM testData limit 1"), 6)
   }
 
   test("index into array") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index c7ea1cc59a487..21a1c9252d526 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -866,8 +866,8 @@ private[hive] object HiveQl {
   val WHEN = "(?i)WHEN".r
   val CASE = "(?i)CASE".r
   val SUBSTR = "(?i)SUBSTR(?:ING)?".r
-  val CHAR_LEN = "(?i)CHAR_LEN".r
-  val OCTET_LEN = "(?i)OCTET_LEN".r
+  val CHAR_LENGTH = "(?i)CHAR_LENGTH".r
+  val OCTET_LENGTH = "(?i)OCTET_LENGTH".r
 
   protected def nodeToExpr(node: Node): Expression = node match {
     /* Attribute References */
@@ -999,10 +999,10 @@ private[hive] object HiveQl {
       Substring(nodeToExpr(string), nodeToExpr(pos), Literal(Integer.MAX_VALUE, IntegerType))
     case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) =>
       Substring(nodeToExpr(string), nodeToExpr(pos), nodeToExpr(length))
-    case Token("TOK_FUNCTION", Token(OCTET_LEN(), Nil) :: string :: Nil) =>
-      OctetLen(nodeToExpr(string), Literal(StrlenConstants.DefaultEncoding))
-    case Token("TOK_FUNCTION", Token(OCTET_LEN(), Nil) :: string :: encoding :: Nil) =>
-      OctetLen(nodeToExpr(string), nodeToExpr(encoding))
+    case Token("TOK_FUNCTION", Token(OCTET_LENGTH(), Nil) :: string :: Nil) =>
+      OctetLength(nodeToExpr(string), Literal(StrlenConstants.DefaultEncoding))
+    case Token("TOK_FUNCTION", Token(OCTET_LENGTH(), Nil) :: string :: encoding :: Nil) =>
+      OctetLength(nodeToExpr(string), nodeToExpr(encoding))
 
 
     /* UDFs - Must be last otherwise will preempt built in functions */

From 81c64c331a70f16d1dd031b635027d352945ff6b Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 16:50:55 -0700
Subject: [PATCH 18/22] Revert whitespace/formatting changes on other sections
 of ExpressionEvaluationSuite

---
 .../ExpressionEvaluationSuite.scala           | 108 +++++++++---------
 1 file changed, 51 insertions(+), 57 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index d3cab802dbf78..bc8f1cdfe8fc7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -18,16 +18,12 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
-import java.util.concurrent.atomic.AtomicInteger
 
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.catalyst.types._
 
-import scala.collection.mutable.ArrayBuffer
-
 /* Implicit conversions */
-
 import org.apache.spark.sql.catalyst.dsl.expressions._
 
 class ExpressionEvaluationSuite extends FunSuite {
@@ -63,8 +59,8 @@ class ExpressionEvaluationSuite extends FunSuite {
 
   val notTrueTable =
     (true, false) ::
-      (false, true) ::
-      (null, null) :: Nil
+    (false, true) ::
+    (null, null) :: Nil
 
   test("3VL Not") {
     notTrueTable.foreach {
@@ -74,45 +70,45 @@ class ExpressionEvaluationSuite extends FunSuite {
   }
 
   booleanLogicTest("AND", _ && _,
-    (true, true, true) ::
-      (true, false, false) ::
-      (true, null, null) ::
-      (false, true, false) ::
-      (false, false, false) ::
-      (false, null, false) ::
-      (null, true, null) ::
-      (null, false, false) ::
-      (null, null, null) :: Nil)
+    (true,  true,  true) ::
+    (true,  false, false) ::
+    (true,  null,  null) ::
+    (false, true,  false) ::
+    (false, false, false) ::
+    (false, null,  false) ::
+    (null,  true,  null) ::
+    (null,  false, false) ::
+    (null,  null,  null) :: Nil)
 
   booleanLogicTest("OR", _ || _,
-    (true, true, true) ::
-      (true, false, true) ::
-      (true, null, true) ::
-      (false, true, true) ::
-      (false, false, false) ::
-      (false, null, null) ::
-      (null, true, true) ::
-      (null, false, null) ::
-      (null, null, null) :: Nil)
+    (true,  true,  true) ::
+    (true,  false, true) ::
+    (true,  null,  true) ::
+    (false, true,  true) ::
+    (false, false, false) ::
+    (false, null,  null) ::
+    (null,  true,  true) ::
+    (null,  false, null) ::
+    (null,  null,  null) :: Nil)
 
   booleanLogicTest("=", _ === _,
-    (true, true, true) ::
-      (true, false, false) ::
-      (true, null, null) ::
-      (false, true, false) ::
-      (false, false, true) ::
-      (false, null, null) ::
-      (null, true, null) ::
-      (null, false, null) ::
-      (null, null, null) :: Nil)
+    (true,  true,  true) ::
+    (true,  false, false) ::
+    (true,  null,  null) ::
+    (false, true,  false) ::
+    (false, false, true) ::
+    (false, null,  null) ::
+    (null,  true,  null) ::
+    (null,  false, null) ::
+    (null,  null,  null) :: Nil)
 
   def booleanLogicTest(
-                        name: String,
-                        op: (Expression, Expression) => Expression,
-                        truthTable: Seq[(Any, Any, Any)]) {
+      name: String,
+      op: (Expression, Expression) => Expression,
+      truthTable: Seq[(Any, Any, Any)]) {
     test(s"3VL $name") {
       truthTable.foreach {
-        case (l, r, answer) =>
+        case (l,r,answer) =>
           val expr = op(Literal(l, BooleanType), Literal(r, BooleanType))
           checkEvaluation(expr, answer)
       }
@@ -127,8 +123,8 @@ class ExpressionEvaluationSuite extends FunSuite {
     val actual = try evaluate(expression, inputRow) catch {
       case e: Exception => fail(s"Exception evaluating $expression", e)
     }
-    if (actual != expected) {
-      val input = if (inputRow == EmptyRow) "" else s", input: $inputRow"
+    if(actual != expected) {
+      val input = if(inputRow == EmptyRow) "" else s", input: $inputRow"
       fail(s"Incorrect Evaluation: $expression, actual: $actual, expected: $expected$input")
     }
   }
@@ -153,8 +149,8 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation("addb" like "a%", true)
     checkEvaluation("addb" like "**", false)
     checkEvaluation("abc" like "a%", true)
-    checkEvaluation("abc" like "b%", false)
-    checkEvaluation("abc" like "bc%", false)
+    checkEvaluation("abc"  like "b%", false)
+    checkEvaluation("abc"  like "bc%", false)
   }
 
   test("LIKE Non-literal Regular Expression") {
@@ -191,10 +187,10 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation("axe" rlike "pi|apa", false)
     checkEvaluation("pip" rlike "^(pi)*$", false)
 
-    checkEvaluation("abc" rlike "^ab", true)
-    checkEvaluation("abc" rlike "^bc", false)
-    checkEvaluation("abc" rlike "^ab", true)
-    checkEvaluation("abc" rlike "^bc", false)
+    checkEvaluation("abc"  rlike "^ab", true)
+    checkEvaluation("abc"  rlike "^bc", false)
+    checkEvaluation("abc"  rlike "^ab", true)
+    checkEvaluation("abc"  rlike "^bc", false)
 
     intercept[java.util.regex.PatternSyntaxException] {
       evaluate("abbbbc" rlike "**")
@@ -261,9 +257,7 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(Literal(23.toByte) + Cast(true, ByteType), 24.toByte)
     checkEvaluation(Literal(23.toShort) + Cast(true, ShortType), 24.toShort)
 
-    intercept[Exception] {
-      evaluate(Literal(1) cast BinaryType, null)
-    }
+    intercept[Exception] {evaluate(Literal(1) cast BinaryType, null)}
 
     assert(("abcdef" cast StringType).nullable === false)
     assert(("abcdef" cast BinaryType).nullable === false)
@@ -290,7 +284,7 @@ class ExpressionEvaluationSuite extends FunSuite {
   test("timestamp casting") {
     val millis = 15 * 1000 + 2
     val ts = new Timestamp(millis)
-    val ts1 = new Timestamp(15 * 1000) // a timestamp without the milliseconds part
+    val ts1 = new Timestamp(15 * 1000)  // a timestamp without the milliseconds part
     checkEvaluation(Cast(ts, ShortType), 15)
     checkEvaluation(Cast(ts, IntegerType), 15)
     checkEvaluation(Cast(ts, LongType), 15)
@@ -341,11 +335,11 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(If(Literal(false, BooleanType),
       Literal("a", StringType), Literal("b", StringType)), "b", row)
 
-    checkEvaluation(c1 in(c1, c2), true, row)
+    checkEvaluation(c1 in (c1, c2), true, row)
     checkEvaluation(
       Literal("^Ba*n", StringType) in (Literal("^Ba*n", StringType)), true, row)
     checkEvaluation(
-      Literal("^Ba*n", StringType) in(Literal("^Ba*n", StringType), c2), true, row)
+      Literal("^Ba*n", StringType) in (Literal("^Ba*n", StringType), c2), true, row)
   }
 
   test("case when") {
@@ -393,11 +387,11 @@ class ExpressionEvaluationSuite extends FunSuite {
 
   test("complex type") {
     val row = new GenericRow(Array[Any](
-      "^Ba*n", // 0
-      null.asInstanceOf[String], // 1
-      new GenericRow(Array[Any]("aa", "bb")), // 2
-      Map("aa" -> "bb"), // 3
-      Seq("aa", "bb") // 4
+      "^Ba*n",                                  // 0
+      null.asInstanceOf[String],                // 1
+      new GenericRow(Array[Any]("aa", "bb")),   // 2
+      Map("aa"->"bb"),                          // 3
+      Seq("aa", "bb")                           // 4
     ))
 
     val typeS = StructType(
@@ -428,7 +422,7 @@ class ExpressionEvaluationSuite extends FunSuite {
         :: StructField("b", StringType, nullable = false) :: Nil
     )
 
-    assert(GetField(BoundReference(2, typeS, nullable = true), "a").nullable === true)
+    assert(GetField(BoundReference(2,typeS, nullable = true), "a").nullable === true)
     assert(GetField(BoundReference(2, typeS_notNullable, nullable = false), "a").nullable === false)
 
     assert(GetField(Literal(null, typeS), "a").nullable === true)

From 94fcbd35bc31fc93244fddfdc16329778fc55013 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 19:08:16 -0700
Subject: [PATCH 19/22] Change default encoding to UTF-8

---
 .../spark/sql/catalyst/expressions/stringOperations.scala    | 5 +++--
 .../src/main/scala/org/apache/spark/sql/hive/HiveQl.scala    | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 505642394e552..5a96c9c722411 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -241,7 +241,7 @@ case class Length(child: Expression) extends UnaryExpression {
 }
 
 object OctetLengthConstants {
-  val DefaultEncoding = "ISO-8859-1"
+  val DefaultEncoding = "UTF-8"
 }
 
 object OctetLenUtils {
@@ -294,7 +294,8 @@ case class OctetLength(child: Expression, encoding : Expression) extends UnaryEx
         evalInput.asInstanceOf[String].getBytes(strEncoding).length
       } catch {
         case ue : UnsupportedEncodingException => {
-          throw new UnsupportedEncodingException(s"OctetLen: Caught UnsupportedEncodingException for encoding=[$strEncoding]")
+          throw new UnsupportedEncodingException(
+            s"OctetLen: Caught UnsupportedEncodingException for encoding=[$strEncoding]")
         }
       }
       }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 21a1c9252d526..a60d3dad8a176 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -1000,7 +1000,7 @@ private[hive] object HiveQl {
     case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) =>
       Substring(nodeToExpr(string), nodeToExpr(pos), nodeToExpr(length))
     case Token("TOK_FUNCTION", Token(OCTET_LENGTH(), Nil) :: string :: Nil) =>
-      OctetLength(nodeToExpr(string), Literal(StrlenConstants.DefaultEncoding))
+      OctetLength(nodeToExpr(string), Literal(OctetLengthConstants.DefaultEncoding))
     case Token("TOK_FUNCTION", Token(OCTET_LENGTH(), Nil) :: string :: encoding :: Nil) =>
       OctetLength(nodeToExpr(string), nodeToExpr(encoding))
 

From a0a03d70203c2ff94ed74f9daeb28461f839831f Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 20:15:07 -0700
Subject: [PATCH 20/22] Replace len() method with simpler call to
 codePointCount

---
 out                                           | 672 ++++++++++++++++++
 .../expressions/stringOperations.scala        |  18 +-
 2 files changed, 674 insertions(+), 16 deletions(-)
 create mode 100644 out

diff --git a/out b/out
new file mode 100644
index 0000000000000..d24695e6107d9
--- /dev/null
+++ b/out
@@ -0,0 +1,672 @@
+[INFO] Scanning for projects...
+[INFO] ------------------------------------------------------------------------
+[INFO] Reactor Build Order:
+[INFO] 
+[INFO] Spark Project Parent POM
+[INFO] Spark Project Core
+[INFO] Spark Project Bagel
+[INFO] Spark Project GraphX
+[INFO] Spark Project ML Library
+[INFO] Spark Project Streaming
+[INFO] Spark Project Tools
+[INFO] Spark Project Catalyst
+[INFO] Spark Project SQL
+[INFO] Spark Project Hive
+[INFO] Spark Project REPL
+[INFO] Spark Project YARN Parent POM
+[INFO] Spark Project YARN Stable API
+[INFO] Spark Project Assembly
+[INFO] Spark Project External Twitter
+[INFO] Spark Project External Kafka
+[INFO] Spark Project External Flume Sink
+[INFO] Spark Project External Flume
+[INFO] Spark Project External ZeroMQ
+[INFO] Spark Project External MQTT
+[INFO] Spark Project Examples
+[INFO]                                                                         
+[INFO] ------------------------------------------------------------------------
+[INFO] Building Spark Project Parent POM 1.1.0-SNAPSHOT
+[INFO] ------------------------------------------------------------------------
+[INFO] 
+[INFO] --- maven-enforcer-plugin:1.3.1:enforce (enforce-versions) @ spark-parent ---
+[INFO] 
+[INFO] --- build-helper-maven-plugin:1.8:add-source (add-scala-sources) @ spark-parent ---
+[INFO] Source directory: /shared/strlen/src/main/scala added.
+[INFO] 
+[INFO] --- maven-remote-resources-plugin:1.5:process (default) @ spark-parent ---
+[INFO] 
+[INFO] --- scala-maven-plugin:3.1.6:compile (scala-compile-first) @ spark-parent ---
+[INFO] No sources to compile
+[INFO] 
+[INFO] --- build-helper-maven-plugin:1.8:add-test-source (add-scala-test-sources) @ spark-parent ---
+[INFO] Test Source directory: /shared/strlen/src/test/scala added.
+[INFO] 
+[INFO] --- scala-maven-plugin:3.1.6:testCompile (scala-test-compile-first) @ spark-parent ---
+[INFO] No sources to compile
+[INFO]                                                                         
+[INFO] ------------------------------------------------------------------------
+[INFO] Building Spark Project Core 1.1.0-SNAPSHOT
+[INFO] ------------------------------------------------------------------------
+[INFO] 
+[INFO] --- maven-enforcer-plugin:1.3.1:enforce (enforce-versions) @ spark-core_2.10 ---
+[INFO] 
+[INFO] --- build-helper-maven-plugin:1.8:add-source (add-scala-sources) @ spark-core_2.10 ---
+[INFO] Source directory: /shared/strlen/core/src/main/scala added.
+[INFO] 
+[INFO] --- maven-remote-resources-plugin:1.5:process (default) @ spark-core_2.10 ---
+[INFO] 
+[INFO] --- exec-maven-plugin:1.2.1:exec (default) @ spark-core_2.10 ---
+Archive:  lib/py4j-0.8.2.1-src.zip
+ extracting: build/py4j/__init__.py  
+  inflating: build/py4j/compat.py    
+  inflating: build/py4j/finalizer.py  
+  inflating: build/py4j/java_collections.py  
+  inflating: build/py4j/java_gateway.py  
+  inflating: build/py4j/protocol.py  
+ extracting: build/py4j/tests/__init__.py  
+  inflating: build/py4j/tests/byte_string_test.py  
+  inflating: build/py4j/tests/finalizer_test.py  
+  inflating: build/py4j/tests/java_array_test.py  
+  inflating: build/py4j/tests/java_callback_test.py  
+  inflating: build/py4j/tests/java_gateway_test.py  
+  inflating: build/py4j/tests/java_list_test.py  
+  inflating: build/py4j/tests/java_map_test.py  
+  inflating: build/py4j/tests/java_set_test.py  
+  inflating: build/py4j/tests/multithreadtest.py  
+  inflating: build/py4j/tests/py4j_callback_example.py  
+  inflating: build/py4j/tests/py4j_callback_example2.py  
+  inflating: build/py4j/tests/py4j_example.py  
+  inflating: build/py4j/version.py   
+[INFO] 
+[INFO] --- maven-resources-plugin:2.6:resources (default-resources) @ spark-core_2.10 ---
+[INFO] Using 'UTF-8' encoding to copy filtered resources.
+[INFO] Copying 9 resources
+[INFO] Copying 21 resources
+[INFO] Copying 7 resources
+[INFO] Copying 3 resources
+[INFO] 
+[INFO] --- scala-maven-plugin:3.1.6:compile (scala-compile-first) @ spark-core_2.10 ---
+[INFO] Using zinc server for incremental compilation
+[INFO] compiler plugin: BasicArtifact(org.scalamacros,paradise_2.10.4,2.0.1,null)
+[0m[[0minfo[0m] [0mCompile success at Aug 1, 2014 7:55:25 PM [0.258s][0m
+[INFO] 
+[INFO] --- maven-compiler-plugin:3.1:compile (default-compile) @ spark-core_2.10 ---
+[INFO] Changes detected - recompiling the module!
+[INFO] Compiling 35 source files to /shared/strlen/core/target/scala-2.10/classes
+[INFO] 
+[INFO] --- build-helper-maven-plugin:1.8:add-test-source (add-scala-test-sources) @ spark-core_2.10 ---
+[INFO] Test Source directory: /shared/strlen/core/src/test/scala added.
+[INFO] 
+[INFO] --- maven-resources-plugin:2.6:testResources (default-testResources) @ spark-core_2.10 ---
+[INFO] Using 'UTF-8' encoding to copy filtered resources.
+[INFO] Copying 4 resources
+[INFO] Copying 3 resources
+[INFO] 
+[INFO] --- scala-maven-plugin:3.1.6:testCompile (scala-test-compile-first) @ spark-core_2.10 ---
+[INFO] Using zinc server for incremental compilation
+[INFO] compiler plugin: BasicArtifact(org.scalamacros,paradise_2.10.4,2.0.1,null)
+[0m[[0minfo[0m] [0mCompiling 1 Scala source and 1 Java source to /shared/strlen/core/target/scala-2.10/test-classes...[0m
+[0m[[33mwarn[0m] [0mNote: /shared/strlen/core/src/test/java/org/apache/spark/JavaAPISuite.java uses or overrides a deprecated API.[0m
+[0m[[33mwarn[0m] [0mNote: Recompile with -Xlint:deprecation for details.[0m
+[0m[[33mwarn[0m] [0mNote: /shared/strlen/core/src/test/java/org/apache/spark/JavaAPISuite.java uses unchecked or unsafe operations.[0m
+[0m[[33mwarn[0m] [0mNote: Recompile with -Xlint:unchecked for details.[0m
+[0m[[0minfo[0m] [0mCompile success at Aug 1, 2014 7:55:32 PM [4.592s][0m
+[INFO] 
+[INFO] --- maven-compiler-plugin:3.1:testCompile (default-testCompile) @ spark-core_2.10 ---
+[INFO] Nothing to compile - all classes are up to date
+[INFO] 
+[INFO] --- maven-surefire-plugin:2.17:test (default-test) @ spark-core_2.10 ---
+[INFO] Tests are skipped.
+[INFO] 
+[INFO] --- scalatest-maven-plugin:1.0-RC2:test (test) @ spark-core_2.10 ---
+[36mDiscovery starting.[0m
+[36mDiscovery completed in 6 seconds, 462 milliseconds.[0m
+[36mRun starting. Expected test count is: 724[0m
+[32mExternalSorterSuite:[0m
+[32m- empty data stream[0m
+[32m- few elements per partition[0m
+[32m- empty partitions with spilling[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- spilling in local cluster[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- spilling in local cluster with many reduce tasks[0m
+[32m- cleanup of intermediate files in sorter[0m
+[32m- cleanup of intermediate files in sorter if there are errors[0m
+[32m- cleanup of intermediate files in shuffle[0m
+[32m- cleanup of intermediate files in shuffle with errors[0m
+[32m- no partial aggregation or sorting[0m
+[32m- partial aggregation without spill[0m
+[32m- partial aggregation with spill, no ordering[0m
+[32m- partial aggregation with spill, with ordering[0m
+[32m- sorting without aggregation, no spill[0m
+[32m- sorting without aggregation, with spill[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- spilling with hash collisions[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- spilling with many hash collisions[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- spilling with hash collisions using the Int.MaxValue key[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- spilling with null keys and values[0m
+[32mDAGSchedulerSuite:[0m
+[32m- zero split job[0m
+[32m- run trivial job[0m
+[32m- local job[0m
+[32m- local job oom[0m
+[32m- run trivial job w/ dependency[0m
+[32m- cache location preferences w/ dependency[0m
+[32m- avoid exponential blowup when getting preferred locs list[0m
+[32m- unserializable task[0m
+[32m- trivial job failure[0m
+[32m- trivial job cancellation[0m
+[32m- job cancellation no-kill backend[0m
+[32m- run trivial shuffle[0m
+[32m- run trivial shuffle with fetch failure[0m
+[32m- ignore late map task completions[0m
+[32m- run shuffle with map stage failure[0m
+[32m- failure of stage used by two jobs[0m
+[32m- run trivial shuffle with out-of-band failure and retry[0m
+[32m- recursive shuffle failures[0m
+[32m- cached post-shuffle[0m
+[33m- misbehaved accumulator should not crash DAGScheduler and SparkContext !!! IGNORED !!![0m
+[32m- misbehaved resultHandler should not crash DAGScheduler and SparkContext[0m
+[ERROR] [08/01/2014 19:57:21.310] [test-akka.actor.default-dispatcher-3] [akka://test/user/dagSupervisor/$a] error
+org.apache.spark.SparkException: error
+	at org.apache.spark.scheduler.BuggyDAGEventProcessActor$$anonfun$receive$1.applyOrElse(DAGSchedulerSuite.scala:39)
+	at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
+	at akka.actor.ActorCell.invoke(ActorCell.scala:456)
+	at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
+	at akka.dispatch.Mailbox.run(Mailbox.scala:219)
+	at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
+	at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
+	at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
+	at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
+	at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
+
+[32m- DAGSchedulerActorSupervisor closes the SparkContext when EventProcessActor crashes[0m
+[ERROR] [08/01/2014 19:57:21.321] [DAGSchedulerSuite-akka.actor.default-dispatcher-3] [akka://DAGSchedulerSuite/user/$$a] Job cancelled because SparkContext was shut down
+org.apache.spark.SparkException: Job cancelled because SparkContext was shut down
+	at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:688)
+	at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:687)
+	at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
+	at org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:687)
+	at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.postStop(DAGScheduler.scala:1342)
+	at akka.actor.dungeon.FaultHandling$class.akka$actor$dungeon$FaultHandling$$finishTerminate(FaultHandling.scala:201)
+	at akka.actor.dungeon.FaultHandling$class.terminate(FaultHandling.scala:163)
+	at akka.actor.ActorCell.terminate(ActorCell.scala:338)
+	at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:431)
+	at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
+	at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
+	at akka.testkit.CallingThreadDispatcher.process$1(CallingThreadDispatcher.scala:244)
+	at akka.testkit.CallingThreadDispatcher.runQueue(CallingThreadDispatcher.scala:284)
+	at akka.testkit.CallingThreadDispatcher.systemDispatch(CallingThreadDispatcher.scala:192)
+	at akka.actor.dungeon.Dispatch$class.stop(Dispatch.scala:106)
+	at akka.actor.ActorCell.stop(ActorCell.scala:338)
+	at akka.actor.LocalActorRef.stop(ActorRef.scala:340)
+	at akka.actor.dungeon.Children$class.stop(Children.scala:66)
+	at akka.actor.ActorCell.stop(ActorCell.scala:338)
+	at akka.actor.dungeon.FaultHandling$$anonfun$terminate$1.apply(FaultHandling.scala:149)
+	at akka.actor.dungeon.FaultHandling$$anonfun$terminate$1.apply(FaultHandling.scala:149)
+	at scala.collection.Iterator$class.foreach(Iterator.scala:727)
+	at akka.util.Collections$PartialImmutableValuesIterable$$anon$1.foreach(Collections.scala:27)
+	at akka.util.Collections$PartialImmutableValuesIterable.foreach(Collections.scala:52)
+	at akka.actor.dungeon.FaultHandling$class.terminate(FaultHandling.scala:149)
+	at akka.actor.ActorCell.terminate(ActorCell.scala:338)
+	at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:431)
+	at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
+	at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
+	at akka.dispatch.Mailbox.run(Mailbox.scala:218)
+	at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
+	at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
+	at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
+	at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
+	at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
+
+[32mRDDSuite:[0m
+[32m- basic operations[0m
+[32m- serialization[0m
+[32m- countApproxDistinct[0m
+[32m- SparkContext.union[0m
+[32m- partitioner aware union[0m
+[32m- UnionRDD partition serialized size should be small[0m
+[32m- aggregate[0m
+[32m- basic caching[0m
+[32m- caching with failures[0m
+[32m- empty RDD[0m
+[32m- repartitioned RDDs[0m
+[32m- repartitioned RDDs perform load balancing[0m
+[32m- coalesced RDDs[0m
+[32m- coalesced RDDs with locality[0m
+[32m- coalesced RDDs with locality, large scale (10K partitions)[0m
+[32m- coalesced RDDs with locality, fail first pass[0m
+[32m- zipped RDDs[0m
+[32m- partition pruning[0m
+[32m- mapWith[0m
+[32m- flatMapWith[0m
+[32m- filterWith[0m
+[32m- take[0m
+[32m- top with predefined ordering[0m
+[32m- top with custom ordering[0m
+[32m- takeOrdered with predefined ordering[0m
+[32m- takeOrdered with custom ordering[0m
+[32m- sample preserves partitioner[0m
+[32m- takeSample[0m
+[32m- takeSample from an empty rdd[0m
+[32m- randomSplit[0m
+[32m- runJob on an invalid partition[0m
+[32m- sort an empty RDD[0m
+[32m- sortByKey[0m
+[32m- sortByKey ascending parameter[0m
+[32m- sortByKey with explicit ordering[0m
+[32m- intersection[0m
+[32m- intersection strips duplicates in an input[0m
+[32m- zipWithIndex[0m
+[32m- zipWithIndex with a single partition[0m
+[32m- zipWithUniqueId[0m
+[32m- retag with implicit ClassTag[0m
+[32m- getNarrowAncestors[0m
+[32m- getNarrowAncestors with multiple parents[0m
+[32m- getNarrowAncestors with cycles[0m
+[32mUtilsSuite:[0m
+[32m- bytesToString[0m
+[32m- copyStream[0m
+[32m- memoryStringToMb[0m
+[32m- splitCommandString[0m
+[32m- string formatting of time durations[0m
+[32m- reading offset bytes of a file[0m
+[32m- reading offset bytes across multiple files[0m
+[32m- deserialize long value[0m
+[32m- get iterator size[0m
+[32m- findOldFiles[0m
+[32m- resolveURI[0m
+[32m- nonLocalPaths[0m
+[32mSortingSuite:[0m
+[32m- sortByKey[0m
+[32m- large array[0m
+[32m- large array with one split[0m
+[32m- large array with many partitions[0m
+[32m- sort descending[0m
+[32m- sort descending with one split[0m
+[32m- sort descending with many partitions[0m
+[32m- more partitions than elements[0m
+[32m- empty RDD[0m
+[32m- partition balancing[0m
+[32m- partition balancing for descending sort[0m
+[32mTaskContextSuite:[0m
+[32m- Calls executeOnCompleteCallbacks after failure[0m
+[32mNextIteratorSuite:[0m
+[32m- one iteration[0m
+[32m- two iterations[0m
+[32m- empty iteration[0m
+[32m- close is called once for empty iterations[0m
+[32m- close is called once for non-empty iterations[0m
+[32mParallelCollectionSplitSuite:[0m
+[32m- one element per slice[0m
+[32m- one slice[0m
+[32m- equal slices[0m
+[32m- non-equal slices[0m
+[32m- splitting exclusive range[0m
+[32m- splitting inclusive range[0m
+[32m- empty data[0m
+[32m- zero slices[0m
+[32m- negative number of slices[0m
+[32m- exclusive ranges sliced into ranges[0m
+[32m- inclusive ranges sliced into ranges[0m
+[32m- identical slice sizes between Range and NumericRange[0m
+[32m- identical slice sizes between List and NumericRange[0m
+[32m- large ranges don't overflow[0m
+[32m- random array tests[0m
+[32m- random exclusive range tests[0m
+[32m- random inclusive range tests[0m
+[32m- exclusive ranges of longs[0m
+[32m- inclusive ranges of longs[0m
+[32m- exclusive ranges of doubles[0m
+[32m- inclusive ranges of doubles[0m
+[32mExecutorURLClassLoaderSuite:[0m
+[32m- child first[0m
+[32m- parent first[0m
+[32m- child first can fall back[0m
+[32m- child first can fail[0m
+[32m- driver sets context class loader in local mode[0m
+[32mExecutorRunnerTest:[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- command includes appId[0m
+[32mEventLoggingListenerSuite:[0m
+[32m- Parse names of special files[0m
+[32m- Verify special files exist[0m
+[32m- Verify special files exist with compression[0m
+[32m- Parse event logging info[0m
+[32m- Parse event logging info with compression[0m
+[32m- Basic event logging[0m
+[32m- Basic event logging with compression[0m
+[32m- End-to-end event logging[0m
+[32m- End-to-end event logging with compression[0m
+[32mDriverRunnerTest:[0m
+[32m- Process succeeds instantly[0m
+[32m- Process failing several times and then succeeding[0m
+[32m- Process doesn't restart if not supervised[0m
+[32m- Process doesn't restart if killed[0m
+[32m- Reset of backoff counter[0m
+[32mPairRDDFunctionsSuite:[0m
+[32m- aggregateByKey[0m
+[32m- groupByKey[0m
+[32m- groupByKey with duplicates[0m
+[32m- groupByKey with negative key hash codes[0m
+[32m- groupByKey with many output partitions[0m
+[32m- sampleByKey[0m
+[32m- reduceByKey[0m
+[32m- reduceByKey with collectAsMap[0m
+[32m- reduceByKey with many output partitons[0m
+[32m- reduceByKey with partitioner[0m
+[32m- countApproxDistinctByKey[0m
+[32m- join[0m
+[32m- join all-to-all[0m
+[32m- leftOuterJoin[0m
+[32m- rightOuterJoin[0m
+[32m- join with no matches[0m
+[32m- join with many output partitions[0m
+[32m- groupWith[0m
+[32m- groupWith3[0m
+[32m- groupWith4[0m
+[32m- zero-partition RDD[0m
+[32m- keys and values[0m
+[32m- default partitioner uses partition size[0m
+[32m- default partitioner uses largest partitioner[0m
+[32m- subtract[0m
+[32m- subtract with narrow dependency[0m
+[32m- subtractByKey[0m
+[32m- subtractByKey with narrow dependency[0m
+[32m- foldByKey[0m
+[32m- foldByKey with mutable result type[0m
+[32m- saveNewAPIHadoopFile should call setConf if format is configurable[0m
+[32m- lookup[0m
+[32m- lookup with partitioner[0m
+[32m- lookup with bad partitioner[0m
+[32mPrimitiveVectorSuite:[0m
+[32m- primitive value[0m
+[32m- non-primitive value[0m
+[32m- ideal growth[0m
+[32m- ideal size[0m
+[32m- resizing[0m
+[32mMetricsConfigSuite:[0m
+[32m- MetricsConfig with default properties[0m
+[32m- MetricsConfig with properties set[0m
+[32m- MetricsConfig with subProperties[0m
+[32mSparkContextSchedulerCreationSuite:[0m
+[32m- bad-master[0m
+[32m- local[0m
+[32m- local-*[0m
+[32m- local-n[0m
+[32m- local-*-n-failures[0m
+[32m- local-n-failures[0m
+[32m- bad-local-n[0m
+[32m- bad-local-n-failures[0m
+[32m- local-default-parallelism[0m
+[32m- simr[0m
+[32m- local-cluster[0m
+[32m- yarn-cluster[0m
+[32m- yarn-standalone[0m
+[32m- yarn-client[0m
+Failed to load native Mesos library from /usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64/server:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
+[32m- mesos fine-grained[0m
+Failed to load native Mesos library from /usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64/server:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
+[32m- mesos coarse-grained[0m
+Failed to load native Mesos library from /usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64/server:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
+[32m- mesos with zookeeper[0m
+[32mSamplingUtilsSuite:[0m
+[32m- reservoirSampleAndCount[0m
+[32m- computeFraction[0m
+[32mTimeStampedHashMapSuite:[0m
+[32m- HashMap - basic test[0m
+[32m- TimeStampedHashMap - basic test[0m
+[32m- TimeStampedHashMap - threading safety test[0m
+[32m- TimeStampedWeakValueHashMap - basic test[0m
+[32m- TimeStampedWeakValueHashMap - threading safety test[0m
+[32m- TimeStampedHashMap - clearing by timestamp[0m
+[32m- TimeStampedWeakValueHashMap - clearing by timestamp[0m
+[32m- TimeStampedWeakValueHashMap - clearing weak references[0m
+[32mRandomSamplerSuite:[0m
+[32m- BernoulliSamplerWithRange[0m
+[32m- BernoulliSamplerWithRangeInverse[0m
+[32m- BernoulliSamplerWithRatio[0m
+[32m- BernoulliSamplerWithComplement[0m
+[32m- BernoulliSamplerSetSeed[0m
+[32m- PoissonSampler[0m
+[32mImplicitOrderingSuite:[0m
+[32m- basic inference of Orderings[0m
+[32mClosureCleanerSuite:[0m
+[32m- closures inside an object[0m
+[32m- closures inside a class[0m
+[32m- closures inside a class with no default constructor[0m
+[32m- closures that don't use fields of the outer class[0m
+[32m- nested closures inside an object[0m
+[32m- nested closures inside a class[0m
+[32m- toplevel return statements in closures are identified at cleaning time[0m
+[32m- return statements from named functions nested in closures don't raise exceptions[0m
+[32mUnpersistSuite:[0m
+[32m- unpersist RDD[0m
+[32mTaskSetManagerSuite:[0m
+[32m- TaskSet with no preferences[0m
+[32m- multiple offers with no preferences[0m
+[32m- skip unsatisfiable locality levels[0m
+[32m- basic delay scheduling[0m
+[32m- delay scheduling with fallback[0m
+[32m- delay scheduling with failed hosts[0m
+[32m- task result lost[0m
+[32m- repeated failures lead to task set abortion[0m
+[32m- executors should be blacklisted after task failure, in spite of locality preferences[0m
+[32m- new executors get added[0m
+[32m- test RACK_LOCAL tasks[0m
+[32m- do not emit warning when serialized task is small[0m
+[32m- emit warning when serialized task is large[0m
+[32mDriverSuite:[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- driver should exit after finishing[0m
+[32mCompactBufferSuite:[0m
+[32m- empty buffer[0m
+[32m- basic inserts[0m
+[32m- adding sequences[0m
+[32m- adding the same buffer to itself[0m
+[32mCacheManagerSuite:[0m
+[32m- get uncached rdd[0m
+[32m- get cached rdd[0m
+[32m- get uncached local rdd[0m
+[32mTaskSchedulerImplSuite:[0m
+[32m- FIFO Scheduler Test[0m
+[32m- Fair Scheduler Test[0m
+[32m- Nested Pool Test[0m
+[32m- Scheduler does not always schedule tasks on the same workers[0m
+[32m- Scheduler correctly accounts for multiple CPUs per task[0m
+[32mSparkConfSuite:[0m
+[32m- loading from system properties[0m
+[32m- initializing without loading defaults[0m
+[32m- named set methods[0m
+[32m- basic get and set[0m
+[32m- creating SparkContext without master and app name[0m
+[32m- creating SparkContext without master[0m
+[32m- creating SparkContext without app name[0m
+[32m- creating SparkContext with both master and app name[0m
+[32m- SparkContext property overriding[0m
+[32m- nested property names[0m
+[32mBlockManagerSuite:[0m
+[32m- StorageLevel object caching[0m
+[32m- BlockManagerId object caching[0m
+[32m- master + 1 manager interaction[0m
+[32m- master + 2 managers interaction[0m
+[32m- removing block[0m
+[32m- removing rdd[0m
+[32m- removing broadcast[0m
+[32m- reregistration on heart beat[0m
+[32m- reregistration on block update[0m
+[32m- reregistration doesn't dead lock[0m
+Some(org.apache.spark.storage.BlockResult@12a52bbe)
+[32m- correct BlockResult returned from get() calls[0m
+[32m- in-memory LRU storage[0m
+[32m- in-memory LRU storage with serialization[0m
+[32m- in-memory LRU for partitions of same RDD[0m
+[32m- in-memory LRU for partitions of multiple RDDs[0m
+[32m- tachyon storage[0m
+[32m  + tachyon storage test disabled. [0m
+[32m- on-disk storage[0m
+[32m- disk and memory storage[0m
+[32m- disk and memory storage with getLocalBytes[0m
+[32m- disk and memory storage with serialization[0m
+[32m- disk and memory storage with serialization and getLocalBytes[0m
+[32m- LRU with mixed storage levels[0m
+[32m- in-memory LRU with streams[0m
+[32m- LRU with mixed storage levels and streams[0m
+[32m- negative byte values in ByteBufferInputStream[0m
+[32m- overly large block[0m
+[32m- block compression[0m
+[32m- block store put failure[0m
+[32m- reads of memory-mapped and non memory-mapped files are equivalent[0m
+[32m- updated block statuses[0m
+[32m- query block statuses[0m
+[32m- get matching blocks[0m
+[32m- SPARK-1194 regression: fix the same-RDD rule for cache replacement[0m
+[32m- reserve/release unroll memory[0m
+[32m- safely unroll blocks[0m
+[32m- safely unroll blocks through putIterator[0m
+[32m- safely unroll blocks through putIterator (disk)[0m
+[32m- multiple unrolls by the same thread[0m
+[32mPythonRunnerSuite:[0m
+[32m- format path[0m
+[32m- format paths[0m
+[32mBitSetSuite:[0m
+[32m- basic set and get[0m
+[32m- 100% full bit set[0m
+[32m- nextSetBit[0m
+[32m- xor len(bitsetX) < len(bitsetY)[0m
+[32m- xor len(bitsetX) > len(bitsetY)[0m
+[32m- andNot len(bitsetX) < len(bitsetY)[0m
+[32m- andNot len(bitsetX) > len(bitsetY)[0m
+[32mAsyncRDDActionsSuite:[0m
+[32m- countAsync[0m
+[32m- collectAsync[0m
+[32m- foreachAsync[0m
+[32m- foreachPartitionAsync[0m
+[32m- takeAsync[0m
+[32m- async success handling[0m
+[32m- async failure handling[0m
+[32m- FutureAction result, infinite wait[0m
+[32m- FutureAction result, finite wait[0m
+[32m- FutureAction result, timeout[0m
+[32mMetricsSystemSuite:[0m
+[32m- MetricsSystem with default config[0m
+[32m- MetricsSystem with sources add[0m
+[32mJobCancellationSuite:[0m
+[32m- local mode, FIFO scheduler[0m
+[32m- local mode, fair scheduler[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- cluster mode, FIFO scheduler[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- cluster mode, fair scheduler[0m
+[32m- do not put partially executed partitions into cache[0m
+[32m- job group[0m
+[32m- job group with interruption[0m
+[33m- two jobs sharing the same stage !!! IGNORED !!![0m
+[32mPartitioningSuite:[0m
+[32m- HashPartitioner equality[0m
+[32m- RangePartitioner equality[0m
+[32m- RangePartitioner getPartition[0m
+[32m- RangePartitioner for keys that are not Comparable (but with Ordering)[0m
+[32m- RangPartitioner.sketch[0m
+[32m- RangePartitioner.determineBounds[0m
+[32m- RangePartitioner should run only one job if data is roughly balanced[0m
+[32m- RangePartitioner should work well on unbalanced data[0m
+[32m- RangePartitioner should return a single partition for empty RDDs[0m
+[32m- HashPartitioner not equal to RangePartitioner[0m
+[32m- partitioner preservation[0m
+[32m- partitioning Java arrays should fail[0m
+[32m- zero-length partitions should be correctly handled[0m
+[32mSecurityManagerSuite:[0m
+[32m- set security with conf[0m
+[32m- set security with api[0m
+[32mUISuite:[0m
+[33m- basic ui visibility !!! IGNORED !!![0m
+[33m- visibility at localhost:4040 !!! IGNORED !!![0m
+[33m- attaching a new tab !!! IGNORED !!![0m
+[32m- jetty selects different port under contention[0m
+[32m- jetty binds to port 0 correctly[0m
+[32m- verify appUIAddress contains the scheme[0m
+[32m- verify appUIAddress contains the port[0m
+[32mSortShuffleSuite:[0m
+[32m- groupByKey without compression[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- shuffle non-zero block size[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- shuffle serializer[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- zero sized blocks[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- zero sized blocks without kryo[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- shuffle on mutable pairs[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- sorting on mutable pairs[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- cogroup using mutable pairs[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- subtract mutable pairs[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- sort with Java non serializable class - Kryo[0m
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+Spark assembly has been built with Hive, including Datanucleus jars on classpath
+[32m- sort with Java non serializable class - Java[0m
+/bin/sh: line 1:  1895 Killed                  java -Dbasedir=/shared/strlen/core -Xmx3g -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m org.scalatest.tools.Runner -R '/shared/strlen/core/target/scala-2.10/classes /shared/strlen/core/target/scala-2.10/test-classes' -o -f /shared/strlen/core/target/surefire-reports/shared/strlen/core/target/SparkTestSuite.txt -u /shared/strlen/core/target/surefire-reports/.
+[INFO] ------------------------------------------------------------------------
+[INFO] Reactor Summary:
+[INFO] 
+[INFO] Spark Project Parent POM .......................... SUCCESS [2.021s]
+[INFO] Spark Project Core ................................ FAILURE [8:09.635s]
+[INFO] Spark Project Bagel ............................... SKIPPED
+[INFO] Spark Project GraphX .............................. SKIPPED
+[INFO] Spark Project ML Library .......................... SKIPPED
+[INFO] Spark Project Streaming ........................... SKIPPED
+[INFO] Spark Project Tools ............................... SKIPPED
+[INFO] Spark Project Catalyst ............................ SKIPPED
+[INFO] Spark Project SQL ................................. SKIPPED
+[INFO] Spark Project Hive ................................ SKIPPED
+[INFO] Spark Project REPL ................................ SKIPPED
+[INFO] Spark Project YARN Parent POM ..................... SKIPPED
+[INFO] Spark Project YARN Stable API ..................... SKIPPED
+[INFO] Spark Project Assembly ............................ SKIPPED
+[INFO] Spark Project External Twitter .................... SKIPPED
+[INFO] Spark Project External Kafka ...................... SKIPPED
+[INFO] Spark Project External Flume Sink ................. SKIPPED
+[INFO] Spark Project External Flume ...................... SKIPPED
+[INFO] Spark Project External ZeroMQ ..................... SKIPPED
+[INFO] Spark Project External MQTT ....................... SKIPPED
+[INFO] Spark Project Examples ............................ SKIPPED
+[INFO] ------------------------------------------------------------------------
+[INFO] BUILD FAILURE
+[INFO] ------------------------------------------------------------------------
+[INFO] Total time: 8:12.473s
+[INFO] Finished at: Fri Aug 01 20:03:30 PDT 2014
+[INFO] Final Memory: 30M/697M
+[INFO] ------------------------------------------------------------------------
+[ERROR] Failed to execute goal org.scalatest:scalatest-maven-plugin:1.0-RC2:test (test) on project spark-core_2.10: There are test failures -> [Help 1]
+[ERROR] 
+[ERROR] To see the full stack trace of the errors, re-run Maven with the -e switch.
+[ERROR] Re-run Maven using the -X switch to enable full debug logging.
+[ERROR] 
+[ERROR] For more information about the errors and possible solutions, please read the following articles:
+[ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN/MojoFailureException
+[ERROR] 
+[ERROR] After correcting the problems, you can resume the build with the command
+[ERROR]   mvn <goals> -rf :spark-core_2.10
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 5a96c9c722411..65be1a76fa35d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -234,30 +234,16 @@ case class Length(child: Expression) extends UnaryExpression {
     } else if (!inputVal.isInstanceOf[String]) {
       inputVal.toString.length
     } else {
-      OctetLenUtils.len(inputVal.asInstanceOf[String])
+      val str = inputVal.asInstanceOf[String]
+      str.codePointCount(0, str.length)
     }
   }
-
 }
 
 object OctetLengthConstants {
   val DefaultEncoding = "UTF-8"
 }
 
-object OctetLenUtils {
-  def len(s : String)  = {
-    if (s == null) {
-      null
-    } else {
-      @inline def isUtfStartByte(b : Byte) = (b & 0xC0) != 0x80
-      s.getBytes.foldLeft(0) { case (cnt, b) => {
-        cnt + (if (isUtfStartByte(b)) 1 else 0)
-      }
-      }
-    }
-  }
-}
-
 /**
  * A function that returns the number of characters in a string expression
  */

From 91761be0b0433a81d5b01c13aeac718decae8df3 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba@gmail.com>
Date: Fri, 1 Aug 2014 20:17:18 -0700
Subject: [PATCH 21/22] Remove spurious output log file

---
 out | 672 ------------------------------------------------------------
 1 file changed, 672 deletions(-)
 delete mode 100644 out

diff --git a/out b/out
deleted file mode 100644
index d24695e6107d9..0000000000000
--- a/out
+++ /dev/null
@@ -1,672 +0,0 @@
-[INFO] Scanning for projects...
-[INFO] ------------------------------------------------------------------------
-[INFO] Reactor Build Order:
-[INFO] 
-[INFO] Spark Project Parent POM
-[INFO] Spark Project Core
-[INFO] Spark Project Bagel
-[INFO] Spark Project GraphX
-[INFO] Spark Project ML Library
-[INFO] Spark Project Streaming
-[INFO] Spark Project Tools
-[INFO] Spark Project Catalyst
-[INFO] Spark Project SQL
-[INFO] Spark Project Hive
-[INFO] Spark Project REPL
-[INFO] Spark Project YARN Parent POM
-[INFO] Spark Project YARN Stable API
-[INFO] Spark Project Assembly
-[INFO] Spark Project External Twitter
-[INFO] Spark Project External Kafka
-[INFO] Spark Project External Flume Sink
-[INFO] Spark Project External Flume
-[INFO] Spark Project External ZeroMQ
-[INFO] Spark Project External MQTT
-[INFO] Spark Project Examples
-[INFO]                                                                         
-[INFO] ------------------------------------------------------------------------
-[INFO] Building Spark Project Parent POM 1.1.0-SNAPSHOT
-[INFO] ------------------------------------------------------------------------
-[INFO] 
-[INFO] --- maven-enforcer-plugin:1.3.1:enforce (enforce-versions) @ spark-parent ---
-[INFO] 
-[INFO] --- build-helper-maven-plugin:1.8:add-source (add-scala-sources) @ spark-parent ---
-[INFO] Source directory: /shared/strlen/src/main/scala added.
-[INFO] 
-[INFO] --- maven-remote-resources-plugin:1.5:process (default) @ spark-parent ---
-[INFO] 
-[INFO] --- scala-maven-plugin:3.1.6:compile (scala-compile-first) @ spark-parent ---
-[INFO] No sources to compile
-[INFO] 
-[INFO] --- build-helper-maven-plugin:1.8:add-test-source (add-scala-test-sources) @ spark-parent ---
-[INFO] Test Source directory: /shared/strlen/src/test/scala added.
-[INFO] 
-[INFO] --- scala-maven-plugin:3.1.6:testCompile (scala-test-compile-first) @ spark-parent ---
-[INFO] No sources to compile
-[INFO]                                                                         
-[INFO] ------------------------------------------------------------------------
-[INFO] Building Spark Project Core 1.1.0-SNAPSHOT
-[INFO] ------------------------------------------------------------------------
-[INFO] 
-[INFO] --- maven-enforcer-plugin:1.3.1:enforce (enforce-versions) @ spark-core_2.10 ---
-[INFO] 
-[INFO] --- build-helper-maven-plugin:1.8:add-source (add-scala-sources) @ spark-core_2.10 ---
-[INFO] Source directory: /shared/strlen/core/src/main/scala added.
-[INFO] 
-[INFO] --- maven-remote-resources-plugin:1.5:process (default) @ spark-core_2.10 ---
-[INFO] 
-[INFO] --- exec-maven-plugin:1.2.1:exec (default) @ spark-core_2.10 ---
-Archive:  lib/py4j-0.8.2.1-src.zip
- extracting: build/py4j/__init__.py  
-  inflating: build/py4j/compat.py    
-  inflating: build/py4j/finalizer.py  
-  inflating: build/py4j/java_collections.py  
-  inflating: build/py4j/java_gateway.py  
-  inflating: build/py4j/protocol.py  
- extracting: build/py4j/tests/__init__.py  
-  inflating: build/py4j/tests/byte_string_test.py  
-  inflating: build/py4j/tests/finalizer_test.py  
-  inflating: build/py4j/tests/java_array_test.py  
-  inflating: build/py4j/tests/java_callback_test.py  
-  inflating: build/py4j/tests/java_gateway_test.py  
-  inflating: build/py4j/tests/java_list_test.py  
-  inflating: build/py4j/tests/java_map_test.py  
-  inflating: build/py4j/tests/java_set_test.py  
-  inflating: build/py4j/tests/multithreadtest.py  
-  inflating: build/py4j/tests/py4j_callback_example.py  
-  inflating: build/py4j/tests/py4j_callback_example2.py  
-  inflating: build/py4j/tests/py4j_example.py  
-  inflating: build/py4j/version.py   
-[INFO] 
-[INFO] --- maven-resources-plugin:2.6:resources (default-resources) @ spark-core_2.10 ---
-[INFO] Using 'UTF-8' encoding to copy filtered resources.
-[INFO] Copying 9 resources
-[INFO] Copying 21 resources
-[INFO] Copying 7 resources
-[INFO] Copying 3 resources
-[INFO] 
-[INFO] --- scala-maven-plugin:3.1.6:compile (scala-compile-first) @ spark-core_2.10 ---
-[INFO] Using zinc server for incremental compilation
-[INFO] compiler plugin: BasicArtifact(org.scalamacros,paradise_2.10.4,2.0.1,null)
-[0m[[0minfo[0m] [0mCompile success at Aug 1, 2014 7:55:25 PM [0.258s][0m
-[INFO] 
-[INFO] --- maven-compiler-plugin:3.1:compile (default-compile) @ spark-core_2.10 ---
-[INFO] Changes detected - recompiling the module!
-[INFO] Compiling 35 source files to /shared/strlen/core/target/scala-2.10/classes
-[INFO] 
-[INFO] --- build-helper-maven-plugin:1.8:add-test-source (add-scala-test-sources) @ spark-core_2.10 ---
-[INFO] Test Source directory: /shared/strlen/core/src/test/scala added.
-[INFO] 
-[INFO] --- maven-resources-plugin:2.6:testResources (default-testResources) @ spark-core_2.10 ---
-[INFO] Using 'UTF-8' encoding to copy filtered resources.
-[INFO] Copying 4 resources
-[INFO] Copying 3 resources
-[INFO] 
-[INFO] --- scala-maven-plugin:3.1.6:testCompile (scala-test-compile-first) @ spark-core_2.10 ---
-[INFO] Using zinc server for incremental compilation
-[INFO] compiler plugin: BasicArtifact(org.scalamacros,paradise_2.10.4,2.0.1,null)
-[0m[[0minfo[0m] [0mCompiling 1 Scala source and 1 Java source to /shared/strlen/core/target/scala-2.10/test-classes...[0m
-[0m[[33mwarn[0m] [0mNote: /shared/strlen/core/src/test/java/org/apache/spark/JavaAPISuite.java uses or overrides a deprecated API.[0m
-[0m[[33mwarn[0m] [0mNote: Recompile with -Xlint:deprecation for details.[0m
-[0m[[33mwarn[0m] [0mNote: /shared/strlen/core/src/test/java/org/apache/spark/JavaAPISuite.java uses unchecked or unsafe operations.[0m
-[0m[[33mwarn[0m] [0mNote: Recompile with -Xlint:unchecked for details.[0m
-[0m[[0minfo[0m] [0mCompile success at Aug 1, 2014 7:55:32 PM [4.592s][0m
-[INFO] 
-[INFO] --- maven-compiler-plugin:3.1:testCompile (default-testCompile) @ spark-core_2.10 ---
-[INFO] Nothing to compile - all classes are up to date
-[INFO] 
-[INFO] --- maven-surefire-plugin:2.17:test (default-test) @ spark-core_2.10 ---
-[INFO] Tests are skipped.
-[INFO] 
-[INFO] --- scalatest-maven-plugin:1.0-RC2:test (test) @ spark-core_2.10 ---
-[36mDiscovery starting.[0m
-[36mDiscovery completed in 6 seconds, 462 milliseconds.[0m
-[36mRun starting. Expected test count is: 724[0m
-[32mExternalSorterSuite:[0m
-[32m- empty data stream[0m
-[32m- few elements per partition[0m
-[32m- empty partitions with spilling[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- spilling in local cluster[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- spilling in local cluster with many reduce tasks[0m
-[32m- cleanup of intermediate files in sorter[0m
-[32m- cleanup of intermediate files in sorter if there are errors[0m
-[32m- cleanup of intermediate files in shuffle[0m
-[32m- cleanup of intermediate files in shuffle with errors[0m
-[32m- no partial aggregation or sorting[0m
-[32m- partial aggregation without spill[0m
-[32m- partial aggregation with spill, no ordering[0m
-[32m- partial aggregation with spill, with ordering[0m
-[32m- sorting without aggregation, no spill[0m
-[32m- sorting without aggregation, with spill[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- spilling with hash collisions[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- spilling with many hash collisions[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- spilling with hash collisions using the Int.MaxValue key[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- spilling with null keys and values[0m
-[32mDAGSchedulerSuite:[0m
-[32m- zero split job[0m
-[32m- run trivial job[0m
-[32m- local job[0m
-[32m- local job oom[0m
-[32m- run trivial job w/ dependency[0m
-[32m- cache location preferences w/ dependency[0m
-[32m- avoid exponential blowup when getting preferred locs list[0m
-[32m- unserializable task[0m
-[32m- trivial job failure[0m
-[32m- trivial job cancellation[0m
-[32m- job cancellation no-kill backend[0m
-[32m- run trivial shuffle[0m
-[32m- run trivial shuffle with fetch failure[0m
-[32m- ignore late map task completions[0m
-[32m- run shuffle with map stage failure[0m
-[32m- failure of stage used by two jobs[0m
-[32m- run trivial shuffle with out-of-band failure and retry[0m
-[32m- recursive shuffle failures[0m
-[32m- cached post-shuffle[0m
-[33m- misbehaved accumulator should not crash DAGScheduler and SparkContext !!! IGNORED !!![0m
-[32m- misbehaved resultHandler should not crash DAGScheduler and SparkContext[0m
-[ERROR] [08/01/2014 19:57:21.310] [test-akka.actor.default-dispatcher-3] [akka://test/user/dagSupervisor/$a] error
-org.apache.spark.SparkException: error
-	at org.apache.spark.scheduler.BuggyDAGEventProcessActor$$anonfun$receive$1.applyOrElse(DAGSchedulerSuite.scala:39)
-	at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
-	at akka.actor.ActorCell.invoke(ActorCell.scala:456)
-	at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
-	at akka.dispatch.Mailbox.run(Mailbox.scala:219)
-	at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
-	at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
-	at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
-	at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
-	at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
-
-[32m- DAGSchedulerActorSupervisor closes the SparkContext when EventProcessActor crashes[0m
-[ERROR] [08/01/2014 19:57:21.321] [DAGSchedulerSuite-akka.actor.default-dispatcher-3] [akka://DAGSchedulerSuite/user/$$a] Job cancelled because SparkContext was shut down
-org.apache.spark.SparkException: Job cancelled because SparkContext was shut down
-	at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:688)
-	at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:687)
-	at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
-	at org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:687)
-	at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.postStop(DAGScheduler.scala:1342)
-	at akka.actor.dungeon.FaultHandling$class.akka$actor$dungeon$FaultHandling$$finishTerminate(FaultHandling.scala:201)
-	at akka.actor.dungeon.FaultHandling$class.terminate(FaultHandling.scala:163)
-	at akka.actor.ActorCell.terminate(ActorCell.scala:338)
-	at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:431)
-	at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
-	at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
-	at akka.testkit.CallingThreadDispatcher.process$1(CallingThreadDispatcher.scala:244)
-	at akka.testkit.CallingThreadDispatcher.runQueue(CallingThreadDispatcher.scala:284)
-	at akka.testkit.CallingThreadDispatcher.systemDispatch(CallingThreadDispatcher.scala:192)
-	at akka.actor.dungeon.Dispatch$class.stop(Dispatch.scala:106)
-	at akka.actor.ActorCell.stop(ActorCell.scala:338)
-	at akka.actor.LocalActorRef.stop(ActorRef.scala:340)
-	at akka.actor.dungeon.Children$class.stop(Children.scala:66)
-	at akka.actor.ActorCell.stop(ActorCell.scala:338)
-	at akka.actor.dungeon.FaultHandling$$anonfun$terminate$1.apply(FaultHandling.scala:149)
-	at akka.actor.dungeon.FaultHandling$$anonfun$terminate$1.apply(FaultHandling.scala:149)
-	at scala.collection.Iterator$class.foreach(Iterator.scala:727)
-	at akka.util.Collections$PartialImmutableValuesIterable$$anon$1.foreach(Collections.scala:27)
-	at akka.util.Collections$PartialImmutableValuesIterable.foreach(Collections.scala:52)
-	at akka.actor.dungeon.FaultHandling$class.terminate(FaultHandling.scala:149)
-	at akka.actor.ActorCell.terminate(ActorCell.scala:338)
-	at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:431)
-	at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
-	at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
-	at akka.dispatch.Mailbox.run(Mailbox.scala:218)
-	at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
-	at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
-	at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
-	at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
-	at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
-
-[32mRDDSuite:[0m
-[32m- basic operations[0m
-[32m- serialization[0m
-[32m- countApproxDistinct[0m
-[32m- SparkContext.union[0m
-[32m- partitioner aware union[0m
-[32m- UnionRDD partition serialized size should be small[0m
-[32m- aggregate[0m
-[32m- basic caching[0m
-[32m- caching with failures[0m
-[32m- empty RDD[0m
-[32m- repartitioned RDDs[0m
-[32m- repartitioned RDDs perform load balancing[0m
-[32m- coalesced RDDs[0m
-[32m- coalesced RDDs with locality[0m
-[32m- coalesced RDDs with locality, large scale (10K partitions)[0m
-[32m- coalesced RDDs with locality, fail first pass[0m
-[32m- zipped RDDs[0m
-[32m- partition pruning[0m
-[32m- mapWith[0m
-[32m- flatMapWith[0m
-[32m- filterWith[0m
-[32m- take[0m
-[32m- top with predefined ordering[0m
-[32m- top with custom ordering[0m
-[32m- takeOrdered with predefined ordering[0m
-[32m- takeOrdered with custom ordering[0m
-[32m- sample preserves partitioner[0m
-[32m- takeSample[0m
-[32m- takeSample from an empty rdd[0m
-[32m- randomSplit[0m
-[32m- runJob on an invalid partition[0m
-[32m- sort an empty RDD[0m
-[32m- sortByKey[0m
-[32m- sortByKey ascending parameter[0m
-[32m- sortByKey with explicit ordering[0m
-[32m- intersection[0m
-[32m- intersection strips duplicates in an input[0m
-[32m- zipWithIndex[0m
-[32m- zipWithIndex with a single partition[0m
-[32m- zipWithUniqueId[0m
-[32m- retag with implicit ClassTag[0m
-[32m- getNarrowAncestors[0m
-[32m- getNarrowAncestors with multiple parents[0m
-[32m- getNarrowAncestors with cycles[0m
-[32mUtilsSuite:[0m
-[32m- bytesToString[0m
-[32m- copyStream[0m
-[32m- memoryStringToMb[0m
-[32m- splitCommandString[0m
-[32m- string formatting of time durations[0m
-[32m- reading offset bytes of a file[0m
-[32m- reading offset bytes across multiple files[0m
-[32m- deserialize long value[0m
-[32m- get iterator size[0m
-[32m- findOldFiles[0m
-[32m- resolveURI[0m
-[32m- nonLocalPaths[0m
-[32mSortingSuite:[0m
-[32m- sortByKey[0m
-[32m- large array[0m
-[32m- large array with one split[0m
-[32m- large array with many partitions[0m
-[32m- sort descending[0m
-[32m- sort descending with one split[0m
-[32m- sort descending with many partitions[0m
-[32m- more partitions than elements[0m
-[32m- empty RDD[0m
-[32m- partition balancing[0m
-[32m- partition balancing for descending sort[0m
-[32mTaskContextSuite:[0m
-[32m- Calls executeOnCompleteCallbacks after failure[0m
-[32mNextIteratorSuite:[0m
-[32m- one iteration[0m
-[32m- two iterations[0m
-[32m- empty iteration[0m
-[32m- close is called once for empty iterations[0m
-[32m- close is called once for non-empty iterations[0m
-[32mParallelCollectionSplitSuite:[0m
-[32m- one element per slice[0m
-[32m- one slice[0m
-[32m- equal slices[0m
-[32m- non-equal slices[0m
-[32m- splitting exclusive range[0m
-[32m- splitting inclusive range[0m
-[32m- empty data[0m
-[32m- zero slices[0m
-[32m- negative number of slices[0m
-[32m- exclusive ranges sliced into ranges[0m
-[32m- inclusive ranges sliced into ranges[0m
-[32m- identical slice sizes between Range and NumericRange[0m
-[32m- identical slice sizes between List and NumericRange[0m
-[32m- large ranges don't overflow[0m
-[32m- random array tests[0m
-[32m- random exclusive range tests[0m
-[32m- random inclusive range tests[0m
-[32m- exclusive ranges of longs[0m
-[32m- inclusive ranges of longs[0m
-[32m- exclusive ranges of doubles[0m
-[32m- inclusive ranges of doubles[0m
-[32mExecutorURLClassLoaderSuite:[0m
-[32m- child first[0m
-[32m- parent first[0m
-[32m- child first can fall back[0m
-[32m- child first can fail[0m
-[32m- driver sets context class loader in local mode[0m
-[32mExecutorRunnerTest:[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- command includes appId[0m
-[32mEventLoggingListenerSuite:[0m
-[32m- Parse names of special files[0m
-[32m- Verify special files exist[0m
-[32m- Verify special files exist with compression[0m
-[32m- Parse event logging info[0m
-[32m- Parse event logging info with compression[0m
-[32m- Basic event logging[0m
-[32m- Basic event logging with compression[0m
-[32m- End-to-end event logging[0m
-[32m- End-to-end event logging with compression[0m
-[32mDriverRunnerTest:[0m
-[32m- Process succeeds instantly[0m
-[32m- Process failing several times and then succeeding[0m
-[32m- Process doesn't restart if not supervised[0m
-[32m- Process doesn't restart if killed[0m
-[32m- Reset of backoff counter[0m
-[32mPairRDDFunctionsSuite:[0m
-[32m- aggregateByKey[0m
-[32m- groupByKey[0m
-[32m- groupByKey with duplicates[0m
-[32m- groupByKey with negative key hash codes[0m
-[32m- groupByKey with many output partitions[0m
-[32m- sampleByKey[0m
-[32m- reduceByKey[0m
-[32m- reduceByKey with collectAsMap[0m
-[32m- reduceByKey with many output partitons[0m
-[32m- reduceByKey with partitioner[0m
-[32m- countApproxDistinctByKey[0m
-[32m- join[0m
-[32m- join all-to-all[0m
-[32m- leftOuterJoin[0m
-[32m- rightOuterJoin[0m
-[32m- join with no matches[0m
-[32m- join with many output partitions[0m
-[32m- groupWith[0m
-[32m- groupWith3[0m
-[32m- groupWith4[0m
-[32m- zero-partition RDD[0m
-[32m- keys and values[0m
-[32m- default partitioner uses partition size[0m
-[32m- default partitioner uses largest partitioner[0m
-[32m- subtract[0m
-[32m- subtract with narrow dependency[0m
-[32m- subtractByKey[0m
-[32m- subtractByKey with narrow dependency[0m
-[32m- foldByKey[0m
-[32m- foldByKey with mutable result type[0m
-[32m- saveNewAPIHadoopFile should call setConf if format is configurable[0m
-[32m- lookup[0m
-[32m- lookup with partitioner[0m
-[32m- lookup with bad partitioner[0m
-[32mPrimitiveVectorSuite:[0m
-[32m- primitive value[0m
-[32m- non-primitive value[0m
-[32m- ideal growth[0m
-[32m- ideal size[0m
-[32m- resizing[0m
-[32mMetricsConfigSuite:[0m
-[32m- MetricsConfig with default properties[0m
-[32m- MetricsConfig with properties set[0m
-[32m- MetricsConfig with subProperties[0m
-[32mSparkContextSchedulerCreationSuite:[0m
-[32m- bad-master[0m
-[32m- local[0m
-[32m- local-*[0m
-[32m- local-n[0m
-[32m- local-*-n-failures[0m
-[32m- local-n-failures[0m
-[32m- bad-local-n[0m
-[32m- bad-local-n-failures[0m
-[32m- local-default-parallelism[0m
-[32m- simr[0m
-[32m- local-cluster[0m
-[32m- yarn-cluster[0m
-[32m- yarn-standalone[0m
-[32m- yarn-client[0m
-Failed to load native Mesos library from /usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64/server:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
-[32m- mesos fine-grained[0m
-Failed to load native Mesos library from /usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64/server:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
-[32m- mesos coarse-grained[0m
-Failed to load native Mesos library from /usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64/server:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
-[32m- mesos with zookeeper[0m
-[32mSamplingUtilsSuite:[0m
-[32m- reservoirSampleAndCount[0m
-[32m- computeFraction[0m
-[32mTimeStampedHashMapSuite:[0m
-[32m- HashMap - basic test[0m
-[32m- TimeStampedHashMap - basic test[0m
-[32m- TimeStampedHashMap - threading safety test[0m
-[32m- TimeStampedWeakValueHashMap - basic test[0m
-[32m- TimeStampedWeakValueHashMap - threading safety test[0m
-[32m- TimeStampedHashMap - clearing by timestamp[0m
-[32m- TimeStampedWeakValueHashMap - clearing by timestamp[0m
-[32m- TimeStampedWeakValueHashMap - clearing weak references[0m
-[32mRandomSamplerSuite:[0m
-[32m- BernoulliSamplerWithRange[0m
-[32m- BernoulliSamplerWithRangeInverse[0m
-[32m- BernoulliSamplerWithRatio[0m
-[32m- BernoulliSamplerWithComplement[0m
-[32m- BernoulliSamplerSetSeed[0m
-[32m- PoissonSampler[0m
-[32mImplicitOrderingSuite:[0m
-[32m- basic inference of Orderings[0m
-[32mClosureCleanerSuite:[0m
-[32m- closures inside an object[0m
-[32m- closures inside a class[0m
-[32m- closures inside a class with no default constructor[0m
-[32m- closures that don't use fields of the outer class[0m
-[32m- nested closures inside an object[0m
-[32m- nested closures inside a class[0m
-[32m- toplevel return statements in closures are identified at cleaning time[0m
-[32m- return statements from named functions nested in closures don't raise exceptions[0m
-[32mUnpersistSuite:[0m
-[32m- unpersist RDD[0m
-[32mTaskSetManagerSuite:[0m
-[32m- TaskSet with no preferences[0m
-[32m- multiple offers with no preferences[0m
-[32m- skip unsatisfiable locality levels[0m
-[32m- basic delay scheduling[0m
-[32m- delay scheduling with fallback[0m
-[32m- delay scheduling with failed hosts[0m
-[32m- task result lost[0m
-[32m- repeated failures lead to task set abortion[0m
-[32m- executors should be blacklisted after task failure, in spite of locality preferences[0m
-[32m- new executors get added[0m
-[32m- test RACK_LOCAL tasks[0m
-[32m- do not emit warning when serialized task is small[0m
-[32m- emit warning when serialized task is large[0m
-[32mDriverSuite:[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- driver should exit after finishing[0m
-[32mCompactBufferSuite:[0m
-[32m- empty buffer[0m
-[32m- basic inserts[0m
-[32m- adding sequences[0m
-[32m- adding the same buffer to itself[0m
-[32mCacheManagerSuite:[0m
-[32m- get uncached rdd[0m
-[32m- get cached rdd[0m
-[32m- get uncached local rdd[0m
-[32mTaskSchedulerImplSuite:[0m
-[32m- FIFO Scheduler Test[0m
-[32m- Fair Scheduler Test[0m
-[32m- Nested Pool Test[0m
-[32m- Scheduler does not always schedule tasks on the same workers[0m
-[32m- Scheduler correctly accounts for multiple CPUs per task[0m
-[32mSparkConfSuite:[0m
-[32m- loading from system properties[0m
-[32m- initializing without loading defaults[0m
-[32m- named set methods[0m
-[32m- basic get and set[0m
-[32m- creating SparkContext without master and app name[0m
-[32m- creating SparkContext without master[0m
-[32m- creating SparkContext without app name[0m
-[32m- creating SparkContext with both master and app name[0m
-[32m- SparkContext property overriding[0m
-[32m- nested property names[0m
-[32mBlockManagerSuite:[0m
-[32m- StorageLevel object caching[0m
-[32m- BlockManagerId object caching[0m
-[32m- master + 1 manager interaction[0m
-[32m- master + 2 managers interaction[0m
-[32m- removing block[0m
-[32m- removing rdd[0m
-[32m- removing broadcast[0m
-[32m- reregistration on heart beat[0m
-[32m- reregistration on block update[0m
-[32m- reregistration doesn't dead lock[0m
-Some(org.apache.spark.storage.BlockResult@12a52bbe)
-[32m- correct BlockResult returned from get() calls[0m
-[32m- in-memory LRU storage[0m
-[32m- in-memory LRU storage with serialization[0m
-[32m- in-memory LRU for partitions of same RDD[0m
-[32m- in-memory LRU for partitions of multiple RDDs[0m
-[32m- tachyon storage[0m
-[32m  + tachyon storage test disabled. [0m
-[32m- on-disk storage[0m
-[32m- disk and memory storage[0m
-[32m- disk and memory storage with getLocalBytes[0m
-[32m- disk and memory storage with serialization[0m
-[32m- disk and memory storage with serialization and getLocalBytes[0m
-[32m- LRU with mixed storage levels[0m
-[32m- in-memory LRU with streams[0m
-[32m- LRU with mixed storage levels and streams[0m
-[32m- negative byte values in ByteBufferInputStream[0m
-[32m- overly large block[0m
-[32m- block compression[0m
-[32m- block store put failure[0m
-[32m- reads of memory-mapped and non memory-mapped files are equivalent[0m
-[32m- updated block statuses[0m
-[32m- query block statuses[0m
-[32m- get matching blocks[0m
-[32m- SPARK-1194 regression: fix the same-RDD rule for cache replacement[0m
-[32m- reserve/release unroll memory[0m
-[32m- safely unroll blocks[0m
-[32m- safely unroll blocks through putIterator[0m
-[32m- safely unroll blocks through putIterator (disk)[0m
-[32m- multiple unrolls by the same thread[0m
-[32mPythonRunnerSuite:[0m
-[32m- format path[0m
-[32m- format paths[0m
-[32mBitSetSuite:[0m
-[32m- basic set and get[0m
-[32m- 100% full bit set[0m
-[32m- nextSetBit[0m
-[32m- xor len(bitsetX) < len(bitsetY)[0m
-[32m- xor len(bitsetX) > len(bitsetY)[0m
-[32m- andNot len(bitsetX) < len(bitsetY)[0m
-[32m- andNot len(bitsetX) > len(bitsetY)[0m
-[32mAsyncRDDActionsSuite:[0m
-[32m- countAsync[0m
-[32m- collectAsync[0m
-[32m- foreachAsync[0m
-[32m- foreachPartitionAsync[0m
-[32m- takeAsync[0m
-[32m- async success handling[0m
-[32m- async failure handling[0m
-[32m- FutureAction result, infinite wait[0m
-[32m- FutureAction result, finite wait[0m
-[32m- FutureAction result, timeout[0m
-[32mMetricsSystemSuite:[0m
-[32m- MetricsSystem with default config[0m
-[32m- MetricsSystem with sources add[0m
-[32mJobCancellationSuite:[0m
-[32m- local mode, FIFO scheduler[0m
-[32m- local mode, fair scheduler[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- cluster mode, FIFO scheduler[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- cluster mode, fair scheduler[0m
-[32m- do not put partially executed partitions into cache[0m
-[32m- job group[0m
-[32m- job group with interruption[0m
-[33m- two jobs sharing the same stage !!! IGNORED !!![0m
-[32mPartitioningSuite:[0m
-[32m- HashPartitioner equality[0m
-[32m- RangePartitioner equality[0m
-[32m- RangePartitioner getPartition[0m
-[32m- RangePartitioner for keys that are not Comparable (but with Ordering)[0m
-[32m- RangPartitioner.sketch[0m
-[32m- RangePartitioner.determineBounds[0m
-[32m- RangePartitioner should run only one job if data is roughly balanced[0m
-[32m- RangePartitioner should work well on unbalanced data[0m
-[32m- RangePartitioner should return a single partition for empty RDDs[0m
-[32m- HashPartitioner not equal to RangePartitioner[0m
-[32m- partitioner preservation[0m
-[32m- partitioning Java arrays should fail[0m
-[32m- zero-length partitions should be correctly handled[0m
-[32mSecurityManagerSuite:[0m
-[32m- set security with conf[0m
-[32m- set security with api[0m
-[32mUISuite:[0m
-[33m- basic ui visibility !!! IGNORED !!![0m
-[33m- visibility at localhost:4040 !!! IGNORED !!![0m
-[33m- attaching a new tab !!! IGNORED !!![0m
-[32m- jetty selects different port under contention[0m
-[32m- jetty binds to port 0 correctly[0m
-[32m- verify appUIAddress contains the scheme[0m
-[32m- verify appUIAddress contains the port[0m
-[32mSortShuffleSuite:[0m
-[32m- groupByKey without compression[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- shuffle non-zero block size[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- shuffle serializer[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- zero sized blocks[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- zero sized blocks without kryo[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- shuffle on mutable pairs[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- sorting on mutable pairs[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- cogroup using mutable pairs[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- subtract mutable pairs[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- sort with Java non serializable class - Kryo[0m
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-Spark assembly has been built with Hive, including Datanucleus jars on classpath
-[32m- sort with Java non serializable class - Java[0m
-/bin/sh: line 1:  1895 Killed                  java -Dbasedir=/shared/strlen/core -Xmx3g -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m org.scalatest.tools.Runner -R '/shared/strlen/core/target/scala-2.10/classes /shared/strlen/core/target/scala-2.10/test-classes' -o -f /shared/strlen/core/target/surefire-reports/shared/strlen/core/target/SparkTestSuite.txt -u /shared/strlen/core/target/surefire-reports/.
-[INFO] ------------------------------------------------------------------------
-[INFO] Reactor Summary:
-[INFO] 
-[INFO] Spark Project Parent POM .......................... SUCCESS [2.021s]
-[INFO] Spark Project Core ................................ FAILURE [8:09.635s]
-[INFO] Spark Project Bagel ............................... SKIPPED
-[INFO] Spark Project GraphX .............................. SKIPPED
-[INFO] Spark Project ML Library .......................... SKIPPED
-[INFO] Spark Project Streaming ........................... SKIPPED
-[INFO] Spark Project Tools ............................... SKIPPED
-[INFO] Spark Project Catalyst ............................ SKIPPED
-[INFO] Spark Project SQL ................................. SKIPPED
-[INFO] Spark Project Hive ................................ SKIPPED
-[INFO] Spark Project REPL ................................ SKIPPED
-[INFO] Spark Project YARN Parent POM ..................... SKIPPED
-[INFO] Spark Project YARN Stable API ..................... SKIPPED
-[INFO] Spark Project Assembly ............................ SKIPPED
-[INFO] Spark Project External Twitter .................... SKIPPED
-[INFO] Spark Project External Kafka ...................... SKIPPED
-[INFO] Spark Project External Flume Sink ................. SKIPPED
-[INFO] Spark Project External Flume ...................... SKIPPED
-[INFO] Spark Project External ZeroMQ ..................... SKIPPED
-[INFO] Spark Project External MQTT ....................... SKIPPED
-[INFO] Spark Project Examples ............................ SKIPPED
-[INFO] ------------------------------------------------------------------------
-[INFO] BUILD FAILURE
-[INFO] ------------------------------------------------------------------------
-[INFO] Total time: 8:12.473s
-[INFO] Finished at: Fri Aug 01 20:03:30 PDT 2014
-[INFO] Final Memory: 30M/697M
-[INFO] ------------------------------------------------------------------------
-[ERROR] Failed to execute goal org.scalatest:scalatest-maven-plugin:1.0-RC2:test (test) on project spark-core_2.10: There are test failures -> [Help 1]
-[ERROR] 
-[ERROR] To see the full stack trace of the errors, re-run Maven with the -e switch.
-[ERROR] Re-run Maven using the -X switch to enable full debug logging.
-[ERROR] 
-[ERROR] For more information about the errors and possible solutions, please read the following articles:
-[ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN/MojoFailureException
-[ERROR] 
-[ERROR] After correcting the problems, you can resume the build with the command
-[ERROR]   mvn <goals> -rf :spark-core_2.10

From 22eddbce6a201c8f5b5c31859ceb972e60657377 Mon Sep 17 00:00:00 2001
From: Stephen Boesch <javadba>
Date: Sun, 3 Aug 2014 22:48:54 -0700
Subject: [PATCH 22/22] Use Octet/Char_Len instead of Octet/Char_length due to
 apparent preexisting spark ParserCombinator bug.

---
 .../org/apache/spark/sql/catalyst/SqlParser.scala    | 10 +++++-----
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala   | 12 ++++++------
 .../scala/org/apache/spark/sql/hive/HiveQl.scala     | 10 +++++-----
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 188953b272dac..826e3b76f9bd8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -124,8 +124,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
   protected val SUBSTRING = Keyword("SUBSTRING")
   protected val LEN = Keyword("LEN")
   protected val LENGTH = Keyword("LENGTH")
-  protected val CHAR_LENGTH = Keyword("CHAR_LENGTH")
-  protected val OCTET_LENGTH = Keyword("OCTET_LENGTH")
+  protected val CHAR_LEN = Keyword("CHAR_LEN")
+  protected val OCTET_LEN = Keyword("OCTET_LEN")
 
   // Use reflection to find the reserved words defined in this class.
   protected val reservedWords =
@@ -327,11 +327,11 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
     (SUBSTR | SUBSTRING) ~> "(" ~> expression ~ "," ~ expression ~ "," ~ expression <~ ")" ^^ {
       case s ~ "," ~ p ~ "," ~ l => Substring(s,p,l)
     } |
-    (LEN | LENGTH | CHAR_LENGTH) ~> "(" ~> expression <~ ")" ^^ { case s => Length(s) } |
-      OCTET_LENGTH ~> "(" ~> expression ~ "," ~  expression <~ ")" ^^ {
+    (LEN | LENGTH | CHAR_LEN) ~> "(" ~> expression <~ ")" ^^ { case s => Length(s) } |
+      OCTET_LEN ~> "(" ~> expression ~ "," ~  expression <~ ")" ^^ {
       case s ~ "," ~  e => OctetLength(s, e)
     } |
-      OCTET_LENGTH ~> "(" ~> expression  <~ ")" ^^ {
+    OCTET_LEN ~> "(" ~> expression  <~ ")" ^^ {
       case s  => OctetLength(s, Literal(OctetLengthConstants.DefaultEncoding))
     } |
     ident ~ "(" ~ repsep(expression, ",") <~ ")" ^^ {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 8e834e6e3ce1e..595b11c2a305d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -54,7 +54,7 @@ class SQLQuerySuite extends QueryTest {
 
   test("SPARK-2686 Added Parser of SQL LENGTH()") {
     checkAnswer(
-       sql("SELECT char_length(key) as keylen from testData where key = 100"), 3)
+       sql("SELECT char_len(key) as keylen from testData where key = 100"), 3)
     checkAnswer(
       sql("SELECT len(key), count(*) as cnt from testData where key <= 100 group by len(key)"),
       Seq(Seq(1,9),Seq(2,90), Seq(3,1)))
@@ -66,15 +66,15 @@ class SQLQuerySuite extends QueryTest {
       sql("SELECT max(LENGTH(s)) FROM nullableRepeatedData"), 4)
   }
 
-  test("SPARK-2686 Added Parser of SQL OCTET_LENGTH()") {
+  test("SPARK-2686 Added Parser of SQL OCTET_LEN()") {
     checkAnswer(
-      sql("SELECT octet_length(s) from repeatedData"), Seq(Seq(4),Seq(4)))
+      sql("SELECT octet_len(s) from repeatedData"), Seq(Seq(4),Seq(4)))
     checkAnswer(
-      sql("SELECT octet_length(s,'UTF-8') from repeatedData"), Seq(Seq(4),Seq(4)))
+      sql("SELECT octet_len(s,'UTF-8') from repeatedData"), Seq(Seq(4),Seq(4)))
     checkAnswer(
-      sql("SELECT max(octet_length(s,'UTF-8')) from nullStrings"), 3)
+      sql("SELECT max(octet_len(s,'UTF-8')) from nullStrings"), 3)
     checkAnswer(
-      sql("SELECT octet_length('a','ISO-8859-1') + octet_length('abcde','ISO-8859-1') FROM testData limit 1"), 6)
+      sql("SELECT octet_len('a','ISO-8859-1') + octet_len('abcde','ISO-8859-1') FROM testData limit 1"), 6)
   }
 
   test("index into array") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index a60d3dad8a176..e6769646e4af2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -866,8 +866,8 @@ private[hive] object HiveQl {
   val WHEN = "(?i)WHEN".r
   val CASE = "(?i)CASE".r
   val SUBSTR = "(?i)SUBSTR(?:ING)?".r
-  val CHAR_LENGTH = "(?i)CHAR_LENGTH".r
-  val OCTET_LENGTH = "(?i)OCTET_LENGTH".r
+  val CHAR_LEN = "(?i)CHAR_LEN".r
+  val OCTET_LEN = "(?i)OCTET_LEN".r
 
   protected def nodeToExpr(node: Node): Expression = node match {
     /* Attribute References */
@@ -997,11 +997,11 @@ private[hive] object HiveQl {
     case Token("TOK_FUNCTION", Token(RAND(), Nil) :: Nil) => Rand
     case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: Nil) => 
       Substring(nodeToExpr(string), nodeToExpr(pos), Literal(Integer.MAX_VALUE, IntegerType))
-    case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) =>
+    case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) => 
       Substring(nodeToExpr(string), nodeToExpr(pos), nodeToExpr(length))
-    case Token("TOK_FUNCTION", Token(OCTET_LENGTH(), Nil) :: string :: Nil) =>
+    case Token("TOK_FUNCTION", Token(OCTET_LEN(), Nil) :: string :: Nil) =>
       OctetLength(nodeToExpr(string), Literal(OctetLengthConstants.DefaultEncoding))
-    case Token("TOK_FUNCTION", Token(OCTET_LENGTH(), Nil) :: string :: encoding :: Nil) =>
+    case Token("TOK_FUNCTION", Token(OCTET_LEN(), Nil) :: string :: encoding :: Nil) =>
       OctetLength(nodeToExpr(string), nodeToExpr(encoding))