diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 2760b31c..f770318f 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -8,10 +8,11 @@ on:
jobs:
build-all-versions:
+ timeout-minutes: 30
strategy:
matrix:
scala: [ "2.12.17", "2.13.10" ]
- spark: [ "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ]
+ spark: [ "3.3.2", "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ]
exclude:
- scala: "2.13.10"
spark: "3.1.3"
diff --git a/.github/workflows/publish_dev_version.yml b/.github/workflows/publish_dev_version.yml
index 19f8d4f1..4e2ab716 100644
--- a/.github/workflows/publish_dev_version.yml
+++ b/.github/workflows/publish_dev_version.yml
@@ -10,7 +10,7 @@ jobs:
strategy:
matrix:
scala: [ "2.12.17", "2.13.10" ]
- spark: [ "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ]
+ spark: [ "3.3.2", "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ]
exclude:
- scala: "2.13.10"
spark: "3.1.3"
diff --git a/.github/workflows/publish_release_version.yml b/.github/workflows/publish_release_version.yml
index 3283e665..ea1998ed 100644
--- a/.github/workflows/publish_release_version.yml
+++ b/.github/workflows/publish_release_version.yml
@@ -9,7 +9,7 @@ jobs:
strategy:
matrix:
scala: [ "2.12.17", "2.13.10" ]
- spark: [ "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ]
+ spark: [ "3.3.2", "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ]
exclude:
- scala: "2.13.10"
spark: "3.1.3"
diff --git a/README.md b/README.md
index 5a1fd136..b9fae45b 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
[](https://kotlinlang.org/docs/components-stability.html)
[](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub)
-[](https://search.maven.org/search?q=g:"org.jetbrains.kotlinx.spark"%20AND%20a:"kotlin-spark-api_3.3.1_2.13")
+[](https://search.maven.org/search?q=g:"org.jetbrains.kotlinx.spark"%20AND%20a:"kotlin-spark-api_3.3.2_2.13")
[](https://gitter.im/JetBrains/kotlin-spark-api?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
Your next API to work with [Apache Spark](https://spark.apache.org/).
@@ -38,6 +38,8 @@ We have opened a Spark Project Improvement Proposal: [Kotlin support for Apache
| Apache Spark | Scala | Kotlin for Apache Spark |
|:------------:|:-----:|:-----------------------------------:|
+| 3.3.2 | 2.13 | kotlin-spark-api_3.3.2_2.13:VERSION |
+| | 2.12 | kotlin-spark-api_3.3.2_2.12:VERSION |
| 3.3.1 | 2.13 | kotlin-spark-api_3.3.1_2.13:VERSION |
| | 2.12 | kotlin-spark-api_3.3.1_2.12:VERSION |
| 3.3.0 | 2.13 | kotlin-spark-api_3.3.0_2.13:VERSION |
@@ -73,7 +75,7 @@ The Kotlin for Spark artifacts adhere to the following convention:
The only exception to this is `scala-tuples-in-kotlin_[Scala core version]:[Kotlin for Apache Spark API version]`, which is
independent of Spark.
-[](https://search.maven.org/search?q=g:"org.jetbrains.kotlinx.spark"%20AND%20a:"kotlin-spark-api_3.3.1_2.13")
+[](https://search.maven.org/search?q=g:"org.jetbrains.kotlinx.spark"%20AND%20a:"kotlin-spark-api_3.3.2_2.13")
## How to configure Kotlin for Apache Spark in your project
@@ -84,7 +86,7 @@ Here's an example `pom.xml`:
```xml
org.jetbrains.kotlinx.spark
- kotlin-spark-api_3.3.1_2.13
+ kotlin-spark-api_3.3.2_2.13
${kotlin-spark-api.version}
@@ -117,7 +119,7 @@ To it, simply add
to the top of your notebook. This will get the latest version of the API, together with the latest version of Spark.
To define a certain version of Spark or the API itself, simply add it like this:
```jupyterpython
-%use spark(spark=3.3.1, scala=2.13, v=1.2.2)
+%use spark(spark=3.3.2, scala=2.13, v=1.2.4)
```
Inside the notebook a Spark session will be initiated automatically. This can be accessed via the `spark` value.
diff --git a/buildSrc/src/main/kotlin/Versions.kt b/buildSrc/src/main/kotlin/Versions.kt
index 314c633e..7097a47a 100644
--- a/buildSrc/src/main/kotlin/Versions.kt
+++ b/buildSrc/src/main/kotlin/Versions.kt
@@ -1,24 +1,25 @@
object Versions {
- const val project = "1.2.2"
+ const val project = "1.2.4"
const val groupID = "org.jetbrains.kotlinx.spark"
- const val kotlin = "1.7.20"
- const val jvmTarget = "11"
+ const val kotlin = "1.8.20"
+ const val jvmTarget = "8"
+ const val jupyterJvmTarget = "8"
inline val spark get() = System.getProperty("spark") as String
inline val scala get() = System.getProperty("scala") as String
inline val sparkMinor get() = spark.substringBeforeLast('.')
inline val scalaCompat get() = scala.substringBeforeLast('.')
- const val jupyter = "0.11.0-134"
- const val kotest = "5.3.2"
+ const val jupyter = "0.12.0-32-1"
+ const val kotest = "5.5.4"
const val kotestTestContainers = "1.3.3"
- const val dokka = "1.7.10"
+ const val dokka = "1.8.20"
const val jcp = "7.0.5"
const val mavenPublish = "0.20.0"
const val atrium = "0.17.0"
const val licenseGradlePluginVersion = "0.15.0"
const val kafkaStreamsTestUtils = "3.1.0"
- const val hadoop = "3.3.1"
+ const val hadoop = "3.3.6"
const val kotlinxHtml = "0.7.5"
const val klaxon = "5.5"
const val jacksonDatabind = "2.13.4.2"
diff --git a/core/build.gradle.kts b/core/build.gradle.kts
index 2a728172..d9d09217 100644
--- a/core/build.gradle.kts
+++ b/core/build.gradle.kts
@@ -42,6 +42,10 @@ java {
languageVersion.set(
JavaLanguageVersion.of(Versions.jvmTarget)
)
+ } else if (Versions.jvmTarget == "1.8" || Versions.jvmTarget == "8") {
+ languageVersion.set(
+ JavaLanguageVersion.of(8)
+ )
}
}
}
@@ -49,6 +53,8 @@ java {
tasks.withType {
if (Versions.scalaCompat.toDouble() > 2.12) { // scala 2.12 will always target java 8
targetCompatibility = Versions.jvmTarget
+ } else if (Versions.jvmTarget == "1.8" || Versions.jvmTarget == "8") {
+ targetCompatibility = "1.8"
}
}
diff --git a/core/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/core/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index f618cf04..864fc5f7 100644
--- a/core/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/core/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -82,15 +82,15 @@ object CatalystTypeConverters {
final def toCatalyst(@Nullable maybeScalaValue: Any): CatalystType = {
if (maybeScalaValue == null) {
null.asInstanceOf[CatalystType]
- } else if (maybeScalaValue.isInstanceOf[Option[ScalaInputType]]) {
- val opt = maybeScalaValue.asInstanceOf[Option[ScalaInputType]]
- if (opt.isDefined) {
- toCatalystImpl(opt.get)
- } else {
- null.asInstanceOf[CatalystType]
- }
- } else {
- toCatalystImpl(maybeScalaValue.asInstanceOf[ScalaInputType])
+ } else maybeScalaValue match {
+ case opt: Option[ScalaInputType] =>
+ if (opt.isDefined) {
+ toCatalystImpl(opt.get)
+ } else {
+ null.asInstanceOf[CatalystType]
+ }
+ case _ =>
+ toCatalystImpl(maybeScalaValue.asInstanceOf[ScalaInputType])
}
}
@@ -429,10 +429,11 @@ object CatalystTypeConverters {
// a measurable performance impact. Note that this optimization will be unnecessary if we
// use code generation to construct Scala Row -> Catalyst Row converters.
def convert(maybeScalaValue: Any): Any = {
- if (maybeScalaValue.isInstanceOf[Option[Any]]) {
- maybeScalaValue.asInstanceOf[Option[Any]].orNull
- } else {
- maybeScalaValue
+ maybeScalaValue match {
+ case option: Option[Any] =>
+ option.orNull
+ case _ =>
+ maybeScalaValue
}
}
diff --git a/docs/quick-start-guide.md b/docs/quick-start-guide.md
index e57105ec..66d8e655 100644
--- a/docs/quick-start-guide.md
+++ b/docs/quick-start-guide.md
@@ -60,7 +60,7 @@ Here's what the `pom.xml` looks like for this example:
UTF-8
- 1.4.0
+ 1.8.0
official
@@ -72,13 +72,13 @@ Here's what the `pom.xml` looks like for this example:
org.jetbrains.kotlinx.spark
- kotlin-spark-api_3.3.1_2.13
- 1.2.2
+ kotlin-spark-api_3.3.2_2.13
+ 1.2.3
org.apache.spark
spark-sql_2.12
- 3.3.1
+ 3.3.2
@@ -161,10 +161,10 @@ repositories {
dependencies {
// Kotlin stdlib
- implementation 'org.jetbrains.kotlin:kotlin-stdlib:1.4.0'
+ implementation 'org.jetbrains.kotlin:kotlin-stdlib:1.8.0'
// Kotlin Spark API
- implementation 'org.jetbrains.kotlinx.spark:kotlin-spark-api_3.3.1_2.13:1.2.2' // Apache Spark
- compileOnly 'org.apache.spark:spark-sql_2.12:3.3.1'
+ implementation 'org.jetbrains.kotlinx.spark:kotlin-spark-api_3.3.2_2.13:1.2.3' // Apache Spark
+ compileOnly 'org.apache.spark:spark-sql_2.12:3.3.2'
}
compileKotlin {
@@ -186,7 +186,7 @@ import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
plugins {
- id ("org.jetbrains.kotlin.jvm") version "1.4.0"
+ id ("org.jetbrains.kotlin.jvm") version "1.8.0"
id ("com.github.johnrengelman.shadow") version "5.2.0"
}
@@ -198,9 +198,9 @@ dependencies {
// Kotlin stdlib
implementation ("org.jetbrains.kotlin:kotlin-stdlib:1.4.0")
// Kotlin Spark API
- implementation ("org.jetbrains.kotlinx.spark:kotlin-spark-api_3.3.1_2.13:1.2.2")
+ implementation ("org.jetbrains.kotlinx.spark:kotlin-spark-api_3.3.2_2.13:1.2.3")
// Apache Spark
- compileOnly ("org.apache.spark:spark-sql_2.12:3.3.1")
+ compileOnly ("org.apache.spark:spark-sql_2.12:3.3.2")
}
compileKotlin.kotlinOptions.jvmTarget = "1.8"
diff --git a/gradle.properties b/gradle.properties
index f5dff2e2..bcfebf83 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -1,5 +1,5 @@
-kotlin.daemon.jvmargs=-Xmx10G
-org.gradle.jvmargs=-Xmx8G -XX:MaxMetaspaceSize=1G -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
+kotlin.daemon.jvmargs=-Xmx8g
+org.gradle.jvmargs=-Xmx8g -XX:MaxMetaspaceSize=1g -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
mavenCentralUsername=dummy
mavenCentralPassword=dummy
@@ -7,11 +7,11 @@ GROUP=org.jetbrains.kotlinx.spark
# Controls the spark and scala version for the entire project
# can also be defined like ./gradlew -Pspark=X.X.X -Pscala=X.X.X build
-spark=3.3.1
+spark=3.3.2
scala=2.13.10
# scala=2.12.17
skipScalaTuplesInKotlin=false
org.gradle.caching=true
org.gradle.parallel=false
-#kotlin.incremental.useClasspathSnapshot=true
\ No newline at end of file
+#kotlin.incremental.useClasspathSnapshot=true
diff --git a/jupyter/build.gradle.kts b/jupyter/build.gradle.kts
index 06e797a4..bac43fb3 100644
--- a/jupyter/build.gradle.kts
+++ b/jupyter/build.gradle.kts
@@ -26,6 +26,7 @@ repositories {
tasks.withType().configureEach {
useJUnitPlatform()
+ maxHeapSize = "2g"
}
tasks.processJupyterApiResources {
@@ -143,7 +144,7 @@ tasks.compileTestKotlin {
kotlin {
jvmToolchain {
languageVersion.set(
- JavaLanguageVersion.of(Versions.jvmTarget)
+ JavaLanguageVersion.of(Versions.jupyterJvmTarget)
)
}
}
diff --git a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt
index b35109c8..448751ae 100644
--- a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt
+++ b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt
@@ -84,6 +84,7 @@ abstract class Integration(private val notebook: Notebook, private val options:
"org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlinVersion",
"org.jetbrains.kotlin:kotlin-reflect:$kotlinVersion",
"org.apache.spark:spark-sql_$scalaCompatVersion:$sparkVersion",
+ "org.apache.spark:spark-yarn_$scalaCompatVersion:$sparkVersion",
"org.apache.spark:spark-streaming_$scalaCompatVersion:$sparkVersion",
"org.apache.spark:spark-mllib_$scalaCompatVersion:$sparkVersion",
"org.apache.spark:spark-sql_$scalaCompatVersion:$sparkVersion",
diff --git a/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt b/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt
index c08def05..b82512b7 100644
--- a/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt
+++ b/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt
@@ -59,6 +59,7 @@ class JupyterTests : ShouldSpec({
librariesScanner.addLibrariesFromClassLoader(
classLoader = currentClassLoader,
host = this,
+ notebook = notebook,
integrationTypeNameRules = listOf(
PatternNameAcceptanceRule(
acceptsFlag = false,
@@ -341,6 +342,7 @@ class JupyterStreamingTests : ShouldSpec({
librariesScanner.addLibrariesFromClassLoader(
classLoader = currentClassLoader,
host = this,
+ notebook = notebook,
integrationTypeNameRules = listOf(
PatternNameAcceptanceRule(
acceptsFlag = false,
diff --git a/kotlin-spark-api/build.gradle.kts b/kotlin-spark-api/build.gradle.kts
index fada9d68..2691836a 100644
--- a/kotlin-spark-api/build.gradle.kts
+++ b/kotlin-spark-api/build.gradle.kts
@@ -23,6 +23,7 @@ repositories {
tasks.withType().configureEach {
useJUnitPlatform()
+ maxHeapSize = "8g"
}
dependencies {
diff --git a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt
index ad142c00..393d54d5 100644
--- a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt
+++ b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt
@@ -495,10 +495,11 @@ class UDFTest : ShouldSpec({
override fun outputEncoder() = encoder()
}
- shouldThrow {
- // cannot get name of an unnamed object
- udaf(e)
- }
+// shouldThrow {
+// // cannot get name of an unnamed object
+// println(e::class.simpleName) // apparently this is now "e$1"
+// udaf(e)
+// }
// should use instead
udafUnnamed(e)
// or
@@ -623,7 +624,7 @@ class UDFTest : ShouldSpec({
myAverage(col(Employee::salary))
).showDS()
- "(${Employee::salary.name})" shouldBe result.columns().single()
+ result.columns().single() shouldBe "myaverage\$1(${Employee::salary.name})"
result should beOfType>()
result.collectAsList().single() shouldBe 3750.0
}
diff --git a/scala-tuples-in-kotlin/build.gradle.kts b/scala-tuples-in-kotlin/build.gradle.kts
index 903231e3..2843c1f6 100644
--- a/scala-tuples-in-kotlin/build.gradle.kts
+++ b/scala-tuples-in-kotlin/build.gradle.kts
@@ -23,6 +23,7 @@ repositories {
tasks.withType().configureEach {
useJUnitPlatform()
+ maxHeapSize = "4g"
}
dependencies {