diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8dc6303a81239..dd736a7191598 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -337,7 +337,6 @@ jobs: HIVE_PROFILE: ${{ matrix.hive }} GITHUB_PREV_SHA: ${{ github.event.before }} SPARK_LOCAL_IP: localhost - NOLINT_ON_COMPILE: true SKIP_UNIDOC: true SKIP_MIMA: true SKIP_PACKAGING: true @@ -599,7 +598,6 @@ jobs: HIVE_PROFILE: hive2.3 GITHUB_PREV_SHA: ${{ github.event.before }} SPARK_LOCAL_IP: localhost - NOLINT_ON_COMPILE: true SKIP_UNIDOC: true SKIP_MIMA: true SKIP_PACKAGING: true @@ -868,7 +866,6 @@ jobs: env: LC_ALL: C.UTF-8 LANG: C.UTF-8 - NOLINT_ON_COMPILE: false GITHUB_PREV_SHA: ${{ github.event.before }} BRANCH: ${{ inputs.branch }} container: @@ -1060,7 +1057,6 @@ jobs: env: LC_ALL: C.UTF-8 LANG: C.UTF-8 - NOLINT_ON_COMPILE: false PYSPARK_DRIVER_PYTHON: python3.9 PYSPARK_PYTHON: python3.9 GITHUB_PREV_SHA: ${{ github.event.before }} diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 16598bda87339..428a3ed3f1120 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -169,7 +169,6 @@ fi cd "$SPARK_HOME" if [ "$SBT_ENABLED" == "true" ] ; then - export NOLINT_ON_COMPILE=1 # Store the command as an array because $SBT variable might have spaces in it. # Normal quoting tricks don't work. # See: http://mywiki.wooledge.org/BashFAQ/050 diff --git a/dev/scalastyle b/dev/scalastyle index 0428453b62c81..09e6c2372614d 100755 --- a/dev/scalastyle +++ b/dev/scalastyle @@ -30,6 +30,62 @@ ERRORS=$(echo -e "q\n" \ if test ! -z "$ERRORS"; then echo -e "Scalastyle checks failed at following occurrences:\n$ERRORS" + # When running under GitHub Actions, also emit each scalastyle violation as + # a workflow `::error` annotation so it appears inline on the PR's "Files + # changed" tab. Without this, a violation cascades into ~7 red CI checks + # (Linters, Java 17/25 Maven build, Documentation generation, sparkr, + # Docker integration, TPC-DS) -- all needing catalyst to compile -- and + # each only surfaces a generic "exit code 1" with no file/line, forcing + # the user to download a full job log to find the actual violation. + if [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then + # Strip ANSI color codes from the captured output before regex + # matching. Today sbt under awk's pipe is not a TTY and skips color, + # so the input is already plain. But if sbt color is ever forced + # (`-Dsbt.color=always`, custom CI shell), `\e[31m` would silently + # break every regex below. Cheap to harden. + ERRORS_PLAIN=$(printf '%s' "$ERRORS" | sed -E $'s/\x1b\\[[0-9;]*[A-Za-z]//g') + # Helper: emit one `::error` annotation. Centralised so the two regex + # branches below stay short. + emit_annotation() { + local file="$1" lineno="$2" msg="$3" + # Strip the GitHub Actions workspace prefix so the annotation + # references the path as it appears in the repo. + local file_rel="${file#${GITHUB_WORKSPACE:-}/}" + # Escape the few characters GitHub reserves in annotation values: + # %, \r, \n. (`,` and `:` need not be escaped in the message body, + # only inside parameter values, which we don't use.) + local msg_escaped="${msg//%/%25}" + msg_escaped="${msg_escaped//$'\r'/%0D}" + msg_escaped="${msg_escaped//$'\n'/%0A}" + printf '::error file=%s,line=%s,title=Scalastyle::%s\n' \ + "$file_rel" "$lineno" "$msg_escaped" + } + printf '%s\n' "$ERRORS_PLAIN" | while IFS= read -r raw; do + # Two scalastyle output formats reach us: + # + # (a) scalastyle's native console writer (`Tasks.doScalastyle` when + # invoked by the explicit `scalastyle` / `test:scalastyle` + # tasks): + # error file= message= line= [column=] + # The path has no spaces, the message can; `column=` is + # appended for checkers that report a column (e.g. + # `WhitespaceEndOfLineChecker`) and absent otherwise. + # + # (b) sbt's logger format, used when `Tasks.doScalastyle` writes + # through `streams.value.log.error(...)` -- which is what the + # explicit `scalastyle` / `test:scalastyle` tasks invoked by + # this script do, and so this is the format we see in CI: + # [error] :: + # The leading `[error] ` plus a single `::` (with no + # `::` follow-up) is what tells it apart from a regular + # Scala compile error of shape `[error] ::: `. + if [[ "$raw" =~ ^error[[:space:]]+file=([^[:space:]]+)[[:space:]]+message=(.*)[[:space:]]+line=([0-9]+)([[:space:]]+column=[0-9]+)?$ ]]; then + emit_annotation "${BASH_REMATCH[1]}" "${BASH_REMATCH[3]}" "${BASH_REMATCH[2]}" + elif [[ "$raw" =~ ^\[error\][[:space:]]+(/[^:[:space:]]+):([0-9]+):[[:space:]]+(.+)$ ]]; then + emit_annotation "${BASH_REMATCH[1]}" "${BASH_REMATCH[2]}" "${BASH_REMATCH[3]}" + fi + done + fi exit 1 else echo -e "Scalastyle checks passed." diff --git a/docs/_plugins/build_api_docs.rb b/docs/_plugins/build_api_docs.rb index e6719c4bed7e3..a75727d8b65db 100644 --- a/docs/_plugins/build_api_docs.rb +++ b/docs/_plugins/build_api_docs.rb @@ -133,101 +133,48 @@ def build_spark_scala_and_java_docs_if_necessary command = "build/sbt -Pkinesis-asl unidoc" puts "Running '#{command}'..." - # Tee sbt output to a log file so we can diagnose failures. The most common - # unidoc failure is a javadoc crash mid-stream while generating HTML for a - # specific class, buried under ~100 benign errors on genjavadoc-generated - # Java stubs (e.g. target/java/org/apache/spark/ErrorInfo.java). Without the - # diagnostic below, the real culprit -- the source whose doc tripped javadoc - # -- is effectively invisible in the CI log. - log_file = File.join(SPARK_PROJECT_ROOT, "target", "unidoc-build.log") - mkdir_p(File.dirname(log_file)) - success = stream_and_capture(command, log_file) - unless success - diagnose_unidoc_failure(log_file) - raise("Unidoc generation failed") - end -end -# Runs `command`, streaming every line to both stdout and `log_file`. Returns -# true iff the command exited 0. Ruby-only; no shell pipefail reliance. -def stream_and_capture(command, log_file) - File.open(log_file, 'w') do |f| - IO.popen("#{command} 2>&1", 'r') do |pipe| - pipe.each_line do |line| + # Suppress genjavadoc-stub diagnostic blocks from the visible log. javadoc + # emits ~3500 `[error]` lines per unidoc run on stubs under `target/java/` + # -- all benign because `--ignore-source-errors` is set, but they bury + # everything else. Each diagnostic is a header line followed by 3-5 + # `[error|warn]`-prefixed continuation lines (snippet, caret, + # symbol/location); the state machine drops both. + # + # Match by *message text*, not just by `target/java/` path. Otherwise + # legitimate doclint diagnostics on stub paths would be hidden too -- + # those messages are real signal. The patterns below are the known-benign + # genjavadoc structural errors; anything else on a `target/java/` path is + # echoed. Diagnostic mirror lines from `SparkUnidocDoclet` use the + # `[unidoc-doclet]` prefix and don't match either regex, so they always + # pass through. + ansi = /\e\[[0-9;]*[A-Za-z]/ + stub_header = %r{ + \[(?:error|warn)\]\s+ + \S*?/target/java/\S+\.java:\d+(?::\d+)?:\s+ + error:\s+ + (?:cannot\s+find\s+symbol + |illegal\s+combination\s+of\s+modifiers + |non-static\s+type\s+variable\b + |.*?\s+is\s+not\s+public\s+in\s+\S+;\s+cannot\s+be\s+accessed\s+from\s+outside\s+package) + }x + stub_cont = %r{\A\s*\[(?:error|warn)\]\s+(?!/\S+\.java:\d+(?::\d+)?:\s)} + in_stub = false + IO.popen("#{command} 2>&1", 'r') do |pipe| + pipe.each_line do |line| + plain = line.gsub(ansi, '') + if plain =~ stub_header + in_stub = true + elsif in_stub && plain =~ stub_cont + # continuation of a stub block; suppress + else + in_stub = false $stdout.write(line) $stdout.flush - f.write(line) end end end - $?.success? -end - -# Scans the captured unidoc log and prints a pointer to the most likely -# culprit source file. The heuristic: when javadoc dies mid-HTML-generation, -# the last "Generating .../X.html" line before "javadoc exited with exit code" -# names the class that tripped it. Prints nothing actionable if the failure -# mode doesn't match (e.g. a scaladoc error), in which case the full log above -# already shows what's wrong. -def diagnose_unidoc_failure(log_file) - return unless File.exist?(log_file) - begin - lines = File.readlines(log_file) - - javadoc_exit_idx = lines.rindex { |l| l.include?("javadoc exited with exit code") } - last_generating = nil - if javadoc_exit_idx - # Strip ANSI color codes so the regex matches sbt-coloured output too. - ansi = /\e\[[0-9;]*[A-Za-z]/ - lines[0...javadoc_exit_idx].reverse_each do |line| - if line.gsub(ansi, '') =~ %r{Generating .+/javaunidoc/(\S+?\.html)\.\.\.} - last_generating = $1 - break - end - end - end - - banner = "=" * 78 - $stderr.puts "" - $stderr.puts banner - $stderr.puts "Unidoc failed -- diagnostic summary" - $stderr.puts banner - if last_generating - class_path = last_generating.sub(/\.html$/, '') - class_name = class_path.tr('/', '.') - $stderr.puts "" - $stderr.puts " Javadoc crashed while generating: #{last_generating}" - $stderr.puts " Likely culprit: doc comment in #{class_name}" - $stderr.puts "" - $stderr.puts " Javadoc can hard-exit (not just warn) on specific scaladoc" - $stderr.puts " patterns once they have been passed through genjavadoc --" - $stderr.puts " wiki-style `[[Class]]` / `[[method]]` links or inline-backticked" - $stderr.puts " code refs in the Scala source for the class above are common" - $stderr.puts " triggers. Start by auditing any recent doc-string changes in" - $stderr.puts " that source file." - $stderr.puts "" - $stderr.puts " NOTE: the '[error]' lines above on files under" - $stderr.puts " target/java/... are benign genjavadoc stubs -- every PR" - $stderr.puts " emits them and they do not cause the exit. Ignore them." - elsif javadoc_exit_idx - $stderr.puts "" - $stderr.puts " Javadoc exited but no class HTML generation was in progress;" - $stderr.puts " the crash predates HTML output -- likely a CLI / classpath /" - $stderr.puts " setup issue. See the full sbt output above." - else - $stderr.puts "" - $stderr.puts " Could not locate a 'javadoc exited with exit code' marker in" - $stderr.puts " the log; the failure is likely outside the javaunidoc step" - $stderr.puts " (scaladoc / sbt / build env). See the full sbt output above." - end - $stderr.puts banner - $stderr.puts "" - rescue => e - # Never let the diagnostic helper itself obscure the underlying unidoc - # failure: if anything here goes wrong (e.g. encoding error reading the - # log), report it briefly and let the caller raise the real error. - $stderr.puts "(diagnostic helper failed: #{e.class}: #{e.message})" - end + raise("Unidoc generation failed") unless $?.success? end def build_scala_and_java_docs diff --git a/pom.xml b/pom.xml index 4c61850c268fe..a6eed0d3a71d5 100644 --- a/pom.xml +++ b/pom.xml @@ -3235,6 +3235,11 @@ org.apache.maven.plugins maven-source-plugin + org.scalastyle scalastyle-maven-plugin @@ -3251,13 +3256,6 @@ ${project.build.sourceEncoding} ${project.reporting.outputEncoding} - - - - check - - - org.apache.maven.plugins @@ -3395,6 +3393,32 @@ + + + scalastyle + + + + + org.scalastyle + scalastyle-maven-plugin + + + + check + + + + + + + + +