-
Notifications
You must be signed in to change notification settings - Fork 337
Implement SCA Reachability runtime detection: report vulnerable classes and callsites via telemetry #11352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Implement SCA Reachability runtime detection: report vulnerable classes and callsites via telemetry #11352
Changes from all commits
0a5c473
056e10a
d4f8583
467af34
28a82e4
314b73c
c296e3f
eb84339
611db60
a757731
ee2b7fb
67ab3ac
8492fd0
737d134
71d479f
f4bd262
e062d2d
89ded4c
74c7431
7daadaf
03d0cd6
9b1a5de
aa91be1
b2b16ac
85ec05a
79c452d
3f26179
92571c7
32e34d2
8989380
eaef6c2
cedab00
96ccb10
c036032
f49edc8
4a2db95
e52d031
7a57f08
30e5121
9563f1a
4ab3a03
70bbd6e
9dbbbc3
ecd5310
5824236
ad8df24
2331eea
54b5afc
e55f602
31ca279
b99745e
ad77e9e
15f088f
1c39656
a0c8af1
472dd13
6fe3340
9a1f078
fbaec49
bf9df22
43c0213
2c57819
65e2c9d
9eba5b2
51b5c25
a1759e1
a33437e
a875064
196a71d
83191ea
51492dc
18d9540
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,120 @@ | ||||||
| package datadog.gradle.plugin.sca | ||||||
|
|
||||||
| import datadog.gradle.sca.GhsaEnrichmentParser | ||||||
| import groovy.json.JsonOutput | ||||||
| import groovy.json.JsonSlurper | ||||||
| import java.net.HttpURLConnection | ||||||
| import java.net.URL | ||||||
| import org.gradle.api.GradleException | ||||||
| import org.gradle.api.Plugin | ||||||
| import org.gradle.api.Project | ||||||
|
|
||||||
| /** | ||||||
| * Registers the [generateScaCvesJson] task that downloads GHSA enrichments from | ||||||
| * `sca-reachability-database` and generates `sca_cves.json` bundled in the appsec JAR. | ||||||
| * | ||||||
| * This is a **temporary** build-time approach. The symbol database will be delivered | ||||||
| * via Remote Config in a future iteration, at which point this plugin and the committed | ||||||
| * `sca_cves.json` file will be removed. | ||||||
| * | ||||||
| * Usage: `apply plugin: 'dd-trace-java.sca-enrichments'`. The task runs only when | ||||||
| * `-PrefreshSca` is passed or the output file is absent; CI uses the committed copy. | ||||||
| */ | ||||||
| @Suppress("unused") | ||||||
| class ScaEnrichmentsPlugin : Plugin<Project> { | ||||||
|
|
||||||
| companion object { | ||||||
| private const val SCA_ENRICHMENTS_API = | ||||||
| "https://api.github.com/repos/DataDog/sca-reachability-database/contents/enrichments" | ||||||
| } | ||||||
|
|
||||||
| override fun apply(project: Project) { | ||||||
| val outputFile = project.file("src/main/resources/sca_cves.json") | ||||||
|
|
||||||
| val generateTask = | ||||||
| project.tasks.register("generateScaCvesJson") { | ||||||
| description = | ||||||
| "Downloads GHSA enrichments from sca-reachability-database and updates " + | ||||||
| "src/main/resources/sca_cves.json. Run with -PrefreshSca to force a refresh. " + | ||||||
| "sca_cves.json is committed to the repo so CI does not need network access." | ||||||
| group = "build" | ||||||
| outputs.file(outputFile) | ||||||
| onlyIf { project.hasProperty("refreshSca") || !outputFile.exists() } | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue: I don't believe that works, the task up-to-date-ness can still make the task not refreshing forcibly the file, since it's based in task's inputs. Instead, if you need to forcibly refresh depending only on the property, you. can use
Suggested change
|
||||||
|
|
||||||
| doLast { | ||||||
| val token = System.getenv("GITHUB_TOKEN") | ||||||
|
|
||||||
| logger.lifecycle("Fetching GHSA enrichment index from GitHub...") | ||||||
| @Suppress("UNCHECKED_CAST") | ||||||
| val fileList = githubFetch(SCA_ENRICHMENTS_API, token) as List<Map<String, Any>> | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: I'd suggest the add the ability to pass a custom URL, e.g. via a gradle property, that could be handy in CI. Also, what happens if for some reason the content cannot be grabbed, 404, network error ? |
||||||
| val ghsaFiles = | ||||||
| fileList.filter { | ||||||
| it["name"]?.toString()?.endsWith(".json") == true && it["type"] == "file" | ||||||
| } | ||||||
| logger.lifecycle("Found ${ghsaFiles.size} enrichment files") | ||||||
|
|
||||||
| val entries = mutableListOf<Any>() | ||||||
| ghsaFiles.forEach { fileInfo -> | ||||||
| val ghsaId = fileInfo["name"]!!.toString().removeSuffix(".json") | ||||||
| val rawContent = githubFetchRaw(fileInfo["download_url"]!!.toString(), token) | ||||||
| entries.addAll(GhsaEnrichmentParser.parse(ghsaId, rawContent)) | ||||||
| } | ||||||
|
|
||||||
| outputFile.writeText(JsonOutput.toJson(mapOf("version" to 1, "entries" to entries))) | ||||||
| logger.lifecycle( | ||||||
| "sca_cves.json: ${entries.size} entries from ${ghsaFiles.size} GHSA files") | ||||||
| logger.lifecycle( | ||||||
| "Remember to commit src/main/resources/sca_cves.json after updating the database.") | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| // Defer wiring until after the java plugin adds processResources. | ||||||
| project.pluginManager.withPlugin("java") { | ||||||
| project.tasks.named("processResources") { | ||||||
| dependsOn(generateTask) | ||||||
| doLast { | ||||||
| // Minify only sca_cves.json — not all JSON files in the module output. | ||||||
| project | ||||||
| .fileTree(mapOf("dir" to outputs.files.asPath, "includes" to listOf("**/sca_cves.json"))) | ||||||
| .forEach { f -> f.writeText(JsonOutput.toJson(JsonSlurper().parse(f))) } | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| private fun githubConnect(url: String, token: String?): HttpURLConnection { | ||||||
| val connection = URL(url).openConnection() as HttpURLConnection | ||||||
| connection.setRequestProperty("Accept", "application/vnd.github+json") | ||||||
| connection.setRequestProperty("X-GitHub-Api-Version", "2022-11-28") | ||||||
| if (!token.isNullOrEmpty()) { | ||||||
| connection.setRequestProperty("Authorization", "Bearer $token") | ||||||
| } | ||||||
| connection.connectTimeout = 10_000 | ||||||
| connection.readTimeout = 30_000 | ||||||
| val code = connection.responseCode | ||||||
| if (code != 200) { | ||||||
| throw GradleException( | ||||||
| "GitHub API returned HTTP $code for $url.\n" + | ||||||
| "Unauthenticated rate limit is 60 req/hr. Set GITHUB_TOKEN to raise it.") | ||||||
| } | ||||||
| return connection | ||||||
| } | ||||||
|
|
||||||
| private fun githubFetch(url: String, token: String?): Any { | ||||||
| val conn = githubConnect(url, token) | ||||||
| return try { | ||||||
| JsonSlurper().parse(conn.inputStream) | ||||||
| } finally { | ||||||
| conn.disconnect() | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| private fun githubFetchRaw(url: String, token: String?): String { | ||||||
| val conn = githubConnect(url, token) | ||||||
| return try { | ||||||
| conn.inputStream.bufferedReader().readText() | ||||||
| } finally { | ||||||
| conn.disconnect() | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| package datadog.gradle.sca | ||
|
|
||
| import com.fasterxml.jackson.databind.JsonNode | ||
| import com.fasterxml.jackson.databind.ObjectMapper | ||
|
|
||
| /** | ||
| * Parses GHSA enrichment JSON files from the sca-reachability-database into the internal | ||
| * sca_cves.json format consumed by SCA Reachability at runtime. | ||
| * | ||
| * Key transformations: | ||
| * - Filters entries to JVM language only | ||
| * - Expands multi-package GHSA entries into N records (one per Maven artifact), because | ||
| * each artifact may have different version ranges for the same set of class symbols | ||
| * - Converts class FQNs to JVM internal format (slashes) so the ClassFileTransformer | ||
| * can do O(1) map lookups without per-class string conversion | ||
| * - Sets method=null for all symbols — field exists for forward compatibility when the | ||
| * database adds method-level symbols in the future (see APPSEC-62260) | ||
| */ | ||
| object GhsaEnrichmentParser { | ||
|
|
||
| private val mapper = ObjectMapper() | ||
|
|
||
| /** | ||
| * Parses a single GHSA enrichment file. | ||
| * | ||
| * @param ghsaId the GHSA identifier (e.g. "GHSA-645p-88qh-w398"), used as vuln_id | ||
| * @param jsonContent the raw JSON content of the enrichment file | ||
| * @return list of sca_cves.json entry maps, one per affected Maven artifact | ||
| */ | ||
| fun parse(ghsaId: String, jsonContent: String): List<Map<String, Any?>> { | ||
| val root = mapper.readTree(jsonContent) | ||
| require(root.isArray) { "GHSA enrichment file $ghsaId must be a JSON array, got ${root.nodeType}" } | ||
|
|
||
| val entries = mutableListOf<Map<String, Any?>>() | ||
|
|
||
| for (entry in root) { | ||
| if (entry.path("language").asText() != "jvm") continue | ||
|
|
||
| val symbols = extractSymbols(entry) | ||
| if (symbols.isEmpty()) continue | ||
|
|
||
| for (pkg in entry.path("package")) { | ||
| if (pkg.path("ecosystem").asText() != "maven") continue | ||
| val artifact = pkg.path("name").asText().takeIf { it.isNotEmpty() } ?: continue | ||
| val versionRanges = pkg.path("version_range").map { it.asText() } | ||
|
|
||
| entries += mapOf( | ||
| "vuln_id" to ghsaId, | ||
| "artifact" to artifact, | ||
| "version_ranges" to versionRanges, | ||
| "symbols" to symbols, | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| return entries | ||
| } | ||
|
|
||
| private fun extractSymbols(entry: JsonNode): List<Map<String, Any?>> { | ||
| val symbols = mutableListOf<Map<String, Any?>>() | ||
| val imports = entry.path("ecosystem_specific").path("imports") | ||
| if (imports.isMissingNode || !imports.isArray) return symbols | ||
|
|
||
| for (importGroup in imports) { | ||
| for (symbol in importGroup.path("symbols")) { | ||
| if (symbol.path("type").asText() != "class") continue | ||
| val pkg = symbol.path("value").asText().takeIf { it.isNotEmpty() } ?: continue | ||
| val name = symbol.path("name").asText().takeIf { it.isNotEmpty() } ?: continue | ||
|
|
||
| // JVM internal format (slashes) — avoids per-class conversion in the | ||
| // ClassFileTransformer hot path at runtime. | ||
| // TODO(APPSEC-62260): verify inner-class format when database adds method-level symbols. | ||
| // If GHSA uses dot notation for inner classes (e.g. name="Outer.Inner"), the replace below | ||
| // produces com/example/Outer/Inner instead of the correct com/example/Outer$Inner. | ||
| // When the database team defines the format, update this to handle the $ separator. | ||
| val internalName = "$pkg.$name".replace('.', '/') | ||
| symbols += mapOf("class" to internalName, "method" to null) | ||
|
jandro996 marked this conversation as resolved.
|
||
| } | ||
| } | ||
|
|
||
| return symbols | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,75 @@ | ||
| package datadog.gradle.plugin.sca | ||
|
|
||
| import datadog.gradle.plugin.GradleFixture | ||
| import org.assertj.core.api.Assertions.assertThat | ||
| import org.gradle.testkit.runner.TaskOutcome | ||
| import org.junit.jupiter.api.BeforeEach | ||
| import org.junit.jupiter.api.Test | ||
|
|
||
| class ScaEnrichmentsPluginTest : GradleFixture() { | ||
|
|
||
| @BeforeEach | ||
| fun setup() { | ||
| writeSettings("""rootProject.name = "test-appsec"""") | ||
| writeRootProject( | ||
| """ | ||
| plugins { | ||
| java | ||
| id("dd-trace-java.sca-enrichments") | ||
| } | ||
| """ | ||
| ) | ||
| } | ||
|
|
||
| @Test | ||
| fun `generateScaCvesJson is SKIPPED when file exists and refreshSca is not set`() { | ||
| file("src/main/resources/sca_cves.json").also { | ||
| it.parentFile.mkdirs() | ||
| it.writeText("{\"version\":1,\"entries\":[]}") | ||
| } | ||
|
|
||
| val result = run("generateScaCvesJson") | ||
|
|
||
| assertThat(result.task(":generateScaCvesJson")?.outcome).isEqualTo(TaskOutcome.SKIPPED) | ||
| } | ||
|
|
||
| @Test | ||
| fun `generateScaCvesJson attempts to run when refreshSca is set even if file exists`() { | ||
| file("src/main/resources/sca_cves.json").also { | ||
| it.parentFile.mkdirs() | ||
| it.writeText("{}") | ||
| } | ||
|
|
||
| // With -PrefreshSca the onlyIf condition is true; task will fail at the GitHub fetch | ||
| // (no network in tests) but must NOT be SKIPPED | ||
| val result = run("generateScaCvesJson", "-PrefreshSca", expectFailure = true) | ||
|
|
||
| assertThat(result.task(":generateScaCvesJson")?.outcome) | ||
| .isNotNull | ||
| .isNotEqualTo(TaskOutcome.SKIPPED) | ||
| } | ||
|
|
||
| @Test | ||
| fun `generateScaCvesJson attempts to run when output file does not exist`() { | ||
| // File absent: onlyIf returns true; task will fail at GitHub fetch but must not be SKIPPED | ||
| val result = run("generateScaCvesJson", expectFailure = true) | ||
|
|
||
| assertThat(result.task(":generateScaCvesJson")?.outcome) | ||
| .isNotNull | ||
| .isNotEqualTo(TaskOutcome.SKIPPED) | ||
| } | ||
|
|
||
| @Test | ||
| fun `processResources depends on generateScaCvesJson`() { | ||
| file("src/main/resources/sca_cves.json").also { | ||
| it.parentFile.mkdirs() | ||
| it.writeText("{\"version\":1,\"entries\":[]}") | ||
| } | ||
|
|
||
| val result = run("processResources") | ||
|
|
||
| // generateScaCvesJson must appear as SKIPPED (file exists, no -PrefreshSca) | ||
| assertThat(result.task(":generateScaCvesJson")?.outcome).isEqualTo(TaskOutcome.SKIPPED) | ||
| assertThat(result.task(":processResources")?.outcome).isEqualTo(TaskOutcome.SUCCESS) | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note: THis is fine for a temporary plugin, but if it's a longer term approach, I suggest to make a concrete task type.