Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
536dbeb
add decisionLogging field to OpaCluster CRD
xeniape Apr 19, 2024
72ee9fe
typo fix
xeniape Apr 19, 2024
abb3f06
add decision logging test to logging integration test
xeniape Apr 22, 2024
d50398f
revert python version change for local testing
xeniape Apr 22, 2024
e7e95ed
Allow to enable decision logs only for console or file
sbernauer Apr 22, 2024
f52ef64
decision logging set through logging configuration, build opa cluster…
xeniape May 2, 2024
82ca86f
revert crd changes
xeniape May 2, 2024
5987521
cargo-fmt formatting
xeniape May 2, 2024
9551693
remove some comments
xeniape May 3, 2024
fa190ad
Merge branch 'main' into decision-logging-in-crd
xeniape May 10, 2024
8d776dd
code improvements and tests
xeniape May 24, 2024
480e905
add integration test for decision logs
xeniape May 24, 2024
2bda1b0
yamllint fixes
xeniape May 24, 2024
9b7977b
yamllint fixes
xeniape May 24, 2024
5a27884
add documentation
xeniape May 27, 2024
8dab63e
some documentation changes
xeniape May 27, 2024
95de302
renaming and replacing ordering function
xeniape May 27, 2024
f0a1a9f
clippy change
xeniape May 27, 2024
a63a550
cargofmt
xeniape May 27, 2024
6c4e2d2
replace log level conversion
xeniape May 27, 2024
0e3e0a5
temporary change opa version for integration tes
xeniape May 29, 2024
5b5773e
correct indentation
xeniape May 29, 2024
199f497
pass envvars directly
xeniape May 29, 2024
53f1029
Revert "temporary change opa version for integration tes"
xeniape Jun 3, 2024
3ee7174
bump operator-rs version
xeniape Jun 3, 2024
f25330f
update cargo.nix and crate-hashes.json
xeniape Jun 3, 2024
e37d4e1
Merge branch 'main' into decision-logging-in-crd
xeniape Jun 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions deploy/helm/opa-operator/crds/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,21 @@ spec:
type: array
type: object
type: object
decisionLogging:
default:
file: null
console: null
description: Decision Logging configuration.
properties:
console:
description: Whether or not to print decision logging to the console.
nullable: true
type: boolean
file:
description: Whether or not to print decision logging to files collected by Vector agent.
nullable: true
type: boolean
type: object
gracefulShutdownTimeout:
description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details.
nullable: true
Expand Down Expand Up @@ -4142,6 +4157,21 @@ spec:
type: array
type: object
type: object
decisionLogging:
default:
file: null
console: null
description: Decision Logging configuration.
properties:
console:
description: Whether or not to print decision logging to the console.
nullable: true
type: boolean
file:
description: Whether or not to print decision logging to files collected by Vector agent.
nullable: true
type: boolean
type: object
gracefulShutdownTimeout:
description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details.
nullable: true
Expand Down
32 changes: 31 additions & 1 deletion rust/crd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub mod user_info_fetcher;
pub const APP_NAME: &str = "opa";
pub const OPERATOR_NAME: &str = "opa.stackable.tech";

pub const CONFIG_FILE: &str = "config.yaml";
pub const CONFIG_FILE: &str = "config.json";

pub const DEFAULT_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(2);
/// Safety puffer to guarantee the graceful shutdown works every time.
Expand Down Expand Up @@ -171,6 +171,29 @@ pub enum Container {
Opa,
}

#[derive(Clone, Debug, Default, Fragment, JsonSchema, PartialEq)]
#[fragment_attrs(
derive(
Clone,
Debug,
Default,
Deserialize,
Merge,
JsonSchema,
PartialEq,
Serialize
),
serde(rename_all = "camelCase"),
schemars(description = "Decision Logging configuration.")
)]
pub struct DecisionLogging {
/// Whether or not to print decision logging to files collected by Vector agent.
pub file: bool,

/// Whether or not to print decision logging to the console.
pub console: bool,
}

#[derive(Clone, Debug, Default, Fragment, JsonSchema, PartialEq)]
#[fragment_attrs(
derive(
Expand All @@ -192,6 +215,9 @@ pub struct OpaConfig {
#[fragment_attrs(serde(default))]
pub logging: Logging<Container>,

#[fragment_attrs(serde(default))]
pub decision_logging: DecisionLogging,

#[fragment_attrs(serde(default))]
pub affinity: StackableAffinity,

Expand All @@ -204,6 +230,10 @@ impl OpaConfig {
fn default_config() -> OpaConfigFragment {
OpaConfigFragment {
logging: product_logging::spec::default_logging(),
decision_logging: DecisionLoggingFragment {
file: Some(true),
console: Some(false),
},
resources: ResourcesFragment {
cpu: CpuLimitsFragment {
min: Some(Quantity("250m".to_owned())),
Expand Down
91 changes: 55 additions & 36 deletions rust/operator-binary/src/controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ use crate::{

pub const OPA_CONTROLLER_NAME: &str = "opacluster";

pub const CONFIG_FILE: &str = "config.yaml";
pub const CONFIG_FILE: &str = "config.json";
pub const APP_PORT: u16 = 8081;
pub const APP_PORT_NAME: &str = "http";
pub const METRICS_PORT_NAME: &str = "metrics";
Expand Down Expand Up @@ -572,7 +572,7 @@ fn build_server_rolegroup_config_map(

cm_builder
.metadata(metadata)
.add_data(CONFIG_FILE, build_config_file());
.add_data(CONFIG_FILE, build_config_file(merged_config));

if let Some(user_info) = &opa.spec.cluster_config.user_info {
cm_builder.add_data(
Expand Down Expand Up @@ -910,33 +910,36 @@ pub fn error_policy(_obj: Arc<OpaCluster>, _error: &Error, _ctx: Arc<Ctx>) -> Ac
Action::requeue(*Duration::from_secs(5))
}

fn build_config_file() -> &'static str {
// We currently do not activate decision logging like
// decision_logs:
// console: true
// This will log decisions to the console, but also sends an extra `decision_id` field in the
// API JSON response. This currently leads to our Java authorizers (Druid, Trino) failing to
// deserialize the JSON object since they only expect to have a `result` field returned.
// see https://github.com/stackabletech/opa-operator/issues/422
"
services:
- name: stackable
url: http://localhost:3030/opa/v1

bundles:
stackable:
service: stackable
resource: opa/bundle.tar.gz
persist: true
polling:
min_delay_seconds: 10
max_delay_seconds: 20
"
fn build_config_file(config: &OpaConfig) -> String {
serde_json::to_string_pretty(&json!({
"services": [{
"name": "stackable",
"url": "http://localhost:3030/opa/v1",
},],
"bundles": {
"stackable": {
"service": "stackable",
"resource": "opa/bundle.tar.gz",
"persist": true,
"polling": {
"min_delay_seconds": 10,
"max_delay_seconds": 20,
},
}
},
// FIXME: We should only write this whole struct when we want decision logs
"decision_logs": {
// Either one can enable the decision logs, the entrypoint takes care of filtering which target get decision
// logs
"console": config.decision_logging.console || config.decision_logging.file,
}
}))
.unwrap()
}

fn build_opa_start_command(merged_config: &OpaConfig, container_name: &str) -> String {
let mut opa_log_level = OpaLogLevel::Info;
let mut console_logging_off = false;
let mut console_logging_enabled = true;

if let Some(ContainerLogConfig {
choice: Some(ContainerLogConfigChoice::Automatic(log_config)),
Expand All @@ -956,16 +959,41 @@ fn build_opa_start_command(merged_config: &OpaConfig, container_name: &str) -> S
level: Some(log_level),
}) = log_config.console
{
console_logging_off = log_level == LogLevel::NONE
console_logging_enabled = log_level != LogLevel::NONE
}
}

let decision_logs_to_stdout = merged_config.decision_logging.console;
let decision_logs_to_file = merged_config.decision_logging.file;

// Redirects matter!
// We need to watch out, that the following "$!" call returns the PID of the main (opa-bundle-builder) process,
// and not some utility (e.g. multilog or tee) process.
// See https://stackoverflow.com/a/8048493

// *Technically*, we would also need to calculate `file_logging_enabled` and take that into account. However, this
// is a very nice edge-case and would make things more complicated and error-prone.
let logging_redirects = if console_logging_enabled {
match (decision_logs_to_file, decision_logs_to_stdout) {
(true, true) => format!(" &> >(tee >(/stackable/multilog s{OPA_ROLLING_LOG_FILE_SIZE_BYTES} n{OPA_ROLLING_LOG_FILES} {STACKABLE_LOG_DIR}/{container_name}))"),
(true, false) => format!(" &> >(tee >(/stackable/multilog s{OPA_ROLLING_LOG_FILE_SIZE_BYTES} n{OPA_ROLLING_LOG_FILES} {STACKABLE_LOG_DIR}/{container_name}) | grep -v '\"decision_id\":\"')"),
(false, true) => format!(" &> >(tee >(grep -v '\"decision_id\":\"' &> >(/stackable/multilog s{OPA_ROLLING_LOG_FILE_SIZE_BYTES} n{OPA_ROLLING_LOG_FILES} {STACKABLE_LOG_DIR}/{container_name})))"),
// Normally in this case OPA should *not* be instructed to even write decision logs, so we *should* be able
// to just forward them. However, we still filter them just to be safe.
(false, false) => format!(" &> >(grep -v '\"decision_id\":\"' | tee >(/stackable/multilog s{OPA_ROLLING_LOG_FILE_SIZE_BYTES} n{OPA_ROLLING_LOG_FILES} {STACKABLE_LOG_DIR}/{container_name}))"),
}
} else if decision_logs_to_file {
format!(" &> >(/stackable/multilog s{OPA_ROLLING_LOG_FILE_SIZE_BYTES} n{OPA_ROLLING_LOG_FILES} {STACKABLE_LOG_DIR}/{container_name})")
} else {
format!(" &> >(grep -v '\"decision_id\":\"' &> >(/stackable/multilog s{OPA_ROLLING_LOG_FILE_SIZE_BYTES} n{OPA_ROLLING_LOG_FILES} {STACKABLE_LOG_DIR}/{container_name}))")
};

// TODO: Think about adding --shutdown-wait-period, as suggested by https://github.com/open-policy-agent/opa/issues/2764
formatdoc! {"
{COMMON_BASH_TRAP_FUNCTIONS}
{remove_vector_shutdown_file_command}
prepare_signal_handlers
/stackable/opa/opa run -s -a 0.0.0.0:{APP_PORT} -c {CONFIG_DIR}/config.yaml -l {opa_log_level} --shutdown-grace-period {shutdown_grace_period_s} --disable-telemetry{logging_redirects} &
/stackable/opa/opa run -s -a 0.0.0.0:{APP_PORT} -c {CONFIG_DIR}/{CONFIG_FILE} -l {opa_log_level} --shutdown-grace-period {shutdown_grace_period_s} --disable-telemetry{logging_redirects} &
wait_for_termination $!
{create_vector_shutdown_file_command}
",
Expand All @@ -974,15 +1002,6 @@ fn build_opa_start_command(merged_config: &OpaConfig, container_name: &str) -> S
create_vector_shutdown_file_command =
create_vector_shutdown_file_command(STACKABLE_LOG_DIR),
shutdown_grace_period_s = merged_config.graceful_shutdown_timeout.unwrap_or(DEFAULT_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT).as_secs(),
// Redirects matter!
// We need to watch out, that the following "$!" call returns the PID of the main (opa-bundle-builder) process,
// and not some utility (e.g. multilog or tee) process.
// See https://stackoverflow.com/a/8048493
logging_redirects = if console_logging_off {
format!(" &> >(/stackable/multilog s{OPA_ROLLING_LOG_FILE_SIZE_BYTES} n{OPA_ROLLING_LOG_FILES} {STACKABLE_LOG_DIR}/{container_name})")
} else {
format!(" &> >(tee >(/stackable/multilog s{OPA_ROLLING_LOG_FILE_SIZE_BYTES} n{OPA_ROLLING_LOG_FILES} {STACKABLE_LOG_DIR}/{container_name}))")
},
}
}

Expand Down
4 changes: 4 additions & 0 deletions tests/templates/kuttl/logging/02-install-opa.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ spec:
clusterConfig:
vectorAggregatorConfigMapName: opa-vector-aggregator-discovery
servers:
config:
decisionLogging:
console: false
file: true
roleGroups:
automatic-log-config:
config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,20 @@ customConfig:
type: vector
version: "2"
transforms:
filteredAutomaticLogConfigServerOpaDecision:
type: filter
inputs: [vector]
condition: >-
starts_with(string!(.pod), "test-opa-server-automatic-log-config") &&
.container == "opa" &&
.logger == "decision"
filteredAutomaticLogConfigServerOpa:
type: filter
inputs: [vector]
condition: >-
starts_with(string!(.pod), "test-opa-server-automatic-log-config") &&
.container == "opa"
.container == "opa" &&
.logger != "decision"
filteredAutomaticLogConfigServerBundleBuilder:
type: filter
inputs: [vector]
Expand Down
42 changes: 42 additions & 0 deletions tests/templates/kuttl/logging/test_log_aggregation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,46 @@
#!/usr/bin/env python3
import requests

def send_opa_decision_request():
response = requests.post(
'http://test-opa:8081/v1/data/test/world'
)

assert response.status_code == 200, \
'Cannot access the API of the opa cluster.'

def check_decision_events():
response = requests.post(
'http://opa-vector-aggregator:8686/graphql',
json={
'query': """
{
transforms(filter: {componentId: {equals: \"filteredAutomaticLogConfigServerOpaDecision\"}}) {
nodes {
componentId
metrics {
sentEventsTotal {
sentEventsTotal
}
}
}
}
}
"""
}
)

assert response.status_code == 200, \
'Cannot access the API of the vector aggregator.'

result = response.json()

sentEvents = result['data']['transforms']['nodes'][0]['metrics']['sentEventsTotal']
componentId = result['data']['transforms']['nodes'][0]['componentId']

assert sentEvents is not None and \
sentEvents['sentEventsTotal'] > 0, \
f'No events were sent in "{componentId}".'

def check_sent_events():
response = requests.post(
Expand Down Expand Up @@ -44,5 +84,7 @@ def check_sent_events():


if __name__ == '__main__':
send_opa_decision_request()
check_decision_events()
check_sent_events()
print('Test successful!')