diff --git a/CHANGELOG.md b/CHANGELOG.md index d2cd3148..5d473cea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,12 @@ All notable changes to this project will be documented in this file. `security.properties`). Previously, arbitrary file names were silently accepted and ignored ([#777]). - Bump `stackable-operator` to 0.111.1 ([#777], [#778]). +- Internal operator refactoring: introduce dereference() and validate() steps in the reconciler ([#783]). [#770]: https://github.com/stackabletech/hdfs-operator/pull/770 [#777]: https://github.com/stackabletech/hdfs-operator/pull/777 [#778]: https://github.com/stackabletech/hdfs-operator/pull/778 +[#783]: https://github.com/stackabletech/hdfs-operator/pull/783 ## [26.3.0] - 2026-03-16 diff --git a/Cargo.nix b/Cargo.nix index 60f9df78..8f272641 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -4821,7 +4821,7 @@ rec { src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; rev = "7a5f0c3fbcd091340214a23f0607fcd4b4fcc152"; - sha256 = "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by"; + sha256 = "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk"; }; libName = "k8s_version"; authors = [ @@ -9469,7 +9469,7 @@ rec { src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; rev = "7a5f0c3fbcd091340214a23f0607fcd4b4fcc152"; - sha256 = "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by"; + sha256 = "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk"; }; libName = "stackable_certs"; authors = [ @@ -9670,7 +9670,7 @@ rec { src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; rev = "7a5f0c3fbcd091340214a23f0607fcd4b4fcc152"; - sha256 = "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by"; + sha256 = "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk"; }; libName = "stackable_operator"; authors = [ @@ -9850,7 +9850,7 @@ rec { src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; rev = "7a5f0c3fbcd091340214a23f0607fcd4b4fcc152"; - sha256 = "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by"; + sha256 = "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk"; }; procMacro = true; libName = "stackable_operator_derive"; @@ -9885,7 +9885,7 @@ rec { src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; rev = "7a5f0c3fbcd091340214a23f0607fcd4b4fcc152"; - sha256 = "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by"; + sha256 = "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk"; }; libName = "stackable_shared"; authors = [ @@ -9966,7 +9966,7 @@ rec { src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; rev = "7a5f0c3fbcd091340214a23f0607fcd4b4fcc152"; - sha256 = "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by"; + sha256 = "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk"; }; libName = "stackable_telemetry"; authors = [ @@ -10076,7 +10076,7 @@ rec { src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; rev = "7a5f0c3fbcd091340214a23f0607fcd4b4fcc152"; - sha256 = "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by"; + sha256 = "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk"; }; libName = "stackable_versioned"; authors = [ @@ -10126,7 +10126,7 @@ rec { src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; rev = "7a5f0c3fbcd091340214a23f0607fcd4b4fcc152"; - sha256 = "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by"; + sha256 = "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk"; }; procMacro = true; libName = "stackable_versioned_macros"; @@ -10194,7 +10194,7 @@ rec { src = pkgs.fetchgit { url = "https://github.com/stackabletech/operator-rs.git"; rev = "7a5f0c3fbcd091340214a23f0607fcd4b4fcc152"; - sha256 = "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by"; + sha256 = "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk"; }; libName = "stackable_webhook"; authors = [ diff --git a/crate-hashes.json b/crate-hashes.json index a6396ca0..86f2b840 100644 --- a/crate-hashes.json +++ b/crate-hashes.json @@ -1,12 +1,12 @@ { - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#k8s-version@0.1.3": "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by", - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-certs@0.4.0": "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by", - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-operator-derive@0.3.1": "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by", - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-operator@0.111.1": "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by", - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-shared@0.1.0": "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by", - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-telemetry@0.6.3": "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by", - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-versioned-macros@0.10.0": "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by", - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-versioned@0.10.0": "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by", - "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-webhook@0.9.1": "0d58yvxvy8hbai12bjhcyvh4zw182j5dsfyqja4k2xc1vzjy29by", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#k8s-version@0.1.3": "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-certs@0.4.0": "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-operator-derive@0.3.1": "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-operator@0.111.1": "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-shared@0.1.0": "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-telemetry@0.6.3": "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-versioned-macros@0.10.0": "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-versioned@0.10.0": "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk", + "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.111.1#stackable-webhook@0.9.1": "0lj969rjbxairjglrnaq0xhabvdrq5nd6wl1i0y9pr50nhh7zvgk", "git+https://github.com/stackabletech/product-config.git?tag=0.8.0#product-config@0.8.0": "1dz70kapm2wdqcr7ndyjji0lhsl98bsq95gnb2lw487wf6yr7987" } \ No newline at end of file diff --git a/rust/operator-binary/src/controller/dereference.rs b/rust/operator-binary/src/controller/dereference.rs new file mode 100644 index 00000000..dc06ed18 --- /dev/null +++ b/rust/operator-binary/src/controller/dereference.rs @@ -0,0 +1,30 @@ +use snafu::{ResultExt, Snafu}; + +use crate::{crd::v1alpha1, security::opa::HdfsOpaConfig}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("invalid OPA configuration"))] + InvalidOpaConfig { source: crate::security::opa::Error }, +} + +/// External references resolved during the dereference step. +pub struct DereferencedObjects { + pub hdfs_opa_config: Option, +} + +pub async fn dereference( + client: &stackable_operator::client::Client, + hdfs: &v1alpha1::HdfsCluster, +) -> Result { + let hdfs_opa_config = match &hdfs.spec.cluster_config.authorization { + Some(opa_config) => Some( + HdfsOpaConfig::from_opa_config(client, hdfs, opa_config) + .await + .context(InvalidOpaConfigSnafu)?, + ), + None => None, + }; + + Ok(DereferencedObjects { hdfs_opa_config }) +} diff --git a/rust/operator-binary/src/controller/mod.rs b/rust/operator-binary/src/controller/mod.rs new file mode 100644 index 00000000..1b261dfe --- /dev/null +++ b/rust/operator-binary/src/controller/mod.rs @@ -0,0 +1,2 @@ +pub mod dereference; +pub mod validate; diff --git a/rust/operator-binary/src/controller/validate.rs b/rust/operator-binary/src/controller/validate.rs new file mode 100644 index 00000000..7f253c3c --- /dev/null +++ b/rust/operator-binary/src/controller/validate.rs @@ -0,0 +1,115 @@ +use std::{collections::BTreeMap, str::FromStr}; + +use product_config::ProductConfigManager; +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + commons::product_image_selection, + product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, + role_utils::GenericRoleConfig, +}; + +use crate::{ + crd::{HdfsNodeRole, v1alpha1}, + hdfs_controller::{ + CONTAINER_IMAGE_BASE_NAME, ValidatedCluster, ValidatedRoleConfig, ValidatedRoleGroupConfig, + }, + security::opa::HdfsOpaConfig, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to resolve product image"))] + ResolveProductImage { + source: product_image_selection::Error, + }, + + #[snafu(display("invalid role properties"))] + RoleProperties { source: crate::crd::Error }, + + #[snafu(display("failed to generate product config"))] + GenerateProductConfig { + source: stackable_operator::product_config_utils::Error, + }, + + #[snafu(display("invalid product configuration"))] + InvalidProductConfig { + source: stackable_operator::product_config_utils::Error, + }, + + #[snafu(display("could not parse HDFS role [{role}]"))] + UnidentifiedHdfsRole { + source: strum::ParseError, + role: String, + }, + + #[snafu(display("failed to resolve and merge config for role and role group"))] + FailedToResolveConfig { source: crate::crd::Error }, +} + +pub fn validate_cluster( + hdfs: &v1alpha1::HdfsCluster, + image_repository: &str, + product_config_manager: &ProductConfigManager, + hdfs_opa_config: Option, +) -> Result { + let resolved_product_image = hdfs + .spec + .image + .resolve( + CONTAINER_IMAGE_BASE_NAME, + image_repository, + crate::built_info::PKG_VERSION, + ) + .context(ResolveProductImageSnafu)?; + + let roles = hdfs.build_role_properties().context(RolePropertiesSnafu)?; + + let validated_config = validate_all_roles_and_groups_config( + &resolved_product_image.product_version, + &transform_all_roles_to_config(hdfs, &roles).context(GenerateProductConfigSnafu)?, + product_config_manager, + false, + false, + ) + .context(InvalidProductConfigSnafu)?; + + let mut role_groups = BTreeMap::new(); + let mut role_configs = BTreeMap::new(); + + for (role_name, group_config) in validated_config.iter() { + let hdfs_role = HdfsNodeRole::from_str(role_name).context(UnidentifiedHdfsRoleSnafu { + role: role_name.to_string(), + })?; + + if let Some(GenericRoleConfig { + pod_disruption_budget: pdb, + }) = hdfs.role_config(&hdfs_role) + { + role_configs.insert(hdfs_role, ValidatedRoleConfig { pdb: pdb.clone() }); + } + + let mut group_configs = BTreeMap::new(); + for (rolegroup_name, rolegroup_config) in group_config.iter() { + let merged_config = hdfs_role + .merged_config(hdfs, rolegroup_name) + .context(FailedToResolveConfigSnafu)?; + + group_configs.insert( + rolegroup_name.clone(), + ValidatedRoleGroupConfig { + merged_config, + product_config_properties: rolegroup_config.clone(), + }, + ); + } + + role_groups.insert(hdfs_role, group_configs); + } + + Ok(ValidatedCluster { + image: resolved_product_image, + role_groups, + role_configs, + hdfs_opa_config, + }) +} diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index f6830464..ecd78b86 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -993,7 +993,7 @@ pub struct CommonNodeConfig { } /// Configuration for a rolegroup of an unknown type. -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum AnyNodeConfig { Name(NameNodeConfig), Data(DataNodeConfig), @@ -1087,7 +1087,9 @@ impl AnyNodeConfig { Eq, Hash, JsonSchema, + Ord, PartialEq, + PartialOrd, Serialize, )] pub enum HdfsNodeRole { diff --git a/rust/operator-binary/src/hdfs_controller.rs b/rust/operator-binary/src/hdfs_controller.rs index 9e92159d..76090bee 100644 --- a/rust/operator-binary/src/hdfs_controller.rs +++ b/rust/operator-binary/src/hdfs_controller.rs @@ -19,10 +19,7 @@ use stackable_operator::{ cli::OperatorEnvironmentOptions, client::Client, cluster_resources::{ClusterResourceApplyStrategy, ClusterResources}, - commons::{ - product_image_selection::{self, ResolvedProductImage}, - rbac::build_rbac_resources, - }, + commons::{product_image_selection::ResolvedProductImage, rbac::build_rbac_resources}, iter::reverse_if, k8s_openapi::{ DeepMerge, @@ -40,8 +37,7 @@ use stackable_operator::{ }, kvp::{LabelError, Labels}, logging::controller::ReconcilerError, - product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, - role_utils::{GenericRoleConfig, RoleGroupRef}, + role_utils::RoleGroupRef, shared::time::Duration, status::{ condition::{ @@ -69,7 +65,7 @@ use crate::{ pdb::add_pdbs, }, product_logging::extend_role_group_config_map, - security::{self, kerberos, opa::HdfsOpaConfig}, + security::{kerberos, opa::HdfsOpaConfig}, service::{self, rolegroup_headless_service, rolegroup_metrics_service}, }; @@ -77,19 +73,44 @@ pub const RESOURCE_MANAGER_HDFS_CONTROLLER: &str = "hdfs-operator-hdfs-controlle const HDFS_CONTROLLER_NAME: &str = "hdfs-controller"; pub const HDFS_FULL_CONTROLLER_NAME: &str = concatcp!(HDFS_CONTROLLER_NAME, '.', OPERATOR_NAME); -const CONTAINER_IMAGE_BASE_NAME: &str = "hadoop"; +pub const CONTAINER_IMAGE_BASE_NAME: &str = "hadoop"; + +/// The validated cluster: proves that product-config validation and config merging +/// succeeded for every role and role group before any resources are created. +/// Placed in the controller so that subsequent steps that reference this struct +/// only depend on the controller. +#[derive(Clone, Debug)] +pub struct ValidatedCluster { + pub image: ResolvedProductImage, + pub role_groups: BTreeMap>, + pub role_configs: BTreeMap, + pub hdfs_opa_config: Option, +} + +/// Per-role configuration extracted during validation. +#[derive(Clone, Debug)] +pub struct ValidatedRoleConfig { + pub pdb: stackable_operator::commons::pdb::PdbConfig, +} + +/// Per-rolegroup configuration: the merged CRD config plus the product-config properties. +#[derive(Clone, Debug)] +pub struct ValidatedRoleGroupConfig { + pub merged_config: AnyNodeConfig, + pub product_config_properties: HashMap>, +} #[derive(Snafu, Debug, EnumDiscriminants)] #[strum_discriminants(derive(IntoStaticStr))] pub enum Error { - #[snafu(display("invalid role configuration"))] - InvalidRoleConfig { - source: stackable_operator::product_config_utils::Error, + #[snafu(display("failed to dereference cluster resources"))] + Dereference { + source: crate::controller::dereference::Error, }, - #[snafu(display("invalid product configuration"))] - InvalidProductConfig { - source: stackable_operator::product_config_utils::Error, + #[snafu(display("failed to validate cluster configuration"))] + Validate { + source: crate::controller::validate::Error, }, #[snafu(display("invalid upgrade state"))] @@ -125,12 +146,6 @@ pub enum Error { obj_ref: ObjectRef, }, - #[snafu(display("invalid role {role:?}"))] - InvalidRole { - source: strum::ParseError, - role: String, - }, - #[snafu(display("object has no name"))] ObjectHasNoName { obj_ref: ObjectRef, @@ -172,18 +187,12 @@ pub enum Error { #[snafu(display("failed to create pod references"))] CreatePodReferences { source: crate::crd::Error }, - #[snafu(display("failed to build role properties"))] - BuildRoleProperties { source: crate::crd::Error }, - #[snafu(display("failed to add the logging configuration to the ConfigMap {cm_name:?}"))] InvalidLoggingConfig { source: crate::product_logging::Error, cm_name: String, }, - #[snafu(display("failed to merge config"))] - ConfigMerge { source: crate::crd::Error }, - #[snafu(display("failed to create cluster event"))] FailedToCreateClusterEvent { source: crate::event::Error }, @@ -234,19 +243,11 @@ pub enum Error { #[snafu(display("failed to build security config"))] BuildSecurityConfig { source: kerberos::Error }, - #[snafu(display("invalid OPA configuration"))] - InvalidOpaConfig { source: security::opa::Error }, - #[snafu(display("HdfsCluster object is invalid"))] InvalidHdfsCluster { source: error_boundary::InvalidObject, }, - #[snafu(display("failed to resolve product image"))] - ResolveProductImage { - source: product_image_selection::Error, - }, - #[snafu(display("failed to builds service"))] BuildService { source: service::Error }, } @@ -279,29 +280,19 @@ pub async fn reconcile_hdfs( .context(InvalidHdfsClusterSnafu)?; let client = &ctx.client; - let resolved_product_image = hdfs - .spec - .image - .resolve( - CONTAINER_IMAGE_BASE_NAME, - &ctx.operator_environment.image_repository, - crate::built_info::PKG_VERSION, - ) - .context(ResolveProductImageSnafu)?; - - let validated_config = { - let roles = hdfs - .build_role_properties() - .context(BuildRolePropertiesSnafu)?; - validate_all_roles_and_groups_config( - &resolved_product_image.product_version, - &transform_all_roles_to_config(hdfs, &roles).context(InvalidRoleConfigSnafu)?, - &ctx.product_config, - false, - false, - ) - .context(InvalidProductConfigSnafu)? - }; + let dereferenced = crate::controller::dereference::dereference(client, hdfs) + .await + .context(DereferenceSnafu)?; + + let validated = crate::controller::validate::validate_cluster( + hdfs, + &ctx.operator_environment.image_repository, + &ctx.product_config, + dereferenced.hdfs_opa_config, + ) + .context(ValidateSnafu)?; + + let resolved_product_image = &validated.image; let hdfs_obj_ref = hdfs.object_ref(&()); // A list of all name and journal nodes across all role groups is needed for all ConfigMaps and initialization checks. @@ -341,15 +332,6 @@ pub async fn reconcile_hdfs( .await .context(ApplyRoleBindingSnafu)?; - let hdfs_opa_config = match &hdfs.spec.cluster_config.authorization { - Some(opa_config) => Some( - HdfsOpaConfig::from_opa_config(client, hdfs, opa_config) - .await - .context(InvalidOpaConfigSnafu)?, - ), - None => None, - }; - let dfs_replication = hdfs.spec.cluster_config.dfs_replication; let mut ss_cond_builder = StatefulSetConditionBuilder::default(); @@ -371,7 +353,7 @@ pub async fn reconcile_hdfs( ); 'roles: for role in roles { let role_name: &str = role.into(); - let Some(group_config) = validated_config.get(role_name) else { + let Some(group_config) = validated.role_groups.get(&role) else { tracing::debug!(?role, "role has no configuration, skipping"); continue; }; @@ -388,20 +370,20 @@ pub async fn reconcile_hdfs( .context(FailedToCreateClusterEventSnafu)?; } - for (rolegroup_name, rolegroup_config) in group_config.iter() { - let merged_config = role - .merged_config(hdfs, rolegroup_name) - .context(ConfigMergeSnafu)?; + for (rolegroup_name, validated_rg_config) in group_config.iter() { + let merged_config = &validated_rg_config.merged_config; - let env_overrides = rolegroup_config.get(&PropertyNameKind::Env); + let env_overrides = validated_rg_config + .product_config_properties + .get(&PropertyNameKind::Env); let rolegroup_ref = hdfs.rolegroup_ref(role_name, rolegroup_name); let rg_service = - rolegroup_headless_service(hdfs, &role, &rolegroup_ref, &resolved_product_image) + rolegroup_headless_service(hdfs, &role, &rolegroup_ref, resolved_product_image) .context(BuildServiceSnafu)?; let rg_metrics_service = - rolegroup_metrics_service(hdfs, &role, &rolegroup_ref, &resolved_product_image) + rolegroup_metrics_service(hdfs, &role, &rolegroup_ref, resolved_product_image) .context(BuildServiceSnafu)?; // We need to split the creation and the usage of the "metadata" variable in two statements. @@ -428,11 +410,11 @@ pub async fn reconcile_hdfs( &client.kubernetes_cluster_info, metadata, &rolegroup_ref, - rolegroup_config, + &validated_rg_config.product_config_properties, &namenode_podrefs, &journalnode_podrefs, - &merged_config, - &hdfs_opa_config, + merged_config, + &validated.hdfs_opa_config, )?; let rg_statefulset = rolegroup_statefulset( @@ -441,9 +423,9 @@ pub async fn reconcile_hdfs( metadata, &role, &rolegroup_ref, - &resolved_product_image, + resolved_product_image, env_overrides, - &merged_config, + merged_config, &namenode_podrefs, &rbac_sa, )?; @@ -498,14 +480,16 @@ pub async fn reconcile_hdfs( } } - let role_config = hdfs.role_config(&role); - if let Some(GenericRoleConfig { - pod_disruption_budget: pdb, - }) = role_config - { - add_pdbs(pdb, hdfs, &role, client, &mut cluster_resources) - .await - .context(FailedToCreatePdbSnafu)?; + if let Some(validated_role_config) = validated.role_configs.get(&role) { + add_pdbs( + &validated_role_config.pdb, + hdfs, + &role, + client, + &mut cluster_resources, + ) + .await + .context(FailedToCreatePdbSnafu)?; } } @@ -519,7 +503,7 @@ pub async fn reconcile_hdfs( .namenode_listener_refs(client) .await .context(CollectDiscoveryConfigSnafu)?, - &resolved_product_image, + resolved_product_image, ) .context(BuildDiscoveryConfigMapSnafu)?; @@ -930,7 +914,12 @@ pub fn error_policy( #[cfg(test)] mod test { - use stackable_operator::commons::networking::DomainName; + use stackable_operator::{ + commons::networking::DomainName, + product_config_utils::{ + transform_all_roles_to_config, validate_all_roles_and_groups_config, + }, + }; use super::*; diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 5dd87a2a..074059ee 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -43,6 +43,7 @@ use crate::{ mod config; mod container; +mod controller; mod crd; mod discovery; mod event; diff --git a/rust/operator-binary/src/security/opa.rs b/rust/operator-binary/src/security/opa.rs index 013edcdb..16a6b06f 100644 --- a/rust/operator-binary/src/security/opa.rs +++ b/rust/operator-binary/src/security/opa.rs @@ -16,6 +16,7 @@ pub enum Error { type Result = std::result::Result; +#[derive(Clone, Debug)] pub struct HdfsOpaConfig { authorization_connection_string: String, } diff --git a/tests/templates/kuttl/smoke/30-assert.yaml.j2 b/tests/templates/kuttl/smoke/30-assert.yaml.j2 index 2f17591a..debb4cb1 100644 --- a/tests/templates/kuttl/smoke/30-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-assert.yaml.j2 @@ -3,173 +3,489 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert timeout: 600 --- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: hdfs-namenode-default -spec: - template: - spec: - containers: - - name: namenode - resources: - requests: - cpu: 250m # From defaults - memory: 1Gi - limits: - cpu: "1" # From defaults - memory: 1Gi -{% if lookup('env', 'VECTOR_AGGREGATOR') %} - - name: vector -{% endif %} - - name: zkfc - terminationGracePeriodSeconds: 900 -status: - readyReplicas: 2 - replicas: 2 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: hdfs-journalnode-default -spec: - template: - spec: - containers: - - name: journalnode - resources: - requests: - cpu: 110m # From podOverrides - memory: 512Mi - limits: - cpu: 410m # From podOverrides - memory: 512Mi -{% if lookup('env', 'VECTOR_AGGREGATOR') %} - - name: vector -{% endif %} - terminationGracePeriodSeconds: 900 -status: - readyReplicas: 1 - replicas: 1 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: hdfs-datanode-default -spec: - template: - spec: - containers: - - name: datanode - resources: - requests: - cpu: 100m # From defaults - memory: 512Mi - limits: - cpu: 400m # From defaults - memory: 512Mi -{% if lookup('env', 'VECTOR_AGGREGATOR') %} - - name: vector -{% endif %} - terminationGracePeriodSeconds: 1800 -status: - readyReplicas: {{ test_scenario['values']['number-of-datanodes'] }} - replicas: {{ test_scenario['values']['number-of-datanodes'] }} -{% if test_scenario['values']['datanode-pvcs'] == '2hdd-1ssd' %} ---- apiVersion: v1 kind: Service metadata: + labels: + app.kubernetes.io/component: namenode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + stackable.tech/vendor: Stackable name: hdfs-namenode-default + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs spec: + clusterIP: None ports: - name: rpc port: 8020 protocol: TCP - targetPort: 8020 - name: http port: 9870 protocol: TCP - targetPort: 9870 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: namenode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + type: ClusterIP --- apiVersion: v1 kind: Service metadata: + annotations: + prometheus.io/path: /prom + prometheus.io/port: "9870" + prometheus.io/scheme: http + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/component: namenode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + prometheus.io/scrape: "true" + stackable.tech/vendor: Stackable name: hdfs-namenode-default-metrics + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs spec: + clusterIP: None ports: - name: metrics port: 9870 protocol: TCP - targetPort: 9870 - name: jmx-metrics port: 8183 protocol: TCP - targetPort: 8183 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: namenode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + type: ClusterIP --- apiVersion: v1 kind: Service metadata: + labels: + app.kubernetes.io/component: datanode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + stackable.tech/vendor: Stackable name: hdfs-datanode-default + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs spec: + clusterIP: None ports: - name: data port: 9866 protocol: TCP - targetPort: 9866 - name: ipc port: 9867 protocol: TCP - targetPort: 9867 - name: http port: 9864 protocol: TCP - targetPort: 9864 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: datanode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + type: ClusterIP --- apiVersion: v1 kind: Service metadata: + annotations: + prometheus.io/path: /prom + prometheus.io/port: "9864" + prometheus.io/scheme: http + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/component: datanode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + prometheus.io/scrape: "true" + stackable.tech/vendor: Stackable name: hdfs-datanode-default-metrics + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs spec: + clusterIP: None ports: - name: metrics port: 9864 protocol: TCP - targetPort: 9864 - name: jmx-metrics port: 8082 protocol: TCP - targetPort: 8082 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: datanode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + type: ClusterIP --- apiVersion: v1 kind: Service metadata: + labels: + app.kubernetes.io/component: journalnode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + stackable.tech/vendor: Stackable name: hdfs-journalnode-default + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs spec: + clusterIP: None ports: - name: rpc port: 8485 protocol: TCP - targetPort: 8485 - name: http port: 8480 protocol: TCP - targetPort: 8480 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: journalnode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + type: ClusterIP --- apiVersion: v1 kind: Service metadata: + annotations: + prometheus.io/path: /prom + prometheus.io/port: "8480" + prometheus.io/scheme: http + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/component: journalnode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + prometheus.io/scrape: "true" + stackable.tech/vendor: Stackable name: hdfs-journalnode-default-metrics + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs spec: + clusterIP: None ports: - name: metrics port: 8480 protocol: TCP - targetPort: 8480 - name: jmx-metrics port: 8081 protocol: TCP - targetPort: 8081 + publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: journalnode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + app.kubernetes.io/component: namenode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + stackable.tech/vendor: Stackable + name: hdfs-namenode-default + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +spec: + podManagementPolicy: OrderedReady + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/component: namenode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + serviceName: hdfs-namenode-default + template: + spec: + containers: + - name: namenode + resources: + requests: + cpu: 250m + memory: 1Gi + limits: + cpu: "1" + memory: 1Gi +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + - name: vector +{% endif %} + - name: zkfc + terminationGracePeriodSeconds: 900 +status: + readyReplicas: 2 + replicas: 2 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + app.kubernetes.io/component: datanode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + stackable.tech/vendor: Stackable + name: hdfs-datanode-default + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +spec: + podManagementPolicy: OrderedReady + replicas: {{ test_scenario['values']['number-of-datanodes'] }} + selector: + matchLabels: + app.kubernetes.io/component: datanode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + serviceName: hdfs-datanode-default + template: + spec: + containers: + - name: datanode + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 400m + memory: 512Mi +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + - name: vector +{% endif %} + terminationGracePeriodSeconds: 1800 +status: + readyReplicas: {{ test_scenario['values']['number-of-datanodes'] }} + replicas: {{ test_scenario['values']['number-of-datanodes'] }} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + app.kubernetes.io/component: journalnode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + stackable.tech/vendor: Stackable + name: hdfs-journalnode-default + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +spec: + podManagementPolicy: OrderedReady + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: journalnode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + serviceName: hdfs-journalnode-default + template: + spec: + containers: + - name: journalnode + resources: + requests: + cpu: 110m + memory: 512Mi + limits: + cpu: 410m + memory: 512Mi +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + - name: vector +{% endif %} + terminationGracePeriodSeconds: 900 +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/component: namenode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + stackable.tech/vendor: Stackable + name: hdfs-namenode-default + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/component: datanode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + stackable.tech/vendor: Stackable + name: hdfs-datanode-default + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/component: journalnode + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + app.kubernetes.io/role-group: default + stackable.tech/vendor: Stackable + name: hdfs-journalnode-default + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + name: hdfs-serviceaccount + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/instance: hdfs + app.kubernetes.io/managed-by: hdfs.stackable.tech_hdfs-operator-hdfs-controller + app.kubernetes.io/name: hdfs + name: hdfs-rolebinding + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: hdfs-clusterrole +subjects: +- kind: ServiceAccount + name: hdfs-serviceaccount +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: hdfs-namenode + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +status: + expectedPods: 2 + currentHealthy: 2 + disruptionsAllowed: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: hdfs-datanode + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +status: + expectedPods: {{ test_scenario['values']['number-of-datanodes'] }} + currentHealthy: {{ test_scenario['values']['number-of-datanodes'] }} + disruptionsAllowed: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: hdfs-journalnode + ownerReferences: + - apiVersion: hdfs.stackable.tech/v1alpha1 + controller: true + kind: HdfsCluster + name: hdfs +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 +{% if test_scenario['values']['datanode-pvcs'] == '2hdd-1ssd' %} --- apiVersion: v1 kind: PersistentVolumeClaim @@ -210,30 +526,3 @@ status: - ReadWriteOnce phase: Bound {% endif %} ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: hdfs-journalnode -status: - expectedPods: 1 - currentHealthy: 1 - disruptionsAllowed: 1 ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: hdfs-namenode -status: - expectedPods: 2 - currentHealthy: 2 - disruptionsAllowed: 1 ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: hdfs-datanode -status: - expectedPods: {{ test_scenario['values']['number-of-datanodes'] }} - currentHealthy: {{ test_scenario['values']['number-of-datanodes'] }} - disruptionsAllowed: 1 diff --git a/tests/templates/kuttl/smoke/31-assert.yaml b/tests/templates/kuttl/smoke/31-assert.yaml deleted file mode 100644 index e07c0f5e..00000000 --- a/tests/templates/kuttl/smoke/31-assert.yaml +++ /dev/null @@ -1,20 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestAssert -timeout: 600 -commands: - # - # Test envOverrides for all roles - # - - script: | - kubectl -n $NAMESPACE get sts hdfs-datanode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "datanode") | .env[] | select (.name == "COMMON_VAR" and .value == "group-value")' - kubectl -n $NAMESPACE get sts hdfs-datanode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "datanode") | .env[] | select (.name == "GROUP_VAR" and .value == "group-value")' - kubectl -n $NAMESPACE get sts hdfs-datanode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "datanode") | .env[] | select (.name == "ROLE_VAR" and .value == "role-value")' - - script: | - kubectl -n $NAMESPACE get sts hdfs-journalnode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "journalnode") | .env[] | select (.name == "COMMON_VAR" and .value == "group-value")' - kubectl -n $NAMESPACE get sts hdfs-journalnode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "journalnode") | .env[] | select (.name == "GROUP_VAR" and .value == "group-value")' - kubectl -n $NAMESPACE get sts hdfs-journalnode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "journalnode") | .env[] | select (.name == "ROLE_VAR" and .value == "role-value")' - - script: | - kubectl -n $NAMESPACE get sts hdfs-namenode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "namenode") | .env[] | select (.name == "COMMON_VAR" and .value == "group-value")' - kubectl -n $NAMESPACE get sts hdfs-namenode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "namenode") | .env[] | select (.name == "GROUP_VAR" and .value == "group-value")' - kubectl -n $NAMESPACE get sts hdfs-namenode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "namenode") | .env[] | select (.name == "ROLE_VAR" and .value == "role-value")' diff --git a/tests/templates/kuttl/smoke/31-assert.yaml.j2 b/tests/templates/kuttl/smoke/31-assert.yaml.j2 new file mode 100644 index 00000000..a65e3732 --- /dev/null +++ b/tests/templates/kuttl/smoke/31-assert.yaml.j2 @@ -0,0 +1,643 @@ +--- +# Snapshot the full .data of each operator-managed ConfigMap and verify envOverrides. +# Any code change that alters rendered config values will fail these diffs. +# +# The heredoc is quoted (<<'YAMLEOF') so shell substitution is disabled and +# Hadoop ${env.FOO} escapes survive verbatim. Only __NAMESPACE__ is substituted +# afterwards via sed, because kuttl tests run in a randomised namespace per +# invocation. Both sides are normalised to canonical JSON via yq -o=json; keys +# are already alphabetical on both sides (operator stores BTreeMap; kubectl +# serialises maps sorted; the heredoc is hand-sorted). +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +commands: + # + # Test envOverrides for all roles + # + - script: | + kubectl -n $NAMESPACE get sts hdfs-datanode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "datanode") | .env[] | select (.name == "COMMON_VAR" and .value == "group-value")' + kubectl -n $NAMESPACE get sts hdfs-datanode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "datanode") | .env[] | select (.name == "GROUP_VAR" and .value == "group-value")' + kubectl -n $NAMESPACE get sts hdfs-datanode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "datanode") | .env[] | select (.name == "ROLE_VAR" and .value == "role-value")' + - script: | + kubectl -n $NAMESPACE get sts hdfs-journalnode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "journalnode") | .env[] | select (.name == "COMMON_VAR" and .value == "group-value")' + kubectl -n $NAMESPACE get sts hdfs-journalnode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "journalnode") | .env[] | select (.name == "GROUP_VAR" and .value == "group-value")' + kubectl -n $NAMESPACE get sts hdfs-journalnode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "journalnode") | .env[] | select (.name == "ROLE_VAR" and .value == "role-value")' + - script: | + kubectl -n $NAMESPACE get sts hdfs-namenode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "namenode") | .env[] | select (.name == "COMMON_VAR" and .value == "group-value")' + kubectl -n $NAMESPACE get sts hdfs-namenode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "namenode") | .env[] | select (.name == "GROUP_VAR" and .value == "group-value")' + kubectl -n $NAMESPACE get sts hdfs-namenode-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "namenode") | .env[] | select (.name == "ROLE_VAR" and .value == "role-value")' + # + # ConfigMap data snapshot: hdfs-namenode-default + # + - script: | + expected=$(cat <<'YAMLEOF' | sed "s|__NAMESPACE__|$NAMESPACE|g" | yq -o=json + core-site.xml: |- + + + + fs.defaultFS + hdfs://hdfs/ + + + ha.zookeeper.quorum + ${env.ZOOKEEPER} + + + hadoop.prometheus.endpoint.enabled + true + + + io.file.buffer.size + 131072 + + + format-namenodes.log4j.properties: |+ + log4j.rootLogger=INFO, CONSOLE, FILE + + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=INFO + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + log4j.appender.FILE=org.apache.log4j.RollingFileAppender + log4j.appender.FILE.Threshold=INFO + log4j.appender.FILE.File=/stackable/log/format-namenodes/format-namenodes.log4j.xml + log4j.appender.FILE.MaxFileSize=5MB + log4j.appender.FILE.MaxBackupIndex=1 + log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout + + format-zookeeper.log4j.properties: |+ + log4j.rootLogger=INFO, CONSOLE, FILE + + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=INFO + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + log4j.appender.FILE=org.apache.log4j.RollingFileAppender + log4j.appender.FILE.Threshold=INFO + log4j.appender.FILE.File=/stackable/log/format-zookeeper/format-zookeeper.log4j.xml + log4j.appender.FILE.MaxFileSize=5MB + log4j.appender.FILE.MaxBackupIndex=1 + log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout + + hadoop-policy.xml: |- + + + + hdfs-site.xml: |- + + + + dfs.client.failover.proxy.provider.hdfs + org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider + + + dfs.datanode.handler.count + 50 + + + dfs.datanode.max.transfer.threads + 8192 + + + dfs.datanode.registered.hostname + ${env.POD_ADDRESS} + + + dfs.datanode.registered.http.port + ${env.HTTP_PORT} + + + dfs.datanode.registered.ipc.port + ${env.IPC_PORT} + + + dfs.datanode.registered.port + ${env.DATA_PORT} + + + dfs.datanode.sync.behind.writes + true + + + dfs.datanode.synconclose + true + + + dfs.ha.automatic-failover.enabled + true + + + dfs.ha.fencing.methods + shell(/bin/true) + + + dfs.ha.namenode.id + ${env.POD_NAME} + + + dfs.ha.namenodes.hdfs + hdfs-namenode-default-0,hdfs-namenode-default-1 + + + dfs.journalnode.edits.dir + /stackable/data/journalnode + + + dfs.namenode.datanode.registration.unsafe.allow-address-override + true + + + dfs.namenode.handler.count + 50 + + + dfs.namenode.http-address.hdfs.hdfs-namenode-default-0 + hdfs-namenode-default-0.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:9870 + + + dfs.namenode.http-address.hdfs.hdfs-namenode-default-1 + hdfs-namenode-default-1.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:9870 + + + dfs.namenode.name.dir + /stackable/data/namenode + + + dfs.namenode.name.dir.hdfs.hdfs-namenode-default-0 + /stackable/data/namenode + + + dfs.namenode.name.dir.hdfs.hdfs-namenode-default-1 + /stackable/data/namenode + + + dfs.namenode.replication.max-streams + 4 + + + dfs.namenode.replication.max-streams-hard-limit + 8 + + + dfs.namenode.rpc-address.hdfs.hdfs-namenode-default-0 + hdfs-namenode-default-0.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:8020 + + + dfs.namenode.rpc-address.hdfs.hdfs-namenode-default-1 + hdfs-namenode-default-1.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:8020 + + + dfs.namenode.shared.edits.dir + qjournal://hdfs-journalnode-default-0.hdfs-journalnode-default.__NAMESPACE__.svc.cluster.local:8485/hdfs + + + dfs.nameservices + hdfs + + + dfs.replication + 1 + + + hdfs.log4j.properties: |+ + log4j.rootLogger=INFO, CONSOLE, FILE + + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=INFO + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + log4j.appender.FILE=org.apache.log4j.RollingFileAppender + log4j.appender.FILE.Threshold=INFO + log4j.appender.FILE.File=/stackable/log/hdfs/hdfs.log4j.xml + log4j.appender.FILE.MaxFileSize=5MB + log4j.appender.FILE.MaxBackupIndex=1 + log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout + + security.properties: | + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + ssl-client.xml: |- + + + + ssl-server.xml: |- + + + + zkfc.log4j.properties: |+ + log4j.rootLogger=INFO, CONSOLE, FILE + + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=INFO + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + log4j.appender.FILE=org.apache.log4j.RollingFileAppender + log4j.appender.FILE.Threshold=INFO + log4j.appender.FILE.File=/stackable/log/zkfc/zkfc.log4j.xml + log4j.appender.FILE.MaxFileSize=5MB + log4j.appender.FILE.MaxBackupIndex=1 + log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout + + YAMLEOF + ) + actual=$(kubectl -n $NAMESPACE get cm hdfs-namenode-default -o yaml | yq -o=json '.data | del(.["vector.toml"])') + if [ "$expected" != "$actual" ]; then + echo "ERROR: ConfigMap hdfs-namenode-default data drifted from snapshot." + echo "=== expected ===" + printf '%s\n' "$expected" + echo "=== actual ===" + printf '%s\n' "$actual" + exit 1 + fi + # + # ConfigMap data snapshot: hdfs-datanode-default + # + - script: | + expected=$(cat <<'YAMLEOF' | sed "s|__NAMESPACE__|$NAMESPACE|g" | yq -o=json + core-site.xml: |- + + + + fs.defaultFS + hdfs://hdfs/ + + + ha.zookeeper.quorum + ${env.ZOOKEEPER} + + + hadoop.prometheus.endpoint.enabled + true + + + io.file.buffer.size + 131072 + + + hadoop-policy.xml: |- + + + + hdfs-site.xml: |- + + + + dfs.client.failover.proxy.provider.hdfs + org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider + + + dfs.datanode.data.dir +{% if test_scenario['values']['datanode-pvcs'] == '2hdd-1ssd' %} + [DISK]/stackable/data/hdd/datanode,[DISK]/stackable/data/hdd-1/datanode,[SSD]/stackable/data/ssd/datanode +{% else %} + [DISK]/stackable/data/data/datanode +{% endif %} + + + dfs.datanode.handler.count + 50 + + + dfs.datanode.max.transfer.threads + 8192 + + + dfs.datanode.registered.hostname + ${env.POD_ADDRESS} + + + dfs.datanode.registered.http.port + ${env.HTTP_PORT} + + + dfs.datanode.registered.ipc.port + ${env.IPC_PORT} + + + dfs.datanode.registered.port + ${env.DATA_PORT} + + + dfs.datanode.sync.behind.writes + true + + + dfs.datanode.synconclose + true + + + dfs.ha.automatic-failover.enabled + true + + + dfs.ha.fencing.methods + shell(/bin/true) + + + dfs.ha.namenode.id + ${env.POD_NAME} + + + dfs.ha.namenodes.hdfs + hdfs-namenode-default-0,hdfs-namenode-default-1 + + + dfs.journalnode.edits.dir + /stackable/data/journalnode + + + dfs.namenode.datanode.registration.unsafe.allow-address-override + true + + + dfs.namenode.handler.count + 50 + + + dfs.namenode.http-address.hdfs.hdfs-namenode-default-0 + hdfs-namenode-default-0.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:9870 + + + dfs.namenode.http-address.hdfs.hdfs-namenode-default-1 + hdfs-namenode-default-1.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:9870 + + + dfs.namenode.name.dir + /stackable/data/namenode + + + dfs.namenode.name.dir.hdfs.hdfs-namenode-default-0 + /stackable/data/namenode + + + dfs.namenode.name.dir.hdfs.hdfs-namenode-default-1 + /stackable/data/namenode + + + dfs.namenode.replication.max-streams + 4 + + + dfs.namenode.replication.max-streams-hard-limit + 8 + + + dfs.namenode.rpc-address.hdfs.hdfs-namenode-default-0 + hdfs-namenode-default-0.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:8020 + + + dfs.namenode.rpc-address.hdfs.hdfs-namenode-default-1 + hdfs-namenode-default-1.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:8020 + + + dfs.namenode.shared.edits.dir + qjournal://hdfs-journalnode-default-0.hdfs-journalnode-default.__NAMESPACE__.svc.cluster.local:8485/hdfs + + + dfs.nameservices + hdfs + + + dfs.replication + 1 + + + hdfs.log4j.properties: |+ + log4j.rootLogger=INFO, CONSOLE, FILE + + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=INFO + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + log4j.appender.FILE=org.apache.log4j.RollingFileAppender + log4j.appender.FILE.Threshold=INFO + log4j.appender.FILE.File=/stackable/log/hdfs/hdfs.log4j.xml + log4j.appender.FILE.MaxFileSize=5MB + log4j.appender.FILE.MaxBackupIndex=1 + log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout + + security.properties: | + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + ssl-client.xml: |- + + + + ssl-server.xml: |- + + + + wait-for-namenodes.log4j.properties: |+ + log4j.rootLogger=INFO, CONSOLE, FILE + + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=INFO + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + log4j.appender.FILE=org.apache.log4j.RollingFileAppender + log4j.appender.FILE.Threshold=INFO + log4j.appender.FILE.File=/stackable/log/wait-for-namenodes/wait-for-namenodes.log4j.xml + log4j.appender.FILE.MaxFileSize=5MB + log4j.appender.FILE.MaxBackupIndex=1 + log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout + + YAMLEOF + ) + actual=$(kubectl -n $NAMESPACE get cm hdfs-datanode-default -o yaml | yq -o=json '.data | del(.["vector.toml"])') + if [ "$expected" != "$actual" ]; then + echo "ERROR: ConfigMap hdfs-datanode-default data drifted from snapshot." + echo "=== expected ===" + printf '%s\n' "$expected" + echo "=== actual ===" + printf '%s\n' "$actual" + exit 1 + fi + # + # ConfigMap data snapshot: hdfs-journalnode-default + # + - script: | + expected=$(cat <<'YAMLEOF' | sed "s|__NAMESPACE__|$NAMESPACE|g" | yq -o=json + core-site.xml: |- + + + + fs.defaultFS + hdfs://hdfs/ + + + ha.zookeeper.quorum + ${env.ZOOKEEPER} + + + hadoop.prometheus.endpoint.enabled + true + + + io.file.buffer.size + 131072 + + + hadoop-policy.xml: |- + + + + hdfs-site.xml: |- + + + + dfs.client.failover.proxy.provider.hdfs + org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider + + + dfs.datanode.handler.count + 50 + + + dfs.datanode.max.transfer.threads + 8192 + + + dfs.datanode.registered.hostname + ${env.POD_ADDRESS} + + + dfs.datanode.registered.http.port + ${env.HTTP_PORT} + + + dfs.datanode.registered.ipc.port + ${env.IPC_PORT} + + + dfs.datanode.registered.port + ${env.DATA_PORT} + + + dfs.datanode.sync.behind.writes + true + + + dfs.datanode.synconclose + true + + + dfs.ha.automatic-failover.enabled + true + + + dfs.ha.fencing.methods + shell(/bin/true) + + + dfs.ha.namenode.id + ${env.POD_NAME} + + + dfs.ha.namenodes.hdfs + hdfs-namenode-default-0,hdfs-namenode-default-1 + + + dfs.journalnode.edits.dir + /stackable/data/journalnode + + + dfs.namenode.datanode.registration.unsafe.allow-address-override + true + + + dfs.namenode.handler.count + 50 + + + dfs.namenode.http-address.hdfs.hdfs-namenode-default-0 + hdfs-namenode-default-0.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:9870 + + + dfs.namenode.http-address.hdfs.hdfs-namenode-default-1 + hdfs-namenode-default-1.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:9870 + + + dfs.namenode.name.dir + /stackable/data/namenode + + + dfs.namenode.name.dir.hdfs.hdfs-namenode-default-0 + /stackable/data/namenode + + + dfs.namenode.name.dir.hdfs.hdfs-namenode-default-1 + /stackable/data/namenode + + + dfs.namenode.replication.max-streams + 4 + + + dfs.namenode.replication.max-streams-hard-limit + 8 + + + dfs.namenode.rpc-address.hdfs.hdfs-namenode-default-0 + hdfs-namenode-default-0.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:8020 + + + dfs.namenode.rpc-address.hdfs.hdfs-namenode-default-1 + hdfs-namenode-default-1.hdfs-namenode-default.__NAMESPACE__.svc.cluster.local:8020 + + + dfs.namenode.shared.edits.dir + qjournal://hdfs-journalnode-default-0.hdfs-journalnode-default.__NAMESPACE__.svc.cluster.local:8485/hdfs + + + dfs.nameservices + hdfs + + + dfs.replication + 1 + + + hdfs.log4j.properties: |+ + log4j.rootLogger=INFO, CONSOLE, FILE + + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=INFO + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + log4j.appender.FILE=org.apache.log4j.RollingFileAppender + log4j.appender.FILE.Threshold=INFO + log4j.appender.FILE.File=/stackable/log/hdfs/hdfs.log4j.xml + log4j.appender.FILE.MaxFileSize=5MB + log4j.appender.FILE.MaxBackupIndex=1 + log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout + + security.properties: | + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + ssl-client.xml: |- + + + + ssl-server.xml: |- + + + + YAMLEOF + ) + actual=$(kubectl -n $NAMESPACE get cm hdfs-journalnode-default -o yaml | yq -o=json '.data | del(.["vector.toml"])') + if [ "$expected" != "$actual" ]; then + echo "ERROR: ConfigMap hdfs-journalnode-default data drifted from snapshot." + echo "=== expected ===" + printf '%s\n' "$expected" + echo "=== actual ===" + printf '%s\n' "$actual" + exit 1 + fi