From 2cda39cf06358c7d24ebda5da0d8ec92588763d2 Mon Sep 17 00:00:00 2001 From: Arnau Verdaguer Date: Thu, 20 Feb 2025 17:09:15 +0100 Subject: [PATCH] [ovn-controller] Change startup mechanism of ovs pods This commit aims to modify the starting scripts of the ovn-controller-ovs daemonset. This is done to allow modifying the RollingUpdate Strategy to not allow any Unavailable pod during update, what this will cause is that during an update to the pod (delete old one and create new one) instead of first deleting the first one and then deleting the next one, it will create the new pod while the old one is running. Due to how the startup scritps works currently this is not allowed. The reason behind this is to try to lower the downtime observed if the environment is using centralized floating ip during an update. With this commit the ovn-controller-ovs will share the PID namespace with the host, in order to allow signaling between old/new pod. Another change is adding an STATE that the containers (ovsdb-server, ovs-vswitchd and ovsdb-server-init) will handle internally. The differents states are: - NULL (No file): will happen the fist time ds is created on the oc worker. - INIT: First time init-ovsdb-server is executed on the oc worker. - OVSDB_SERVER: once ovsdb-server pod has run the startup script - RUNNING: Once ovsdb-server is up and ovs-vswitchd has run the startup script. - UPDATE: Once a new pod is created and ovsdb-server-init has run. - RESTART_VSWITCH: After ovsdb-server-init has finished, new ovsdb-server pod has stopped the old ovs-vswitchd process. - RESTART_DBSERVER: After old ovs-vswitchd has been restarted the old ovsdb-server is also stop. The normal flow of states is the following: NULL -> INIT -> OVSDB_SERVER -> RUNNING Scale down: If the oc worker is deleted the DS and all the pods and mount points will be deleted, in case of node being up again it should start from NULL Update: RUNNING -> (Change on CR) -> UPDATE -> RESTART_VSWITCHD -> RESTART_DBSERVER -> OVSDB_SERVER -> RUNNING Jira: OSPRH-11636 Related: OSPRH-10821 --- controllers/ovncontroller_controller.go | 6 +- pkg/ovncontroller/daemonset.go | 41 +++++++++-- .../ovncontroller/bin/init-ovsdb-server.sh | 68 +++++++++++++++++-- .../ovncontroller/bin/start-ovsdb-server.sh | 41 ++++++++++- templates/ovncontroller/bin/start-vswitchd.sh | 53 ++++++++++++++- .../ovncontroller/bin/stop-ovsdb-server.sh | 14 ++++ templates/ovncontroller/bin/stop-vswitchd.sh | 16 +++++ 7 files changed, 226 insertions(+), 13 deletions(-) diff --git a/controllers/ovncontroller_controller.go b/controllers/ovncontroller_controller.go index f2421fc4..a5d2713e 100644 --- a/controllers/ovncontroller_controller.go +++ b/controllers/ovncontroller_controller.go @@ -612,7 +612,6 @@ func (r *OVNControllerReconciler) reconcileNormal(ctx context.Context, instance instance.Status.DesiredNumberScheduled = dset.GetDaemonSet().Status.DesiredNumberScheduled instance.Status.NumberReady = dset.GetDaemonSet().Status.NumberReady - // Define a new DaemonSet object for OVS (ovsdb-server + ovs-vswitchd) ovsdset := daemonset.NewDaemonSet( ovncontroller.CreateOVSDaemonSet(instance, inputHash, ovsServiceLabels, serviceAnnotations, topology), time.Duration(5)*time.Second, @@ -744,6 +743,11 @@ func (r *OVNControllerReconciler) generateServiceConfigMaps( } else { templateParameters["OVNEncapNIC"] = "eth0" } + if instance.Spec.TLS.Enabled() { + templateParameters["TLS"] = "Enabled" + } else { + templateParameters["TLS"] = "Disabled" + } cms := []util.Template{ // ScriptsConfigMap { diff --git a/pkg/ovncontroller/daemonset.go b/pkg/ovncontroller/daemonset.go index aab8d67e..f14ab43d 100644 --- a/pkg/ovncontroller/daemonset.go +++ b/pkg/ovncontroller/daemonset.go @@ -24,6 +24,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" ) @@ -236,6 +237,27 @@ func CreateOVSDaemonSet( envVars := map[string]env.Setter{} envVars["CONFIG_HASH"] = env.SetValue(configHash) + volumes := []corev1.Volume{} + mounts := []corev1.VolumeMount{} + + // add OVN dbs cert and CA + if instance.Spec.TLS.Enabled() { + svc := tls.Service{ + SecretName: *instance.Spec.TLS.GenericService.SecretName, + CertMount: ptr.To(ovn_common.OVNDbCertPath), + KeyMount: ptr.To(ovn_common.OVNDbKeyPath), + CaMount: ptr.To(ovn_common.OVNDbCaCertPath), + } + volumes = append(volumes, svc.CreateVolume(ovnv1.ServiceNameOVS)) + mounts = append(mounts, svc.CreateVolumeMounts(ovnv1.ServiceNameOVS)...) + + // add CA bundle if defined + if instance.Spec.TLS.CaBundleSecretName != "" { + volumes = append(volumes, instance.Spec.TLS.CreateVolume()) + mounts = append(mounts, instance.Spec.TLS.CreateVolumeMounts(nil)...) + } + } + initContainers := []corev1.Container{ { Name: "ovsdb-server-init", @@ -250,7 +272,7 @@ func CreateOVSDaemonSet( Privileged: &privileged, }, Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetOVSDbVolumeMounts(), + VolumeMounts: append(GetOVSDbVolumeMounts(), mounts...), }, } @@ -276,7 +298,7 @@ func CreateOVSDaemonSet( Privileged: &privileged, }, Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetOVSDbVolumeMounts(), + VolumeMounts: append(GetOVSDbVolumeMounts(), mounts...), // TODO: consider the fact that resources are now double booked Resources: instance.Spec.Resources, LivenessProbe: ovsDbLivenessProbe, @@ -303,7 +325,7 @@ func CreateOVSDaemonSet( Privileged: &privileged, }, Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetVswitchdVolumeMounts(), + VolumeMounts: append(GetVswitchdVolumeMounts(), mounts...), // TODO: consider the fact that resources are now double booked Resources: instance.Spec.Resources, LivenessProbe: ovsVswitchdLivenessProbe, @@ -312,6 +334,9 @@ func CreateOVSDaemonSet( }, } + maxUnavailable := intstr.FromInt32(0) + maxSurge := intstr.FromInt32(1) + daemonset := &appsv1.DaemonSet{ ObjectMeta: metav1.ObjectMeta{ Name: ovnv1.ServiceNameOVS, @@ -327,9 +352,17 @@ func CreateOVSDaemonSet( }, Spec: corev1.PodSpec{ ServiceAccountName: instance.RbacResourceName(), + HostPID: true, InitContainers: initContainers, Containers: containers, - Volumes: GetOVSVolumes(instance.Name, instance.Namespace), + Volumes: append(GetOVSVolumes(instance.Name, instance.Namespace), volumes...), + }, + }, + UpdateStrategy: appsv1.DaemonSetUpdateStrategy{ + Type: appsv1.RollingUpdateDaemonSetStrategyType, + RollingUpdate: &appsv1.RollingUpdateDaemonSet{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, }, }, }, diff --git a/templates/ovncontroller/bin/init-ovsdb-server.sh b/templates/ovncontroller/bin/init-ovsdb-server.sh index ef17e3c3..10750eff 100755 --- a/templates/ovncontroller/bin/init-ovsdb-server.sh +++ b/templates/ovncontroller/bin/init-ovsdb-server.sh @@ -18,15 +18,71 @@ set -ex source $(dirname $0)/functions trap wait_for_db_creation EXIT +function init_ovsdb_server { + # Initialize or upgrade database if needed + CTL_ARGS="--system-id=random --no-ovs-vswitchd" + /usr/share/openvswitch/scripts/ovs-ctl start $CTL_ARGS + /usr/share/openvswitch/scripts/ovs-ctl stop $CTL_ARGS + + if [ ! -f /var/lib/openvswitch/already_executed ]; then + # If file was not present, set status INIT + echo "INIT" > /var/lib/openvswitch/already_executed + fi + + wait_for_db_creation + trap - EXIT +} + # If db file is empty, remove it; otherwise service won't start. # See https://issues.redhat.com/browse/FDP-689 for more details. if ! [ -s ${DB_FILE} ]; then rm -f ${DB_FILE} fi -# Initialize or upgrade database if needed -CTL_ARGS="--system-id=random --no-ovs-vswitchd" -/usr/share/openvswitch/scripts/ovs-ctl start $CTL_ARGS -/usr/share/openvswitch/scripts/ovs-ctl stop $CTL_ARGS -wait_for_db_creation -trap - EXIT +# Check if file is created, if not means it's first execution +if [ -f /var/lib/openvswitch/already_executed ]; then + if [ {{ .TLS }} == "Enabled" ]; then + # TLS is used + TLSOptions="--certificate=/etc/pki/tls/certs/ovndb.crt --private-key=/etc/pki/tls/private/ovndb.key --ca-cert=/etc/pki/tls/certs/ovndbca.crt" + DBOptions="--db ssl:ovsdbserver-nb.openstack.svc.cluster.local:6641" + else + # Normal TCP is used + TLSOptions="" + DBOptions="--db tcp:ovsdbserver-nb.openstack.svc.cluster.local:6641" + fi + + # Need to double check that ovsdb-server and vswitchd are actually running + # (That pod was not unhealty and it got destroyed) + # In the following steps we need ovsdb-server to be running, check pid file + if [ ! -f /run/openvswitch/ovsdb-server.pid ]; then + # No PID file, start as normal + echo "No PID file found, init ovsdb_server as it's the only pod" + init_ovsdb_server + exit 0 + fi + # File is created, no need to run ovs-ctl + # Change state to "UPDATE" + echo "UPDATE" > /var/lib/openvswitch/already_executed + # Clear possible leftovers of past executions + ## Need to lower chassis priority + # First get the system-id + chassis_id=$(ovs-vsctl get Open_Vswitch . external_ids:system-id) + nb_output=$(ovn-nbctl --no-leader-only $DBOptions $TLSOptions --columns=_uuid,priority find Gateway_Chassis chassis_name=$chassis_id) + # Check that nbctl was executed correctly + if [ $? -ne 0 ]; then + echo "ERROR: ovn-nbctl find command failed" + exit 1 + fi + row_uuid=$(echo "$nb_output" | grep "_uuid" | cut -d':' -f2 | xargs) + priority=$(echo "$nb_output" | grep "priority" | cut -d':' -f2 | xargs) + # Save priority to be able to restore it later (It's overwritting, not appending, hence no check) + echo $priority > /var/lib/openvswitch/old_priority + # Set lower priority (lowest value possible 0) + ovn-nbctl --no-leader-only $DBOptions $TLSOptions set Gateway_Chassis $row_uuid priority=0 + # Check that nbctl was executed correctly + if [ $? -ne 0 ]; then + echo "ERROR: ovn-nbctl set command failed" + exit 1 + fi + exit 0 +fi diff --git a/templates/ovncontroller/bin/start-ovsdb-server.sh b/templates/ovncontroller/bin/start-ovsdb-server.sh index 6c87f0a4..bf702950 100755 --- a/templates/ovncontroller/bin/start-ovsdb-server.sh +++ b/templates/ovncontroller/bin/start-ovsdb-server.sh @@ -17,12 +17,51 @@ set -ex source $(dirname $0)/functions +echo "start ovsdb-server" + +# Check state +if [ -f /var/lib/openvswitch/already_executed ]; then + if [ $(cat /var/lib/openvswitch/already_executed) == "UPDATE" ]; then + echo "In a middle of an upgrade" + # Need to stop vswitch and dbserver + # First stop vswitchd + vswitchd_pid=$(cat /run/openvswitch/ovs-vswitchd.pid) + # Stop vswitch + echo "stopping vswitchd" + bash /usr/local/bin/container-scripts/stop-vswitchd.sh + echo "Done, stopped vswitchd" + # Wait for vswitchd to end checking status + while true; do + if [ $(cat /var/lib/openvswitch/already_executed) == "RESTART_VSWITCHD" ]; then + break + fi + sleep 0.1 + done + echo "Status is already RESTART_VSWITCHD" + bash /usr/local/bin/container-scripts/stop-ovsdb-server.sh + echo "Done, stopped ovsdb-server" + # vswitchd stopped + # We still need to run the ovsdb-server in this new container, this can be done + # with the flag --overwrite-pidfile but we need to ignore the next SIGTERM that will + # send openshift, creating file to noop the stop-ovsdb-server.sh + echo "setting flag to skip ovsdb-server stop" + touch /var/lib/openvswitch/skip_stop_ovsdbserver + else + # It could happen that ovsdb-server or ovs-vwsitchd pod can't start correctly or can't get to running state + # this would cause this script to be run with already_executed with an state different than "UPDATE" + : + fi +fi + # Remove the obsolete semaphore file in case it still exists. cleanup_ovsdb_server_semaphore +# Set state to "OVSDB_SERVER" +echo "OVSDB_SERVER" > /var/lib/openvswitch/already_executed + # Start the service ovsdb-server ${DB_FILE} \ - --pidfile \ + --pidfile --overwrite-pidfile \ --remote=punix:/var/run/openvswitch/db.sock \ --private-key=db:Open_vSwitch,SSL,private_key \ --certificate=db:Open_vSwitch,SSL,certificate \ diff --git a/templates/ovncontroller/bin/start-vswitchd.sh b/templates/ovncontroller/bin/start-vswitchd.sh index c9ee0a24..108609c7 100755 --- a/templates/ovncontroller/bin/start-vswitchd.sh +++ b/templates/ovncontroller/bin/start-vswitchd.sh @@ -14,6 +14,33 @@ # License for the specific language governing permissions and limitations # under the License. +# Check which connection is used +if [ {{ .TLS }} == "Enabled" ]; then + # TLS is used + TLSOptions="--certificate=/etc/pki/tls/certs/ovndb.crt --private-key=/etc/pki/tls/private/ovndb.key --ca-cert=/etc/pki/tls/certs/ovndbca.crt" + DBOptions="--db ssl:ovsdbserver-nb.openstack.svc.cluster.local:6641" +else + # Normal TCP is used + TLSOptions="" + DBOptions="--db tcp:ovsdbserver-nb.openstack.svc.cluster.local:6641" +fi + + +# Check if we're doing an update +echo "Check if we're doing an update" +if [ -f /var/lib/openvswitch/already_executed ]; then + while true; do + if [ $(cat /var/lib/openvswitch/already_executed) == "OVSDB_SERVER" ]; then + break + fi + if [ $(cat /var/lib/openvswitch/already_executed) == "INIT" ]; then + break + fi + sleep 0.1 + done + echo "OVSDBSERVER Already up, start script" +fi + source $(dirname $0)/functions wait_for_ovsdb_server @@ -30,7 +57,7 @@ ovs-vsctl --no-wait set open_vswitch . other_config:flow-restore-wait=true # It's safe to start vswitchd now. Do it. # --detach to allow the execution to continue to restoring the flows. -/usr/sbin/ovs-vswitchd --pidfile --mlockall --detach +/usr/sbin/ovs-vswitchd --pidfile --overwrite-pidfile --mlockall --detach # Restore saved flows. if [ -f $FLOWS_RESTORE_SCRIPT ]; then @@ -49,6 +76,30 @@ cleanup_flows_backup # Now, inform vswitchd that we are done. ovs-vsctl remove open_vswitch . other_config flow-restore-wait +# Restore the priority if this was changed during the update +if [ -f /var/lib/openvswitch/old_priority ]; then + echo "Using DBOptions: $DBOptions" + echo "Using TLSOptions: $TLSOptions" + priority=$(cat /var/lib/openvswitch/old_priority) + echo "Restoring old priority, which was: $priority" + chassis_id=$(ovs-vsctl get Open_Vswitch . external_ids:system-id) + nb_output=$(ovn-nbctl --no-leader-only $DBOptions $TLSOptions --columns=_uuid,priority find Gateway_Chassis chassis_name=$chassis_id) + err=$? + if [ $err -ne 0 ]; then + echo "Error while getting gateway chassis uuid $err" + fi + row_uuid=$(echo "$nb_output" | grep "_uuid" | cut -d':' -f2 | xargs) + rm /var/lib/openvswitch/old_priority + ovn-nbctl --no-leader-only $DBOptions $TLSOptions set Gateway_Chassis $row_uuid priority=$priority + err=$? + if [ $err -ne 0 ]; then + echo "Error while setting gateway chassis priority ($priority), error: $err" + fi +fi + +# Set state to "RUNNING" +echo "RUNNING" > /var/lib/openvswitch/already_executed + # This is container command script. Block it from exiting, otherwise k8s will # restart the container again. sleep infinity diff --git a/templates/ovncontroller/bin/stop-ovsdb-server.sh b/templates/ovncontroller/bin/stop-ovsdb-server.sh index 2d15c507..0e1923d7 100755 --- a/templates/ovncontroller/bin/stop-ovsdb-server.sh +++ b/templates/ovncontroller/bin/stop-ovsdb-server.sh @@ -14,6 +14,12 @@ # License for the specific language governing permissions and limitations # under the License. +if [ -f /var/lib/openvswitch/skip_stop_ovsdbserver ]; then + echo "Skipping stop script" + rm /var/lib/openvswitch/skip_stop_ovsdbserver + exit 0 +fi + set -ex source $(dirname $0)/functions @@ -26,5 +32,13 @@ while [ ! -f $SAFE_TO_STOP_OVSDB_SERVER_SEMAPHORE ]; do done cleanup_ovsdb_server_semaphore +while [ $(cat /var/log/openvswitch/already_executed) != "RESTART_VSWITCHD" ]; do + # Wait to vswitchd container to finish it's stop process + sleep 0.1 +done + # Now it's safe to stop db server. Do it. /usr/share/openvswitch/scripts/ovs-ctl stop --no-ovs-vswitchd + +# Update state to "RESTART_DBSERVER" +echo "RESTART_DBSERVER" > /var/lib/openvswitch/already_executed diff --git a/templates/ovncontroller/bin/stop-vswitchd.sh b/templates/ovncontroller/bin/stop-vswitchd.sh index d12f74c9..d1a6a74e 100755 --- a/templates/ovncontroller/bin/stop-vswitchd.sh +++ b/templates/ovncontroller/bin/stop-vswitchd.sh @@ -14,6 +14,11 @@ # License for the specific language governing permissions and limitations # under the License. +if [ -f /var/lib/openvswitch/skip_stop_vswitchd ]; then + echo "Skipping stop script" + rm /var/lib/openvswitch/skip_stop_vswitchd + exit 0 +fi set -ex source $(dirname $0)/functions @@ -32,5 +37,16 @@ TMPDIR=$FLOWS_RESTORE_DIR /usr/share/openvswitch/scripts/ovs-save save-flows $br # unlocks the db preStop script, working as a semaphore touch $SAFE_TO_STOP_OVSDB_SERVER_SEMAPHORE +# If it's comming from an update, it means that the +# new pod, hence we're still missing the signal from the openshift. To avoid +# running this script twice, create file to skip the following SIGTERM signal +if [ -f /var/lib/openvswitch/already_executed ]; then + if [ $(cat /var/lib/openvswitch/already_executed) == "UPDATE" ]; then + touch /var/lib/openvswitch/skip_stop_vswitchd + fi +fi +# Update state to "RESTART_VSWITCHD" +echo "RESTART_VSWITCHD" > /var/lib/openvswitch/already_executed + # Finally, stop vswitchd. /usr/share/openvswitch/scripts/ovs-ctl stop --no-ovsdb-server