[PATCH] Re: Restarting DHCP safely whilst avoiding partner-down state

Terry Burton tez at terryburton.co.uk
Mon May 16 13:11:33 UTC 2016


On 13 May 2016 at 20:30, Terry Burton <tez at terryburton.co.uk> wrote:
>
> So currently there are no trap handlers for SIGTERM or SIGINT and
> therefore no cleanup whatsoever at exit.
>
> There is a compiled-out option ENABLE_GENTLE_SHUTDOWN which installs
> handlers for these signals but when this was activated it implemented
> the harmful semantics of putting the server through a
> recovery+partner-down transition which isn't useful for a quick
> configuration reload:
>
> /* Enable the gentle shutdown signal handling.  Currently this
>    means that on SIGINT or SIGTERM a client will release its
>    address and a server in a failover pair will go through
>    partner down.  Both of which can be undesireable in some
>    situations.  We plan to revisit this feature and may
>    make non-backwards compatible changes including the
>    removal of this define.  Use at your own risk.  */
> /* #define ENABLE_GENTLE_SHUTDOWN */
>
> #if defined(ENABLE_GENTLE_SHUTDOWN)
>         /* no signal handlers until we deal with the side effects */
>         /* install signal handlers */
>         signal(SIGINT, dhcp_signal_handler);   /* control-c */
>         signal(SIGTERM, dhcp_signal_handler);  /* kill */
> #endif
>
> Having a more basic signal handler that defers the exit in order to
> continue to write out an outstanding lease seems better. Perhaps once
> could even differentiate these exit semantics based on SIGINT vs
> SIGTERM.
>
> If someone who can speak for ISC is able to indicate whether this
> would be a sensible approach then I am happy to work up a patch.

Dear ISC DHCP devs,

Please could you review and if suitable pull the following:

https://github.com/terryburton/isc-dhcp/commit/90f6e8617f13b7bc9575d275bd37b7a418b6139d

As a patch (also attached):

https://github.com/terryburton/isc-dhcp/commit/90f6e8617f13b7bc9575d275bd37b7a418b6139d.diff

Summary below...


Many thanks,

Terry

----

Safely shutdown dhcpd when signalled.

This patch reintroduces signal handlers to ensure that dhcpd safely exits when
signalled mitigating the existing risk that the leases file is truncated at
shutdown.

It provides the expected behaviour that SIGTERM and SIGINT will cause a safe
shutdown that does not place a failover pair into recovery on restart, so they
remain suitable for a basic configuration reload - only safer.

Using OMAPI to set the state of a control object to shutdown (2) retains the
existing behaviour of placing the peer into partner-down and performing a
recovery on restart. Equivalently, SIGUSR2 will now shutdown dhcpd and perform
a recovery on restart.

SIGTERM, SIGINT - Clean shutdown, suitable for a configuration reload.
SIGUSR2, OMAPI  - Put failover peer into partner-down state and exit performing
                  a recovery on startup, suitable for an extended outage.

Since this is intended to be a sane set of defaults the GENTLE_SHUTDOWN define
no longer applies to dhcpd, i.e. safe handling of signals for selectable
operational semantics is provided by default.
-------------- next part --------------
diff --git a/server/dhcpd.c b/server/dhcpd.c
index a42d759..31d6acc 100644
--- a/server/dhcpd.c
+++ b/server/dhcpd.c
@@ -921,12 +921,9 @@ main(int argc, char **argv) {
 	omapi_set_int_value ((omapi_object_t *)dhcp_control_object,
 			     (omapi_object_t *)0, "state", server_running);
 
-#if defined(ENABLE_GENTLE_SHUTDOWN)
-	/* no signal handlers until we deal with the side effects */
-        /* install signal handlers */
 	signal(SIGINT, dhcp_signal_handler);   /* control-c */
-	signal(SIGTERM, dhcp_signal_handler);  /* kill */
-#endif
+	signal(SIGTERM, dhcp_signal_handler);  /* term, fast shutdown */
+	signal(SIGUSR2, dhcp_signal_handler);  /* perform recovery */
 
 	/* Log that we are about to start working */
 	log_info("Server starting service.");
@@ -1402,6 +1399,9 @@ int dhcpd_interface_setup_hook (struct interface_info *ip, struct iaddr *ia)
 static TIME shutdown_time;
 static int omapi_connection_count;
 enum dhcp_shutdown_state shutdown_state;
+#if defined (FAILOVER_PROTOCOL)
+static isc_boolean_t shutdown_do_recovery_on_startup;
+#endif
 
 isc_result_t dhcp_io_shutdown (omapi_object_t *obj, void *foo)
 {
@@ -1481,38 +1481,31 @@ static isc_result_t dhcp_io_shutdown_countdown (void *vlp)
 	}
 
 #if defined (FAILOVER_PROTOCOL)
-	/* Set all failover peers into the shutdown state. */
-	if (shutdown_state == shutdown_dhcp) {
-	    for (state = failover_states; state; state = state -> next) {
-		if (state -> me.state == normal) {
-		    dhcp_failover_set_state (state, shut_down);
-		    failover_connection_count++;
-		}
-		if (state -> me.state == shut_down &&
-		    state -> partner.state != partner_down)
-			failover_connection_count++;
+	if (shutdown_do_recovery_on_startup == ISC_TRUE) {
+	    /* Set all failover peers into the shutdown state. */
+	    if (shutdown_state == shutdown_dhcp) {
+	        for (state = failover_states; state; state = state -> next) {
+		    if (state -> me.state == normal) {
+		       dhcp_failover_set_state (state, shut_down);
+		       failover_connection_count++;
+		    }
+		    if (state -> me.state == shut_down &&
+		        state -> partner.state != partner_down)
+			    failover_connection_count++;
+	        }
 	    }
-	}
-
-	if (shutdown_state == shutdown_done) {
-	    for (state = failover_states; state; state = state -> next) {
-		if (state -> me.state == shut_down) {
-		    if (state -> link_to_peer)
-			dhcp_failover_link_dereference (&state -> link_to_peer,
-							MDL);
-		    dhcp_failover_set_state (state, recover);
-		}
+	    if (shutdown_state == shutdown_done) {
+	        for (state = failover_states; state; state = state -> next) {
+		    if (state -> me.state == shut_down) {
+		        if (state -> link_to_peer)
+			    dhcp_failover_link_dereference (&state -> link_to_peer,
+			    				    MDL);
+		        dhcp_failover_set_state (state, recover);
+		    }
+	        }
 	    }
-#if defined (DEBUG_MEMORY_LEAKAGE) && \
-		defined (DEBUG_MEMORY_LEAKAGE_ON_EXIT)
-	    free_everything ();
-	    omapi_print_dmalloc_usage_by_caller ();
+	}
 #endif
-	    if (no_pid_file == ISC_FALSE)
-		    (void) unlink(path_dhcpd_pid);
-	    exit (0);
-	}		
-#else
 	if (shutdown_state == shutdown_done) {
 #if defined (DEBUG_MEMORY_LEAKAGE) && \
 		defined (DEBUG_MEMORY_LEAKAGE_ON_EXIT)
@@ -1523,7 +1516,6 @@ static isc_result_t dhcp_io_shutdown_countdown (void *vlp)
 			(void) unlink(path_dhcpd_pid);
 		exit (0);
 	}
-#endif
 	if (shutdown_state == shutdown_dhcp &&
 #if defined(FAILOVER_PROTOCOL)
 	    !failover_connection_count &&
@@ -1552,14 +1544,25 @@ isc_result_t dhcp_set_control_state (control_object_state_t oldstate,
 		return ISC_R_SUCCESS;
 	shutdown_time = cur_time;
 	shutdown_state = shutdown_listeners;
+#if defined (FAILOVER_PROTOCOL)
+        shutdown_do_recovery_on_startup = ISC_FALSE;
+#endif
 	/* Called by user. */
 	if (shutdown_signal == 0) {
 		shutdown_signal = SIGUSR1;
+#if defined (FAILOVER_PROTOCOL)
+		shutdown_do_recovery_on_startup = ISC_TRUE;
+#endif
 		dhcp_io_shutdown_countdown (0);
 		return ISC_R_SUCCESS;
 	}
 	/* Called on signal. */
 	log_info("Received signal %d, initiating shutdown.", shutdown_signal);
+#if defined (FAILOVER_PROTOCOL)
+        if (shutdown_signal == SIGUSR2) {
+            shutdown_do_recovery_on_startup = ISC_TRUE;
+        }
+#endif
 	shutdown_signal = SIGUSR1;
 	
 	/*


More information about the dhcp-users mailing list