Secondary server in failover fails to come out of recover state
Oscar Ricardo Silva
osilva at scuff.cc.utexas.edu
Fri May 10 21:00:49 UTC 2013
I have changed the split value to 128 and raised the MCLT to 300. After
the change, both servers were reloaded and came up normally. Twenty
minutes later, someone on staff made a change and the primary returned
to a normal state but then the secondary stayed in recover mode as we've
seen before.
Here's the logs, the configuration files (including some of the pool
statements). The primary is taken down at 15:09:13 and returns to
normal at 15:14:13. The secondary is then taken down at 15:14:44 but
then the last message was received at 15:15:47 (the logs were copied at
15:33:00)
Logs from primary:
15:09:13 primary-dhcp dhcpd: failover peer dhcp: I move from shutdown to
recover
15:10:13 primary-dhcp dhcpd: failover peer dhcp: I move from recover to
startup
15:10:13 primary-dhcp dhcpd: failover peer dhcp: I move from startup to
recover
15:13:31 primary-dhcp dhcpd: failover peer dhcp: peer update completed.
15:13:31 primary-dhcp dhcpd: failover peer dhcp: I move from recover to
recover-wait
15:14:13 primary-dhcp dhcpd: failover peer dhcp: I move from
recover-wait to recover-done
15:14:13 primary-dhcp dhcpd: failover peer dhcp: peer moves from
partner-down to normal
15:14:13 primary-dhcp dhcpd: failover peer dhcp: I move from
recover-done to normal
15:14:44 primary-dhcp dhcpd: failover peer dhcp: peer moves from normal
to shutdown
15:14:44 primary-dhcp dhcpd: failover peer dhcp: I move from normal to
partner-down
15:14:45 primary-dhcp dhcpd: peer dhcp: disconnected
15:15:47 primary-dhcp dhcpd: failover peer dhcp: peer moves from
shutdown to recover
15:15:47 primary-dhcp dhcpd: failover peer dhcp: peer moves from recover
to recover
Logs from secondary:
15:09:12 secondary-dhcp dhcpd: failover peer dhcp: peer moves from
normal to shutdown
15:09:12 secondary-dhcp dhcpd: failover peer dhcp: I move from normal to
partner-down
15:09:13 secondary-dhcp dhcpd: peer dhcp: disconnected
15:10:13 secondary-dhcp dhcpd: failover peer dhcp: peer moves from
shutdown to recover
15:10:13 secondary-dhcp dhcpd: failover peer dhcp: peer moves from
recover to recover
15:13:31 secondary-dhcp dhcpd: failover peer dhcp: peer moves from
recover to recover-wait
15:14:13 secondary-dhcp dhcpd: failover peer dhcp: peer moves from
recover-wait to recover-done
15:14:13 secondary-dhcp dhcpd: failover peer dhcp: I move from
partner-down to normal
15:14:13 secondary-dhcp dhcpd: failover peer dhcp: peer moves from
recover-done to normal
15:14:44 secondary-dhcp dhcpd: failover peer dhcp: I move from normal to
shutdown
15:14:44 secondary-dhcp dhcpd: failover peer dhcp: peer moves from
normal to partner-down
15:14:45 secondary-dhcp dhcpd: failover peer dhcp: I move from shutdown
to recover
15:15:47 secondary-dhcp dhcpd: failover peer dhcp: I move from recover
to startup
15:15:47 secondary-dhcp dhcpd: failover peer dhcp: I move from startup
to recover
Primary:
option domain-name-servers 192.168.50.41, 192.168.50.40 ;
option ntp-servers 192.168.50.40, 192.168.50.41;
default-lease-time 172800;
max-lease-time 172800;
one-lease-per-client true;
ddns-update-style ad-hoc;
ddns-updates off;
authoritative;
key-off-mac-address true;
if substring (option dhcp-client-identifier, 0, 5) = 01:52:41:53:20 {
deny booting;
}
option voip-tftp-server-address code 150 = array of ip-address ;
set vendor-string = option vendor-class-identifier;
failover peer "dhcp" {
primary;
address 192.168.100.2;
port 520;
peer address 192.168.101.2;
peer port 520;
max-response-delay 60;
max-unacked-updates 10;
mclt 300;
split 128;
load balance max seconds 5;
}
subnet 192.168.100.0 netmask 255.255.255.224 {
}
subnet 192.168.75.128 netmask 255.255.255.128 {
pool {
range 192.168.75.130 192.168.75.254;
deny dynamic bootp clients ;
failover peer "dhcp" ;
}
option subnet-mask 255.255.255.128;
option broadcast-address 255.255.255.255;
option routers 192.168.75.129;
}
subnet 192.168.235.0 netmask 255.255.255.128 {
pool {
range 192.168.235.13 192.168.235.126;
deny dynamic bootp clients ;
failover peer "dhcp" ;
}
option subnet-mask 255.255.255.128;
option broadcast-address 255.255.255.255;
option routers 192.168.235.1;
}
Secondary:
option domain-name-servers 192.168.50.40, 192.168.50.41 ;
option ntp-servers 192.168.50.40, 192.168.50.41;
default-lease-time 172800;
max-lease-time 172800;
one-lease-per-client true;
ddns-update-style ad-hoc;
ddns-updates off;
authoritative;
key-off-mac-address true;
if substring (option dhcp-client-identifier, 0, 5) = 01:52:41:53:20 {
deny booting;
}
option voip-tftp-server-address code 150 = array of ip-address ;
set vendor-string = option vendor-class-identifier;
failover peer "dhcp" {
secondary;
address 192.168.101.2;
port 520;
peer address 192.168.100.2;
peer port 520;
max-response-delay 60;
max-unacked-updates 10;
load balance max seconds 5;
}
subnet 192.168.101.0 netmask 255.255.255.224 {
}
subnet 192.168.75.128 netmask 255.255.255.128 {
pool {
range 192.168.75.130 192.168.75.254;
deny dynamic bootp clients ;
failover peer "dhcp" ;
}
option subnet-mask 255.255.255.128;
option broadcast-address 255.255.255.255;
option routers 192.168.75.129;
}
subnet 192.168.235.0 netmask 255.255.255.128 {
pool {
range 192.168.235.13 192.168.235.126;
deny dynamic bootp clients ;
failover peer "dhcp" ;
}
option subnet-mask 255.255.255.128;
option broadcast-address 255.255.255.255;
option routers 192.168.235.1;
}
On 04/30/2013 03:37 PM, Steven Carr wrote:
> Can't see anything in the config that is suspect to be honest.
>
> I assume you have a 'failover peer "dhcp";' statement inside each pool
> statement? (that's why I asked for full config)
>
> Personally I would change mclt to 3600 and spilt to 128 (there are
> only a handful of situations where I would see split set to 0 or 255
> the main one being when you have branch networks with a local DHCP
> server and need a centralised "backup" DHCP incase the branch fails).
>
> You could also try changing the port and peer port numbers (maybe
> something >1024?) just on the off chance that it is being
> blocked/terminated by something else, and it would be worth getting
> packet captures going on each system to see exactly what comms are
> happening between the two during the startup.
>
> The only other thought I have is that it could be something to do with
> the patch you have wrote. I'm not sure what impact this has had on the
> data being written out to the leases file or being synchronised (you
> might see this in a packet capture) but it could be choking on
> something in that data that wasn't originally meant to be in there.
>
> If you do change the split value then I would also flip the order of
> domain-name-servers on the secondary server to load balance across the
> two DNS servers, rather than dumping all queries on the first DNS
> server.
>
> Steve
> _______________________________________________
> dhcp-users mailing list
> dhcp-users at lists.isc.org
> https://lists.isc.org/mailman/listinfo/dhcp-users
>
More information about the dhcp-users
mailing list