bind 9.4.3b2: too many open files

JINMEI Tatuya / 神明達哉 Jinmei_Tatuya at isc.org
Tue Jul 15 02:14:00 UTC 2008


At Mon, 14 Jul 2008 14:32:13 +0200,
Tom Mueller <t.mueller at strato-rz.de> wrote:

> > This indicates that it's a pretty busy server, so it's possible that
> > all of the available 1024 ports are consumed.  Please increase the max
> > open files to 4096 and retry.
> >   
> Yes, that fixed 'too many files'.
> I get quite often these messages:
> 
> 14-Jul-2008 14:02:35.959 general: socket.c:523: unexpected error:
> 14-Jul-2008 14:02:35.959 general: socket.c:523: unexpected error:
> 14-Jul-2008 14:02:35.959 general: epoll_ctl(DEL), 398: Bad file descriptor
> 14-Jul-2008 14:02:35.959 general: epoll_ctl(DEL), 398: Bad file descriptor
> 
> On another machine under lower query load happens nothing like this.
> 
> Why happens this and how serious is the resulting impact?

I have some possible theory in my mind, but I'd like to be sure about
how this actually happened.  Could you apply the patch copied below
and run it again?  (It adds a bit more detailed debug information in
the above log message, and should be otherwise harmless).

Thanks,

---
JINMEI, Tatuya
Internet Systems Consortium, Inc.

Index: socket.c
===================================================================
RCS file: /proj/cvs/prod/bind9/lib/isc/unix/socket.c,v
retrieving revision 1.237.18.38
diff -u -r1.237.18.38 socket.c
--- socket.c	3 Jul 2008 00:14:40 -0000	1.237.18.38
+++ socket.c	15 Jul 2008 02:08:54 -0000
@@ -491,7 +491,7 @@
 }
 
 static inline isc_result_t
-unwatch_fd(isc_socketmgr_t *manager, int fd, int msg) {
+unwatch_fd(isc_socketmgr_t *manager, int fd, int msg, int tag) {
 	isc_result_t result = ISC_R_SUCCESS;
 
 #ifdef USE_KQUEUE
@@ -521,7 +521,7 @@
 		char strbuf[ISC_STRERRORSIZE];
 		isc__strerror(errno, strbuf, sizeof(strbuf));
 		UNEXPECTED_ERROR(__FILE__, __LINE__,
-				 "epoll_ctl(DEL), %d: %s", fd, strbuf);
+				 "epoll_ctl(DEL,%d), %d: %s", fd, tag, strbuf);
 		result = ISC_R_UNEXPECTED;
 	}
 	return (result);
@@ -602,8 +602,8 @@
 		 * fdlock; otherwise it could cause deadlock due to a lock order
 		 * reversal.
 		 */
-		(void)unwatch_fd(manager, fd, SELECT_POKE_READ);
-		(void)unwatch_fd(manager, fd, SELECT_POKE_WRITE);
+		(void)unwatch_fd(manager, fd, SELECT_POKE_READ, 0);
+		(void)unwatch_fd(manager, fd, SELECT_POKE_WRITE, 1);
 		(void)close(fd);
 		return;
 	}
@@ -2693,8 +2693,8 @@
 		manager->fdstate[fd] = CLOSED;
 		UNLOCK(&manager->fdlock[lockid]);
 
-		(void)unwatch_fd(manager, fd, SELECT_POKE_READ);
-		(void)unwatch_fd(manager, fd, SELECT_POKE_WRITE);
+		(void)unwatch_fd(manager, fd, SELECT_POKE_READ, 2);
+		(void)unwatch_fd(manager, fd, SELECT_POKE_WRITE, 3);
 		(void)close(fd);
 		return;
 	}
@@ -2704,7 +2704,7 @@
 	unlock_sock = ISC_FALSE;
 	if (readable) {
 		if (sock == NULL) {
-			(void)unwatch_fd(manager, fd, SELECT_POKE_READ);
+			(void)unwatch_fd(manager, fd, SELECT_POKE_READ, 4);
 			goto check_write;
 		}
 		unlock_sock = ISC_TRUE;
@@ -2715,12 +2715,12 @@
 			else
 				dispatch_recv(sock);
 		}
-		(void)unwatch_fd(manager, fd, SELECT_POKE_READ);
+		(void)unwatch_fd(manager, fd, SELECT_POKE_READ, 5);
 	}
 check_write:
 	if (writeable) {
 		if (sock == NULL) {
-			(void)unwatch_fd(manager, fd, SELECT_POKE_WRITE);
+			(void)unwatch_fd(manager, fd, SELECT_POKE_WRITE, 6);
 			return;
 		}
 		if (!unlock_sock) {
@@ -2733,7 +2733,7 @@
 			else
 				dispatch_send(sock);
 		}
-		(void)unwatch_fd(manager, fd, SELECT_POKE_WRITE);
+		(void)unwatch_fd(manager, fd, SELECT_POKE_WRITE, 7);
 	}
 	if (unlock_sock)
 		UNLOCK(&sock->lock);
@@ -3106,7 +3106,7 @@
 #ifdef ISC_PLATFORM_USETHREADS
 	isc_result_t result;
 
-	result = unwatch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ);
+	result = unwatch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ, 8);
 	if (result != ISC_R_SUCCESS) {
 		UNEXPECTED_ERROR(__FILE__, __LINE__,
 				 "epoll_ctl(DEL) %s",


More information about the bind-users mailing list