/* * Copyright (C) Igor Sysoev * Copyright (C) Nginx, Inc. */ #include #include #include #if (NGX_TEST_BUILD_EPOLL) /* epoll declarations */ #define EPOLLIN 0x001 #define EPOLLPRI 0x002 #define EPOLLOUT 0x004 #define EPOLLRDNORM 0x040 #define EPOLLRDBAND 0x080 #define EPOLLWRNORM 0x100 #define EPOLLWRBAND 0x200 #define EPOLLMSG 0x400 #define EPOLLERR 0x008 #define EPOLLHUP 0x010 #define EPOLLET 0x80000000 #define EPOLLONESHOT 0x40000000 #define EPOLL_CTL_ADD 1 #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 typedef union epoll_data { void *ptr; int fd; uint32_t u32; uint64_t u64; } epoll_data_t; struct epoll_event { uint32_t events; epoll_data_t data; }; int epoll_create(int size); int epoll_create(int size) { return -1; } int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event); int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) { return -1; } int epoll_wait(int epfd, struct epoll_event *events, int nevents, int timeout); int epoll_wait(int epfd, struct epoll_event *events, int nevents, int timeout) { return -1; } #if (NGX_HAVE_FILE_AIO) #define SYS_io_setup 245 #define SYS_io_destroy 246 #define SYS_io_getevents 247 #define SYS_eventfd 323 typedef u_int aio_context_t; struct io_event { uint64_t data; /* the data field from the iocb */ uint64_t obj; /* what iocb this event came from */ int64_t res; /* result code for this event */ int64_t res2; /* secondary result */ }; #endif #endif typedef struct { ngx_uint_t events; ngx_uint_t aio_requests; } ngx_epoll_conf_t; static ngx_int_t ngx_epoll_init(ngx_cycle_t *cycle, ngx_msec_t timer); static void ngx_epoll_done(ngx_cycle_t *cycle); static ngx_int_t ngx_epoll_add_event(ngx_event_t *ev, ngx_int_t event, ngx_uint_t flags); static ngx_int_t ngx_epoll_del_event(ngx_event_t *ev, ngx_int_t event, ngx_uint_t flags); static ngx_int_t ngx_epoll_add_connection(ngx_connection_t *c); static ngx_int_t ngx_epoll_del_connection(ngx_connection_t *c, ngx_uint_t flags); static ngx_int_t ngx_epoll_process_events(ngx_cycle_t *cycle, ngx_msec_t timer, ngx_uint_t flags); #if (NGX_HAVE_FILE_AIO) static void ngx_epoll_eventfd_handler(ngx_event_t *ev); #endif static void *ngx_epoll_create_conf(ngx_cycle_t *cycle); static char *ngx_epoll_init_conf(ngx_cycle_t *cycle, void *conf); static int ep = -1; static struct epoll_event *event_list; static ngx_uint_t nevents; #if (NGX_HAVE_FILE_AIO) int ngx_eventfd = -1; aio_context_t ngx_aio_ctx = 0; static ngx_event_t ngx_eventfd_event; static ngx_connection_t ngx_eventfd_conn; #endif static ngx_str_t epoll_name = ngx_string("epoll"); static ngx_command_t ngx_epoll_commands[] = { { ngx_string("epoll_events"), NGX_EVENT_CONF|NGX_CONF_TAKE1, ngx_conf_set_num_slot, 0, offsetof(ngx_epoll_conf_t, events), NULL }, { ngx_string("worker_aio_requests"), NGX_EVENT_CONF|NGX_CONF_TAKE1, ngx_conf_set_num_slot, 0, offsetof(ngx_epoll_conf_t, aio_requests), NULL }, ngx_null_command }; ngx_event_module_t ngx_epoll_module_ctx = { &epoll_name, ngx_epoll_create_conf, /* create configuration */ ngx_epoll_init_conf, /* init configuration */ { ngx_epoll_add_event, /* add an event */ ngx_epoll_del_event, /* delete an event */ ngx_epoll_add_event, /* enable an event */ ngx_epoll_del_event, /* disable an event */ ngx_epoll_add_connection, /* add an connection */ ngx_epoll_del_connection, /* delete an connection */ NULL, /* process the changes */ ngx_epoll_process_events, /* process the events */ ngx_epoll_init, /* init the events */ ngx_epoll_done, /* done the events */ } }; ngx_module_t ngx_epoll_module = { NGX_MODULE_V1, &ngx_epoll_module_ctx, /* module context */ ngx_epoll_commands, /* module directives */ NGX_EVENT_MODULE, /* module type */ NULL, /* init master */ NULL, /* init module */ NULL, /* init process */ NULL, /* init thread */ NULL, /* exit thread */ NULL, /* exit process */ NULL, /* exit master */ NGX_MODULE_V1_PADDING }; #if (NGX_HAVE_FILE_AIO) /* * We call io_setup(), io_destroy() io_submit(), and io_getevents() directly * as syscalls instead of libaio usage, because the library header file * supports eventfd() since 0.3.107 version only. * * Also we do not use eventfd() in glibc, because glibc supports it * since 2.8 version and glibc maps two syscalls eventfd() and eventfd2() * into single eventfd() function with different number of parameters. */ static int io_setup(u_int nr_reqs, aio_context_t *ctx) { return syscall(SYS_io_setup, nr_reqs, ctx); } static int io_destroy(aio_context_t ctx) { return syscall(SYS_io_destroy, ctx); } static int io_getevents(aio_context_t ctx, long min_nr, long nr, struct io_event *events, struct timespec *tmo) { return syscall(SYS_io_getevents, ctx, min_nr, nr, events, tmo); } static void ngx_epoll_aio_init(ngx_cycle_t *cycle, ngx_epoll_conf_t *epcf) { int n; struct epoll_event ee; ngx_eventfd = syscall(SYS_eventfd, 0); if (ngx_eventfd == -1) { ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, "eventfd() failed"); ngx_file_aio = 0; return; } ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0, "eventfd: %d", ngx_eventfd); n = 1; if (ioctl(ngx_eventfd, FIONBIO, &n) == -1) { ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, "ioctl(eventfd, FIONBIO) failed"); goto failed; } if (io_setup(epcf->aio_requests, &ngx_aio_ctx) == -1) { ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, "io_setup() failed"); goto failed; } ngx_eventfd_event.data = &ngx_eventfd_conn; ngx_eventfd_event.handler = ngx_epoll_eventfd_handler; ngx_eventfd_event.log = cycle->log; ngx_eventfd_event.active = 1; ngx_eventfd_conn.fd = ngx_eventfd; ngx_eventfd_conn.read = &ngx_eventfd_event; ngx_eventfd_conn.log = cycle->log; ee.events = EPOLLIN|EPOLLET; ee.data.ptr = &ngx_eventfd_conn; if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_eventfd, &ee) != -1) { return; } ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, "epoll_ctl(EPOLL_CTL_ADD, eventfd) failed"); if (io_destroy(ngx_aio_ctx) == -1) { ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, "io_destroy() failed"); } failed: if (close(ngx_eventfd) == -1) { ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, "eventfd close() failed"); } ngx_eventfd = -1; ngx_aio_ctx = 0; ngx_file_aio = 0; } #endif static ngx_int_t ngx_epoll_init(ngx_cycle_t *cycle, ngx_msec_t timer) { ngx_epoll_conf_t *epcf; epcf = ngx_event_get_conf(cycle->conf_ctx, ngx_epoll_module); if (ep == -1) { ep = epoll_create(cycle->connection_n / 2); if (ep == -1) { ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, "epoll_create() failed"); return NGX_ERROR; } #if (NGX_HAVE_FILE_AIO) ngx_epoll_aio_init(cycle, epcf); #endif } if (nevents < epcf->events) { if (event_list) { ngx_free(event_list); } event_list = ngx_alloc(sizeof(struct epoll_event) * epcf->events, cycle->log); if (event_list == NULL) { return NGX_ERROR; } } nevents = epcf->events; ngx_io = ngx_os_io; ngx_event_actions = ngx_epoll_module_ctx.actions; #if (NGX_HAVE_CLEAR_EVENT) ngx_event_flags = NGX_USE_CLEAR_EVENT #else ngx_event_flags = NGX_USE_LEVEL_EVENT #endif |NGX_USE_GREEDY_EVENT |NGX_USE_EPOLL_EVENT; return NGX_OK; } static void ngx_epoll_done(ngx_cycle_t *cycle) { if (close(ep) == -1) { ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, "epoll close() failed"); } ep = -1; #if (NGX_HAVE_FILE_AIO) if (ngx_eventfd != -1) { if (io_destroy(ngx_aio_ctx) == -1) { ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, "io_destroy() failed"); } if (close(ngx_eventfd) == -1) { ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno, "eventfd close() failed"); } ngx_eventfd = -1; } ngx_aio_ctx = 0; #endif ngx_free(event_list); event_list = NULL; nevents = 0; } static ngx_int_t ngx_epoll_add_event(ngx_event_t *ev, ngx_int_t event, ngx_uint_t flags) { int op; uint32_t events, prev; ngx_event_t *e; ngx_connection_t *c; struct epoll_event ee; c = ev->data; events = (uint32_t) event; if (event == NGX_READ_EVENT) { e = c->write; prev = EPOLLOUT; #if (NGX_READ_EVENT != EPOLLIN) events = EPOLLIN; #endif } else { e = c->read; prev = EPOLLIN; #if (NGX_WRITE_EVENT != EPOLLOUT) events = EPOLLOUT; #endif } if (e->active) { op = EPOLL_CTL_MOD; events |= prev; } else { op = EPOLL_CTL_ADD; } ee.events = events | (uint32_t) flags; ee.data.ptr = (void *) ((uintptr_t) c | ev->instance); ngx_log_debug3(NGX_LOG_DEBUG_EVENT, ev->log, 0, "epoll add event: fd:%d op:%d ev:%08XD", c->fd, op, ee.events); if (epoll_ctl(ep, op, c->fd, &ee) == -1) { ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_errno, "epoll_ctl(%d, %d) failed", op, c->fd); return NGX_ERROR; } ev->active = 1; #if 0 ev->oneshot = (flags & NGX_ONESHOT_EVENT) ? 1 : 0; #endif return NGX_OK; } static ngx_int_t ngx_epoll_del_event(ngx_event_t *ev, ngx_int_t event, ngx_uint_t flags) { int op; uint32_t prev; ngx_event_t *e; ngx_connection_t *c; struct epoll_event ee; /* * when the file descriptor is closed, the epoll automatically deletes * it from its queue, so we do not need to delete explicitly the event * before the closing the file descriptor */ if (flags & NGX_CLOSE_EVENT) { ev->active = 0; return NGX_OK; } c = ev->data; if (event == NGX_READ_EVENT) { e = c->write; prev = EPOLLOUT; } else { e = c->read; prev = EPOLLIN; } if (e->active) { op = EPOLL_CTL_MOD; ee.events = prev | (uint32_t) flags; ee.data.ptr = (void *) ((uintptr_t) c | ev->instance); } else { op = EPOLL_CTL_DEL; ee.events = 0; ee.data.ptr = NULL; } ngx_log_debug3(NGX_LOG_DEBUG_EVENT, ev->log, 0, "epoll del event: fd:%d op:%d ev:%08XD", c->fd, op, ee.events); if (epoll_ctl(ep, op, c->fd, &ee) == -1) { ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_errno, "epoll_ctl(%d, %d) failed", op, c->fd); return NGX_ERROR; } ev->active = 0; return NGX_OK; } static ngx_int_t ngx_epoll_add_connection(ngx_connection_t *c) { struct epoll_event ee; ee.events = EPOLLIN|EPOLLOUT|EPOLLET; ee.data.ptr = (void *) ((uintptr_t) c | c->read->instance); ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0, "epoll add connection: fd:%d ev:%08XD", c->fd, ee.events); if (epoll_ctl(ep, EPOLL_CTL_ADD, c->fd, &ee) == -1) { ngx_log_error(NGX_LOG_ALERT, c->log, ngx_errno, "epoll_ctl(EPOLL_CTL_ADD, %d) failed", c->fd); return NGX_ERROR; } c->read->active = 1; c->write->active = 1; return NGX_OK; } static ngx_int_t ngx_epoll_del_connection(ngx_connection_t *c, ngx_uint_t flags) { int op; struct epoll_event ee; /* * when the file descriptor is closed the epoll automatically deletes * it from its queue so we do not need to delete explicitly the event * before the closing the file descriptor */ if (flags & NGX_CLOSE_EVENT) { c->read->active = 0; c->write->active = 0; return NGX_OK; } ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, "epoll del connection: fd:%d", c->fd); op = EPOLL_CTL_DEL; ee.events = 0; ee.data.ptr = NULL; if (epoll_ctl(ep, op, c->fd, &ee) == -1) { ngx_log_error(NGX_LOG_ALERT, c->log, ngx_errno, "epoll_ctl(%d, %d) failed", op, c->fd); return NGX_ERROR; } c->read->active = 0; c->write->active = 0; return NGX_OK; } static ngx_int_t ngx_epoll_process_events(ngx_cycle_t *cycle, ngx_msec_t timer, ngx_uint_t flags) { int events; uint32_t revents; ngx_int_t instance, i; ngx_uint_t level; ngx_err_t err; ngx_event_t *rev, *wev, **queue; ngx_connection_t *c; /* NGX_TIMER_INFINITE == INFTIM */ ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0, "epoll timer: %M", timer); events = epoll_wait(ep, event_list, (int) nevents, timer); err = (events == -1) ? ngx_errno : 0; if (flags & NGX_UPDATE_TIME || ngx_event_timer_alarm) { ngx_time_update(); } if (err) { if (err == NGX_EINTR) { if (ngx_event_timer_alarm) { ngx_event_timer_alarm = 0; return NGX_OK; } level = NGX_LOG_INFO; } else { level = NGX_LOG_ALERT; } ngx_log_error(level, cycle->log, err, "epoll_wait() failed"); return NGX_ERROR; } if (events == 0) { if (timer != NGX_TIMER_INFINITE) { return NGX_OK; } ngx_log_error(NGX_LOG_ALERT, cycle->log, 0, "epoll_wait() returned no events without timeout"); return NGX_ERROR; } ngx_mutex_lock(ngx_posted_events_mutex); for (i = 0; i < events; i++) { c = event_list[i].data.ptr; instance = (uintptr_t) c & 1; c = (ngx_connection_t *) ((uintptr_t) c & (uintptr_t) ~1); rev = c->read; if (c->fd == -1 || rev->instance != instance) { /* * the stale event from a file descriptor * that was just closed in this iteration */ ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0, "epoll: stale event %p", c); continue; } revents = event_list[i].events; ngx_log_debug3(NGX_LOG_DEBUG_EVENT, cycle->log, 0, "epoll: fd:%d ev:%04XD d:%p", c->fd, revents, event_list[i].data.ptr); if (revents & (EPOLLERR|EPOLLHUP)) { ngx_log_debug2(NGX_LOG_DEBUG_EVENT, cycle->log, 0, "epoll_wait() error on fd:%d ev:%04XD", c->fd, revents); } #if 0 if (revents & ~(EPOLLIN|EPOLLOUT|EPOLLERR|EPOLLHUP)) { ngx_log_error(NGX_LOG_ALERT, cycle->log, 0, "strange epoll_wait() events fd:%d ev:%04XD", c->fd, revents); } #endif if ((revents & (EPOLLERR|EPOLLHUP)) && (revents & (EPOLLIN|EPOLLOUT)) == 0) { /* * if the error events were returned without EPOLLIN or EPOLLOUT, * then add these flags to handle the events at least in one * active handler */ revents |= EPOLLIN|EPOLLOUT; } if ((revents & EPOLLIN) && rev->active) { if ((flags & NGX_POST_THREAD_EVENTS) && !rev->accept) { rev->posted_ready = 1; } else { rev->ready = 1; } if (flags & NGX_POST_EVENTS) { queue = (ngx_event_t **) (rev->accept ? &ngx_posted_accept_events : &ngx_posted_events); ngx_locked_post_event(rev, queue); } else { rev->handler(rev); } } wev = c->write; if ((revents & EPOLLOUT) && wev->active) { if (c->fd == -1 || wev->instance != instance) { /* * the stale event from a file descriptor * that was just closed in this iteration */ ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0, "epoll: stale event %p", c); continue; } if (flags & NGX_POST_THREAD_EVENTS) { wev->posted_ready = 1; } else { wev->ready = 1; } if (flags & NGX_POST_EVENTS) { ngx_locked_post_event(wev, &ngx_posted_events); } else { wev->handler(wev); } } } ngx_mutex_unlock(ngx_posted_events_mutex); return NGX_OK; } #if (NGX_HAVE_FILE_AIO) static void ngx_epoll_eventfd_handler(ngx_event_t *ev) { int n, events; long i; uint64_t ready; ngx_err_t err; ngx_event_t *e; ngx_event_aio_t *aio; struct io_event event[64]; struct timespec ts; ngx_log_debug0(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd handler"); n = read(ngx_eventfd, &ready, 8); err = ngx_errno; ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd: %d", n); if (n != 8) { if (n == -1) { if (err == NGX_EAGAIN) { return; } ngx_log_error(NGX_LOG_ALERT, ev->log, err, "read(eventfd) failed"); return; } ngx_log_error(NGX_LOG_ALERT, ev->log, 0, "read(eventfd) returned only %d bytes", n); return; } ts.tv_sec = 0; ts.tv_nsec = 0; while (ready) { events = io_getevents(ngx_aio_ctx, 1, 64, event, &ts); ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0, "io_getevents: %l", events); if (events > 0) { ready -= events; for (i = 0; i < events; i++) { ngx_log_debug4(NGX_LOG_DEBUG_EVENT, ev->log, 0, "io_event: %uXL %uXL %L %L", event[i].data, event[i].obj, event[i].res, event[i].res2); e = (ngx_event_t *) (uintptr_t) event[i].data; e->complete = 1; e->active = 0; e->ready = 1; aio = e->data; aio->res = event[i].res; ngx_post_event(e, &ngx_posted_events); } continue; } if (events == 0) { return; } /* events == -1 */ ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_errno, "io_getevents() failed"); return; } } #endif static void * ngx_epoll_create_conf(ngx_cycle_t *cycle) { ngx_epoll_conf_t *epcf; epcf = ngx_palloc(cycle->pool, sizeof(ngx_epoll_conf_t)); if (epcf == NULL) { return NULL; } epcf->events = NGX_CONF_UNSET; epcf->aio_requests = NGX_CONF_UNSET; return epcf; } static char * ngx_epoll_init_conf(ngx_cycle_t *cycle, void *conf) { ngx_epoll_conf_t *epcf = conf; ngx_conf_init_uint_value(epcf->events, 512); ngx_conf_init_uint_value(epcf->aio_requests, 32); return NGX_CONF_OK; }