Merge branch 'master' of ozlabs.org:ccan
[ccan] / ccan / io / poll.c
1 /* Licensed under LGPLv2.1+ - see LICENSE file for details */
2 #include "io.h"
3 #include "backend.h"
4 #include <assert.h>
5 #include <poll.h>
6 #include <stdlib.h>
7 #include <sys/types.h>
8 #include <sys/socket.h>
9 #include <limits.h>
10 #include <errno.h>
11
12 static size_t num_fds = 0, max_fds = 0, num_closing = 0, num_waiting = 0;
13 static struct pollfd *pollfds = NULL;
14 static struct fd **fds = NULL;
15 static struct timers timeouts;
16 #ifdef DEBUG
17 static unsigned int io_loop_level;
18 static struct io_conn *free_later;
19 static void io_loop_enter(void)
20 {
21         io_loop_level++;
22 }
23 static void io_loop_exit(void)
24 {
25         io_loop_level--;
26         if (io_loop_level == 0) {
27                 /* Delayed free. */
28                 while (free_later) {
29                         struct io_conn *c = free_later;
30                         free_later = c->finish_arg;
31                         io_alloc.free(c);
32                 }
33         }
34 }
35 static void free_conn(struct io_conn *conn)
36 {
37         /* Only free on final exit: chain via finish. */
38         if (io_loop_level > 1) {
39                 struct io_conn *c;
40                 for (c = free_later; c; c = c->finish_arg)
41                         assert(c != conn);
42                 conn->finish_arg = free_later;
43                 free_later = conn;
44         } else
45                 io_alloc.free(conn);
46 }
47 #else
48 static void io_loop_enter(void)
49 {
50 }
51 static void io_loop_exit(void)
52 {
53 }
54 static void free_conn(struct io_conn *conn)
55 {
56         io_alloc.free(conn);
57 }
58 #endif
59
60 static bool add_fd(struct fd *fd, short events)
61 {
62         if (num_fds + 1 > max_fds) {
63                 struct pollfd *newpollfds;
64                 struct fd **newfds;
65                 size_t num = max_fds ? max_fds * 2 : 8;
66
67                 newpollfds = io_alloc.realloc(pollfds, sizeof(*newpollfds)*num);
68                 if (!newpollfds)
69                         return false;
70                 pollfds = newpollfds;
71                 newfds = io_alloc.realloc(fds, sizeof(*newfds) * num);
72                 if (!newfds)
73                         return false;
74                 fds = newfds;
75                 max_fds = num;
76         }
77
78         pollfds[num_fds].events = events;
79         /* In case it's idle. */
80         if (!events)
81                 pollfds[num_fds].fd = -fd->fd;
82         else
83                 pollfds[num_fds].fd = fd->fd;
84         pollfds[num_fds].revents = 0; /* In case we're iterating now */
85         fds[num_fds] = fd;
86         fd->backend_info = num_fds;
87         num_fds++;
88         if (events)
89                 num_waiting++;
90
91         return true;
92 }
93
94 static void del_fd(struct fd *fd)
95 {
96         size_t n = fd->backend_info;
97
98         assert(n != -1);
99         assert(n < num_fds);
100         if (pollfds[n].events)
101                 num_waiting--;
102         if (n != num_fds - 1) {
103                 /* Move last one over us. */
104                 pollfds[n] = pollfds[num_fds-1];
105                 fds[n] = fds[num_fds-1];
106                 assert(fds[n]->backend_info == num_fds-1);
107                 fds[n]->backend_info = n;
108         } else if (num_fds == 1) {
109                 /* Free everything when no more fds. */
110                 io_alloc.free(pollfds);
111                 io_alloc.free(fds);
112                 pollfds = NULL;
113                 fds = NULL;
114                 max_fds = 0;
115         }
116         num_fds--;
117         fd->backend_info = -1;
118         close(fd->fd);
119 }
120
121 bool add_listener(struct io_listener *l)
122 {
123         if (!add_fd(&l->fd, POLLIN))
124                 return false;
125         return true;
126 }
127
128 void backend_plan_changed(struct io_conn *conn)
129 {
130         struct pollfd *pfd;
131
132         /* This can happen with debugging and delayed free... */
133         if (conn->fd.backend_info == -1)
134                 return;
135
136         pfd = &pollfds[conn->fd.backend_info];
137
138         if (pfd->events)
139                 num_waiting--;
140
141         pfd->events = conn->plan.pollflag;
142         if (conn->duplex) {
143                 int mask = conn->duplex->plan.pollflag;
144                 /* You can't *both* read/write. */
145                 assert(!mask || pfd->events != mask);
146                 pfd->events |= mask;
147         }
148         if (pfd->events) {
149                 num_waiting++;
150                 pfd->fd = conn->fd.fd;
151         } else
152                 pfd->fd = -conn->fd.fd;
153
154         if (!conn->plan.next)
155                 num_closing++;
156 }
157
158 bool add_conn(struct io_conn *c)
159 {
160         if (!add_fd(&c->fd, c->plan.pollflag))
161                 return false;
162         /* Immediate close is allowed. */
163         if (!c->plan.next)
164                 num_closing++;
165         return true;
166 }
167
168 bool add_duplex(struct io_conn *c)
169 {
170         c->fd.backend_info = c->duplex->fd.backend_info;
171         backend_plan_changed(c);
172         return true;
173 }
174
175 void backend_del_conn(struct io_conn *conn)
176 {
177         if (conn->finish) {
178                 /* Saved by io_close */
179                 errno = conn->plan.u1.s;
180                 conn->finish(conn, conn->finish_arg);
181         }
182         if (timeout_active(conn))
183                 backend_del_timeout(conn);
184         io_alloc.free(conn->timeout);
185         if (conn->duplex) {
186                 /* In case fds[] pointed to the other one. */
187                 fds[conn->fd.backend_info] = &conn->duplex->fd;
188                 conn->duplex->duplex = NULL;
189                 conn->fd.backend_info = -1;
190         } else
191                 del_fd(&conn->fd);
192         num_closing--;
193         free_conn(conn);
194 }
195
196 void del_listener(struct io_listener *l)
197 {
198         del_fd(&l->fd);
199 }
200
201 static void set_plan(struct io_conn *conn, struct io_plan plan)
202 {
203         conn->plan = plan;
204         backend_plan_changed(conn);
205 }
206
207 static void accept_conn(struct io_listener *l)
208 {
209         int fd = accept(l->fd.fd, NULL, NULL);
210
211         /* FIXME: What to do here? */
212         if (fd < 0)
213                 return;
214         l->init(fd, l->arg);
215 }
216
217 /* It's OK to miss some, as long as we make progress. */
218 static bool finish_conns(struct io_conn **ready)
219 {
220         unsigned int i;
221
222         for (i = 0; !io_loop_return && i < num_fds; i++) {
223                 struct io_conn *c, *duplex;
224
225                 if (!num_closing)
226                         break;
227
228                 if (fds[i]->listener)
229                         continue;
230                 c = (void *)fds[i];
231                 for (duplex = c->duplex; c; c = duplex, duplex = NULL) {
232                         if (!c->plan.next) {
233                                 if (doing_debug_on(c) && ready) {
234                                         *ready = c;
235                                         return true;
236                                 }
237                                 backend_del_conn(c);
238                                 i--;
239                         }
240                 }
241         }
242         return false;
243 }
244
245 void backend_add_timeout(struct io_conn *conn, struct timespec duration)
246 {
247         if (!timeouts.base)
248                 timers_init(&timeouts, time_now());
249         timer_add(&timeouts, &conn->timeout->timer,
250                   time_add(time_now(), duration));
251         conn->timeout->conn = conn;
252 }
253
254 void backend_del_timeout(struct io_conn *conn)
255 {
256         assert(conn->timeout->conn == conn);
257         timer_del(&timeouts, &conn->timeout->timer);
258         conn->timeout->conn = NULL;
259 }
260
261 /* This is the main loop. */
262 void *do_io_loop(struct io_conn **ready)
263 {
264         void *ret;
265
266         io_loop_enter();
267
268         while (!io_loop_return) {
269                 int i, r, timeout = INT_MAX;
270                 struct timespec now;
271                 bool some_timeouts = false;
272
273                 if (timeouts.base) {
274                         struct timespec first;
275                         struct list_head expired;
276                         struct io_timeout *t;
277
278                         now = time_now();
279
280                         /* Call functions for expired timers. */
281                         timers_expire(&timeouts, now, &expired);
282                         while ((t = list_pop(&expired, struct io_timeout, timer.list))) {
283                                 struct io_conn *conn = t->conn;
284                                 /* Clear, in case timer re-adds */
285                                 t->conn = NULL;
286                                 set_current(conn);
287                                 set_plan(conn, t->next(conn, t->next_arg));
288                                 some_timeouts = true;
289                         }
290
291                         /* Now figure out how long to wait for the next one. */
292                         if (timer_earliest(&timeouts, &first)) {
293                                 uint64_t f = time_to_msec(time_sub(first, now));
294                                 if (f < INT_MAX)
295                                         timeout = f;
296                         }
297                 }
298
299                 if (num_closing) {
300                         /* If this finishes a debugging con, return now. */
301                         if (finish_conns(ready))
302                                 return NULL;
303                         /* Could have started/finished more. */
304                         continue;
305                 }
306
307                 /* debug can recurse on io_loop; anything can change. */
308                 if (doing_debug() && some_timeouts)
309                         continue;
310
311                 if (num_fds == 0)
312                         break;
313
314                 /* You can't tell them all to go to sleep! */
315                 assert(num_waiting);
316
317                 r = poll(pollfds, num_fds, timeout);
318                 if (r < 0)
319                         break;
320
321                 for (i = 0; i < num_fds && !io_loop_return; i++) {
322                         struct io_conn *c = (void *)fds[i];
323                         int events = pollfds[i].revents;
324
325                         if (r == 0)
326                                 break;
327
328                         if (fds[i]->listener) {
329                                 if (events & POLLIN) {
330                                         accept_conn((void *)c);
331                                         r--;
332                                 }
333                         } else if (events & (POLLIN|POLLOUT)) {
334                                 r--;
335                                 if (c->duplex) {
336                                         int mask = c->duplex->plan.pollflag;
337                                         if (events & mask) {
338                                                 if (doing_debug_on(c->duplex)
339                                                         && ready) {
340                                                         *ready = c->duplex;
341                                                         return NULL;
342                                                 }
343                                                 io_ready(c->duplex);
344                                                 events &= ~mask;
345                                                 /* debug can recurse;
346                                                  * anything can change. */
347                                                 if (doing_debug())
348                                                         break;
349                                                 if (!(events&(POLLIN|POLLOUT)))
350                                                         continue;
351                                         }
352                                 }
353                                 if (doing_debug_on(c) && ready) {
354                                         *ready = c;
355                                         return NULL;
356                                 }
357                                 io_ready(c);
358                                 /* debug can recurse; anything can change. */
359                                 if (doing_debug())
360                                         break;
361                         } else if (events & (POLLHUP|POLLNVAL|POLLERR)) {
362                                 r--;
363                                 set_current(c);
364                                 errno = EBADF;
365                                 set_plan(c, io_close());
366                                 if (c->duplex) {
367                                         set_current(c->duplex);
368                                         set_plan(c->duplex, io_close());
369                                 }
370                         }
371                 }
372         }
373
374         while (num_closing && !io_loop_return) {
375                 if (finish_conns(ready))
376                         return NULL;
377         }
378
379         ret = io_loop_return;
380         io_loop_return = NULL;
381
382         io_loop_exit();
383         return ret;
384 }
385
386 void *io_loop(void)
387 {
388         return do_io_loop(NULL);
389 }