Lines Matching +full:poll +full:- +full:only
1 // SPDX-License-Identifier: GPL-2.0-or-later
18 #include <linux/poll.h>
50 * 2) ep->mtx (mutex)
51 * 3) ep->lock (rwlock)
54 * We need a rwlock (ep->lock) because we manipulate objects
55 * from inside the poll callback, that might be triggered from
57 * So we can't sleep inside the poll callback and hence we need
61 * mutex (ep->mtx). It is acquired during the event transfer loop,
76 * It is necessary to acquire multiple "ep->mtx"es at once in the
79 * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired
80 * before e2->mtx). Since we disallow cycles of epoll file
81 * descriptors, this ensures that the mutexes are well-ordered. In
85 * It is possible to drop the "ep->mtx" and to use the global
86 * mutex "epmutex" (together with "ep->lock") to have it working,
87 * but having "ep->mtx" will make the interface more scalable.
89 * normal operations the epoll private "ep->mtx" will guarantee
106 #define EP_UNACTIVE_PTR ((void *) -1L)
152 * Works together "struct eventpoll"->ovflist in keeping the
160 /* Number of active wait queue attached to poll operations */
163 /* List containing poll wait queues */
196 /* Wait queue used by file->poll() */
211 * holding ->lock.
227 /* used to track busy poll napi_id */
237 /* Wait structure used by the poll hooks */
255 /* Wrapper struct used by poll queueing */
321 return f->f_op == &eventpoll_fops; in is_file_epoll()
328 ffd->file = file; in ep_set_ffd()
329 ffd->fd = fd; in ep_set_ffd()
336 return (p1->file > p2->file ? +1: in ep_cmp_ffd()
337 (p1->file < p2->file ? -1 : p1->fd - p2->fd)); in ep_cmp_ffd()
343 return !list_empty(&epi->rdllink); in ep_is_linked()
354 return container_of(p, struct eppoll_entry, wait)->base; in ep_item_from_wait()
360 return container_of(p, struct ep_pqueue, pt)->epi; in ep_item_from_epqueue()
363 /* Initialize the poll safe wake up structure */
366 INIT_LIST_HEAD(&ncalls->tasks_call_list); in ep_nested_calls_init()
367 spin_lock_init(&ncalls->lock); in ep_nested_calls_init()
371 * ep_events_available - Checks if ready events might be available.
380 return !list_empty_careful(&ep->rdllist) || in ep_events_available()
381 READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR; in ep_events_available()
393 * Busy poll if globally on and supporting sockets found && no events,
400 unsigned int napi_id = READ_ONCE(ep->napi_id); in ep_busy_loop()
408 if (ep->napi_id) in ep_reset_busy_poll_napi_id()
409 ep->napi_id = 0; in ep_reset_busy_poll_napi_id()
413 * Set epoll busy poll NAPI ID from sk.
426 sock = sock_from_file(epi->ffd.file, &err); in ep_set_busy_poll_napi_id()
430 sk = sock->sk; in ep_set_busy_poll_napi_id()
434 napi_id = READ_ONCE(sk->sk_napi_id); in ep_set_busy_poll_napi_id()
435 ep = epi->ep; in ep_set_busy_poll_napi_id()
437 /* Non-NAPI IDs can be rejected in ep_set_busy_poll_napi_id()
441 if (napi_id < MIN_NAPI_ID || napi_id == ep->napi_id) in ep_set_busy_poll_napi_id()
444 /* record NAPI ID for use in next busy poll */ in ep_set_busy_poll_napi_id()
445 ep->napi_id = napi_id; in ep_set_busy_poll_napi_id()
465 * ep_call_nested - Perform a bound (possibly) nested call, by checking
468 * no re-entered.
476 * Returns: Returns the code returned by the @nproc callback, or -1 if
485 struct list_head *lsthead = &ncalls->tasks_call_list; in ep_call_nested()
489 spin_lock_irqsave(&ncalls->lock, flags); in ep_call_nested()
497 if (tncur->ctx == ctx && in ep_call_nested()
498 (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) { in ep_call_nested()
503 error = -1; in ep_call_nested()
513 spin_unlock_irqrestore(&ncalls->lock, flags); in ep_call_nested()
519 spin_lock_irqsave(&ncalls->lock, flags); in ep_call_nested()
522 spin_unlock_irqrestore(&ncalls->lock, flags); in ep_call_nested()
540 * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
544 * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
563 * it might be natural to create a per-cpu nest count. However, since in ep_poll_safewake()
564 * we can recurse on ep->poll_wait.lock, and a non-raw spinlock can in ep_poll_safewake()
565 * schedule() in the -rt kernel, the per-cpu variable are no longer in ep_poll_safewake()
577 if ((is_file_epoll(epi->ffd.file))) { in ep_poll_safewake()
578 ep_src = epi->ffd.file->private_data; in ep_poll_safewake()
579 nests = ep_src->nests; in ep_poll_safewake()
584 spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests); in ep_poll_safewake()
585 ep->nests = nests + 1; in ep_poll_safewake()
586 wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags); in ep_poll_safewake()
587 ep->nests = 0; in ep_poll_safewake()
588 spin_unlock_irqrestore(&ep->poll_wait.lock, flags); in ep_poll_safewake()
596 wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags); in ep_poll_safewake()
607 * If it is cleared by POLLFREE, it should be rcu-safe. in ep_remove_wait_queue()
610 * we rely on whead->lock. in ep_remove_wait_queue()
612 whead = smp_load_acquire(&pwq->whead); in ep_remove_wait_queue()
614 remove_wait_queue(whead, &pwq->wait); in ep_remove_wait_queue()
619 * This function unregisters poll callbacks from the associated file
625 struct list_head *lsthead = &epi->pwqlist; in ep_unregister_pollwait()
631 list_del(&pwq->llink); in ep_unregister_pollwait()
637 /* call only when ep->mtx is held */
640 return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx)); in ep_wakeup_source()
643 /* call only when ep->mtx is held */
654 return rcu_access_pointer(epi->ws) ? true : false; in ep_has_wakeup_source()
657 /* call when ep->mtx cannot be held (ep_poll_callback) */
663 ws = rcu_dereference(epi->ws); in ep_pm_stay_awake_rcu()
670 * ep_scan_ready_list - Scans the ready list in a way that makes possible for
671 * the scan code, to call f_op->poll(). Also allows for
677 * @depth: The current depth of recursive f_op->poll calls.
678 * @ep_locked: caller already holds ep->mtx
699 mutex_lock_nested(&ep->mtx, depth); in ep_scan_ready_list()
702 * Steal the ready list, and re-init the original one to the in ep_scan_ready_list()
703 * empty list. Also, set ep->ovflist to NULL so that events in ep_scan_ready_list()
705 * have the poll callback to queue directly on ep->rdllist, in ep_scan_ready_list()
709 write_lock_irq(&ep->lock); in ep_scan_ready_list()
710 list_splice_init(&ep->rdllist, &txlist); in ep_scan_ready_list()
711 WRITE_ONCE(ep->ovflist, NULL); in ep_scan_ready_list()
712 write_unlock_irq(&ep->lock); in ep_scan_ready_list()
719 write_lock_irq(&ep->lock); in ep_scan_ready_list()
722 * other events might have been queued by the poll callback. in ep_scan_ready_list()
723 * We re-insert them inside the main ready-list here. in ep_scan_ready_list()
725 for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL; in ep_scan_ready_list()
726 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { in ep_scan_ready_list()
730 * queued into ->ovflist but the "txlist" might already in ep_scan_ready_list()
735 * ->ovflist is LIFO, so we have to reverse it in order in ep_scan_ready_list()
738 list_add(&epi->rdllink, &ep->rdllist); in ep_scan_ready_list()
743 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after in ep_scan_ready_list()
745 * ep->rdllist. in ep_scan_ready_list()
747 WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR); in ep_scan_ready_list()
750 * Quickly re-inject items left on "txlist". in ep_scan_ready_list()
752 list_splice(&txlist, &ep->rdllist); in ep_scan_ready_list()
753 __pm_relax(ep->ws); in ep_scan_ready_list()
755 if (!list_empty(&ep->rdllist)) { in ep_scan_ready_list()
756 if (waitqueue_active(&ep->wq)) in ep_scan_ready_list()
757 wake_up(&ep->wq); in ep_scan_ready_list()
760 write_unlock_irq(&ep->lock); in ep_scan_ready_list()
763 mutex_unlock(&ep->mtx); in ep_scan_ready_list()
780 struct file *file = epi->ffd.file; in ep_remove()
785 * Removes poll wait queue hooks. in ep_remove()
790 spin_lock(&file->f_lock); in ep_remove()
791 list_del_rcu(&epi->fllink); in ep_remove()
792 spin_unlock(&file->f_lock); in ep_remove()
794 rb_erase_cached(&epi->rbn, &ep->rbr); in ep_remove()
796 write_lock_irq(&ep->lock); in ep_remove()
798 list_del_init(&epi->rdllink); in ep_remove()
799 write_unlock_irq(&ep->lock); in ep_remove()
804 * field epi->rcu, since we are trying to minimize the size of in ep_remove()
806 * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make in ep_remove()
809 call_rcu(&epi->rcu, epi_rcu_free); in ep_remove()
811 atomic_long_dec(&ep->user->epoll_watches); in ep_remove()
822 if (waitqueue_active(&ep->poll_wait)) in ep_free()
828 * We do not need to hold "ep->mtx" here because the epoll file in ep_free()
830 * anymore. The only hit might come from eventpoll_release_file() but in ep_free()
836 * Walks through the whole tree by unregistering poll callbacks. in ep_free()
838 for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { in ep_free()
847 * point we are sure no poll callbacks will be lingering around, and also by in ep_free()
849 * us during this operation. So we can avoid the lock on "ep->lock". in ep_free()
850 * We do not need to lock ep->mtx, either, we only do it to prevent in ep_free()
853 mutex_lock(&ep->mtx); in ep_free()
854 while ((rbp = rb_first_cached(&ep->rbr)) != NULL) { in ep_free()
859 mutex_unlock(&ep->mtx); in ep_free()
862 mutex_destroy(&ep->mtx); in ep_free()
863 free_uid(ep->user); in ep_free()
864 wakeup_source_unregister(ep->ws); in ep_free()
870 struct eventpoll *ep = file->private_data; in ep_eventpoll_release()
885 * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested()
894 pt->_key = epi->event.events; in ep_item_poll()
895 if (!is_file_epoll(epi->ffd.file)) in ep_item_poll()
896 return vfs_poll(epi->ffd.file, pt) & epi->event.events; in ep_item_poll()
898 ep = epi->ffd.file->private_data; in ep_item_poll()
899 poll_wait(epi->ffd.file, &ep->poll_wait, pt); in ep_item_poll()
900 locked = pt && (pt->_qproc == ep_ptable_queue_proc); in ep_item_poll()
902 return ep_scan_ready_list(epi->ffd.file->private_data, in ep_item_poll()
904 locked) & epi->event.events; in ep_item_poll()
922 * Item has been dropped into the ready list by the poll in ep_read_events_proc()
927 list_del_init(&epi->rdllink); in ep_read_events_proc()
936 struct eventpoll *ep = file->private_data; in ep_eventpoll_poll()
939 /* Insert inside our poll wait queue */ in ep_eventpoll_poll()
940 poll_wait(file, &ep->poll_wait, wait); in ep_eventpoll_poll()
953 struct eventpoll *ep = f->private_data; in ep_show_fdinfo()
956 mutex_lock(&ep->mtx); in ep_show_fdinfo()
957 for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { in ep_show_fdinfo()
959 struct inode *inode = file_inode(epi->ffd.file); in ep_show_fdinfo()
963 epi->ffd.fd, epi->event.events, in ep_show_fdinfo()
964 (long long)epi->event.data, in ep_show_fdinfo()
965 (long long)epi->ffd.file->f_pos, in ep_show_fdinfo()
966 inode->i_ino, inode->i_sb->s_dev); in ep_show_fdinfo()
970 mutex_unlock(&ep->mtx); in ep_show_fdinfo()
980 .poll = ep_eventpoll_poll,
995 * We don't want to get "file->f_lock" because it is not in eventpoll_release_file()
1000 * The only hit might come from ep_free() but by holding the mutex in eventpoll_release_file()
1002 * "ep->mtx" after "epmutex" because ep_remove() requires it when called in eventpoll_release_file()
1008 list_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) { in eventpoll_release_file()
1009 ep = epi->ep; in eventpoll_release_file()
1010 mutex_lock_nested(&ep->mtx, 0); in eventpoll_release_file()
1012 mutex_unlock(&ep->mtx); in eventpoll_release_file()
1024 error = -ENOMEM; in ep_alloc()
1029 mutex_init(&ep->mtx); in ep_alloc()
1030 rwlock_init(&ep->lock); in ep_alloc()
1031 init_waitqueue_head(&ep->wq); in ep_alloc()
1032 init_waitqueue_head(&ep->poll_wait); in ep_alloc()
1033 INIT_LIST_HEAD(&ep->rdllist); in ep_alloc()
1034 ep->rbr = RB_ROOT_CACHED; in ep_alloc()
1035 ep->ovflist = EP_UNACTIVE_PTR; in ep_alloc()
1036 ep->user = user; in ep_alloc()
1060 for (rbp = ep->rbr.rb_root.rb_node; rbp; ) { in ep_find()
1062 kcmp = ep_cmp_ffd(&ffd, &epi->ffd); in ep_find()
1064 rbp = rbp->rb_right; in ep_find()
1066 rbp = rbp->rb_left; in ep_find()
1082 for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { in ep_find_tfd()
1084 if (epi->ffd.fd == tfd) { in ep_find_tfd()
1088 toff--; in ep_find_tfd()
1104 return ERR_PTR(-EINVAL); in get_epoll_tfile_raw_ptr()
1106 ep = file->private_data; in get_epoll_tfile_raw_ptr()
1108 mutex_lock(&ep->mtx); in get_epoll_tfile_raw_ptr()
1111 file_raw = epi->ffd.file; in get_epoll_tfile_raw_ptr()
1113 file_raw = ERR_PTR(-ENOENT); in get_epoll_tfile_raw_ptr()
1114 mutex_unlock(&ep->mtx); in get_epoll_tfile_raw_ptr()
1131 * Also an element can be locklessly added to the list only in one
1144 * This is simple 'new->next = head' operation, but cmpxchg() in list_add_tail_lockless()
1147 * new->next == new. in list_add_tail_lockless()
1149 if (cmpxchg(&new->next, new, head) != new) in list_add_tail_lockless()
1153 * Initially ->next of a new element must be updated with the head in list_add_tail_lockless()
1154 * (we are inserting to the tail) and only then pointers are atomically in list_add_tail_lockless()
1155 * exchanged. XCHG guarantees memory ordering, thus ->next should be in list_add_tail_lockless()
1157 * swapped before prev->next is updated. in list_add_tail_lockless()
1160 prev = xchg(&head->prev, new); in list_add_tail_lockless()
1163 * It is safe to modify prev->next and new->prev, because a new element in list_add_tail_lockless()
1164 * is added only to the tail and new->next is updated before XCHG. in list_add_tail_lockless()
1167 prev->next = new; in list_add_tail_lockless()
1168 new->prev = prev; in list_add_tail_lockless()
1174 * Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
1181 struct eventpoll *ep = epi->ep; in chain_epi_lockless()
1184 if (epi->next != EP_UNACTIVE_PTR) in chain_epi_lockless()
1188 if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) in chain_epi_lockless()
1192 epi->next = xchg(&ep->ovflist, epi); in chain_epi_lockless()
1203 * events from another file descriptors, thus all modifications to ->rdllist
1204 * or ->ovflist are lockless. Read lock is paired with the write lock from
1209 * concurrently for the same @epi from different CPUs if poll table was inited
1219 struct eventpoll *ep = epi->ep; in ep_poll_callback()
1224 read_lock_irqsave(&ep->lock, flags); in ep_poll_callback()
1229 * If the event mask does not contain any poll(2) event, we consider the in ep_poll_callback()
1234 if (!(epi->event.events & ~EP_PRIVATE_BITS)) in ep_poll_callback()
1243 if (pollflags && !(pollflags & epi->event.events)) in ep_poll_callback()
1248 * (because we're accessing user memory, and because of linux f_op->poll() in ep_poll_callback()
1250 * chained in ep->ovflist and requeued later on. in ep_poll_callback()
1252 if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { in ep_poll_callback()
1257 if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) in ep_poll_callback()
1262 * Wake up ( if active ) both the eventpoll wait list and the ->poll() in ep_poll_callback()
1265 if (waitqueue_active(&ep->wq)) { in ep_poll_callback()
1266 if ((epi->event.events & EPOLLEXCLUSIVE) && in ep_poll_callback()
1270 if (epi->event.events & EPOLLIN) in ep_poll_callback()
1274 if (epi->event.events & EPOLLOUT) in ep_poll_callback()
1282 wake_up(&ep->wq); in ep_poll_callback()
1284 if (waitqueue_active(&ep->poll_wait)) in ep_poll_callback()
1288 read_unlock_irqrestore(&ep->lock, flags); in ep_poll_callback()
1294 if (!(epi->event.events & EPOLLEXCLUSIVE)) in ep_poll_callback()
1300 * ->whead = NULL and do another remove_wait_queue() after in ep_poll_callback()
1303 list_del_init(&wait->entry); in ep_poll_callback()
1305 * ->whead != NULL protects us from the race with ep_free() in ep_poll_callback()
1306 * or ep_remove(), ep_remove_wait_queue() takes whead->lock in ep_poll_callback()
1310 smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL); in ep_poll_callback()
1326 if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) { in ep_ptable_queue_proc()
1327 init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); in ep_ptable_queue_proc()
1328 pwq->whead = whead; in ep_ptable_queue_proc()
1329 pwq->base = epi; in ep_ptable_queue_proc()
1330 if (epi->event.events & EPOLLEXCLUSIVE) in ep_ptable_queue_proc()
1331 add_wait_queue_exclusive(whead, &pwq->wait); in ep_ptable_queue_proc()
1333 add_wait_queue(whead, &pwq->wait); in ep_ptable_queue_proc()
1334 list_add_tail(&pwq->llink, &epi->pwqlist); in ep_ptable_queue_proc()
1335 epi->nwait++; in ep_ptable_queue_proc()
1338 epi->nwait = -1; in ep_ptable_queue_proc()
1345 struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL; in ep_rbtree_insert()
1352 kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd); in ep_rbtree_insert()
1354 p = &parent->rb_right; in ep_rbtree_insert()
1357 p = &parent->rb_left; in ep_rbtree_insert()
1359 rb_link_node(&epi->rbn, parent, p); in ep_rbtree_insert()
1360 rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost); in ep_rbtree_insert()
1387 return -1; in path_count_inc()
1408 list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { in reverse_path_check_proc()
1409 child_file = epi->ep->file; in reverse_path_check_proc()
1411 if (list_empty(&child_file->f_ep_links)) { in reverse_path_check_proc()
1413 error = -1; in reverse_path_check_proc()
1434 * reverse_path_check - The tfile_check_list is list of file *, which have
1441 * -1 otherwise.
1468 if (!epi->ep->ws) { in ep_create_wakeup_source()
1469 epi->ep->ws = wakeup_source_register(NULL, ws_name); in ep_create_wakeup_source()
1470 if (!epi->ep->ws) in ep_create_wakeup_source()
1471 return -ENOMEM; in ep_create_wakeup_source()
1474 take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry); in ep_create_wakeup_source()
1481 return -ENOMEM; in ep_create_wakeup_source()
1482 rcu_assign_pointer(epi->ws, ws); in ep_create_wakeup_source()
1487 /* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */
1492 RCU_INIT_POINTER(epi->ws, NULL); in ep_destroy_wakeup_source()
1517 user_watches = atomic_long_read(&ep->user->epoll_watches); in ep_insert()
1519 return -ENOSPC; in ep_insert()
1521 return -ENOMEM; in ep_insert()
1524 INIT_LIST_HEAD(&epi->rdllink); in ep_insert()
1525 INIT_LIST_HEAD(&epi->fllink); in ep_insert()
1526 INIT_LIST_HEAD(&epi->pwqlist); in ep_insert()
1527 epi->ep = ep; in ep_insert()
1528 ep_set_ffd(&epi->ffd, tfile, fd); in ep_insert()
1529 epi->event = *event; in ep_insert()
1530 epi->nwait = 0; in ep_insert()
1531 epi->next = EP_UNACTIVE_PTR; in ep_insert()
1532 if (epi->event.events & EPOLLWAKEUP) { in ep_insert()
1537 RCU_INIT_POINTER(epi->ws, NULL); in ep_insert()
1541 spin_lock(&tfile->f_lock); in ep_insert()
1542 list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); in ep_insert()
1543 spin_unlock(&tfile->f_lock); in ep_insert()
1552 error = -EINVAL; in ep_insert()
1556 /* Initialize the poll table using the queue callback */ in ep_insert()
1561 * Attach the item to the poll hooks and get current event bits. in ep_insert()
1564 * this operation completes, the poll callback can start hitting in ep_insert()
1570 * We have to check if something went wrong during the poll wait queue in ep_insert()
1574 error = -ENOMEM; in ep_insert()
1575 if (epi->nwait < 0) in ep_insert()
1579 write_lock_irq(&ep->lock); in ep_insert()
1586 list_add_tail(&epi->rdllink, &ep->rdllist); in ep_insert()
1590 if (waitqueue_active(&ep->wq)) in ep_insert()
1591 wake_up(&ep->wq); in ep_insert()
1592 if (waitqueue_active(&ep->poll_wait)) in ep_insert()
1596 write_unlock_irq(&ep->lock); in ep_insert()
1598 atomic_long_inc(&ep->user->epoll_watches); in ep_insert()
1609 spin_lock(&tfile->f_lock); in ep_insert()
1610 list_del_rcu(&epi->fllink); in ep_insert()
1611 spin_unlock(&tfile->f_lock); in ep_insert()
1613 rb_erase_cached(&epi->rbn, &ep->rbr); in ep_insert()
1617 * allocated wait queue. Note that we don't care about the ep->ovflist in ep_insert()
1618 * list, since that is used/cleaned only inside a section bound by "mtx". in ep_insert()
1621 write_lock_irq(&ep->lock); in ep_insert()
1623 list_del_init(&epi->rdllink); in ep_insert()
1624 write_unlock_irq(&ep->lock); in ep_insert()
1649 * Set the new event interest mask before calling f_op->poll(); in ep_modify()
1651 * f_op->poll() call and the new event set registering. in ep_modify()
1653 epi->event.events = event->events; /* need barrier below */ in ep_modify()
1654 epi->event.data = event->data; /* protected by mtx */ in ep_modify()
1655 if (epi->event.events & EPOLLWAKEUP) { in ep_modify()
1667 * event occurs immediately after we call f_op->poll(). in ep_modify()
1668 * We need this because we did not take ep->lock while in ep_modify()
1670 * ep->lock). in ep_modify()
1673 * when calling f_op->poll(). This barrier also in ep_modify()
1677 * This barrier will now guarantee ep_poll_callback or f_op->poll in ep_modify()
1689 write_lock_irq(&ep->lock); in ep_modify()
1691 list_add_tail(&epi->rdllink, &ep->rdllist); in ep_modify()
1695 if (waitqueue_active(&ep->wq)) in ep_modify()
1696 wake_up(&ep->wq); in ep_modify()
1697 if (waitqueue_active(&ep->poll_wait)) in ep_modify()
1700 write_unlock_irq(&ep->lock); in ep_modify()
1716 struct epoll_event __user *uevent = esed->events; in ep_send_events_proc()
1721 esed->res = 0; in ep_send_events_proc()
1728 lockdep_assert_held(&ep->mtx); in ep_send_events_proc()
1731 if (esed->res >= esed->maxevents) in ep_send_events_proc()
1735 * Activate ep->ws before deactivating epi->ws to prevent in ep_send_events_proc()
1736 * triggering auto-suspend here (in case we reactive epi->ws in ep_send_events_proc()
1739 * This could be rearranged to delay the deactivation of epi->ws in ep_send_events_proc()
1740 * instead, but then epi->ws would temporarily be out of sync in ep_send_events_proc()
1745 if (ws->active) in ep_send_events_proc()
1746 __pm_stay_awake(ep->ws); in ep_send_events_proc()
1750 list_del_init(&epi->rdllink); in ep_send_events_proc()
1753 * If the event mask intersect the caller-requested one, in ep_send_events_proc()
1755 * is holding ep->mtx, so no operations coming from userspace in ep_send_events_proc()
1762 if (__put_user(revents, &uevent->events) || in ep_send_events_proc()
1763 __put_user(epi->event.data, &uevent->data)) { in ep_send_events_proc()
1764 list_add(&epi->rdllink, head); in ep_send_events_proc()
1766 if (!esed->res) in ep_send_events_proc()
1767 esed->res = -EFAULT; in ep_send_events_proc()
1770 esed->res++; in ep_send_events_proc()
1772 if (epi->event.events & EPOLLONESHOT) in ep_send_events_proc()
1773 epi->event.events &= EP_PRIVATE_BITS; in ep_send_events_proc()
1774 else if (!(epi->event.events & EPOLLET)) { in ep_send_events_proc()
1781 * into ep->rdllist besides us. The epoll_ctl() in ep_send_events_proc()
1784 * poll callback will queue them in ep->ovflist. in ep_send_events_proc()
1786 list_add_tail(&epi->rdllink, &ep->rdllist); in ep_send_events_proc()
1819 * know that default_wake_function/ttwu will only fail if the thread is already
1828 list_del_init(&wq_entry->entry); in ep_autoremove_wake_function()
1833 * ep_poll - Retrieves ready events, and delivers them to the caller supplied
1875 write_lock_irq(&ep->lock); in ep_poll()
1877 write_unlock_irq(&ep->lock); in ep_poll()
1892 * Busy poll timed out. Drop NAPI ID for now, we can add in ep_poll()
1905 * lost. This is also good performance-wise, because on in ep_poll()
1907 * explicitly, thus ep->lock is not taken, which halts the in ep_poll()
1919 write_lock_irq(&ep->lock); in ep_poll()
1929 * plays with two lists (->rdllist and ->ovflist) and there in ep_poll()
1937 res = -EINTR; in ep_poll()
1939 __add_wait_queue_exclusive(&ep->wq, &wait); in ep_poll()
1941 write_unlock_irq(&ep->lock); in ep_poll()
1958 write_lock_irq(&ep->lock); in ep_poll()
1967 __remove_wait_queue(&ep->wq, &wait); in ep_poll()
1968 write_unlock_irq(&ep->lock); in ep_poll()
1974 * Always short-circuit for fatal signals to allow in ep_poll()
1979 res = -EINTR; in ep_poll()
1994 * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
2001 * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
2006 * structure @ep does not violate the constraints, or -1 otherwise.
2012 struct eventpoll *ep = file->private_data; in ep_loop_check_proc()
2017 mutex_lock_nested(&ep->mtx, call_nests + 1); in ep_loop_check_proc()
2018 ep->gen = loop_check_gen; in ep_loop_check_proc()
2019 for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { in ep_loop_check_proc()
2021 if (unlikely(is_file_epoll(epi->ffd.file))) { in ep_loop_check_proc()
2022 ep_tovisit = epi->ffd.file->private_data; in ep_loop_check_proc()
2023 if (ep_tovisit->gen == loop_check_gen) in ep_loop_check_proc()
2026 ep_loop_check_proc, epi->ffd.file, in ep_loop_check_proc()
2039 if (list_empty(&epi->ffd.file->f_tfile_llink)) { in ep_loop_check_proc()
2040 if (get_file_rcu(epi->ffd.file)) in ep_loop_check_proc()
2041 list_add(&epi->ffd.file->f_tfile_llink, in ep_loop_check_proc()
2046 mutex_unlock(&ep->mtx); in ep_loop_check_proc()
2052 * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
2060 * structure @ep does not violate the constraints, or -1 otherwise.
2076 list_del_init(&file->f_tfile_llink); in clear_tfile_check_list()
2095 return -EINVAL; in do_epoll_create()
2117 ep->file = file; in do_epoll_create()
2136 return -EINVAL; in SYSCALL_DEFINE1()
2150 return -EAGAIN; in epoll_mutex_lock()
2163 error = -EBADF; in do_epoll_ctl()
2173 /* The target file descriptor must support poll */ in do_epoll_ctl()
2174 error = -EPERM; in do_epoll_ctl()
2187 error = -EINVAL; in do_epoll_ctl()
2192 * epoll adds to the wakeup queue at EPOLL_CTL_ADD time only, in do_epoll_ctl()
2196 if (ep_op_has_event(op) && (epds->events & EPOLLEXCLUSIVE)) { in do_epoll_ctl()
2200 (epds->events & ~EPOLLEXCLUSIVE_OK_BITS))) in do_epoll_ctl()
2208 ep = f.file->private_data; in do_epoll_ctl()
2225 error = epoll_mutex_lock(&ep->mtx, 0, nonblock); in do_epoll_ctl()
2229 if (!list_empty(&f.file->f_ep_links) || in do_epoll_ctl()
2230 ep->gen == loop_check_gen || in do_epoll_ctl()
2232 mutex_unlock(&ep->mtx); in do_epoll_ctl()
2239 error = -ELOOP; in do_epoll_ctl()
2244 list_add(&tf.file->f_tfile_llink, in do_epoll_ctl()
2247 error = epoll_mutex_lock(&ep->mtx, 0, nonblock); in do_epoll_ctl()
2251 tep = tf.file->private_data; in do_epoll_ctl()
2252 error = epoll_mutex_lock(&tep->mtx, 1, nonblock); in do_epoll_ctl()
2254 mutex_unlock(&ep->mtx); in do_epoll_ctl()
2268 error = -EINVAL; in do_epoll_ctl()
2272 epds->events |= EPOLLERR | EPOLLHUP; in do_epoll_ctl()
2275 error = -EEXIST; in do_epoll_ctl()
2281 error = -ENOENT; in do_epoll_ctl()
2285 if (!(epi->event.events & EPOLLEXCLUSIVE)) { in do_epoll_ctl()
2286 epds->events |= EPOLLERR | EPOLLHUP; in do_epoll_ctl()
2290 error = -ENOENT; in do_epoll_ctl()
2294 mutex_unlock(&tep->mtx); in do_epoll_ctl()
2295 mutex_unlock(&ep->mtx); in do_epoll_ctl()
2324 return -EFAULT; in SYSCALL_DEFINE4()
2342 return -EINVAL; in do_epoll_wait()
2346 return -EFAULT; in do_epoll_wait()
2351 return -EBADF; in do_epoll_wait()
2357 error = -EINVAL; in do_epoll_wait()
2365 ep = f.file->private_data; in do_epoll_wait()
2400 restore_saved_sigmask_unless(error == -EINTR); in SYSCALL_DEFINE6()
2423 restore_saved_sigmask_unless(err == -EINTR); in COMPAT_SYSCALL_DEFINE6()
2437 max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) / in eventpoll_init()
2449 * using an extra cache line on 64-bit (and smaller) CPUs in eventpoll_init()