Lines Matching +full:lock +full:- +full:offset
1 // SPDX-License-Identifier: GPL-2.0-only
30 #include <linux/backing-dev.h>
62 /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
64 static int least_priority = -1;
68 static const char Bad_offset[] = "Bad swap offset entry ";
69 static const char Unused_offset[] = "Unused swap offset entry ";
83 * This uses its own lock instead of swap_lock because when a
84 * swap_info_struct changes between not-full/full, it needs to
85 * add/remove itself to/from this list, but the swap_info_struct->lock
87 * before any swap_info_struct->lock.
129 unsigned long offset, unsigned long flags) in __try_to_reclaim_swap() argument
131 swp_entry_t entry = swp_entry(si->type, offset); in __try_to_reclaim_swap()
135 page = find_get_page(swap_address_space(entry), offset); in __try_to_reclaim_swap()
140 * called by vmscan.c at reclaiming pages. So, we hold a lock on a page, in __try_to_reclaim_swap()
158 struct rb_node *rb = rb_first(&sis->swap_extent_root); in first_se()
164 struct rb_node *rb = rb_next(&se->rb_node); in next_se()
170 * to allow the swap device to optimize its wear-levelling.
181 start_block = (se->start_block + 1) << (PAGE_SHIFT - 9); in discard_swap()
182 nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); in discard_swap()
184 err = blkdev_issue_discard(si->bdev, start_block, in discard_swap()
192 start_block = se->start_block << (PAGE_SHIFT - 9); in discard_swap()
193 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); in discard_swap()
195 err = blkdev_issue_discard(si->bdev, start_block, in discard_swap()
202 return err; /* That will often be -EOPNOTSUPP */ in discard_swap()
206 offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) in offset_to_swap_extent() argument
211 rb = sis->swap_extent_root.rb_node; in offset_to_swap_extent()
214 if (offset < se->start_page) in offset_to_swap_extent()
215 rb = rb->rb_left; in offset_to_swap_extent()
216 else if (offset >= se->start_page + se->nr_pages) in offset_to_swap_extent()
217 rb = rb->rb_right; in offset_to_swap_extent()
230 pgoff_t offset; in swap_page_sector() local
232 offset = __page_file_index(page); in swap_page_sector()
233 se = offset_to_swap_extent(sis, offset); in swap_page_sector()
234 sector = se->start_block + (offset - se->start_page); in swap_page_sector()
235 return sector << (PAGE_SHIFT - 9); in swap_page_sector()
240 * to allow the swap device to optimize its wear-levelling.
248 pgoff_t offset = start_page - se->start_page; in discard_swap_cluster() local
249 sector_t start_block = se->start_block + offset; in discard_swap_cluster()
250 sector_t nr_blocks = se->nr_pages - offset; in discard_swap_cluster()
255 nr_pages -= nr_blocks; in discard_swap_cluster()
257 start_block <<= PAGE_SHIFT - 9; in discard_swap_cluster()
258 nr_blocks <<= PAGE_SHIFT - 9; in discard_swap_cluster()
259 if (blkdev_issue_discard(si->bdev, start_block, in discard_swap_cluster()
285 info->flags = flag; in cluster_set_flag()
290 return info->data; in cluster_count()
296 info->data = c; in cluster_set_count()
302 info->flags = f; in cluster_set_count_flag()
303 info->data = c; in cluster_set_count_flag()
308 return info->data; in cluster_next()
314 info->data = n; in cluster_set_next()
320 info->flags = f; in cluster_set_next_flag()
321 info->data = n; in cluster_set_next_flag()
326 return info->flags & CLUSTER_FLAG_FREE; in cluster_is_free()
331 return info->flags & CLUSTER_FLAG_NEXT_NULL; in cluster_is_null()
336 info->flags = CLUSTER_FLAG_NEXT_NULL; in cluster_set_null()
337 info->data = 0; in cluster_set_null()
343 return info->flags & CLUSTER_FLAG_HUGE; in cluster_is_huge()
349 info->flags &= ~CLUSTER_FLAG_HUGE; in cluster_clear_huge()
353 unsigned long offset) in lock_cluster() argument
357 ci = si->cluster_info; in lock_cluster()
359 ci += offset / SWAPFILE_CLUSTER; in lock_cluster()
360 spin_lock(&ci->lock); in lock_cluster()
368 spin_unlock(&ci->lock); in unlock_cluster()
373 * swap_cluster_info if SSD-style cluster-based locking is in place.
376 struct swap_info_struct *si, unsigned long offset) in lock_cluster_or_swap_info() argument
380 /* Try to use fine-grained SSD-style locking if available: */ in lock_cluster_or_swap_info()
381 ci = lock_cluster(si, offset); in lock_cluster_or_swap_info()
384 spin_lock(&si->lock); in lock_cluster_or_swap_info()
395 spin_unlock(&si->lock); in unlock_cluster_or_swap_info()
400 return cluster_is_null(&list->head); in cluster_list_empty()
405 return cluster_next(&list->head); in cluster_list_first()
410 cluster_set_null(&list->head); in cluster_list_init()
411 cluster_set_null(&list->tail); in cluster_list_init()
419 cluster_set_next_flag(&list->head, idx, 0); in cluster_list_add_tail()
420 cluster_set_next_flag(&list->tail, idx, 0); in cluster_list_add_tail()
423 unsigned int tail = cluster_next(&list->tail); in cluster_list_add_tail()
426 * Nested cluster lock, but both cluster locks are in cluster_list_add_tail()
427 * only acquired when we held swap_info_struct->lock in cluster_list_add_tail()
430 spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING); in cluster_list_add_tail()
432 spin_unlock(&ci_tail->lock); in cluster_list_add_tail()
433 cluster_set_next_flag(&list->tail, idx, 0); in cluster_list_add_tail()
442 idx = cluster_next(&list->head); in cluster_list_del_first()
443 if (cluster_next(&list->tail) == idx) { in cluster_list_del_first()
444 cluster_set_null(&list->head); in cluster_list_del_first()
445 cluster_set_null(&list->tail); in cluster_list_del_first()
447 cluster_set_next_flag(&list->head, in cluster_list_del_first()
459 * si->swap_map directly. To make sure the discarding cluster isn't in swap_cluster_schedule_discard()
463 memset(si->swap_map + idx * SWAPFILE_CLUSTER, in swap_cluster_schedule_discard()
466 cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx); in swap_cluster_schedule_discard()
468 schedule_work(&si->discard_work); in swap_cluster_schedule_discard()
473 struct swap_cluster_info *ci = si->cluster_info; in __free_cluster()
476 cluster_list_add_tail(&si->free_clusters, ci, idx); in __free_cluster()
481 * will be added to free cluster list. caller should hold si->lock.
488 info = si->cluster_info; in swap_do_scheduled_discard()
490 while (!cluster_list_empty(&si->discard_clusters)) { in swap_do_scheduled_discard()
491 idx = cluster_list_del_first(&si->discard_clusters, info); in swap_do_scheduled_discard()
492 spin_unlock(&si->lock); in swap_do_scheduled_discard()
497 spin_lock(&si->lock); in swap_do_scheduled_discard()
500 memset(si->swap_map + idx * SWAPFILE_CLUSTER, in swap_do_scheduled_discard()
512 spin_lock(&si->lock); in swap_discard_work()
514 spin_unlock(&si->lock); in swap_discard_work()
519 struct swap_cluster_info *ci = si->cluster_info; in alloc_cluster()
521 VM_BUG_ON(cluster_list_first(&si->free_clusters) != idx); in alloc_cluster()
522 cluster_list_del_first(&si->free_clusters, ci); in alloc_cluster()
528 struct swap_cluster_info *ci = si->cluster_info + idx; in free_cluster()
536 if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == in free_cluster()
579 cluster_count(&cluster_info[idx]) - 1); in dec_cluster_info_page()
591 unsigned long offset) in scan_swap_map_ssd_cluster_conflict() argument
596 offset /= SWAPFILE_CLUSTER; in scan_swap_map_ssd_cluster_conflict()
597 conflict = !cluster_list_empty(&si->free_clusters) && in scan_swap_map_ssd_cluster_conflict()
598 offset != cluster_list_first(&si->free_clusters) && in scan_swap_map_ssd_cluster_conflict()
599 cluster_is_free(&si->cluster_info[offset]); in scan_swap_map_ssd_cluster_conflict()
604 percpu_cluster = this_cpu_ptr(si->percpu_cluster); in scan_swap_map_ssd_cluster_conflict()
605 cluster_set_null(&percpu_cluster->index); in scan_swap_map_ssd_cluster_conflict()
614 unsigned long *offset, unsigned long *scan_base) in scan_swap_map_try_ssd_cluster() argument
621 cluster = this_cpu_ptr(si->percpu_cluster); in scan_swap_map_try_ssd_cluster()
622 if (cluster_is_null(&cluster->index)) { in scan_swap_map_try_ssd_cluster()
623 if (!cluster_list_empty(&si->free_clusters)) { in scan_swap_map_try_ssd_cluster()
624 cluster->index = si->free_clusters.head; in scan_swap_map_try_ssd_cluster()
625 cluster->next = cluster_next(&cluster->index) * in scan_swap_map_try_ssd_cluster()
627 } else if (!cluster_list_empty(&si->discard_clusters)) { in scan_swap_map_try_ssd_cluster()
631 * reread cluster_next_cpu since we dropped si->lock in scan_swap_map_try_ssd_cluster()
634 *scan_base = this_cpu_read(*si->cluster_next_cpu); in scan_swap_map_try_ssd_cluster()
635 *offset = *scan_base; in scan_swap_map_try_ssd_cluster()
645 tmp = cluster->next; in scan_swap_map_try_ssd_cluster()
646 max = min_t(unsigned long, si->max, in scan_swap_map_try_ssd_cluster()
647 (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER); in scan_swap_map_try_ssd_cluster()
651 if (!si->swap_map[tmp]) in scan_swap_map_try_ssd_cluster()
658 cluster_set_null(&cluster->index); in scan_swap_map_try_ssd_cluster()
661 cluster->next = tmp + 1; in scan_swap_map_try_ssd_cluster()
662 *offset = tmp; in scan_swap_map_try_ssd_cluster()
672 plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]); in __del_from_avail_list()
682 static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, in swap_range_alloc() argument
685 unsigned int end = offset + nr_entries - 1; in swap_range_alloc()
687 if (offset == si->lowest_bit) in swap_range_alloc()
688 si->lowest_bit += nr_entries; in swap_range_alloc()
689 if (end == si->highest_bit) in swap_range_alloc()
690 WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries); in swap_range_alloc()
691 si->inuse_pages += nr_entries; in swap_range_alloc()
692 if (si->inuse_pages == si->pages) { in swap_range_alloc()
693 si->lowest_bit = si->max; in swap_range_alloc()
694 si->highest_bit = 0; in swap_range_alloc()
705 WARN_ON(!plist_node_empty(&p->avail_lists[nid])); in add_to_avail_list()
706 plist_add(&p->avail_lists[nid], &swap_avail_heads[nid]); in add_to_avail_list()
711 static void swap_range_free(struct swap_info_struct *si, unsigned long offset, in swap_range_free() argument
714 unsigned long begin = offset; in swap_range_free()
715 unsigned long end = offset + nr_entries - 1; in swap_range_free()
719 if (offset < si->lowest_bit) in swap_range_free()
720 si->lowest_bit = offset; in swap_range_free()
721 if (end > si->highest_bit) { in swap_range_free()
722 bool was_full = !si->highest_bit; in swap_range_free()
724 WRITE_ONCE(si->highest_bit, end); in swap_range_free()
725 if (was_full && (si->flags & SWP_WRITEOK)) in swap_range_free()
731 si->inuse_pages -= nr_entries; in swap_range_free()
732 if (si->flags & SWP_BLKDEV) in swap_range_free()
734 si->bdev->bd_disk->fops->swap_slot_free_notify; in swap_range_free()
737 while (offset <= end) { in swap_range_free()
738 arch_swap_invalidate_page(si->type, offset); in swap_range_free()
739 frontswap_invalidate_page(si->type, offset); in swap_range_free()
741 swap_slot_free_notify(si->bdev, offset); in swap_range_free()
742 offset++; in swap_range_free()
744 clear_shadow_from_swap_cache(si->type, begin, end); in swap_range_free()
751 if (!(si->flags & SWP_SOLIDSTATE)) { in set_cluster_next()
752 si->cluster_next = next; in set_cluster_next()
756 prev = this_cpu_read(*si->cluster_next_cpu); in set_cluster_next()
759 * another trunk randomly to avoid lock contention on swap in set_cluster_next()
765 if (si->highest_bit <= si->lowest_bit) in set_cluster_next()
767 next = si->lowest_bit + in set_cluster_next()
768 prandom_u32_max(si->highest_bit - si->lowest_bit + 1); in set_cluster_next()
770 next = max_t(unsigned int, next, si->lowest_bit); in set_cluster_next()
772 this_cpu_write(*si->cluster_next_cpu, next); in set_cluster_next()
780 unsigned long offset; in scan_swap_map_slots() local
790 * way, however, we resort to first-free allocation, starting in scan_swap_map_slots()
793 * overall disk seek times between swap pages. -- sct in scan_swap_map_slots()
794 * But we do now try to find an empty cluster. -Andrea in scan_swap_map_slots()
798 si->flags += SWP_SCANNING; in scan_swap_map_slots()
800 * Use percpu scan base for SSD to reduce lock contention on in scan_swap_map_slots()
804 if (si->flags & SWP_SOLIDSTATE) in scan_swap_map_slots()
805 scan_base = this_cpu_read(*si->cluster_next_cpu); in scan_swap_map_slots()
807 scan_base = si->cluster_next; in scan_swap_map_slots()
808 offset = scan_base; in scan_swap_map_slots()
811 if (si->cluster_info) { in scan_swap_map_slots()
812 if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) in scan_swap_map_slots()
814 } else if (unlikely(!si->cluster_nr--)) { in scan_swap_map_slots()
815 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) { in scan_swap_map_slots()
816 si->cluster_nr = SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
820 spin_unlock(&si->lock); in scan_swap_map_slots()
825 * If seek is cheap, that is the SWP_SOLIDSTATE si->cluster_info in scan_swap_map_slots()
828 scan_base = offset = si->lowest_bit; in scan_swap_map_slots()
829 last_in_cluster = offset + SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
832 for (; last_in_cluster <= si->highest_bit; offset++) { in scan_swap_map_slots()
833 if (si->swap_map[offset]) in scan_swap_map_slots()
834 last_in_cluster = offset + SWAPFILE_CLUSTER; in scan_swap_map_slots()
835 else if (offset == last_in_cluster) { in scan_swap_map_slots()
836 spin_lock(&si->lock); in scan_swap_map_slots()
837 offset -= SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
838 si->cluster_next = offset; in scan_swap_map_slots()
839 si->cluster_nr = SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
842 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
848 offset = scan_base; in scan_swap_map_slots()
849 spin_lock(&si->lock); in scan_swap_map_slots()
850 si->cluster_nr = SWAPFILE_CLUSTER - 1; in scan_swap_map_slots()
854 if (si->cluster_info) { in scan_swap_map_slots()
855 while (scan_swap_map_ssd_cluster_conflict(si, offset)) { in scan_swap_map_slots()
859 if (!scan_swap_map_try_ssd_cluster(si, &offset, in scan_swap_map_slots()
864 if (!(si->flags & SWP_WRITEOK)) in scan_swap_map_slots()
866 if (!si->highest_bit) in scan_swap_map_slots()
868 if (offset > si->highest_bit) in scan_swap_map_slots()
869 scan_base = offset = si->lowest_bit; in scan_swap_map_slots()
871 ci = lock_cluster(si, offset); in scan_swap_map_slots()
872 /* reuse swap entry of cache-only swap if not busy. */ in scan_swap_map_slots()
873 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { in scan_swap_map_slots()
876 spin_unlock(&si->lock); in scan_swap_map_slots()
877 swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY); in scan_swap_map_slots()
878 spin_lock(&si->lock); in scan_swap_map_slots()
885 if (si->swap_map[offset]) { in scan_swap_map_slots()
892 WRITE_ONCE(si->swap_map[offset], usage); in scan_swap_map_slots()
893 inc_cluster_info_page(si, si->cluster_info, offset); in scan_swap_map_slots()
896 swap_range_alloc(si, offset, 1); in scan_swap_map_slots()
897 slots[n_ret++] = swp_entry(si->type, offset); in scan_swap_map_slots()
900 if ((n_ret == nr) || (offset >= si->highest_bit)) in scan_swap_map_slots()
906 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
909 spin_unlock(&si->lock); in scan_swap_map_slots()
911 spin_lock(&si->lock); in scan_swap_map_slots()
916 if (si->cluster_info) { in scan_swap_map_slots()
917 if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) in scan_swap_map_slots()
919 } else if (si->cluster_nr && !si->swap_map[++offset]) { in scan_swap_map_slots()
920 /* non-ssd case, still more slots in cluster? */ in scan_swap_map_slots()
921 --si->cluster_nr; in scan_swap_map_slots()
927 * try to scan a little more quickly with lock held unless we in scan_swap_map_slots()
933 if (offset < scan_base) in scan_swap_map_slots()
936 scan_limit = si->highest_bit; in scan_swap_map_slots()
937 for (; offset <= scan_limit && --latency_ration > 0; in scan_swap_map_slots()
938 offset++) { in scan_swap_map_slots()
939 if (!si->swap_map[offset]) in scan_swap_map_slots()
945 set_cluster_next(si, offset + 1); in scan_swap_map_slots()
946 si->flags -= SWP_SCANNING; in scan_swap_map_slots()
950 spin_unlock(&si->lock); in scan_swap_map_slots()
951 while (++offset <= READ_ONCE(si->highest_bit)) { in scan_swap_map_slots()
952 if (data_race(!si->swap_map[offset])) { in scan_swap_map_slots()
953 spin_lock(&si->lock); in scan_swap_map_slots()
957 READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { in scan_swap_map_slots()
958 spin_lock(&si->lock); in scan_swap_map_slots()
961 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
967 offset = si->lowest_bit; in scan_swap_map_slots()
968 while (offset < scan_base) { in scan_swap_map_slots()
969 if (data_race(!si->swap_map[offset])) { in scan_swap_map_slots()
970 spin_lock(&si->lock); in scan_swap_map_slots()
974 READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { in scan_swap_map_slots()
975 spin_lock(&si->lock); in scan_swap_map_slots()
978 if (unlikely(--latency_ration < 0)) { in scan_swap_map_slots()
983 offset++; in scan_swap_map_slots()
985 spin_lock(&si->lock); in scan_swap_map_slots()
988 si->flags -= SWP_SCANNING; in scan_swap_map_slots()
997 unsigned long offset, i; in swap_alloc_cluster() local
1009 if (cluster_list_empty(&si->free_clusters)) in swap_alloc_cluster()
1012 idx = cluster_list_first(&si->free_clusters); in swap_alloc_cluster()
1013 offset = idx * SWAPFILE_CLUSTER; in swap_alloc_cluster()
1014 ci = lock_cluster(si, offset); in swap_alloc_cluster()
1018 map = si->swap_map + offset; in swap_alloc_cluster()
1022 swap_range_alloc(si, offset, SWAPFILE_CLUSTER); in swap_alloc_cluster()
1023 *slot = swp_entry(si->type, offset); in swap_alloc_cluster()
1031 unsigned long offset = idx * SWAPFILE_CLUSTER; in swap_free_cluster() local
1034 ci = lock_cluster(si, offset); in swap_free_cluster()
1035 memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER); in swap_free_cluster()
1039 swap_range_free(si, offset, SWAPFILE_CLUSTER); in swap_free_cluster()
1083 /* requeue si to after same-priority siblings */ in get_swap_pages()
1084 plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); in get_swap_pages()
1086 spin_lock(&si->lock); in get_swap_pages()
1087 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) { in get_swap_pages()
1089 if (plist_node_empty(&si->avail_lists[node])) { in get_swap_pages()
1090 spin_unlock(&si->lock); in get_swap_pages()
1093 WARN(!si->highest_bit, in get_swap_pages()
1095 si->type); in get_swap_pages()
1096 WARN(!(si->flags & SWP_WRITEOK), in get_swap_pages()
1098 si->type); in get_swap_pages()
1100 spin_unlock(&si->lock); in get_swap_pages()
1104 if (si->flags & SWP_BLKDEV) in get_swap_pages()
1109 spin_unlock(&si->lock); in get_swap_pages()
1112 pr_debug("scan_swap_map of si %d failed to find offset\n", in get_swap_pages()
1113 si->type); in get_swap_pages()
1119 * and since scan_swap_map() can drop the si->lock, multiple in get_swap_pages()
1122 * up between us dropping swap_avail_lock and taking si->lock. in get_swap_pages()
1128 if (plist_node_empty(&next->avail_lists[node])) in get_swap_pages()
1136 atomic_long_add((long)(n_goal - n_ret) * size, in get_swap_pages()
1146 pgoff_t offset; in get_swap_page_of_type() local
1152 spin_lock(&si->lock); in get_swap_page_of_type()
1153 if (si->flags & SWP_WRITEOK) { in get_swap_page_of_type()
1155 offset = scan_swap_map(si, 1); in get_swap_page_of_type()
1156 if (offset) { in get_swap_page_of_type()
1160 spin_unlock(&si->lock); in get_swap_page_of_type()
1161 return swp_entry(type, offset); in get_swap_page_of_type()
1164 spin_unlock(&si->lock); in get_swap_page_of_type()
1172 unsigned long offset; in __swap_info_get() local
1179 if (data_race(!(p->flags & SWP_USED))) in __swap_info_get()
1181 offset = swp_offset(entry); in __swap_info_get()
1182 if (offset >= p->max) in __swap_info_get()
1205 if (data_race(!p->swap_map[swp_offset(entry)])) in _swap_info_get()
1221 spin_lock(&p->lock); in swap_info_get()
1234 spin_unlock(&q->lock); in swap_info_get_cont()
1236 spin_lock(&p->lock); in swap_info_get_cont()
1242 unsigned long offset, in __swap_entry_free_locked() argument
1248 count = p->swap_map[offset]; in __swap_entry_free_locked()
1264 if (swap_count_continued(p, offset, count)) in __swap_entry_free_locked()
1269 count--; in __swap_entry_free_locked()
1274 WRITE_ONCE(p->swap_map[offset], usage); in __swap_entry_free_locked()
1276 WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE); in __swap_entry_free_locked()
1289 * enable_swap_info() or swapoff(). So if "si->flags & SWP_VALID" is
1290 * true, the si->map, si->cluster_info, etc. must be valid in the
1296 * swapoff, such as page lock, page table lock, etc. The caller must
1319 unsigned long offset; in get_swap_device() local
1328 if (data_race(!(si->flags & SWP_VALID))) in get_swap_device()
1330 offset = swp_offset(entry); in get_swap_device()
1331 if (offset >= si->max) in get_swap_device()
1348 unsigned long offset = swp_offset(entry); in __swap_entry_free() local
1351 ci = lock_cluster_or_swap_info(p, offset); in __swap_entry_free()
1352 usage = __swap_entry_free_locked(p, offset, 1); in __swap_entry_free()
1363 unsigned long offset = swp_offset(entry); in swap_entry_free() local
1366 ci = lock_cluster(p, offset); in swap_entry_free()
1367 count = p->swap_map[offset]; in swap_entry_free()
1369 p->swap_map[offset] = 0; in swap_entry_free()
1370 dec_cluster_info_page(p, p->cluster_info, offset); in swap_entry_free()
1374 swap_range_free(p, offset, 1); in swap_entry_free()
1395 unsigned long offset = swp_offset(entry); in put_swap_page() local
1396 unsigned long idx = offset / SWAPFILE_CLUSTER; in put_swap_page()
1408 ci = lock_cluster_or_swap_info(si, offset); in put_swap_page()
1411 map = si->swap_map + offset; in put_swap_page()
1421 spin_lock(&si->lock); in put_swap_page()
1424 spin_unlock(&si->lock); in put_swap_page()
1429 if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) { in put_swap_page()
1432 if (i == size - 1) in put_swap_page()
1434 lock_cluster_or_swap_info(si, offset); in put_swap_page()
1445 unsigned long offset = swp_offset(entry); in split_swap_cluster() local
1449 return -EBUSY; in split_swap_cluster()
1450 ci = lock_cluster(si, offset); in split_swap_cluster()
1461 return (int)swp_type(*e1) - (int)swp_type(*e2); in swp_entry_cmp()
1476 * Sort swap entries by swap device, so each lock is only taken once. in swapcache_free_entries()
1489 spin_unlock(&p->lock); in swapcache_free_entries()
1504 unsigned long offset; in page_swapcount() local
1509 offset = swp_offset(entry); in page_swapcount()
1510 ci = lock_cluster_or_swap_info(p, offset); in page_swapcount()
1511 count = swap_count(p->swap_map[offset]); in page_swapcount()
1520 pgoff_t offset = swp_offset(entry); in __swap_count() local
1525 count = swap_count(si->swap_map[offset]); in __swap_count()
1534 pgoff_t offset = swp_offset(entry); in swap_swapcount() local
1537 ci = lock_cluster_or_swap_info(si, offset); in swap_swapcount()
1538 count = swap_count(si->swap_map[offset]); in swap_swapcount()
1571 pgoff_t offset; in swp_swapcount() local
1578 offset = swp_offset(entry); in swp_swapcount()
1580 ci = lock_cluster_or_swap_info(p, offset); in swp_swapcount()
1582 count = swap_count(p->swap_map[offset]); in swp_swapcount()
1589 page = vmalloc_to_page(p->swap_map + offset); in swp_swapcount()
1590 offset &= ~PAGE_MASK; in swp_swapcount()
1596 tmp_count = map[offset]; in swp_swapcount()
1611 unsigned char *map = si->swap_map; in swap_page_trans_huge_swapped()
1613 unsigned long offset = round_down(roffset, SWAPFILE_CLUSTER); in swap_page_trans_huge_swapped() local
1617 ci = lock_cluster_or_swap_info(si, offset); in swap_page_trans_huge_swapped()
1624 if (swap_count(map[offset + i])) { in swap_page_trans_huge_swapped()
1654 unsigned long offset = 0; in page_trans_huge_map_swapcount() local
1681 map = si->swap_map; in page_trans_huge_map_swapcount()
1682 offset = swp_offset(entry); in page_trans_huge_map_swapcount()
1686 ci = lock_cluster(si, offset); in page_trans_huge_map_swapcount()
1691 swapcount = swap_count(map[offset + i]); in page_trans_huge_map_swapcount()
1698 map_swapcount -= 1; in page_trans_huge_map_swapcount()
1699 _total_mapcount -= HPAGE_PMD_NR; in page_trans_huge_map_swapcount()
1714 * to it. And as a side-effect, free up its swap: because the old content
1747 if (p->flags & SWP_STABLE_WRITES) { in reuse_swap_page()
1748 spin_unlock(&p->lock); in reuse_swap_page()
1751 spin_unlock(&p->lock); in reuse_swap_page()
1776 * - most probably a call from __try_to_reclaim_swap() while in try_to_free_swap()
1778 * but conceivably even a call from memory reclaim - will free in try_to_free_swap()
1824 * @offset - number of the PAGE_SIZE-sized block of the device, starting
1829 int swap_type_of(dev_t device, sector_t offset) in swap_type_of() argument
1834 return -1; in swap_type_of()
1840 if (!(sis->flags & SWP_WRITEOK)) in swap_type_of()
1843 if (device == sis->bdev->bd_dev) { in swap_type_of()
1846 if (se->start_block == offset) { in swap_type_of()
1853 return -ENODEV; in swap_type_of()
1864 if (!(sis->flags & SWP_WRITEOK)) in find_first_swap()
1866 *device = sis->bdev->bd_dev; in find_first_swap()
1871 return -ENODEV; in find_first_swap()
1875 * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
1878 sector_t swapdev_block(int type, pgoff_t offset) in swapdev_block() argument
1883 if (!si || !(si->flags & SWP_WRITEOK)) in swapdev_block()
1885 return map_swap_entry(swp_entry(type, offset), &bdev); in swapdev_block()
1902 spin_lock(&sis->lock); in count_swap_pages()
1903 if (sis->flags & SWP_WRITEOK) { in count_swap_pages()
1904 n = sis->pages; in count_swap_pages()
1906 n -= sis->inuse_pages; in count_swap_pages()
1908 spin_unlock(&sis->lock); in count_swap_pages()
1922 * just let do_wp_page work it out if a write is requested later - to
1936 return -ENOMEM; in unuse_pte()
1938 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); in unuse_pte()
1944 dec_mm_counter(vma->vm_mm, MM_SWAPENTS); in unuse_pte()
1945 inc_mm_counter(vma->vm_mm, MM_ANONPAGES); in unuse_pte()
1947 set_pte_at(vma->vm_mm, addr, pte, in unuse_pte()
1948 pte_mkold(mk_pte(page, vma->vm_page_prot))); in unuse_pte()
1974 unsigned long offset; in unuse_pte_range() local
1988 offset = swp_offset(entry); in unuse_pte_range()
1989 if (frontswap && !frontswap_test(si, offset)) in unuse_pte_range()
1993 swap_map = &si->swap_map[offset]; in unuse_pte_range()
2008 return -ENOMEM; in unuse_pte_range()
2025 if (*fs_pages_to_unuse && !--(*fs_pages_to_unuse)) { in unuse_pte_range()
2032 pte_unmap(pte - 1); in unuse_pte_range()
2113 addr = vma->vm_start; in unuse_vma()
2114 end = vma->vm_end; in unuse_vma()
2116 pgd = pgd_offset(vma->vm_mm, addr); in unuse_vma()
2136 for (vma = mm->mmap; vma; vma = vma->vm_next) { in unuse_mm()
2137 if (vma->anon_vma) { in unuse_mm()
2166 for (i = prev + 1; i < si->max; i++) { in find_next_to_unuse()
2167 count = READ_ONCE(si->swap_map[i]); in find_next_to_unuse()
2175 if (i == si->max) in find_next_to_unuse()
2197 if (!READ_ONCE(si->inuse_pages)) in try_to_unuse()
2213 while (READ_ONCE(si->inuse_pages) && in try_to_unuse()
2215 (p = p->next) != &init_mm.mmlist) { in try_to_unuse()
2242 while (READ_ONCE(si->inuse_pages) && in try_to_unuse()
2253 * swap cache just before we acquired the page lock. The page in try_to_unuse()
2269 if (pages_to_unuse && --pages_to_unuse == 0) in try_to_unuse()
2283 * It's easy and robust (though cpu-intensive) just to keep retrying. in try_to_unuse()
2285 if (READ_ONCE(si->inuse_pages)) { in try_to_unuse()
2288 retval = -EINTR; in try_to_unuse()
2298 * added to the mmlist just after page_duplicate - before would be racy.
2306 if (swap_info[type]->inuse_pages) in drain_mmlist()
2316 * corresponds to page offset for the specified swap entry.
2317 * Note that the type of this function is sector_t, but it returns page offset
2318 * into the bdev, not sector offset.
2324 pgoff_t offset; in map_swap_entry() local
2327 *bdev = sis->bdev; in map_swap_entry()
2329 offset = swp_offset(entry); in map_swap_entry()
2330 se = offset_to_swap_extent(sis, offset); in map_swap_entry()
2331 return se->start_block + (offset - se->start_page); in map_swap_entry()
2335 * Returns the page offset into bdev for the specified page's swap entry.
2349 while (!RB_EMPTY_ROOT(&sis->swap_extent_root)) { in destroy_swap_extents()
2350 struct rb_node *rb = sis->swap_extent_root.rb_node; in destroy_swap_extents()
2353 rb_erase(rb, &sis->swap_extent_root); in destroy_swap_extents()
2357 if (sis->flags & SWP_ACTIVATED) { in destroy_swap_extents()
2358 struct file *swap_file = sis->swap_file; in destroy_swap_extents()
2359 struct address_space *mapping = swap_file->f_mapping; in destroy_swap_extents()
2361 sis->flags &= ~SWP_ACTIVATED; in destroy_swap_extents()
2362 if (mapping->a_ops->swap_deactivate) in destroy_swap_extents()
2363 mapping->a_ops->swap_deactivate(swap_file); in destroy_swap_extents()
2377 struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL; in add_swap_extent()
2387 link = &parent->rb_right; in add_swap_extent()
2392 BUG_ON(se->start_page + se->nr_pages != start_page); in add_swap_extent()
2393 if (se->start_block + se->nr_pages == start_block) { in add_swap_extent()
2395 se->nr_pages += nr_pages; in add_swap_extent()
2403 return -ENOMEM; in add_swap_extent()
2404 new_se->start_page = start_page; in add_swap_extent()
2405 new_se->nr_pages = nr_pages; in add_swap_extent()
2406 new_se->start_block = start_block; in add_swap_extent()
2408 rb_link_node(&new_se->rb_node, parent, link); in add_swap_extent()
2409 rb_insert_color(&new_se->rb_node, &sis->swap_extent_root); in add_swap_extent()
2431 * requirements, they are simply tossed out - we will never use those blocks
2438 * Typically it is in the 1-4 megabyte range. So we can have hundreds of
2442 * map_swap_page() has been measured at about 0.3 per page. - akpm.
2446 struct file *swap_file = sis->swap_file; in setup_swap_extents()
2447 struct address_space *mapping = swap_file->f_mapping; in setup_swap_extents()
2448 struct inode *inode = mapping->host; in setup_swap_extents()
2451 if (S_ISBLK(inode->i_mode)) { in setup_swap_extents()
2452 ret = add_swap_extent(sis, 0, sis->max, 0); in setup_swap_extents()
2453 *span = sis->pages; in setup_swap_extents()
2457 if (mapping->a_ops->swap_activate) { in setup_swap_extents()
2458 ret = mapping->a_ops->swap_activate(sis, swap_file, span); in setup_swap_extents()
2460 sis->flags |= SWP_ACTIVATED; in setup_swap_extents()
2462 sis->flags |= SWP_FS_OPS; in setup_swap_extents()
2463 ret = add_swap_extent(sis, 0, sis->max, 0); in setup_swap_extents()
2464 *span = sis->pages; in setup_swap_extents()
2476 if (p->bdev) in swap_node()
2477 bdev = p->bdev; in swap_node()
2479 bdev = p->swap_file->f_inode->i_sb->s_bdev; in swap_node()
2481 return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE; in swap_node()
2491 p->prio = prio; in setup_swap_info()
2493 p->prio = --least_priority; in setup_swap_info()
2496 * low-to-high, while swap ordering is high-to-low in setup_swap_info()
2498 p->list.prio = -p->prio; in setup_swap_info()
2500 if (p->prio >= 0) in setup_swap_info()
2501 p->avail_lists[i].prio = -p->prio; in setup_swap_info()
2504 p->avail_lists[i].prio = 1; in setup_swap_info()
2506 p->avail_lists[i].prio = -p->prio; in setup_swap_info()
2509 p->swap_map = swap_map; in setup_swap_info()
2510 p->cluster_info = cluster_info; in setup_swap_info()
2517 p->flags |= SWP_WRITEOK | SWP_VALID; in _enable_swap_info()
2520 atomic_long_add(p->pages, &nr_swap_pages); in _enable_swap_info()
2521 total_swap_pages += p->pages; in _enable_swap_info()
2527 * which on removal of any swap_info_struct with an auto-assigned in _enable_swap_info()
2528 * (i.e. negative) priority increments the auto-assigned priority in _enable_swap_info()
2529 * of any lower-priority swap_info_structs. in _enable_swap_info()
2534 plist_add(&p->list, &swap_active_head); in _enable_swap_info()
2543 frontswap_init(p->type, frontswap_map); in enable_swap_info()
2545 spin_lock(&p->lock); in enable_swap_info()
2547 spin_unlock(&p->lock); in enable_swap_info()
2555 spin_lock(&p->lock); in enable_swap_info()
2557 spin_unlock(&p->lock); in enable_swap_info()
2564 spin_lock(&p->lock); in reinsert_swap_info()
2565 setup_swap_info(p, p->prio, p->swap_map, p->cluster_info); in reinsert_swap_info()
2567 spin_unlock(&p->lock); in reinsert_swap_info()
2597 return -EPERM; in SYSCALL_DEFINE1()
2599 BUG_ON(!current->mm); in SYSCALL_DEFINE1()
2610 mapping = victim->f_mapping; in SYSCALL_DEFINE1()
2613 if (p->flags & SWP_WRITEOK) { in SYSCALL_DEFINE1()
2614 if (p->swap_file->f_mapping == mapping) { in SYSCALL_DEFINE1()
2621 err = -EINVAL; in SYSCALL_DEFINE1()
2625 if (!security_vm_enough_memory_mm(current->mm, p->pages)) in SYSCALL_DEFINE1()
2626 vm_unacct_memory(p->pages); in SYSCALL_DEFINE1()
2628 err = -ENOMEM; in SYSCALL_DEFINE1()
2633 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2634 if (p->prio < 0) { in SYSCALL_DEFINE1()
2639 si->prio++; in SYSCALL_DEFINE1()
2640 si->list.prio--; in SYSCALL_DEFINE1()
2642 if (si->avail_lists[nid].prio != 1) in SYSCALL_DEFINE1()
2643 si->avail_lists[nid].prio--; in SYSCALL_DEFINE1()
2648 plist_del(&p->list, &swap_active_head); in SYSCALL_DEFINE1()
2651 atomic_long_sub(p->pages, &nr_swap_pages); in SYSCALL_DEFINE1()
2652 total_swap_pages -= p->pages; in SYSCALL_DEFINE1()
2654 p->flags &= ~SWP_WRITEOK; in SYSCALL_DEFINE1()
2655 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2661 err = try_to_unuse(p->type, false, 0); /* force unuse all pages */ in SYSCALL_DEFINE1()
2665 /* re-insert swap space back into swap_list */ in SYSCALL_DEFINE1()
2674 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2675 p->flags &= ~SWP_VALID; /* mark swap device as invalid */ in SYSCALL_DEFINE1()
2676 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2684 flush_work(&p->discard_work); in SYSCALL_DEFINE1()
2687 if (p->flags & SWP_CONTINUED) in SYSCALL_DEFINE1()
2690 if (!p->bdev || !blk_queue_nonrot(bdev_get_queue(p->bdev))) in SYSCALL_DEFINE1()
2695 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2699 p->highest_bit = 0; /* cuts scans short */ in SYSCALL_DEFINE1()
2700 while (p->flags >= SWP_SCANNING) { in SYSCALL_DEFINE1()
2701 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2705 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2708 swap_file = p->swap_file; in SYSCALL_DEFINE1()
2709 old_block_size = p->old_block_size; in SYSCALL_DEFINE1()
2710 p->swap_file = NULL; in SYSCALL_DEFINE1()
2711 p->max = 0; in SYSCALL_DEFINE1()
2712 swap_map = p->swap_map; in SYSCALL_DEFINE1()
2713 p->swap_map = NULL; in SYSCALL_DEFINE1()
2714 cluster_info = p->cluster_info; in SYSCALL_DEFINE1()
2715 p->cluster_info = NULL; in SYSCALL_DEFINE1()
2717 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2719 arch_swap_invalidate_area(p->type); in SYSCALL_DEFINE1()
2720 frontswap_invalidate_area(p->type); in SYSCALL_DEFINE1()
2723 free_percpu(p->percpu_cluster); in SYSCALL_DEFINE1()
2724 p->percpu_cluster = NULL; in SYSCALL_DEFINE1()
2725 free_percpu(p->cluster_next_cpu); in SYSCALL_DEFINE1()
2726 p->cluster_next_cpu = NULL; in SYSCALL_DEFINE1()
2731 swap_cgroup_swapoff(p->type); in SYSCALL_DEFINE1()
2732 exit_swap_address_space(p->type); in SYSCALL_DEFINE1()
2734 inode = mapping->host; in SYSCALL_DEFINE1()
2735 if (S_ISBLK(inode->i_mode)) { in SYSCALL_DEFINE1()
2743 inode->i_flags &= ~S_SWAPFILE; in SYSCALL_DEFINE1()
2750 * not hold p->lock after we cleared its SWP_WRITEOK. in SYSCALL_DEFINE1()
2753 p->flags = 0; in SYSCALL_DEFINE1()
2770 struct seq_file *seq = file->private_data; in swaps_poll()
2774 if (seq->poll_event != atomic_read(&proc_poll_event)) { in swaps_poll()
2775 seq->poll_event = atomic_read(&proc_poll_event); in swaps_poll()
2795 if (!(si->flags & SWP_USED) || !si->swap_map) in swap_start()
2797 if (!--l) in swap_start()
2812 type = si->type + 1; in swap_next()
2816 if (!(si->flags & SWP_USED) || !si->swap_map) in swap_next()
2841 bytes = si->pages << (PAGE_SHIFT - 10); in swap_show()
2842 inuse = si->inuse_pages << (PAGE_SHIFT - 10); in swap_show()
2844 file = si->swap_file; in swap_show()
2847 len < 40 ? 40 - len : 1, " ", in swap_show()
2848 S_ISBLK(file_inode(file)->i_mode) ? in swap_show()
2852 si->prio); in swap_show()
2872 seq = file->private_data; in swaps_open()
2873 seq->poll_event = atomic_read(&proc_poll_event); in swaps_open()
2916 return ERR_PTR(-ENOMEM); in alloc_swap_info()
2920 if (!(swap_info[type]->flags & SWP_USED)) in alloc_swap_info()
2926 return ERR_PTR(-EPERM); in alloc_swap_info()
2929 p->type = type; in alloc_swap_info()
2943 * would be relying on p->type to remain valid. in alloc_swap_info()
2946 p->swap_extent_root = RB_ROOT; in alloc_swap_info()
2947 plist_node_init(&p->list, 0); in alloc_swap_info()
2949 plist_node_init(&p->avail_lists[i], 0); in alloc_swap_info()
2950 p->flags = SWP_USED; in alloc_swap_info()
2953 spin_lock_init(&p->lock); in alloc_swap_info()
2954 spin_lock_init(&p->cont_lock); in alloc_swap_info()
2963 if (S_ISBLK(inode->i_mode)) { in claim_swapfile()
2964 p->bdev = blkdev_get_by_dev(inode->i_rdev, in claim_swapfile()
2966 if (IS_ERR(p->bdev)) { in claim_swapfile()
2967 error = PTR_ERR(p->bdev); in claim_swapfile()
2968 p->bdev = NULL; in claim_swapfile()
2971 p->old_block_size = block_size(p->bdev); in claim_swapfile()
2972 error = set_blocksize(p->bdev, PAGE_SIZE); in claim_swapfile()
2980 if (blk_queue_is_zoned(p->bdev->bd_disk->queue)) in claim_swapfile()
2981 return -EINVAL; in claim_swapfile()
2982 p->flags |= SWP_BLKDEV; in claim_swapfile()
2983 } else if (S_ISREG(inode->i_mode)) { in claim_swapfile()
2984 p->bdev = inode->i_sb->s_bdev; in claim_swapfile()
2994 * 1) the number of bits for the swap offset in the swp_entry_t type, and
2999 * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
3000 * decoded to a swp_entry_t again, and finally the swap offset is
3028 if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { in read_swap_header()
3029 pr_err("Unable to find swap-space signature\n"); in read_swap_header()
3034 if (swab32(swap_header->info.version) == 1) { in read_swap_header()
3035 swab32s(&swap_header->info.version); in read_swap_header()
3036 swab32s(&swap_header->info.last_page); in read_swap_header()
3037 swab32s(&swap_header->info.nr_badpages); in read_swap_header()
3038 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) in read_swap_header()
3040 for (i = 0; i < swap_header->info.nr_badpages; i++) in read_swap_header()
3041 swab32s(&swap_header->info.badpages[i]); in read_swap_header()
3043 /* Check the swap header's sub-version */ in read_swap_header()
3044 if (swap_header->info.version != 1) { in read_swap_header()
3046 swap_header->info.version); in read_swap_header()
3050 p->lowest_bit = 1; in read_swap_header()
3051 p->cluster_next = 1; in read_swap_header()
3052 p->cluster_nr = 0; in read_swap_header()
3055 last_page = swap_header->info.last_page; in read_swap_header()
3057 pr_warn("Empty swap-file\n"); in read_swap_header()
3062 maxpages << (PAGE_SHIFT - 10), in read_swap_header()
3063 last_page << (PAGE_SHIFT - 10)); in read_swap_header()
3067 /* p->max is an unsigned int: don't overflow it */ in read_swap_header()
3071 p->highest_bit = maxpages - 1; in read_swap_header()
3080 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) in read_swap_header()
3082 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) in read_swap_header()
3106 unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS; in setup_swap_map_and_extents()
3109 nr_good_pages = maxpages - 1; /* omit header page */ in setup_swap_map_and_extents()
3111 cluster_list_init(&p->free_clusters); in setup_swap_map_and_extents()
3112 cluster_list_init(&p->discard_clusters); in setup_swap_map_and_extents()
3114 for (i = 0; i < swap_header->info.nr_badpages; i++) { in setup_swap_map_and_extents()
3115 unsigned int page_nr = swap_header->info.badpages[i]; in setup_swap_map_and_extents()
3116 if (page_nr == 0 || page_nr > swap_header->info.last_page) in setup_swap_map_and_extents()
3117 return -EINVAL; in setup_swap_map_and_extents()
3120 nr_good_pages--; in setup_swap_map_and_extents()
3140 p->max = maxpages; in setup_swap_map_and_extents()
3141 p->pages = nr_good_pages; in setup_swap_map_and_extents()
3145 nr_good_pages = p->pages; in setup_swap_map_and_extents()
3148 pr_warn("Empty swap-file\n"); in setup_swap_map_and_extents()
3149 return -EINVAL; in setup_swap_map_and_extents()
3169 cluster_list_add_tail(&p->free_clusters, cluster_info, in setup_swap_map_and_extents()
3182 struct request_queue *q = bdev_get_queue(si->bdev); in swap_discardable()
3210 return -EINVAL; in SYSCALL_DEFINE2()
3213 return -EPERM; in SYSCALL_DEFINE2()
3216 return -ENOMEM; in SYSCALL_DEFINE2()
3222 INIT_WORK(&p->discard_work, swap_discard_work); in SYSCALL_DEFINE2()
3237 p->swap_file = swap_file; in SYSCALL_DEFINE2()
3238 mapping = swap_file->f_mapping; in SYSCALL_DEFINE2()
3239 inode = mapping->host; in SYSCALL_DEFINE2()
3247 error = -EBUSY; in SYSCALL_DEFINE2()
3254 if (!mapping->a_ops->readpage) { in SYSCALL_DEFINE2()
3255 error = -EINVAL; in SYSCALL_DEFINE2()
3267 error = -EINVAL; in SYSCALL_DEFINE2()
3274 error = -ENOMEM; in SYSCALL_DEFINE2()
3278 if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue)) in SYSCALL_DEFINE2()
3279 p->flags |= SWP_STABLE_WRITES; in SYSCALL_DEFINE2()
3281 if (p->bdev && p->bdev->bd_disk->fops->rw_page) in SYSCALL_DEFINE2()
3282 p->flags |= SWP_SYNCHRONOUS_IO; in SYSCALL_DEFINE2()
3284 if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { in SYSCALL_DEFINE2()
3288 p->flags |= SWP_SOLIDSTATE; in SYSCALL_DEFINE2()
3289 p->cluster_next_cpu = alloc_percpu(unsigned int); in SYSCALL_DEFINE2()
3290 if (!p->cluster_next_cpu) { in SYSCALL_DEFINE2()
3291 error = -ENOMEM; in SYSCALL_DEFINE2()
3299 per_cpu(*p->cluster_next_cpu, cpu) = in SYSCALL_DEFINE2()
3300 1 + prandom_u32_max(p->highest_bit); in SYSCALL_DEFINE2()
3307 error = -ENOMEM; in SYSCALL_DEFINE2()
3312 spin_lock_init(&((cluster_info + ci)->lock)); in SYSCALL_DEFINE2()
3314 p->percpu_cluster = alloc_percpu(struct percpu_cluster); in SYSCALL_DEFINE2()
3315 if (!p->percpu_cluster) { in SYSCALL_DEFINE2()
3316 error = -ENOMEM; in SYSCALL_DEFINE2()
3321 cluster = per_cpu_ptr(p->percpu_cluster, cpu); in SYSCALL_DEFINE2()
3322 cluster_set_null(&cluster->index); in SYSCALL_DEFINE2()
3329 error = swap_cgroup_swapon(p->type, maxpages); in SYSCALL_DEFINE2()
3339 /* frontswap enabled? set up bit-per-page map for frontswap */ in SYSCALL_DEFINE2()
3345 if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) { in SYSCALL_DEFINE2()
3352 p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD | in SYSCALL_DEFINE2()
3357 * either do single-time area discards only, or to just in SYSCALL_DEFINE2()
3358 * perform discards for released swap page-clusters. in SYSCALL_DEFINE2()
3359 * Now it's time to adjust the p->flags accordingly. in SYSCALL_DEFINE2()
3362 p->flags &= ~SWP_PAGE_DISCARD; in SYSCALL_DEFINE2()
3364 p->flags &= ~SWP_AREA_DISCARD; in SYSCALL_DEFINE2()
3366 /* issue a swapon-time discard if it's still required */ in SYSCALL_DEFINE2()
3367 if (p->flags & SWP_AREA_DISCARD) { in SYSCALL_DEFINE2()
3375 error = init_swap_address_space(p->type, maxpages); in SYSCALL_DEFINE2()
3383 inode->i_flags |= S_SWAPFILE; in SYSCALL_DEFINE2()
3386 inode->i_flags &= ~S_SWAPFILE; in SYSCALL_DEFINE2()
3391 prio = -1; in SYSCALL_DEFINE2()
3399 p->pages<<(PAGE_SHIFT-10), name->name, p->prio, in SYSCALL_DEFINE2()
3400 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), in SYSCALL_DEFINE2()
3401 (p->flags & SWP_SOLIDSTATE) ? "SS" : "", in SYSCALL_DEFINE2()
3402 (p->flags & SWP_DISCARDABLE) ? "D" : "", in SYSCALL_DEFINE2()
3403 (p->flags & SWP_AREA_DISCARD) ? "s" : "", in SYSCALL_DEFINE2()
3404 (p->flags & SWP_PAGE_DISCARD) ? "c" : "", in SYSCALL_DEFINE2()
3414 exit_swap_address_space(p->type); in SYSCALL_DEFINE2()
3418 free_percpu(p->percpu_cluster); in SYSCALL_DEFINE2()
3419 p->percpu_cluster = NULL; in SYSCALL_DEFINE2()
3420 free_percpu(p->cluster_next_cpu); in SYSCALL_DEFINE2()
3421 p->cluster_next_cpu = NULL; in SYSCALL_DEFINE2()
3422 if (inode && S_ISBLK(inode->i_mode) && p->bdev) { in SYSCALL_DEFINE2()
3423 set_blocksize(p->bdev, p->old_block_size); in SYSCALL_DEFINE2()
3424 blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); in SYSCALL_DEFINE2()
3428 swap_cgroup_swapoff(p->type); in SYSCALL_DEFINE2()
3430 p->swap_file = NULL; in SYSCALL_DEFINE2()
3431 p->flags = 0; in SYSCALL_DEFINE2()
3465 if (!skip && (si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) in si_swapinfo()
3466 nr_to_be_unused += si->inuse_pages; in si_swapinfo()
3468 val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused; in si_swapinfo()
3469 val->totalswap = total_swap_pages + nr_to_be_unused; in si_swapinfo()
3478 * - success -> 0
3479 * - swp_entry is invalid -> EINVAL
3480 * - swp_entry is migration entry -> EINVAL
3481 * - swap-cache reference is requested but there is already one. -> EEXIST
3482 * - swap-cache reference is requested but the entry is not used. -> ENOENT
3483 * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
3489 unsigned long offset; in __swap_duplicate() local
3492 int err = -EINVAL; in __swap_duplicate()
3498 offset = swp_offset(entry); in __swap_duplicate()
3499 ci = lock_cluster_or_swap_info(p, offset); in __swap_duplicate()
3501 count = p->swap_map[offset]; in __swap_duplicate()
3505 * swap entry could be SWAP_MAP_BAD. Check here with lock held. in __swap_duplicate()
3508 err = -ENOENT; in __swap_duplicate()
3522 err = -EEXIST; in __swap_duplicate()
3524 err = -ENOENT; in __swap_duplicate()
3531 err = -EINVAL; in __swap_duplicate()
3532 else if (swap_count_continued(p, offset, count)) in __swap_duplicate()
3535 err = -ENOMEM; in __swap_duplicate()
3537 err = -ENOENT; /* unused swap entry */ in __swap_duplicate()
3539 WRITE_ONCE(p->swap_map[offset], count | has_cache); in __swap_duplicate()
3560 * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required
3562 * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which
3569 while (!err && __swap_duplicate(entry, 1) == -ENOMEM) in swap_duplicate()
3579 * -EEXIST means there is a swap cache.
3600 * out-of-line __page_file_ methods to avoid include hell.
3604 return page_swap_info(page)->swap_file->f_mapping; in __page_file_mapping()
3616 * add_swap_count_continuation - called when a swap count is duplicated
3637 pgoff_t offset; in add_swap_count_continuation() local
3655 spin_lock(&si->lock); in add_swap_count_continuation()
3657 offset = swp_offset(entry); in add_swap_count_continuation()
3659 ci = lock_cluster(si, offset); in add_swap_count_continuation()
3661 count = si->swap_map[offset] & ~SWAP_HAS_CACHE; in add_swap_count_continuation()
3667 * over-provisioning. in add_swap_count_continuation()
3673 ret = -ENOMEM; in add_swap_count_continuation()
3682 head = vmalloc_to_page(si->swap_map + offset); in add_swap_count_continuation()
3683 offset &= ~PAGE_MASK; in add_swap_count_continuation()
3685 spin_lock(&si->cont_lock); in add_swap_count_continuation()
3692 INIT_LIST_HEAD(&head->lru); in add_swap_count_continuation()
3694 si->flags |= SWP_CONTINUED; in add_swap_count_continuation()
3697 list_for_each_entry(list_page, &head->lru, lru) { in add_swap_count_continuation()
3707 map = kmap_atomic(list_page) + offset; in add_swap_count_continuation()
3719 list_add_tail(&page->lru, &head->lru); in add_swap_count_continuation()
3722 spin_unlock(&si->cont_lock); in add_swap_count_continuation()
3725 spin_unlock(&si->lock); in add_swap_count_continuation()
3734 * swap_count_continued - when the original swap_map count is incremented
3740 * lock.
3743 pgoff_t offset, unsigned char count) in swap_count_continued() argument
3750 head = vmalloc_to_page(si->swap_map + offset); in swap_count_continued()
3756 spin_lock(&si->cont_lock); in swap_count_continued()
3757 offset &= ~PAGE_MASK; in swap_count_continued()
3759 map = kmap_atomic(page) + offset; in swap_count_continued()
3772 map = kmap_atomic(page) + offset; in swap_count_continued()
3781 map = kmap_atomic(page) + offset; in swap_count_continued()
3787 map = kmap_atomic(page) + offset; in swap_count_continued()
3802 map = kmap_atomic(page) + offset; in swap_count_continued()
3805 *map -= 1; in swap_count_continued()
3810 map = kmap_atomic(page) + offset; in swap_count_continued()
3818 spin_unlock(&si->cont_lock); in swap_count_continued()
3823 * free_swap_count_continuations - swapoff free all the continuation pages
3828 pgoff_t offset; in free_swap_count_continuations() local
3830 for (offset = 0; offset < si->max; offset += PAGE_SIZE) { in free_swap_count_continuations()
3832 head = vmalloc_to_page(si->swap_map + offset); in free_swap_count_continuations()
3836 list_for_each_entry_safe(page, next, &head->lru, lru) { in free_swap_count_continuations()
3837 list_del(&page->lru); in free_swap_count_continuations()
3858 * lock. in __cgroup_throttle_swaprate()
3860 if (current->throttle_queue) in __cgroup_throttle_swaprate()
3866 if (si->bdev) { in __cgroup_throttle_swaprate()
3867 blkcg_schedule_throttle(bdev_get_queue(si->bdev), true); in __cgroup_throttle_swaprate()
3883 return -ENOMEM; in swapfile_init()