diff --git a/include/linux/swap.h b/include/linux/swap.h index ba5b139bc486..624773cb61b5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -416,9 +416,12 @@ extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *page); extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); -extern int __add_to_swap_cache(struct page *page, swp_entry_t entry); -extern void __delete_from_swap_cache(struct page *); +extern int __add_to_swap_cache(struct page *page, swp_entry_t entry, + void **shadowp); +extern void __delete_from_swap_cache(struct page *page, void *shadow); extern void delete_from_swap_cache(struct page *); +extern void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, @@ -571,7 +574,7 @@ static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, return -1; } -static inline void __delete_from_swap_cache(struct page *page) +static inline void __delete_from_swap_cache(struct page *page, void *shadow) { } @@ -579,6 +582,11 @@ static inline void delete_from_swap_cache(struct page *page) { } +static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end) +{ +} + static inline int page_swapcount(struct page *page) { return 0; diff --git a/mm/swap_state.c b/mm/swap_state.c index b99e8fd93cd3..0ecf892eb994 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -111,7 +111,7 @@ void show_swap_cache_info(void) * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ -int __add_to_swap_cache(struct page *page, swp_entry_t entry) +int __add_to_swap_cache(struct page *page, swp_entry_t entry, void **shadowp) { int error, i, nr = hpage_nr_pages(page); struct address_space *address_space; @@ -127,11 +127,30 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry) address_space = swap_address_space(entry); xa_lock_irq(&address_space->i_pages); for (i = 0; i < nr; i++) { + void *item; + void __rcu **slot; + struct radix_tree_node *node; + set_page_private(page + i, entry.val + i); - error = radix_tree_insert(&address_space->i_pages, - idx + i, page + i); + error = __radix_tree_create(&address_space->i_pages, + idx + i, 0, &node, &slot); if (unlikely(error)) break; + + item = radix_tree_deref_slot_protected(slot, + &address_space->i_pages.xa_lock); + if (WARN_ON_ONCE(item && !radix_tree_exceptional_entry(item))) { + error = -EEXIST; + break; + } + + __radix_tree_replace(&address_space->i_pages, node, slot, + page + i, NULL); + + if (shadowp) { + VM_BUG_ON(i); + *shadowp = item; + } } if (likely(!error)) { address_space->nrpages += nr; @@ -164,7 +183,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) error = radix_tree_maybe_preload_order(gfp_mask, compound_order(page)); if (!error) { - error = __add_to_swap_cache(page, entry); + error = __add_to_swap_cache(page, entry, NULL); radix_tree_preload_end(); } return error; @@ -174,7 +193,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) * This must be called only on pages that have * been verified to be in the swap cache. */ -void __delete_from_swap_cache(struct page *page) +void __delete_from_swap_cache(struct page *page, void *shadow) { struct address_space *address_space; int i, nr = hpage_nr_pages(page); @@ -184,12 +203,23 @@ void __delete_from_swap_cache(struct page *page) VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageSwapCache(page), page); VM_BUG_ON_PAGE(PageWriteback(page), page); + VM_BUG_ON(shadow && !radix_tree_exceptional_entry(shadow)); entry.val = page_private(page); address_space = swap_address_space(entry); idx = swp_offset(entry); for (i = 0; i < nr; i++) { - radix_tree_delete(&address_space->i_pages, idx + i); + void *item; + void __rcu **slot; + struct radix_tree_node *node; + + item = __radix_tree_lookup(&address_space->i_pages, + idx + i, &node, &slot); + if (WARN_ON_ONCE(item != page + i)) + continue; + + __radix_tree_replace(&address_space->i_pages, + node, slot, shadow, NULL); set_page_private(page + i, 0); } ClearPageSwapCache(page); @@ -271,13 +301,47 @@ void delete_from_swap_cache(struct page *page) address_space = swap_address_space(entry); xa_lock_irq(&address_space->i_pages); - __delete_from_swap_cache(page); + __delete_from_swap_cache(page, NULL); xa_unlock_irq(&address_space->i_pages); put_swap_page(page, entry); page_ref_sub(page, hpage_nr_pages(page)); } +void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end) +{ + unsigned long curr = begin; + + for (;;) { + void *item; + void __rcu **slot; + struct radix_tree_iter iter; + swp_entry_t entry = swp_entry(type, curr); + struct address_space *address_space = swap_address_space(entry); + + xa_lock_irq(&address_space->i_pages); + radix_tree_for_each_slot(slot, &address_space->i_pages, + &iter, curr) { + item = radix_tree_deref_slot_protected(slot, + &address_space->i_pages.xa_lock); + if (radix_tree_exceptional_entry(item)) + radix_tree_iter_delete(&address_space->i_pages, + &iter, slot); + if (iter.next_index > end) + break; + } + xa_unlock_irq(&address_space->i_pages); + + /* search the next swapcache until we meet end */ + curr >>= SWAP_ADDRESS_SPACE_SHIFT; + curr++; + curr <<= SWAP_ADDRESS_SPACE_SHIFT; + if (curr > end) + break; + } +} + /* * If we are the only user, then try to free up the swap cache. * @@ -443,7 +507,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, /* May fail (-ENOMEM) if radix-tree node allocation failed. */ __SetPageLocked(new_page); __SetPageSwapBacked(new_page); - err = __add_to_swap_cache(new_page, entry); + err = __add_to_swap_cache(new_page, entry, NULL); if (likely(!err)) { radix_tree_preload_end(); /* diff --git a/mm/swapfile.c b/mm/swapfile.c index 813c846ad6ee..ed9032a237de 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -663,6 +663,7 @@ static void add_to_avail_list(struct swap_info_struct *p) static void swap_range_free(struct swap_info_struct *si, unsigned long offset, unsigned int nr_entries) { + unsigned long begin = offset; unsigned long end = offset + nr_entries - 1; void (*swap_slot_free_notify)(struct block_device *, unsigned long); @@ -688,6 +689,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, swap_slot_free_notify(si->bdev, offset); offset++; } + clear_shadow_from_swap_cache(si->type, begin, end); } static int scan_swap_map_slots(struct swap_info_struct *si, diff --git a/mm/vmscan.c b/mm/vmscan.c index 4239b8b106d9..3c8e34f7557f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -926,7 +926,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; mem_cgroup_swapout(page, swap); - __delete_from_swap_cache(page); + __delete_from_swap_cache(page, NULL); xa_unlock_irqrestore(&mapping->i_pages, flags); put_swap_page(page, swap); } else {