Revert "mm: provide speculative fault infrastructure"

This reverts commit 396b980864. Signed-off-by: UtsavBalar1231 <utsavbalar1231@gmail.com> Change-Id: I90e27e3e598f41efc131f106611fc20e33b709d8
2022-04-28 19:13:00 +05:30 · 2022-04-28 19:13:00 +05:30 · a56e9d538b
commit a56e9d538b
parent 0c2e785466
5 changed files with 7 additions and 388 deletions
--- a/include/linux/hugetlb_inline.h
+++ b/include/linux/hugetlb_inline.h
@ -8,7 +8,7 @@

 static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
 {
-	return !!(READ_ONCE(vma->vm_flags) & VM_HUGETLB);
+	return !!(vma->vm_flags & VM_HUGETLB);
 }

 #else
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@ -331,8 +331,6 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_REMOTE	0x80	/* faulting for non current tsk/mm */
 #define FAULT_FLAG_INSTRUCTION  0x100	/* The fault was during an instruction fetch */
 #define FAULT_FLAG_PREFAULT_OLD 0x400   /* Make faultaround ptes old */
-/* Speculative fault, not holding mmap_sem */
-#define FAULT_FLAG_SPECULATIVE	0x200

 #define FAULT_FLAG_TRACE \
 	{ FAULT_FLAG_WRITE,		"WRITE" }, \
@ -361,10 +359,6 @@ struct vm_fault {
 	gfp_t gfp_mask;			/* gfp mask to be used for allocations */
 	pgoff_t pgoff;			/* Logical page offset based on vma */
 	unsigned long address;		/* Faulting virtual address */
-#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
-	unsigned int sequence;
-	pmd_t orig_pmd;			/* value of PMD at the time of fault */
-#endif
 	pmd_t *pmd;			/* Pointer to pmd entry matching
 					 * the 'address' */
 	pud_t *pud;			/* Pointer to pud entry matching
@ -1529,31 +1523,6 @@ int invalidate_inode_page(struct page *page);
 #ifdef CONFIG_MMU
 extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
-
-#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
-extern int __handle_speculative_fault(struct mm_struct *mm,
-				      unsigned long address,
-				      unsigned int flags);
-static inline int handle_speculative_fault(struct mm_struct *mm,
-					   unsigned long address,
-					   unsigned int flags)
-{
-	/*
-	 * Try speculative page fault for multithreaded user space task only.
-	 */
-	if (!(flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) == 1)
-		return VM_FAULT_RETRY;
-	return __handle_speculative_fault(mm, address, flags);
-}
-#else
-static inline int handle_speculative_fault(struct mm_struct *mm,
-					   unsigned long address,
-					   unsigned int flags)
-{
-	return VM_FAULT_RETRY;
-}
-#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
-
 extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long address, unsigned int fault_flags,
 			    bool *unlocked);
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@ -460,8 +460,8 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 	pgoff_t pgoff;
 	if (unlikely(is_vm_hugetlb_page(vma)))
 		return linear_hugepage_index(vma, address);
-	pgoff = (address - READ_ONCE(vma->vm_start)) >> PAGE_SHIFT;
-	pgoff += READ_ONCE(vma->vm_pgoff);
+	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
+	pgoff += vma->vm_pgoff;
 	return pgoff;
 }

--- a/mm/internal.h
+++ b/mm/internal.h
@ -44,21 +44,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf);
 extern struct vm_area_struct *get_vma(struct mm_struct *mm,
 				      unsigned long addr);
 extern void put_vma(struct vm_area_struct *vma);
-
-static inline bool vma_has_changed(struct vm_fault *vmf)
-{
-	int ret = RB_EMPTY_NODE(&vmf->vma->vm_rb);
-	unsigned int seq = READ_ONCE(vmf->vma->vm_sequence.sequence);
-
-	/*
-	 * Matches both the wmb in write_seqlock_{begin,end}() and
-	 * the wmb in vma_rb_erase().
-	 */
-	smp_rmb();
-
-	return ret || seq != vmf->sequence;
-}
-#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
+#endif

 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
--- a/mm/memory.c
+++ b/mm/memory.c
@ -809,8 +809,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 	if (page)
 		dump_page(page, "bad pte");
 	pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
-		 (void *)addr, READ_ONCE(vma->vm_flags), vma->anon_vma,
-		 mapping, index);
+		 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
 	pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n",
 		 vma->vm_file,
 		 vma->vm_ops ? vma->vm_ops->fault : NULL,
@ -2395,113 +2394,6 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(apply_to_page_range);

-#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
-static bool pte_spinlock(struct vm_fault *vmf)
-{
-	bool ret = false;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	pmd_t pmdval;
-#endif
-
-	/* Check if vma is still valid */
-	if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
-		vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
-		spin_lock(vmf->ptl);
-		return true;
-	}
-
-	local_irq_disable();
-	if (vma_has_changed(vmf))
-		goto out;
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	/*
-	 * We check if the pmd value is still the same to ensure that there
-	 * is not a huge collapse operation in progress in our back.
-	 */
-	pmdval = READ_ONCE(*vmf->pmd);
-	if (!pmd_same(pmdval, vmf->orig_pmd))
-		goto out;
-#endif
-
-	vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
-	if (unlikely(!spin_trylock(vmf->ptl)))
-		goto out;
-
-	if (vma_has_changed(vmf)) {
-		spin_unlock(vmf->ptl);
-		goto out;
-	}
-
-	ret = true;
-out:
-	local_irq_enable();
-	return ret;
-}
-
-static bool pte_map_lock(struct vm_fault *vmf)
-{
-	bool ret = false;
-	pte_t *pte;
-	spinlock_t *ptl;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	pmd_t pmdval;
-#endif
-
-	if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
-		vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
-					       vmf->address, &vmf->ptl);
-		return true;
-	}
-
-	/*
-	 * The first vma_has_changed() guarantees the page-tables are still
-	 * valid, having IRQs disabled ensures they stay around, hence the
-	 * second vma_has_changed() to make sure they are still valid once
-	 * we've got the lock. After that a concurrent zap_pte_range() will
-	 * block on the PTL and thus we're safe.
-	 */
-	local_irq_disable();
-	if (vma_has_changed(vmf))
-		goto out;
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	/*
-	 * We check if the pmd value is still the same to ensure that there
-	 * is not a huge collapse operation in progress in our back.
-	 */
-	pmdval = READ_ONCE(*vmf->pmd);
-	if (!pmd_same(pmdval, vmf->orig_pmd))
-		goto out;
-#endif
-
-	/*
-	 * Same as pte_offset_map_lock() except that we call
-	 * spin_trylock() in place of spin_lock() to avoid race with
-	 * unmap path which may have the lock and wait for this CPU
-	 * to invalidate TLB but this CPU has irq disabled.
-	 * Since we are in a speculative patch, accept it could fail
-	 */
-	ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
-	pte = pte_offset_map(vmf->pmd, vmf->address);
-	if (unlikely(!spin_trylock(ptl))) {
-		pte_unmap(pte);
-		goto out;
-	}
-
-	if (vma_has_changed(vmf)) {
-		pte_unmap_unlock(pte, ptl);
-		goto out;
-	}
-
-	vmf->pte = pte;
-	vmf->ptl = ptl;
-	ret = true;
-out:
-	local_irq_enable();
-	return ret;
-}
-#else
 static inline bool pte_spinlock(struct vm_fault *vmf)
 {
 	vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
@ -2515,7 +2407,6 @@ static inline bool pte_map_lock(struct vm_fault *vmf)
 				       vmf->address, &vmf->ptl);
 	return true;
 }
-#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */

 /*
 * handle_pte_fault chooses page fault handler according to an entry which was
@ -3506,14 +3397,6 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 		ret = check_stable_address_space(vma->vm_mm);
 		if (ret)
 			goto unlock;
-		/*
-		 * Don't call the userfaultfd during the speculative path.
-		 * We already checked for the VMA to not be managed through
-		 * userfaultfd, but it may be set in our back once we have lock
-		 * the pte. In such a case we can ignore it this time.
-		 */
-		if (vmf->flags & FAULT_FLAG_SPECULATIVE)
-			goto setpte;
 		/* Deliver the page fault to userland, check inside PT lock */
 		if (userfaultfd_missing(vma)) {
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
@ -3556,8 +3439,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 		goto unlock_and_release;

 	/* Deliver the page fault to userland, check inside PT lock */
-	if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) &&
-				userfaultfd_missing(vma)) {
+	if (userfaultfd_missing(vma)) {
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		mem_cgroup_cancel_charge(page, memcg, false);
 		put_page(page);
@ -4376,15 +4258,6 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 	pte_t entry;

 	if (unlikely(pmd_none(*vmf->pmd))) {
-		/*
-		 * In the case of the speculative page fault handler we abort
-		 * the speculative path immediately as the pmd is probably
-		 * in the way to be converted in a huge one. We will try
-		 * again holding the mmap_sem (which implies that the collapse
-		 * operation is done).
-		 */
-		if (vmf->flags & FAULT_FLAG_SPECULATIVE)
-			return VM_FAULT_RETRY;
 		/*
 		 * Leave __pte_alloc() until later: because vm_ops->fault may
 		 * want to allocate huge page, and if we expose page table
@ -4392,7 +4265,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 		 * concurrent faults and from rmap lookups.
 		 */
 		vmf->pte = NULL;
-	} else if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
+	} else {
 		/* See comment in pte_alloc_one_map() */
 		if (pmd_devmap_trans_unstable(vmf->pmd))
 			return 0;
@ -4401,9 +4274,6 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 		 * pmd from under us anymore at this point because we hold the
 		 * mmap_sem read mode and khugepaged takes it in write mode.
 		 * So now it's safe to run pte_offset_map().
-		 * This is not applicable to the speculative page fault handler
-		 * but in that case, the pte is fetched earlier in
-		 * handle_speculative_fault().
 		 */
 		vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
 		vmf->orig_pte = *vmf->pte;
@ -4426,8 +4296,6 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 	if (!vmf->pte) {
 		if (vma_is_anonymous(vmf->vma))
 			return do_anonymous_page(vmf);
-		else if (vmf->flags & FAULT_FLAG_SPECULATIVE)
-			return VM_FAULT_RETRY;
 		else
 			return do_fault(vmf);
 	}
@ -4525,9 +4393,6 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 	vmf.pmd = pmd_alloc(mm, vmf.pud, address);
 	if (!vmf.pmd)
 		return VM_FAULT_OOM;
-#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
-	vmf.sequence = raw_read_seqcount(&vma->vm_sequence);
-#endif
 	if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) {
 		ret = create_huge_pmd(&vmf);
 		if (!(ret & VM_FAULT_FALLBACK))
@ -4561,207 +4426,6 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 	return handle_pte_fault(&vmf);
 }

-#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
-
-#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL
-/* This is required by vm_normal_page() */
-#error "Speculative page fault handler requires CONFIG_ARCH_HAS_PTE_SPECIAL"
-#endif
-
-/*
- * vm_normal_page() adds some processing which should be done while
- * hodling the mmap_sem.
- */
-int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
-			       unsigned int flags)
-{
-	struct vm_fault vmf = {
-		.address = address,
-	};
-	pgd_t *pgd, pgdval;
-	p4d_t *p4d, p4dval;
-	pud_t pudval;
-	int seq, ret = VM_FAULT_RETRY;
-	struct vm_area_struct *vma;
-
-	/* Clear flags that may lead to release the mmap_sem to retry */
-	flags &= ~(FAULT_FLAG_ALLOW_RETRY|FAULT_FLAG_KILLABLE);
-	flags |= FAULT_FLAG_SPECULATIVE;
-
-	vma = get_vma(mm, address);
-	if (!vma)
-		return ret;
-
-	/* rmb <-> seqlock,vma_rb_erase() */
-	seq = raw_read_seqcount(&vma->vm_sequence);
-	if (seq & 1)
-		goto out_put;
-
-	/*
-	 * Can't call vm_ops service has we don't know what they would do
-	 * with the VMA.
-	 * This include huge page from hugetlbfs.
-	 */
-	if (vma->vm_ops)
-		goto out_put;
-
-	/*
-	 * __anon_vma_prepare() requires the mmap_sem to be held
-	 * because vm_next and vm_prev must be safe. This can't be guaranteed
-	 * in the speculative path.
-	 */
-	if (unlikely(!vma->anon_vma))
-		goto out_put;
-
-	vmf.vma_flags = READ_ONCE(vma->vm_flags);
-	vmf.vma_page_prot = READ_ONCE(vma->vm_page_prot);
-
-	/* Can't call userland page fault handler in the speculative path */
-	if (unlikely(vmf.vma_flags & VM_UFFD_MISSING))
-		goto out_put;
-
-	if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP)
-		/*
-		 * This could be detected by the check address against VMA's
-		 * boundaries but we want to trace it as not supported instead
-		 * of changed.
-		 */
-		goto out_put;
-
-	if (address < READ_ONCE(vma->vm_start)
-	    || READ_ONCE(vma->vm_end) <= address)
-		goto out_put;
-
-	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
-				       flags & FAULT_FLAG_INSTRUCTION,
-				       flags & FAULT_FLAG_REMOTE)) {
-		ret = VM_FAULT_SIGSEGV;
-		goto out_put;
-	}
-
-	/* This is one is required to check that the VMA has write access set */
-	if (flags & FAULT_FLAG_WRITE) {
-		if (unlikely(!(vmf.vma_flags & VM_WRITE))) {
-			ret = VM_FAULT_SIGSEGV;
-			goto out_put;
-		}
-	} else if (unlikely(!(vmf.vma_flags & (VM_READ|VM_EXEC|VM_WRITE)))) {
-		ret = VM_FAULT_SIGSEGV;
-		goto out_put;
-	}
-
-#ifdef CONFIG_NUMA
-	struct mempolicy *pol;
-
-	/*
-	 * MPOL_INTERLEAVE implies additional checks in
-	 * mpol_misplaced() which are not compatible with the
-	 *speculative page fault processing.
-	 */
-	pol = __get_vma_policy(vma, address);
-	if (!pol)
-		pol = get_task_policy(current);
-	if (pol && pol->mode == MPOL_INTERLEAVE)
-		goto out_put;
-#endif
-
-	/*
-	 * Do a speculative lookup of the PTE entry.
-	 */
-	local_irq_disable();
-	pgd = pgd_offset(mm, address);
-	pgdval = READ_ONCE(*pgd);
-	if (pgd_none(pgdval) || unlikely(pgd_bad(pgdval)))
-		goto out_walk;
-
-	p4d = p4d_offset(pgd, address);
-	p4dval = READ_ONCE(*p4d);
-	if (p4d_none(p4dval) || unlikely(p4d_bad(p4dval)))
-		goto out_walk;
-
-	vmf.pud = pud_offset(p4d, address);
-	pudval = READ_ONCE(*vmf.pud);
-	if (pud_none(pudval) || unlikely(pud_bad(pudval)))
-		goto out_walk;
-
-	/* Huge pages at PUD level are not supported. */
-	if (unlikely(pud_trans_huge(pudval)))
-		goto out_walk;
-
-	vmf.pmd = pmd_offset(vmf.pud, address);
-	vmf.orig_pmd = READ_ONCE(*vmf.pmd);
-	/*
-	 * pmd_none could mean that a hugepage collapse is in progress
-	 * in our back as collapse_huge_page() mark it before
-	 * invalidating the pte (which is done once the IPI is catched
-	 * by all CPU and we have interrupt disabled).
-	 * For this reason we cannot handle THP in a speculative way since we
-	 * can't safely indentify an in progress collapse operation done in our
-	 * back on that PMD.
-	 * Regarding the order of the following checks, see comment in
-	 * pmd_devmap_trans_unstable()
-	 */
-	if (unlikely(pmd_devmap(vmf.orig_pmd) ||
-		     pmd_none(vmf.orig_pmd) || pmd_trans_huge(vmf.orig_pmd) ||
-		     is_swap_pmd(vmf.orig_pmd)))
-		goto out_walk;
-
-	/*
-	 * The above does not allocate/instantiate page-tables because doing so
-	 * would lead to the possibility of instantiating page-tables after
-	 * free_pgtables() -- and consequently leaking them.
-	 *
-	 * The result is that we take at least one !speculative fault per PMD
-	 * in order to instantiate it.
-	 */
-
-	vmf.pte = pte_offset_map(vmf.pmd, address);
-	vmf.orig_pte = READ_ONCE(*vmf.pte);
-	barrier(); /* See comment in handle_pte_fault() */
-	if (pte_none(vmf.orig_pte)) {
-		pte_unmap(vmf.pte);
-		vmf.pte = NULL;
-	}
-
-	vmf.vma = vma;
-	vmf.pgoff = linear_page_index(vma, address);
-	vmf.gfp_mask = __get_fault_gfp_mask(vma);
-	vmf.sequence = seq;
-	vmf.flags = flags;
-
-	local_irq_enable();
-
-	/*
-	 * We need to re-validate the VMA after checking the bounds, otherwise
-	 * we might have a false positive on the bounds.
-	 */
-	if (read_seqcount_retry(&vma->vm_sequence, seq))
-		goto out_put;
-
-	mem_cgroup_enter_user_fault();
-	ret = handle_pte_fault(&vmf);
-	mem_cgroup_exit_user_fault();
-
-	put_vma(vma);
-
-	/*
-	 * The task may have entered a memcg OOM situation but
-	 * if the allocation error was handled gracefully (no
-	 * VM_FAULT_OOM), there is no need to kill anything.
-	 * Just clean up the OOM state peacefully.
-	 */
-	if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
-		mem_cgroup_oom_synchronize(false);
-	return ret;
-
-out_walk:
-	local_irq_enable();
-out_put:
-	put_vma(vma);
-	return ret;
-}
-#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
-
 /*
 * By the time we get here, we already hold the mm semaphore
 *