Commit 26178ec1 authored by Linus Torvalds's avatar Linus Torvalds

x86: mm: consolidate VM_FAULT_RETRY handling

The VM_FAULT_RETRY handling was confusing and incorrect for the case of
returning to kernel mode.  We need to handle the exception table fixup
if we return to kernel mode due to a fatal signal - it will basically
look to the kernel user mode access like the access failed due to the VM
going away from udner it.  Which is correct - the process is dying - and
avoids the whole "repeat endless kernel page faults" case.

Handling the VM_FAULT_RETRY early and in just one place also simplifies
the mmap_sem handling, since once we've taken care of VM_FAULT_RETRY we
know that we can just drop the lock.  The remaining accounting and
possible error handling is thread-local and does not need the mmap_sem.
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 7fb08eca
...@@ -1055,7 +1055,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, ...@@ -1055,7 +1055,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct task_struct *tsk; struct task_struct *tsk;
struct mm_struct *mm; struct mm_struct *mm;
int fault; int fault, major = 0;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
tsk = current; tsk = current;
...@@ -1230,48 +1230,50 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, ...@@ -1230,48 +1230,50 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
* we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
*/ */
fault = handle_mm_fault(mm, vma, address, flags); fault = handle_mm_fault(mm, vma, address, flags);
major |= fault & VM_FAULT_MAJOR;
/* /*
* If we need to retry but a fatal signal is pending, handle the * If we need to retry the mmap_sem has already been released,
* signal first. We do not need to release the mmap_sem because it * and if there is a fatal signal pending there is no guarantee
* would already be released in __lock_page_or_retry in mm/filemap.c. * that we made any progress. Handle this case first.
*/ */
if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))) if (unlikely(fault & VM_FAULT_RETRY)) {
/* Retry at most once */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
if (!fatal_signal_pending(tsk))
goto retry;
}
/* User mode? Just return to handle the fatal exception */
if (fault & FAULT_FLAG_USER)
return;
/* Not returning to user mode? Handle exceptions or die: */
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
return; return;
}
up_read(&mm->mmap_sem);
if (unlikely(fault & VM_FAULT_ERROR)) { if (unlikely(fault & VM_FAULT_ERROR)) {
up_read(&mm->mmap_sem);
mm_fault_error(regs, error_code, address, fault); mm_fault_error(regs, error_code, address, fault);
return; return;
} }
/* /*
* Major/minor page fault accounting is only done on the * Major/minor page fault accounting. If any of the events
* initial attempt. If we go through a retry, it is extremely * returned VM_FAULT_MAJOR, we account it as a major fault.
* likely that the page will be found in page cache at that point.
*/ */
if (flags & FAULT_FLAG_ALLOW_RETRY) { if (major) {
if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++;
tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, } else {
regs, address); tsk->min_flt++;
} else { perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
tsk->min_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
regs, address);
}
if (fault & VM_FAULT_RETRY) {
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
goto retry;
}
} }
check_v8086_mode(regs, address, tsk); check_v8086_mode(regs, address, tsk);
up_read(&mm->mmap_sem);
} }
NOKPROBE_SYMBOL(__do_page_fault); NOKPROBE_SYMBOL(__do_page_fault);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment