Commit a440e4d7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_urgent_for_v5.11_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:
 "As expected, fixes started trickling in after the holidays so here is
  the accumulated pile of x86 fixes for 5.11:

   - A fix for fanotify_mark() missing the conversion of x86_32 native
     syscalls which take 64-bit arguments to the compat handlers due to
     former having a general compat handler. (Brian Gerst)

   - Add a forgotten pmd page destructor call to pud_free_pmd_page()
     where a pmd page is freed. (Dan Williams)

   - Make IN/OUT insns with an u8 immediate port operand handling for
     SEV-ES guests more precise by using only the single port byte and
     not the whole s32 value of the insn decoder. (Peter Gonda)

   - Correct a straddling end range check before returning the proper
     MTRR type, when the end address is the same as top of memory.
     (Ying-Tsun Huang)

   - Change PQR_ASSOC MSR update scheme when moving a task to a resctrl
     resource group to avoid significant performance overhead with some
     resctrl workloads. (Fenghua Yu)

   - Avoid the actual task move overhead when the task is already in the
     resource group. (Fenghua Yu)"

* tag 'x86_urgent_for_v5.11_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/resctrl: Don't move a task to the same resource group
  x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR
  x86/mtrr: Correct the range check before performing MTRR type lookups
  x86/sev-es: Fix SEV-ES OUT/IN immediate opcode vc handling
  x86/mm: Fix leak of pmd ptlock
  fanotify: Fix sys_fanotify_mark() on native x86-32
parents 2ff90100 a0195f31
...@@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID ...@@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID
config ARCH_SUPPORTS_DEBUG_PAGEALLOC config ARCH_SUPPORTS_DEBUG_PAGEALLOC
bool bool
config ARCH_SPLIT_ARG64
bool
help
If a 32-bit architecture requires 64-bit arguments to be split into
pairs of 32-bit arguments, select this option.
source "kernel/gcov/Kconfig" source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig" source "scripts/gcc-plugins/Kconfig"
......
...@@ -19,6 +19,7 @@ config X86_32 ...@@ -19,6 +19,7 @@ config X86_32
select KMAP_LOCAL select KMAP_LOCAL
select MODULES_USE_ELF_REL select MODULES_USE_ELF_REL
select OLD_SIGACTION select OLD_SIGACTION
select ARCH_SPLIT_ARG64
config X86_64 config X86_64
def_bool y def_bool y
......
...@@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, ...@@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
*repeat = 0; *repeat = 0;
*uniform = 1; *uniform = 1;
/* Make end inclusive instead of exclusive */
end--;
prev_match = MTRR_TYPE_INVALID; prev_match = MTRR_TYPE_INVALID;
for (i = 0; i < num_var_ranges; ++i) { for (i = 0; i < num_var_ranges; ++i) {
unsigned short start_state, end_state, inclusive; unsigned short start_state, end_state, inclusive;
...@@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) ...@@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
int repeat; int repeat;
u64 partial_end; u64 partial_end;
/* Make end inclusive instead of exclusive */
end--;
if (!mtrr_state_set) if (!mtrr_state_set)
return MTRR_TYPE_INVALID; return MTRR_TYPE_INVALID;
......
...@@ -525,89 +525,70 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp) ...@@ -525,89 +525,70 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp)
kfree(rdtgrp); kfree(rdtgrp);
} }
struct task_move_callback { static void _update_task_closid_rmid(void *task)
struct callback_head work;
struct rdtgroup *rdtgrp;
};
static void move_myself(struct callback_head *head)
{ {
struct task_move_callback *callback;
struct rdtgroup *rdtgrp;
callback = container_of(head, struct task_move_callback, work);
rdtgrp = callback->rdtgrp;
/* /*
* If resource group was deleted before this task work callback * If the task is still current on this CPU, update PQR_ASSOC MSR.
* was invoked, then assign the task to root group and free the * Otherwise, the MSR is updated when the task is scheduled in.
* resource group.
*/ */
if (atomic_dec_and_test(&rdtgrp->waitcount) && if (task == current)
(rdtgrp->flags & RDT_DELETED)) { resctrl_sched_in();
current->closid = 0; }
current->rmid = 0;
rdtgroup_remove(rdtgrp);
}
if (unlikely(current->flags & PF_EXITING))
goto out;
preempt_disable();
/* update PQR_ASSOC MSR to make resource group go into effect */
resctrl_sched_in();
preempt_enable();
out: static void update_task_closid_rmid(struct task_struct *t)
kfree(callback); {
if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
else
_update_task_closid_rmid(t);
} }
static int __rdtgroup_move_task(struct task_struct *tsk, static int __rdtgroup_move_task(struct task_struct *tsk,
struct rdtgroup *rdtgrp) struct rdtgroup *rdtgrp)
{ {
struct task_move_callback *callback; /* If the task is already in rdtgrp, no need to move the task. */
int ret; if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
tsk->rmid == rdtgrp->mon.rmid) ||
callback = kzalloc(sizeof(*callback), GFP_KERNEL); (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
if (!callback) tsk->closid == rdtgrp->mon.parent->closid))
return -ENOMEM; return 0;
callback->work.func = move_myself;
callback->rdtgrp = rdtgrp;
/* /*
* Take a refcount, so rdtgrp cannot be freed before the * Set the task's closid/rmid before the PQR_ASSOC MSR can be
* callback has been invoked. * updated by them.
*
* For ctrl_mon groups, move both closid and rmid.
* For monitor groups, can move the tasks only from
* their parent CTRL group.
*/ */
atomic_inc(&rdtgrp->waitcount);
ret = task_work_add(tsk, &callback->work, TWA_RESUME); if (rdtgrp->type == RDTCTRL_GROUP) {
if (ret) { tsk->closid = rdtgrp->closid;
/* tsk->rmid = rdtgrp->mon.rmid;
* Task is exiting. Drop the refcount and free the callback. } else if (rdtgrp->type == RDTMON_GROUP) {
* No need to check the refcount as the group cannot be if (rdtgrp->mon.parent->closid == tsk->closid) {
* deleted before the write function unlocks rdtgroup_mutex.
*/
atomic_dec(&rdtgrp->waitcount);
kfree(callback);
rdt_last_cmd_puts("Task exited\n");
} else {
/*
* For ctrl_mon groups move both closid and rmid.
* For monitor groups, can move the tasks only from
* their parent CTRL group.
*/
if (rdtgrp->type == RDTCTRL_GROUP) {
tsk->closid = rdtgrp->closid;
tsk->rmid = rdtgrp->mon.rmid; tsk->rmid = rdtgrp->mon.rmid;
} else if (rdtgrp->type == RDTMON_GROUP) { } else {
if (rdtgrp->mon.parent->closid == tsk->closid) { rdt_last_cmd_puts("Can't move task to different control group\n");
tsk->rmid = rdtgrp->mon.rmid; return -EINVAL;
} else {
rdt_last_cmd_puts("Can't move task to different control group\n");
ret = -EINVAL;
}
} }
} }
return ret;
/*
* Ensure the task's closid and rmid are written before determining if
* the task is current that will decide if it will be interrupted.
*/
barrier();
/*
* By now, the task's closid and rmid are set. If the task is current
* on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
* group go into effect. If the task is not current, the MSR will be
* updated when the task is scheduled in.
*/
update_task_closid_rmid(tsk);
return 0;
} }
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
......
...@@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) ...@@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0xe4: case 0xe4:
case 0xe5: case 0xe5:
*exitinfo |= IOIO_TYPE_IN; *exitinfo |= IOIO_TYPE_IN;
*exitinfo |= (u64)insn->immediate.value << 16; *exitinfo |= (u8)insn->immediate.value << 16;
break; break;
/* OUT immediate opcodes */ /* OUT immediate opcodes */
case 0xe6: case 0xe6:
case 0xe7: case 0xe7:
*exitinfo |= IOIO_TYPE_OUT; *exitinfo |= IOIO_TYPE_OUT;
*exitinfo |= (u64)insn->immediate.value << 16; *exitinfo |= (u8)insn->immediate.value << 16;
break; break;
/* IN register opcodes */ /* IN register opcodes */
......
...@@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) ...@@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
} }
free_page((unsigned long)pmd_sv); free_page((unsigned long)pmd_sv);
pgtable_pmd_page_dtor(virt_to_page(pmd));
free_page((unsigned long)pmd); free_page((unsigned long)pmd);
return 1; return 1;
......
...@@ -1285,26 +1285,23 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, ...@@ -1285,26 +1285,23 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return ret; return ret;
} }
#ifndef CONFIG_ARCH_SPLIT_ARG64
SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
__u64, mask, int, dfd, __u64, mask, int, dfd,
const char __user *, pathname) const char __user *, pathname)
{ {
return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
} }
#endif
#ifdef CONFIG_COMPAT #if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT)
COMPAT_SYSCALL_DEFINE6(fanotify_mark, SYSCALL32_DEFINE6(fanotify_mark,
int, fanotify_fd, unsigned int, flags, int, fanotify_fd, unsigned int, flags,
__u32, mask0, __u32, mask1, int, dfd, SC_ARG64(mask), int, dfd,
const char __user *, pathname) const char __user *, pathname)
{ {
return do_fanotify_mark(fanotify_fd, flags, return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask),
#ifdef __BIG_ENDIAN dfd, pathname);
((__u64)mask0 << 32) | mask1,
#else
((__u64)mask1 << 32) | mask0,
#endif
dfd, pathname);
} }
#endif #endif
......
...@@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) ...@@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
#endif /* __SYSCALL_DEFINEx */ #endif /* __SYSCALL_DEFINEx */
/* For split 64-bit arguments on 32-bit architectures */
#ifdef __LITTLE_ENDIAN
#define SC_ARG64(name) u32, name##_lo, u32, name##_hi
#else
#define SC_ARG64(name) u32, name##_hi, u32, name##_lo
#endif
#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo)
#ifdef CONFIG_COMPAT
#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1
#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2
#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3
#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4
#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5
#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6
#else
#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1
#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2
#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3
#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4
#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5
#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6
#endif
/* /*
* Called before coming back to user-mode. Returning to user-mode with an * Called before coming back to user-mode. Returning to user-mode with an
* address limit different than USER_DS can allow to overwrite kernel memory. * address limit different than USER_DS can allow to overwrite kernel memory.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment