Commit a440e4d7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_urgent_for_v5.11_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:
 "As expected, fixes started trickling in after the holidays so here is
  the accumulated pile of x86 fixes for 5.11:

   - A fix for fanotify_mark() missing the conversion of x86_32 native
     syscalls which take 64-bit arguments to the compat handlers due to
     former having a general compat handler. (Brian Gerst)

   - Add a forgotten pmd page destructor call to pud_free_pmd_page()
     where a pmd page is freed. (Dan Williams)

   - Make IN/OUT insns with an u8 immediate port operand handling for
     SEV-ES guests more precise by using only the single port byte and
     not the whole s32 value of the insn decoder. (Peter Gonda)

   - Correct a straddling end range check before returning the proper
     MTRR type, when the end address is the same as top of memory.
     (Ying-Tsun Huang)

   - Change PQR_ASSOC MSR update scheme when moving a task to a resctrl
     resource group to avoid significant performance overhead with some
     resctrl workloads. (Fenghua Yu)

   - Avoid the actual task move overhead when the task is already in the
     resource group. (Fenghua Yu)"

* tag 'x86_urgent_for_v5.11_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/resctrl: Don't move a task to the same resource group
  x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR
  x86/mtrr: Correct the range check before performing MTRR type lookups
  x86/sev-es: Fix SEV-ES OUT/IN immediate opcode vc handling
  x86/mm: Fix leak of pmd ptlock
  fanotify: Fix sys_fanotify_mark() on native x86-32
parents 2ff90100 a0195f31
......@@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
bool
config ARCH_SPLIT_ARG64
bool
help
If a 32-bit architecture requires 64-bit arguments to be split into
pairs of 32-bit arguments, select this option.
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
......
......@@ -19,6 +19,7 @@ config X86_32
select KMAP_LOCAL
select MODULES_USE_ELF_REL
select OLD_SIGACTION
select ARCH_SPLIT_ARG64
config X86_64
def_bool y
......
......@@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
*repeat = 0;
*uniform = 1;
/* Make end inclusive instead of exclusive */
end--;
prev_match = MTRR_TYPE_INVALID;
for (i = 0; i < num_var_ranges; ++i) {
unsigned short start_state, end_state, inclusive;
......@@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
int repeat;
u64 partial_end;
/* Make end inclusive instead of exclusive */
end--;
if (!mtrr_state_set)
return MTRR_TYPE_INVALID;
......
......@@ -525,89 +525,70 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp)
kfree(rdtgrp);
}
struct task_move_callback {
struct callback_head work;
struct rdtgroup *rdtgrp;
};
static void move_myself(struct callback_head *head)
static void _update_task_closid_rmid(void *task)
{
struct task_move_callback *callback;
struct rdtgroup *rdtgrp;
callback = container_of(head, struct task_move_callback, work);
rdtgrp = callback->rdtgrp;
/*
* If resource group was deleted before this task work callback
* was invoked, then assign the task to root group and free the
* resource group.
* If the task is still current on this CPU, update PQR_ASSOC MSR.
* Otherwise, the MSR is updated when the task is scheduled in.
*/
if (atomic_dec_and_test(&rdtgrp->waitcount) &&
(rdtgrp->flags & RDT_DELETED)) {
current->closid = 0;
current->rmid = 0;
rdtgroup_remove(rdtgrp);
}
if (unlikely(current->flags & PF_EXITING))
goto out;
preempt_disable();
/* update PQR_ASSOC MSR to make resource group go into effect */
resctrl_sched_in();
preempt_enable();
if (task == current)
resctrl_sched_in();
}
out:
kfree(callback);
static void update_task_closid_rmid(struct task_struct *t)
{
if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
else
_update_task_closid_rmid(t);
}
static int __rdtgroup_move_task(struct task_struct *tsk,
struct rdtgroup *rdtgrp)
{
struct task_move_callback *callback;
int ret;
callback = kzalloc(sizeof(*callback), GFP_KERNEL);
if (!callback)
return -ENOMEM;
callback->work.func = move_myself;
callback->rdtgrp = rdtgrp;
/* If the task is already in rdtgrp, no need to move the task. */
if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
tsk->rmid == rdtgrp->mon.rmid) ||
(rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
tsk->closid == rdtgrp->mon.parent->closid))
return 0;
/*
* Take a refcount, so rdtgrp cannot be freed before the
* callback has been invoked.
* Set the task's closid/rmid before the PQR_ASSOC MSR can be
* updated by them.
*
* For ctrl_mon groups, move both closid and rmid.
* For monitor groups, can move the tasks only from
* their parent CTRL group.
*/
atomic_inc(&rdtgrp->waitcount);
ret = task_work_add(tsk, &callback->work, TWA_RESUME);
if (ret) {
/*
* Task is exiting. Drop the refcount and free the callback.
* No need to check the refcount as the group cannot be
* deleted before the write function unlocks rdtgroup_mutex.
*/
atomic_dec(&rdtgrp->waitcount);
kfree(callback);
rdt_last_cmd_puts("Task exited\n");
} else {
/*
* For ctrl_mon groups move both closid and rmid.
* For monitor groups, can move the tasks only from
* their parent CTRL group.
*/
if (rdtgrp->type == RDTCTRL_GROUP) {
tsk->closid = rdtgrp->closid;
if (rdtgrp->type == RDTCTRL_GROUP) {
tsk->closid = rdtgrp->closid;
tsk->rmid = rdtgrp->mon.rmid;
} else if (rdtgrp->type == RDTMON_GROUP) {
if (rdtgrp->mon.parent->closid == tsk->closid) {
tsk->rmid = rdtgrp->mon.rmid;
} else if (rdtgrp->type == RDTMON_GROUP) {
if (rdtgrp->mon.parent->closid == tsk->closid) {
tsk->rmid = rdtgrp->mon.rmid;
} else {
rdt_last_cmd_puts("Can't move task to different control group\n");
ret = -EINVAL;
}
} else {
rdt_last_cmd_puts("Can't move task to different control group\n");
return -EINVAL;
}
}
return ret;
/*
* Ensure the task's closid and rmid are written before determining if
* the task is current that will decide if it will be interrupted.
*/
barrier();
/*
* By now, the task's closid and rmid are set. If the task is current
* on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
* group go into effect. If the task is not current, the MSR will be
* updated when the task is scheduled in.
*/
update_task_closid_rmid(tsk);
return 0;
}
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
......
......@@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0xe4:
case 0xe5:
*exitinfo |= IOIO_TYPE_IN;
*exitinfo |= (u64)insn->immediate.value << 16;
*exitinfo |= (u8)insn->immediate.value << 16;
break;
/* OUT immediate opcodes */
case 0xe6:
case 0xe7:
*exitinfo |= IOIO_TYPE_OUT;
*exitinfo |= (u64)insn->immediate.value << 16;
*exitinfo |= (u8)insn->immediate.value << 16;
break;
/* IN register opcodes */
......
......@@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
}
free_page((unsigned long)pmd_sv);
pgtable_pmd_page_dtor(virt_to_page(pmd));
free_page((unsigned long)pmd);
return 1;
......
......@@ -1285,26 +1285,23 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return ret;
}
#ifndef CONFIG_ARCH_SPLIT_ARG64
SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
__u64, mask, int, dfd,
const char __user *, pathname)
{
return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
}
#endif
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE6(fanotify_mark,
#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT)
SYSCALL32_DEFINE6(fanotify_mark,
int, fanotify_fd, unsigned int, flags,
__u32, mask0, __u32, mask1, int, dfd,
SC_ARG64(mask), int, dfd,
const char __user *, pathname)
{
return do_fanotify_mark(fanotify_fd, flags,
#ifdef __BIG_ENDIAN
((__u64)mask0 << 32) | mask1,
#else
((__u64)mask1 << 32) | mask0,
#endif
dfd, pathname);
return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask),
dfd, pathname);
}
#endif
......
......@@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
#endif /* __SYSCALL_DEFINEx */
/* For split 64-bit arguments on 32-bit architectures */
#ifdef __LITTLE_ENDIAN
#define SC_ARG64(name) u32, name##_lo, u32, name##_hi
#else
#define SC_ARG64(name) u32, name##_hi, u32, name##_lo
#endif
#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo)
#ifdef CONFIG_COMPAT
#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1
#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2
#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3
#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4
#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5
#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6
#else
#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1
#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2
#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3
#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4
#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5
#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6
#endif
/*
* Called before coming back to user-mode. Returning to user-mode with an
* address limit different than USER_DS can allow to overwrite kernel memory.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment