Commit 27afc5db authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Martin Schwidefsky:
 "The most notable change for this pull request is the ftrace rework
  from Heiko.  It brings a small performance improvement and the ground
  work to support a new gcc option to replace the mcount blocks with a
  single nop.

  Two new s390 specific system calls are added to emulate user space
  mmio for PCI, an artifact of the how PCI memory is accessed.

  Two patches for the memory management with changes to common code.
  For KVM mm_forbids_zeropage is added which disables the empty zero
  page for an mm that is used by a KVM process.  And an optimization,
  pmdp_get_and_clear_full is added analog to ptep_get_and_clear_full.

  Some micro optimization for the cmpxchg and the spinlock code.

  And as usual bug fixes and cleanups"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (46 commits)
  s390/cputime: fix 31-bit compile
  s390/scm_block: make the number of reqs per HW req configurable
  s390/scm_block: handle multiple requests in one HW request
  s390/scm_block: allocate aidaw pages only when necessary
  s390/scm_block: use mempool to manage aidaw requests
  s390/eadm: change timeout value
  s390/mm: fix memory leak of ptlock in pmd_free_tlb
  s390: use local symbol names in entry[64].S
  s390/ptrace: always include vector registers in core files
  s390/simd: clear vector register pointer on fork/clone
  s390: translate cputime magic constants to macros
  s390/idle: convert open coded idle time seqcount
  s390/idle: add missing irq off lockdep annotation
  s390/debug: avoid function call for debug_sprintf_*
  s390/kprobes: fix instruction copy for out of line execution
  s390: remove diag 44 calls from cpu_relax()
  s390/dasd: retry partition detection
  s390/dasd: fix list corruption for sleep_on requests
  s390/dasd: fix infinite term I/O loop
  s390/dasd: remove unused code
  ...
parents 70e71ca0 35199781
This diff is collapsed.
......@@ -11,200 +11,28 @@
#include <linux/types.h>
#include <linux/bug.h>
extern void __xchg_called_with_bad_pointer(void);
static inline unsigned long __xchg(unsigned long x, void *ptr, int size)
{
unsigned long addr, old;
int shift;
switch (size) {
case 1:
addr = (unsigned long) ptr;
shift = (3 ^ (addr & 3)) << 3;
addr ^= addr & 3;
asm volatile(
" l %0,%4\n"
"0: lr 0,%0\n"
" nr 0,%3\n"
" or 0,%2\n"
" cs %0,0,%4\n"
" jl 0b\n"
: "=&d" (old), "=Q" (*(int *) addr)
: "d" ((x & 0xff) << shift), "d" (~(0xff << shift)),
"Q" (*(int *) addr) : "memory", "cc", "0");
return old >> shift;
case 2:
addr = (unsigned long) ptr;
shift = (2 ^ (addr & 2)) << 3;
addr ^= addr & 2;
asm volatile(
" l %0,%4\n"
"0: lr 0,%0\n"
" nr 0,%3\n"
" or 0,%2\n"
" cs %0,0,%4\n"
" jl 0b\n"
: "=&d" (old), "=Q" (*(int *) addr)
: "d" ((x & 0xffff) << shift), "d" (~(0xffff << shift)),
"Q" (*(int *) addr) : "memory", "cc", "0");
return old >> shift;
case 4:
asm volatile(
" l %0,%3\n"
"0: cs %0,%2,%3\n"
" jl 0b\n"
: "=&d" (old), "=Q" (*(int *) ptr)
: "d" (x), "Q" (*(int *) ptr)
: "memory", "cc");
return old;
#ifdef CONFIG_64BIT
case 8:
asm volatile(
" lg %0,%3\n"
"0: csg %0,%2,%3\n"
" jl 0b\n"
: "=&d" (old), "=m" (*(long *) ptr)
: "d" (x), "Q" (*(long *) ptr)
: "memory", "cc");
return old;
#endif /* CONFIG_64BIT */
}
__xchg_called_with_bad_pointer();
return x;
}
#define xchg(ptr, x) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\
__ret; \
#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) __o = (o); \
__typeof__(*(ptr)) __n = (n); \
(__typeof__(*(ptr))) __sync_val_compare_and_swap((ptr),__o,__n);\
})
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
#define __HAVE_ARCH_CMPXCHG
extern void __cmpxchg_called_with_bad_pointer(void);
static inline unsigned long __cmpxchg(void *ptr, unsigned long old,
unsigned long new, int size)
{
unsigned long addr, prev, tmp;
int shift;
switch (size) {
case 1:
addr = (unsigned long) ptr;
shift = (3 ^ (addr & 3)) << 3;
addr ^= addr & 3;
asm volatile(
" l %0,%2\n"
"0: nr %0,%5\n"
" lr %1,%0\n"
" or %0,%3\n"
" or %1,%4\n"
" cs %0,%1,%2\n"
" jnl 1f\n"
" xr %1,%0\n"
" nr %1,%5\n"
" jnz 0b\n"
"1:"
: "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr)
: "d" ((old & 0xff) << shift),
"d" ((new & 0xff) << shift),
"d" (~(0xff << shift))
: "memory", "cc");
return prev >> shift;
case 2:
addr = (unsigned long) ptr;
shift = (2 ^ (addr & 2)) << 3;
addr ^= addr & 2;
asm volatile(
" l %0,%2\n"
"0: nr %0,%5\n"
" lr %1,%0\n"
" or %0,%3\n"
" or %1,%4\n"
" cs %0,%1,%2\n"
" jnl 1f\n"
" xr %1,%0\n"
" nr %1,%5\n"
" jnz 0b\n"
"1:"
: "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr)
: "d" ((old & 0xffff) << shift),
"d" ((new & 0xffff) << shift),
"d" (~(0xffff << shift))
: "memory", "cc");
return prev >> shift;
case 4:
asm volatile(
" cs %0,%3,%1\n"
: "=&d" (prev), "=Q" (*(int *) ptr)
: "0" (old), "d" (new), "Q" (*(int *) ptr)
: "memory", "cc");
return prev;
#ifdef CONFIG_64BIT
case 8:
asm volatile(
" csg %0,%3,%1\n"
: "=&d" (prev), "=Q" (*(long *) ptr)
: "0" (old), "d" (new), "Q" (*(long *) ptr)
: "memory", "cc");
return prev;
#endif /* CONFIG_64BIT */
}
__cmpxchg_called_with_bad_pointer();
return old;
}
#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__cmpxchg((ptr), (unsigned long)(o), (unsigned long)(n), \
sizeof(*(ptr))); \
__ret; \
})
#define cmpxchg64 cmpxchg
#define cmpxchg_local cmpxchg
#define cmpxchg64_local cmpxchg
#ifdef CONFIG_64BIT
#define cmpxchg64(ptr, o, n) \
#define xchg(ptr, x) \
({ \
cmpxchg((ptr), (o), (n)); \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old; \
do { \
__old = *__ptr; \
} while (!__sync_bool_compare_and_swap(__ptr, __old, x)); \
__old; \
})
#else /* CONFIG_64BIT */
static inline unsigned long long __cmpxchg64(void *ptr,
unsigned long long old,
unsigned long long new)
{
register_pair rp_old = {.pair = old};
register_pair rp_new = {.pair = new};
unsigned long long *ullptr = ptr;
asm volatile(
" cds %0,%2,%1"
: "+d" (rp_old), "+Q" (*ullptr)
: "d" (rp_new)
: "memory", "cc");
return rp_old.pair;
}
#define cmpxchg64(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__cmpxchg64((ptr), \
(unsigned long long)(o), \
(unsigned long long)(n)); \
__ret; \
})
#endif /* CONFIG_64BIT */
#define __HAVE_ARCH_CMPXCHG
#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
({ \
......@@ -265,40 +93,4 @@ extern void __cmpxchg_double_called_with_bad_pointer(void);
#define system_has_cmpxchg_double() 1
#include <asm-generic/cmpxchg-local.h>
static inline unsigned long __cmpxchg_local(void *ptr,
unsigned long old,
unsigned long new, int size)
{
switch (size) {
case 1:
case 2:
case 4:
#ifdef CONFIG_64BIT
case 8:
#endif
return __cmpxchg(ptr, old, new, size);
default:
return __cmpxchg_local_generic(ptr, old, new, size);
}
return old;
}
/*
* cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
* them available.
*/
#define cmpxchg_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__cmpxchg_local((ptr), (unsigned long)(o), \
(unsigned long)(n), sizeof(*(ptr))); \
__ret; \
})
#define cmpxchg64_local(ptr, o, n) cmpxchg64((ptr), (o), (n))
#endif /* __ASM_CMPXCHG_H */
......@@ -10,6 +10,8 @@
#include <linux/types.h>
#include <asm/div64.h>
#define CPUTIME_PER_USEC 4096ULL
#define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
......@@ -38,24 +40,24 @@ static inline unsigned long __div(unsigned long long n, unsigned long base)
*/
static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
{
return __div((__force unsigned long long) cputime, 4096000000ULL / HZ);
return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / HZ);
}
static inline cputime_t jiffies_to_cputime(const unsigned int jif)
{
return (__force cputime_t)(jif * (4096000000ULL / HZ));
return (__force cputime_t)(jif * (CPUTIME_PER_SEC / HZ));
}
static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
{
unsigned long long jif = (__force unsigned long long) cputime;
do_div(jif, 4096000000ULL / HZ);
do_div(jif, CPUTIME_PER_SEC / HZ);
return jif;
}
static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
{
return (__force cputime64_t)(jif * (4096000000ULL / HZ));
return (__force cputime64_t)(jif * (CPUTIME_PER_SEC / HZ));
}
/*
......@@ -68,7 +70,7 @@ static inline unsigned int cputime_to_usecs(const cputime_t cputime)
static inline cputime_t usecs_to_cputime(const unsigned int m)
{
return (__force cputime_t)(m * 4096ULL);
return (__force cputime_t)(m * CPUTIME_PER_USEC);
}
#define usecs_to_cputime64(m) usecs_to_cputime(m)
......@@ -78,12 +80,12 @@ static inline cputime_t usecs_to_cputime(const unsigned int m)
*/
static inline unsigned int cputime_to_secs(const cputime_t cputime)
{
return __div((__force unsigned long long) cputime, 2048000000) >> 1;
return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / 2) >> 1;
}
static inline cputime_t secs_to_cputime(const unsigned int s)
{
return (__force cputime_t)(s * 4096000000ULL);
return (__force cputime_t)(s * CPUTIME_PER_SEC);
}
/*
......@@ -91,8 +93,8 @@ static inline cputime_t secs_to_cputime(const unsigned int s)
*/
static inline cputime_t timespec_to_cputime(const struct timespec *value)
{
unsigned long long ret = value->tv_sec * 4096000000ULL;
return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000);
unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC;
return (__force cputime_t)(ret + __div(value->tv_nsec * CPUTIME_PER_USEC, NSEC_PER_USEC));
}
static inline void cputime_to_timespec(const cputime_t cputime,
......@@ -103,12 +105,12 @@ static inline void cputime_to_timespec(const cputime_t cputime,
register_pair rp;
rp.pair = __cputime >> 1;
asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
value->tv_nsec = rp.subreg.even * 1000 / 4096;
asm ("dr %0,%1" : "+d" (rp) : "d" (CPUTIME_PER_SEC / 2));
value->tv_nsec = rp.subreg.even * NSEC_PER_USEC / CPUTIME_PER_USEC;
value->tv_sec = rp.subreg.odd;
#else
value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096;
value->tv_sec = __cputime / 4096000000ULL;
value->tv_nsec = (__cputime % CPUTIME_PER_SEC) * NSEC_PER_USEC / CPUTIME_PER_USEC;
value->tv_sec = __cputime / CPUTIME_PER_SEC;
#endif
}
......@@ -119,8 +121,8 @@ static inline void cputime_to_timespec(const cputime_t cputime,
*/
static inline cputime_t timeval_to_cputime(const struct timeval *value)
{
unsigned long long ret = value->tv_sec * 4096000000ULL;
return (__force cputime_t)(ret + value->tv_usec * 4096ULL);
unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC;
return (__force cputime_t)(ret + value->tv_usec * CPUTIME_PER_USEC);
}
static inline void cputime_to_timeval(const cputime_t cputime,
......@@ -131,12 +133,12 @@ static inline void cputime_to_timeval(const cputime_t cputime,
register_pair rp;
rp.pair = __cputime >> 1;
asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
value->tv_usec = rp.subreg.even / 4096;
asm ("dr %0,%1" : "+d" (rp) : "d" (CPUTIME_PER_USEC / 2));
value->tv_usec = rp.subreg.even / CPUTIME_PER_USEC;
value->tv_sec = rp.subreg.odd;
#else
value->tv_usec = (__cputime % 4096000000ULL) / 4096;
value->tv_sec = __cputime / 4096000000ULL;
value->tv_usec = (__cputime % CPUTIME_PER_SEC) / CPUTIME_PER_USEC;
value->tv_sec = __cputime / CPUTIME_PER_SEC;
#endif
}
......@@ -146,13 +148,13 @@ static inline void cputime_to_timeval(const cputime_t cputime,
static inline clock_t cputime_to_clock_t(cputime_t cputime)
{
unsigned long long clock = (__force unsigned long long) cputime;
do_div(clock, 4096000000ULL / USER_HZ);
do_div(clock, CPUTIME_PER_SEC / USER_HZ);
return clock;
}
static inline cputime_t clock_t_to_cputime(unsigned long x)
{
return (__force cputime_t)(x * (4096000000ULL / USER_HZ));
return (__force cputime_t)(x * (CPUTIME_PER_SEC / USER_HZ));
}
/*
......@@ -161,7 +163,7 @@ static inline cputime_t clock_t_to_cputime(unsigned long x)
static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
{
unsigned long long clock = (__force unsigned long long) cputime;
do_div(clock, 4096000000ULL / USER_HZ);
do_div(clock, CPUTIME_PER_SEC / USER_HZ);
return clock;
}
......
......@@ -151,9 +151,21 @@ debug_text_event(debug_info_t* id, int level, const char* txt)
* stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
*/
extern debug_entry_t *
debug_sprintf_event(debug_info_t* id,int level,char *string,...)
__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
__attribute__ ((format(printf, 3, 4)));
#define debug_sprintf_event(_id, _level, _fmt, ...) \
({ \
debug_entry_t *__ret; \
debug_info_t *__id = _id; \
int __level = _level; \
if ((!__id) || (__level > __id->level)) \
__ret = NULL; \
else \
__ret = __debug_sprintf_event(__id, __level, \
_fmt, ## __VA_ARGS__); \
__ret; \
})
static inline debug_entry_t*
debug_exception(debug_info_t* id, int level, void* data, int length)
......@@ -194,9 +206,22 @@ debug_text_exception(debug_info_t* id, int level, const char* txt)
* stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
*/
extern debug_entry_t *
debug_sprintf_exception(debug_info_t* id,int level,char *string,...)
__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
__attribute__ ((format(printf, 3, 4)));
#define debug_sprintf_exception(_id, _level, _fmt, ...) \
({ \
debug_entry_t *__ret; \
debug_info_t *__id = _id; \
int __level = _level; \
if ((!__id) || (__level > __id->level)) \
__ret = NULL; \
else \
__ret = __debug_sprintf_exception(__id, __level, \
_fmt, ## __VA_ARGS__);\
__ret; \
})
int debug_register_view(debug_info_t* id, struct debug_view* view);
int debug_unregister_view(debug_info_t* id, struct debug_view* view);
......
#ifndef _ASM_S390_FTRACE_H
#define _ASM_S390_FTRACE_H
#define ARCH_SUPPORTS_FTRACE_OPS 1
#define MCOUNT_INSN_SIZE 24
#define MCOUNT_RETURN_FIXUP 18
#ifndef __ASSEMBLY__
extern void _mcount(void);
#define ftrace_return_address(n) __builtin_return_address(n)
void _mcount(void);
void ftrace_caller(void);
extern char ftrace_graph_caller_end;
extern unsigned long ftrace_plt;
struct dyn_arch_ftrace { };
#define MCOUNT_ADDR ((long)_mcount)
#define MCOUNT_ADDR ((unsigned long)_mcount)
#define FTRACE_ADDR ((unsigned long)ftrace_caller)
#define KPROBE_ON_FTRACE_NOP 0
#define KPROBE_ON_FTRACE_CALL 1
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
}
#endif /* __ASSEMBLY__ */
struct ftrace_insn {
u16 opc;
s32 disp;
} __packed;
static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
{
#ifdef CONFIG_FUNCTION_TRACER
/* jg .+24 */
insn->opc = 0xc0f4;
insn->disp = MCOUNT_INSN_SIZE / 2;
#endif
}
#define MCOUNT_INSN_SIZE 18
static inline int is_ftrace_nop(struct ftrace_insn *insn)
{
#ifdef CONFIG_FUNCTION_TRACER
if (insn->disp == MCOUNT_INSN_SIZE / 2)
return 1;
#endif
return 0;
}
#define ARCH_SUPPORTS_FTRACE_OPS 1
static inline void ftrace_generate_call_insn(struct ftrace_insn *insn,
unsigned long ip)
{
#ifdef CONFIG_FUNCTION_TRACER
unsigned long target;
/* brasl r0,ftrace_caller */
target = is_module_addr((void *) ip) ? ftrace_plt : FTRACE_ADDR;
insn->opc = 0xc005;
insn->disp = (target - ip) / 2;
#endif
}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_FTRACE_H */
......@@ -9,9 +9,10 @@
#include <linux/types.h>
#include <linux/device.h>
#include <linux/seqlock.h>
struct s390_idle_data {
unsigned int sequence;
seqcount_t seqcount;
unsigned long long idle_count;
unsigned long long idle_time;
unsigned long long clock_idle_enter;
......
......@@ -39,6 +39,15 @@ static inline void iounmap(volatile void __iomem *addr)
{
}
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
return NULL;
}
static inline void ioport_unmap(void __iomem *p)
{
}
/*
* s390 needs a private implementation of pci_iomap since ioremap with its
* offset parameter isn't sufficient. That's because BAR spaces are not
......
#ifndef _ASM_IRQ_H
#define _ASM_IRQ_H
#define EXT_INTERRUPT 1
#define IO_INTERRUPT 2
#define THIN_INTERRUPT 3
#define EXT_INTERRUPT 0
#define IO_INTERRUPT 1
#define THIN_INTERRUPT 2
#define NR_IRQS_BASE 4
#define NR_IRQS_BASE 3
#ifdef CONFIG_PCI_NR_MSI
# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
......@@ -13,9 +13,6 @@
# define NR_IRQS NR_IRQS_BASE
#endif
/* This number is used when no interrupt has been assigned */
#define NO_IRQ 0
/* External interruption codes */
#define EXT_IRQ_INTERRUPT_KEY 0x0040
#define EXT_IRQ_CLK_COMP 0x1004
......
......@@ -60,6 +60,7 @@ typedef u16 kprobe_opcode_t;
struct arch_specific_insn {
/* copy of original instruction */
kprobe_opcode_t *insn;
unsigned int is_ftrace_insn : 1;
};
struct prev_kprobe {
......
......@@ -147,7 +147,7 @@ struct _lowcore {
__u32 softirq_pending; /* 0x02ec */
__u32 percpu_offset; /* 0x02f0 */
__u32 machine_flags; /* 0x02f4 */
__u32 ftrace_func; /* 0x02f8 */
__u8 pad_0x02f8[0x02fc-0x02f8]; /* 0x02f8 */
__u32 spinlock_lockval; /* 0x02fc */
__u8 pad_0x0300[0x0e00-0x0300]; /* 0x0300 */
......@@ -297,7 +297,7 @@ struct _lowcore {
__u64 percpu_offset; /* 0x0378 */
__u64 vdso_per_cpu_data; /* 0x0380 */
__u64 machine_flags; /* 0x0388 */
__u64 ftrace_func; /* 0x0390 */
__u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */
__u64 gmap; /* 0x0398 */
__u32 spinlock_lockval; /* 0x03a0 */
__u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */
......
......@@ -50,10 +50,6 @@ struct zpci_fmb {
atomic64_t unmapped_pages;
} __packed __aligned(16);
#define ZPCI_MSI_VEC_BITS 11
#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS)
#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1)
enum zpci_state {
ZPCI_FN_STATE_RESERVED,
ZPCI_FN_STATE_STANDBY,
......@@ -90,6 +86,7 @@ struct zpci_dev {
/* IRQ stuff */
u64 msi_addr; /* MSI address */
unsigned int max_msi; /* maximum number of MSI's */
struct airq_iv *aibv; /* adapter interrupt bit vector */
unsigned int aisb; /* number of the summary bit */
......
......@@ -139,7 +139,8 @@ static inline int zpci_memcpy_fromio(void *dst,
int size, rc = 0;
while (n > 0) {
size = zpci_get_max_write_size((u64) src, (u64) dst, n, 8);
size = zpci_get_max_write_size((u64 __force) src,
(u64) dst, n, 8);
req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
rc = zpci_read_single(req, dst, offset, size);
if (rc)
......@@ -162,7 +163,8 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
return -EINVAL;
while (n > 0) {
size = zpci_get_max_write_size((u64) dst, (u64) src, n, 128);
size = zpci_get_max_write_size((u64 __force) dst,
(u64) src, n, 128);
req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
if (size > 8) /* main path */
......
......@@ -22,8 +22,6 @@ unsigned long *page_table_alloc(struct mm_struct *);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
bool init_skey);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq);
......
......@@ -133,6 +133,18 @@ extern unsigned long MODULES_END;
#define MODULES_LEN (1UL << 31)
#endif
static inline int is_module_addr(void *addr)
{
#ifdef CONFIG_64BIT
BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
if (addr < (void *)MODULES_VADDR)
return 0;
if (addr > (void *)MODULES_END)
return 0;
#endif
return 1;
}
/*
* A 31 bit pagetable entry of S390 has following format:
* | PFRA | | OS |
......@@ -479,6 +491,11 @@ static inline int mm_has_pgste(struct mm_struct *mm)
return 0;
}
/*
* In the case that a guest uses storage keys
* faults should no longer be backed by zero pages
*/
#define mm_forbids_zeropage mm_use_skey
static inline int mm_use_skey(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
......@@ -1634,6 +1651,19 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
return pmd;
}
#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL
static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
unsigned long address,
pmd_t *pmdp, int full)
{
pmd_t pmd = *pmdp;
if (!full)
pmdp_flush_lazy(mm, address, pmdp);
pmd_clear(pmdp);
return pmd;
}
#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
......@@ -1746,7 +1776,8 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern int vmem_remove_mapping(unsigned long start, unsigned long size);
extern int s390_enable_sie(void);
extern void s390_enable_skey(void);
extern int s390_enable_skey(void);
extern void s390_reset_cmma(struct mm_struct *mm);
/*
* No page table caches to initialise
......
......@@ -217,8 +217,6 @@ static inline unsigned short stap(void)
*/
static inline void cpu_relax(void)
{
if (MACHINE_HAS_DIAG44)
asm volatile("diag 0,0,68");
barrier();
}
......
......@@ -18,14 +18,7 @@ extern int spin_retry;
static inline int
_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
{
unsigned int old_expected = old;
asm volatile(
" cs %0,%3,%1"
: "=d" (old), "=Q" (*lock)
: "0" (old), "d" (new), "Q" (*lock)
: "cc", "memory" );
return old == old_expected;
return __sync_bool_compare_and_swap(lock, old, new);
}
/*
......
......@@ -121,6 +121,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
#ifdef CONFIG_64BIT
if (tlb->mm->context.asce_limit <= (1UL << 31))
return;
pgtable_pmd_page_dtor(virt_to_page(pmd));
tlb_remove_table(tlb, pmd);
#endif
}
......
......@@ -287,7 +287,9 @@
#define __NR_getrandom 349
#define __NR_memfd_create 350
#define __NR_bpf 351
#define NR_syscalls 352
#define __NR_s390_pci_mmio_write 352
#define __NR_s390_pci_mmio_read 353
#define NR_syscalls 354
/*
* There are some system calls that are not present on 64 bit, some
......
......@@ -17,8 +17,8 @@
* Make sure that the compiler is new enough. We want a compiler that
* is known to work with the "Q" assembler constraint.
*/
#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
#error Your compiler is too old; please use version 3.3.3 or newer
#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3)
#error Your compiler is too old; please use version 4.3 or newer
#endif
int main(void)
......@@ -156,7 +156,6 @@ int main(void)
DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));
DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
BLANK();
DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
......
......@@ -434,7 +434,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
/* Signal frames without vectors registers are short ! */
__u16 __user *svc = (void *) frame + frame_size - 2;
__u16 __user *svc = (void __user *) frame + frame_size - 2;
if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
return -EFAULT;
restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
......
......@@ -218,3 +218,5 @@ COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char
COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags)
COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags)
COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size);
COMPAT_SYSCALL_WRAP3(s390_pci_mmio_write, const unsigned long, mmio_addr, const void __user *, user_buffer, const size_t, length);
COMPAT_SYSCALL_WRAP3(s390_pci_mmio_read, const unsigned long, mmio_addr, void __user *, user_buffer, const size_t, length);
......@@ -1019,7 +1019,7 @@ debug_count_numargs(char *string)
*/
debug_entry_t*
debug_sprintf_event(debug_info_t* id, int level,char *string,...)
__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
{
va_list ap;
int numargs,idx;
......@@ -1027,8 +1027,6 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...)
debug_sprintf_entry_t *curr_event;
debug_entry_t *active;
if((!id) || (level > id->level))
return NULL;
if (!debug_active || !id->areas)
return NULL;
numargs=debug_count_numargs(string);
......@@ -1050,14 +1048,14 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...)
return active;
}
EXPORT_SYMBOL(debug_sprintf_event);
EXPORT_SYMBOL(__debug_sprintf_event);
/*
* debug_sprintf_exception:
*/
debug_entry_t*
debug_sprintf_exception(debug_info_t* id, int level,char *string,...)
__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
{
va_list ap;
int numargs,idx;
......@@ -1065,8 +1063,6 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...)
debug_sprintf_entry_t *curr_event;
debug_entry_t *active;
if((!id) || (level > id->level))
return NULL;
if (!debug_active || !id->areas)
return NULL;
......@@ -1089,7 +1085,7 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...)
return active;
}
EXPORT_SYMBOL(debug_sprintf_exception);
EXPORT_SYMBOL(__debug_sprintf_exception);
/*
* debug_register_view:
......
......@@ -191,7 +191,8 @@ void die(struct pt_regs *regs, const char *str)
console_verbose();
spin_lock_irq(&die_lock);
bust_spinlocks(1);
printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter);
printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
regs->int_code >> 17, ++die_counter);
#ifdef CONFIG_PREEMPT
printk("PREEMPT ");
#endif
......
......@@ -12,7 +12,6 @@
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
#include <linux/lockdep.h>
#include <linux/module.h>
#include <linux/pfn.h>
......@@ -490,8 +489,5 @@ void __init startup_init(void)
detect_machine_facilities();
setup_topology();
sclp_early_detect();
#ifdef CONFIG_DYNAMIC_FTRACE
S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
#endif
lockdep_on();
}
This diff is collapsed.
......@@ -74,4 +74,6 @@ struct old_sigaction;
long sys_s390_personality(unsigned int personality);
long sys_s390_runtime_instr(int command, int signum);
long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
#endif /* _ENTRY_H */
This diff is collapsed.
......@@ -7,6 +7,7 @@
* Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
#include <linux/moduleloader.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
......@@ -15,60 +16,39 @@
#include <linux/kprobes.h>
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
#include "entry.h"
void mcount_replace_code(void);
void ftrace_disable_code(void);
void ftrace_enable_insn(void);
/*
* The mcount code looks like this:
* stg %r14,8(%r15) # offset 0
* larl %r1,<&counter> # offset 6
* brasl %r14,_mcount # offset 12
* lg %r14,8(%r15) # offset 18
* Total length is 24 bytes. The complete mcount block initially gets replaced
* by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop
* only patch the jg/lg instruction within the block.
* Note: we do not patch the first instruction to an unconditional branch,
* since that would break kprobes/jprobes. It is easier to leave the larl
* instruction in and only modify the second instruction.
* Total length is 24 bytes. Only the first instruction will be patched
* by ftrace_make_call / ftrace_make_nop.
* The enabled ftrace code block looks like this:
* larl %r0,.+24 # offset 0
* > lg %r1,__LC_FTRACE_FUNC # offset 6
* br %r1 # offset 12
* brcl 0,0 # offset 14
* brc 0,0 # offset 20
* > brasl %r0,ftrace_caller # offset 0
* larl %r1,<&counter> # offset 6
* brasl %r14,_mcount # offset 12
* lg %r14,8(%r15) # offset 18
* The ftrace function gets called with a non-standard C function call ABI
* where r0 contains the return address. It is also expected that the called
* function only clobbers r0 and r1, but restores r2-r15.
* For module code we can't directly jump to ftrace caller, but need a
* trampoline (ftrace_plt), which clobbers also r1.
* The return point of the ftrace function has offset 24, so execution
* continues behind the mcount block.
* larl %r0,.+24 # offset 0
* > jg .+18 # offset 6
* br %r1 # offset 12
* brcl 0,0 # offset 14
* brc 0,0 # offset 20
* The disabled ftrace code block looks like this:
* > jg .+24 # offset 0
* larl %r1,<&counter> # offset 6
* brasl %r14,_mcount # offset 12
* lg %r14,8(%r15) # offset 18
* The jg instruction branches to offset 24 to skip as many instructions
* as possible.
*/
asm(
" .align 4\n"
"mcount_replace_code:\n"
" larl %r0,0f\n"
"ftrace_disable_code:\n"
" jg 0f\n"
" br %r1\n"
" brcl 0,0\n"
" brc 0,0\n"
"0:\n"
" .align 4\n"
"ftrace_enable_insn:\n"
" lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n");
#define MCOUNT_BLOCK_SIZE 24
#define MCOUNT_INSN_OFFSET 6
#define FTRACE_INSN_SIZE 6
unsigned long ftrace_plt;
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
......@@ -79,24 +59,62 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
/* Initial replacement of the whole mcount block */
if (addr == MCOUNT_ADDR) {
if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET,
mcount_replace_code,
MCOUNT_BLOCK_SIZE))
return -EPERM;
return 0;
struct ftrace_insn insn;
unsigned short op;
void *from, *to;
size_t size;
ftrace_generate_nop_insn(&insn);
size = sizeof(insn);
from = &insn;
to = (void *) rec->ip;
if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op)))
return -EFAULT;
/*
* If we find a breakpoint instruction, a kprobe has been placed
* at the beginning of the function. We write the constant
* KPROBE_ON_FTRACE_NOP into the remaining four bytes of the original
* instruction so that the kprobes handler can execute a nop, if it
* reaches this breakpoint.
*/
if (op == BREAKPOINT_INSTRUCTION) {
size -= 2;
from += 2;
to += 2;
insn.disp = KPROBE_ON_FTRACE_NOP;
}
if (probe_kernel_write((void *) rec->ip, ftrace_disable_code,
MCOUNT_INSN_SIZE))
if (probe_kernel_write(to, from, size))
return -EPERM;
return 0;
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn,
FTRACE_INSN_SIZE))
struct ftrace_insn insn;
unsigned short op;
void *from, *to;
size_t size;
ftrace_generate_call_insn(&insn, rec->ip);
size = sizeof(insn);
from = &insn;
to = (void *) rec->ip;
if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op)))
return -EFAULT;
/*
* If we find a breakpoint instruction, a kprobe has been placed
* at the beginning of the function. We write the constant
* KPROBE_ON_FTRACE_CALL into the remaining four bytes of the original
* instruction so that the kprobes handler can execute a brasl if it
* reaches this breakpoint.
*/
if (op == BREAKPOINT_INSTRUCTION) {
size -= 2;
from += 2;
to += 2;
insn.disp = KPROBE_ON_FTRACE_CALL;
}
if (probe_kernel_write(to, from, size))
return -EPERM;
return 0;
}
......@@ -111,13 +129,30 @@ int __init ftrace_dyn_arch_init(void)
return 0;
}
static int __init ftrace_plt_init(void)
{
unsigned int *ip;
ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE);
if (!ftrace_plt)
panic("cannot allocate ftrace plt\n");
ip = (unsigned int *) ftrace_plt;
ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
ip[1] = 0x100a0004;
ip[2] = 0x07f10000;
ip[3] = FTRACE_ADDR >> 32;
ip[4] = FTRACE_ADDR & 0xffffffff;
set_memory_ro(ftrace_plt, 1);
return 0;
}
device_initcall(ftrace_plt_init);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* Hook the return address and push it in the stack of return addresses
* in current thread info.
*/
unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
unsigned long ip)
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
{
struct ftrace_graph_ent trace;
......@@ -137,6 +172,7 @@ unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
out:
return parent;
}
NOKPROBE_SYMBOL(prepare_ftrace_return);
/*
* Patch the kernel code at ftrace_graph_caller location. The instruction
......
......@@ -19,7 +19,7 @@
static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
void __kprobes enabled_wait(void)
void enabled_wait(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
unsigned long long idle_time;
......@@ -35,31 +35,32 @@ void __kprobes enabled_wait(void)
/* Call the assembler magic in entry.S */
psw_idle(idle, psw_mask);
trace_hardirqs_off();
/* Account time spent with enabled wait psw loaded as idle time. */
idle->sequence++;
smp_wmb();
write_seqcount_begin(&idle->seqcount);
idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
idle->idle_time += idle_time;
idle->idle_count++;
account_idle_time(idle_time);
smp_wmb();
idle->sequence++;
write_seqcount_end(&idle->seqcount);
}
NOKPROBE_SYMBOL(enabled_wait);
static ssize_t show_idle_count(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
unsigned long long idle_count;
unsigned int sequence;
unsigned int seq;
do {
sequence = ACCESS_ONCE(idle->sequence);
seq = read_seqcount_begin(&idle->seqcount);
idle_count = ACCESS_ONCE(idle->idle_count);
if (ACCESS_ONCE(idle->clock_idle_enter))
idle_count++;
} while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
} while (read_seqcount_retry(&idle->seqcount, seq));
return sprintf(buf, "%llu\n", idle_count);
}
DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
......@@ -69,15 +70,15 @@ static ssize_t show_idle_time(struct device *dev,
{
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
unsigned long long now, idle_time, idle_enter, idle_exit;
unsigned int sequence;
unsigned int seq;
do {
now = get_tod_clock();
sequence = ACCESS_ONCE(idle->sequence);
seq = read_seqcount_begin(&idle->seqcount);
idle_time = ACCESS_ONCE(idle->idle_time);
idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
} while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
} while (read_seqcount_retry(&idle->seqcount, seq));
idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
return sprintf(buf, "%llu\n", idle_time >> 12);
}
......@@ -87,14 +88,14 @@ cputime64_t arch_cpu_idle_time(int cpu)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
unsigned long long now, idle_enter, idle_exit;
unsigned int sequence;
unsigned int seq;
do {
now = get_tod_clock();
sequence = ACCESS_ONCE(idle->sequence);
seq = read_seqcount_begin(&idle->seqcount);
idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
} while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
} while (read_seqcount_retry(&idle->seqcount, seq));
return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
}
......
......@@ -127,13 +127,10 @@ int show_interrupts(struct seq_file *p, void *v)
for_each_online_cpu(cpu)
seq_printf(p, "CPU%d ", cpu);
seq_putc(p, '\n');
goto out;
}
if (index < NR_IRQS) {
if (index >= NR_IRQS_BASE)
goto out;
/* Adjust index to process irqclass_main_desc array entries */
index--;
seq_printf(p, "%s: ", irqclass_main_desc[index].name);
irq = irqclass_main_desc[index].irq;
for_each_online_cpu(cpu)
......@@ -158,7 +155,7 @@ int show_interrupts(struct seq_file *p, void *v)
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
return from < THIN_INTERRUPT ? THIN_INTERRUPT : from;
return from < NR_IRQS_BASE ? NR_IRQS_BASE : from;
}
/*
......
This diff is collapsed.
......@@ -27,6 +27,7 @@ ENTRY(ftrace_caller)
.globl ftrace_regs_caller
.set ftrace_regs_caller,ftrace_caller
lgr %r1,%r15
aghi %r0,MCOUNT_RETURN_FIXUP
aghi %r15,-STACK_FRAME_SIZE
stg %r1,__SF_BACKCHAIN(%r15)
stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
......
......@@ -1383,7 +1383,6 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
cpuhw->lsctl.ed = 1;
/* Set in_use flag and store event */
event->hw.idx = 0; /* only one sampling event per CPU supported */
cpuhw->event = event;
cpuhw->flags |= PMU_F_IN_USE;
......
......@@ -61,7 +61,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return sf->gprs[8];
}
extern void __kprobes kernel_thread_starter(void);
extern void kernel_thread_starter(void);
/*
* Free current thread data structures etc..
......@@ -153,6 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
save_fp_ctl(&p->thread.fp_regs.fpc);
save_fp_regs(p->thread.fp_regs.fprs);
p->thread.fp_regs.pad = 0;
p->thread.vxrs = NULL;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
unsigned long tls = frame->childregs.gprs[6];
......
......@@ -248,14 +248,27 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
*/
tmp = 0;
} else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
/*
* floating point control reg. is in the thread structure
*/
tmp = child->thread.fp_regs.fpc;
tmp <<= BITS_PER_LONG - 32;
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
/*
* floating point regs. are either in child->thread.fp_regs
* or the child->thread.vxrs array
*/
offset = addr - (addr_t) &dummy->regs.fp_regs;
tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset);
if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
tmp <<= BITS_PER_LONG - 32;
offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
#ifdef CONFIG_64BIT
if (child->thread.vxrs)
tmp = *(addr_t *)
((addr_t) child->thread.vxrs + 2*offset);
else
#endif
tmp = *(addr_t *)
((addr_t) &child->thread.fp_regs.fprs + offset);
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
......@@ -383,16 +396,29 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
*/
return 0;
} else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
/*
* floating point control reg. is in the thread structure
*/
if ((unsigned int) data != 0 ||
test_fp_ctl(data >> (BITS_PER_LONG - 32)))
return -EINVAL;
child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
* floating point regs. are either in child->thread.fp_regs
* or the child->thread.vxrs array
*/
if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
if ((unsigned int) data != 0 ||
test_fp_ctl(data >> (BITS_PER_LONG - 32)))
return -EINVAL;
offset = addr - (addr_t) &dummy->regs.fp_regs;
*(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data;
offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
#ifdef CONFIG_64BIT
if (child->thread.vxrs)
*(addr_t *)((addr_t)
child->thread.vxrs + 2*offset) = data;
else
#endif
*(addr_t *)((addr_t)
&child->thread.fp_regs.fprs + offset) = data;
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
......@@ -611,12 +637,26 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
*/
tmp = 0;
} else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
/*
* floating point control reg. is in the thread structure
*/
tmp = child->thread.fp_regs.fpc;
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
* floating point regs. are either in child->thread.fp_regs
* or the child->thread.vxrs array
*/
offset = addr - (addr_t) &dummy32->regs.fp_regs;
tmp = *(__u32 *)((addr_t) &child->thread.fp_regs + offset);
offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
#ifdef CONFIG_64BIT
if (child->thread.vxrs)
tmp = *(__u32 *)
((addr_t) child->thread.vxrs + 2*offset);
else
#endif
tmp = *(__u32 *)
((addr_t) &child->thread.fp_regs.fprs + offset);
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
......@@ -722,15 +762,28 @@ static int __poke_user_compat(struct task_struct *child,
*/
return 0;
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
} else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
/*
* floating point regs. are stored in the thread structure
* floating point control reg. is in the thread structure
*/
if (addr == (addr_t) &dummy32->regs.fp_regs.fpc &&
test_fp_ctl(tmp))
if (test_fp_ctl(tmp))
return -EINVAL;
offset = addr - (addr_t) &dummy32->regs.fp_regs;
*(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp;
child->thread.fp_regs.fpc = data;
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
* floating point regs. are either in child->thread.fp_regs
* or the child->thread.vxrs array
*/
offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
#ifdef CONFIG_64BIT
if (child->thread.vxrs)
*(__u32 *)((addr_t)
child->thread.vxrs + 2*offset) = tmp;
else
#endif
*(__u32 *)((addr_t)
&child->thread.fp_regs.fprs + offset) = tmp;
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
......@@ -1038,12 +1091,6 @@ static int s390_tdb_set(struct task_struct *target,
return 0;
}
static int s390_vxrs_active(struct task_struct *target,
const struct user_regset *regset)
{
return !!target->thread.vxrs;
}
static int s390_vxrs_low_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
......@@ -1052,6 +1099,8 @@ static int s390_vxrs_low_get(struct task_struct *target,
__u64 vxrs[__NUM_VXRS_LOW];
int i;
if (!MACHINE_HAS_VX)
return -ENODEV;
if (target->thread.vxrs) {
if (target == current)
save_vx_regs(target->thread.vxrs);
......@@ -1070,6 +1119,8 @@ static int s390_vxrs_low_set(struct task_struct *target,
__u64 vxrs[__NUM_VXRS_LOW];
int i, rc;
if (!MACHINE_HAS_VX)
return -ENODEV;
if (!target->thread.vxrs) {
rc = alloc_vector_registers(target);
if (rc)
......@@ -1095,6 +1146,8 @@ static int s390_vxrs_high_get(struct task_struct *target,
{
__vector128 vxrs[__NUM_VXRS_HIGH];
if (!MACHINE_HAS_VX)
return -ENODEV;
if (target->thread.vxrs) {
if (target == current)
save_vx_regs(target->thread.vxrs);
......@@ -1112,6 +1165,8 @@ static int s390_vxrs_high_set(struct task_struct *target,
{
int rc;
if (!MACHINE_HAS_VX)
return -ENODEV;
if (!target->thread.vxrs) {
rc = alloc_vector_registers(target);
if (rc)
......@@ -1196,7 +1251,6 @@ static const struct user_regset s390_regsets[] = {
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
.active = s390_vxrs_active,
.get = s390_vxrs_low_get,
.set = s390_vxrs_low_set,
},
......@@ -1205,7 +1259,6 @@ static const struct user_regset s390_regsets[] = {
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
.active = s390_vxrs_active,
.get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
......@@ -1419,7 +1472,6 @@ static const struct user_regset s390_compat_regsets[] = {
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
.active = s390_vxrs_active,
.get = s390_vxrs_low_get,
.set = s390_vxrs_low_set,
},
......@@ -1428,7 +1480,6 @@ static const struct user_regset s390_compat_regsets[] = {
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
.active = s390_vxrs_active,
.get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
......
......@@ -41,7 +41,6 @@
#include <linux/ctype.h>
#include <linux/reboot.h>
#include <linux/topology.h>
#include <linux/ftrace.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/memory.h>
......@@ -356,7 +355,6 @@ static void __init setup_lowcore(void)
lc->steal_timer = S390_lowcore.steal_timer;
lc->last_update_timer = S390_lowcore.last_update_timer;
lc->last_update_clock = S390_lowcore.last_update_clock;
lc->ftrace_func = S390_lowcore.ftrace_func;
restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
restart_stack += ASYNC_SIZE;
......
......@@ -371,7 +371,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
} else {
/* Signal frame without vector registers are short ! */
__u16 __user *svc = (void *) frame + frame_size - 2;
__u16 __user *svc = (void __user *) frame + frame_size - 2;
if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
return -EFAULT;
restorer = (unsigned long) svc | PSW_ADDR_AMODE;
......
......@@ -236,7 +236,6 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
lc->percpu_offset = __per_cpu_offset[cpu];
lc->kernel_asce = S390_lowcore.kernel_asce;
lc->machine_flags = S390_lowcore.machine_flags;
lc->ftrace_func = S390_lowcore.ftrace_func;
lc->user_timer = lc->system_timer = lc->steal_timer = 0;
__ctl_store(lc->cregs_save_area, 0, 15);
save_access_regs((unsigned int *) lc->access_regs_save_area);
......
......@@ -360,3 +360,5 @@ SYSCALL(sys_seccomp,sys_seccomp,compat_sys_seccomp)
SYSCALL(sys_getrandom,sys_getrandom,compat_sys_getrandom)
SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */
SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf)
SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write)
SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read)
......@@ -61,10 +61,11 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
/*
* Scheduler clock - returns current time in nanosec units.
*/
unsigned long long notrace __kprobes sched_clock(void)
unsigned long long notrace sched_clock(void)
{
return tod_to_ns(get_tod_clock_monotonic());
}
NOKPROBE_SYMBOL(sched_clock);
/*
* Monotonic_clock - returns # of nanoseconds passed since time_init()
......
......@@ -49,7 +49,8 @@ static inline void report_user_fault(struct pt_regs *regs, int signr)
return;
if (!printk_ratelimit())
return;
printk("User process fault: interruption code 0x%X ", regs->int_code);
printk("User process fault: interruption code %04x ilc:%d ",
regs->int_code & 0xffff, regs->int_code >> 17);
print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
printk("\n");
show_regs(regs);
......@@ -87,16 +88,16 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
}
}
static void __kprobes do_trap(struct pt_regs *regs, int si_signo, int si_code,
char *str)
static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
if (notify_die(DIE_TRAP, str, regs, 0,
regs->int_code, si_signo) == NOTIFY_STOP)
return;
do_report_trap(regs, si_signo, si_code, str);
}
NOKPROBE_SYMBOL(do_trap);
void __kprobes do_per_trap(struct pt_regs *regs)
void do_per_trap(struct pt_regs *regs)
{
siginfo_t info;
......@@ -111,6 +112,7 @@ void __kprobes do_per_trap(struct pt_regs *regs)
(void __force __user *) current->thread.per_event.address;
force_sig_info(SIGTRAP, &info, current);
}
NOKPROBE_SYMBOL(do_per_trap);
void default_trap_handler(struct pt_regs *regs)
{
......@@ -151,8 +153,6 @@ DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
"privileged operation")
DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
"special operation exception")
DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN,
"translation exception")
#ifdef CONFIG_64BIT
DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
......@@ -179,7 +179,13 @@ static inline void do_fp_trap(struct pt_regs *regs, int fpc)
do_trap(regs, SIGFPE, si_code, "floating point exception");
}
void __kprobes illegal_op(struct pt_regs *regs)
void translation_exception(struct pt_regs *regs)
{
/* May never happen. */
die(regs, "Translation exception");
}
void illegal_op(struct pt_regs *regs)
{
siginfo_t info;
__u8 opcode[6];
......@@ -252,7 +258,7 @@ void __kprobes illegal_op(struct pt_regs *regs)
if (signal)
do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
}
NOKPROBE_SYMBOL(illegal_op);
#ifdef CONFIG_MATHEMU
void specification_exception(struct pt_regs *regs)
......@@ -469,7 +475,7 @@ void space_switch_exception(struct pt_regs *regs)
do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
}
void __kprobes kernel_stack_overflow(struct pt_regs * regs)
void kernel_stack_overflow(struct pt_regs *regs)
{
bust_spinlocks(1);
printk("Kernel stack overflow.\n");
......@@ -477,6 +483,7 @@ void __kprobes kernel_stack_overflow(struct pt_regs * regs)
bust_spinlocks(0);
panic("Corrupt kernel stack, can't continue.");
}
NOKPROBE_SYMBOL(kernel_stack_overflow);
void __init trap_init(void)
{
......
......@@ -271,7 +271,7 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_MEM_CLR_CMMA:
mutex_lock(&kvm->lock);
idx = srcu_read_lock(&kvm->srcu);
page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
s390_reset_cmma(kvm->arch.gmap->mm);
srcu_read_unlock(&kvm->srcu, idx);
mutex_unlock(&kvm->lock);
ret = 0;
......
......@@ -156,21 +156,25 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
return 0;
}
static void __skey_check_enable(struct kvm_vcpu *vcpu)
static int __skey_check_enable(struct kvm_vcpu *vcpu)
{
int rc = 0;
if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
return;
return rc;
s390_enable_skey();
rc = s390_enable_skey();
trace_kvm_s390_skey_related_inst(vcpu);
vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
return rc;
}
static int handle_skey(struct kvm_vcpu *vcpu)
{
__skey_check_enable(vcpu);
int rc = __skey_check_enable(vcpu);
if (rc)
return rc;
vcpu->stat.instruction_storage_key++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
......@@ -683,7 +687,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
__skey_check_enable(vcpu);
int rc = __skey_check_enable(vcpu);
if (rc)
return rc;
if (set_guest_storage_key(current->mm, useraddr,
vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
......
......@@ -261,8 +261,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
return;
if (!printk_ratelimit())
return;
printk(KERN_ALERT "User process fault: interruption code 0x%X ",
regs->int_code);
printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d",
regs->int_code & 0xffff, regs->int_code >> 17);
print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
printk(KERN_CONT "\n");
printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
......@@ -548,7 +548,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
return fault;
}
void __kprobes do_protection_exception(struct pt_regs *regs)
void do_protection_exception(struct pt_regs *regs)
{
unsigned long trans_exc_code;
int fault;
......@@ -574,8 +574,9 @@ void __kprobes do_protection_exception(struct pt_regs *regs)
if (unlikely(fault))
do_fault_error(regs, fault);
}
NOKPROBE_SYMBOL(do_protection_exception);
void __kprobes do_dat_exception(struct pt_regs *regs)
void do_dat_exception(struct pt_regs *regs)
{
int access, fault;
......@@ -584,6 +585,7 @@ void __kprobes do_dat_exception(struct pt_regs *regs)
if (unlikely(fault))
do_fault_error(regs, fault);
}
NOKPROBE_SYMBOL(do_dat_exception);
#ifdef CONFIG_PFAULT
/*
......
......@@ -18,6 +18,8 @@
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/swapops.h>
#include <linux/ksm.h>
#include <linux/mman.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
......@@ -750,8 +752,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
break;
/* Walk the process page table, lock and get pte pointer */
ptep = get_locked_pte(gmap->mm, addr, &ptl);
if (unlikely(!ptep))
continue;
VM_BUG_ON(!ptep);
/* Set notification bit in the pgste of the pte */
entry = *ptep;
if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
......@@ -761,7 +762,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
gaddr += PAGE_SIZE;
len -= PAGE_SIZE;
}
spin_unlock(ptl);
pte_unmap_unlock(ptep, ptl);
}
up_read(&gmap->mm->mmap_sem);
return rc;
......@@ -834,99 +835,6 @@ static inline void page_table_free_pgste(unsigned long *table)
__free_page(page);
}
static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end, bool init_skey)
{
pte_t *start_pte, *pte;
spinlock_t *ptl;
pgste_t pgste;
start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
pte = start_pte;
do {
pgste = pgste_get_lock(pte);
pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
if (init_skey) {
unsigned long address;
pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
PGSTE_GR_BIT | PGSTE_GC_BIT);
/* skip invalid and not writable pages */
if (pte_val(*pte) & _PAGE_INVALID ||
!(pte_val(*pte) & _PAGE_WRITE)) {
pgste_set_unlock(pte, pgste);
continue;
}
address = pte_val(*pte) & PAGE_MASK;
page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
}
pgste_set_unlock(pte, pgste);
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(start_pte, ptl);
return addr;
}
static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
unsigned long addr, unsigned long end, bool init_skey)
{
unsigned long next;
pmd_t *pmd;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
if (pmd_none_or_clear_bad(pmd))
continue;
next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
} while (pmd++, addr = next, addr != end);
return addr;
}
static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end, bool init_skey)
{
unsigned long next;
pud_t *pud;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
} while (pud++, addr = next, addr != end);
return addr;
}
void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
unsigned long end, bool init_skey)
{
unsigned long addr, next;
pgd_t *pgd;
down_write(&mm->mmap_sem);
if (init_skey && mm_use_skey(mm))
goto out_up;
addr = start;
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
} while (pgd++, addr = next, addr != end);
if (init_skey)
current->mm->context.use_skey = 1;
out_up:
up_write(&mm->mmap_sem);
}
EXPORT_SYMBOL(page_table_reset_pgste);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq)
{
......@@ -992,11 +900,6 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
return NULL;
}
void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
unsigned long end, bool init_skey)
{
}
static inline void page_table_free_pgste(unsigned long *table)
{
}
......@@ -1347,12 +1250,88 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
*/
void s390_enable_skey(void)
static int __s390_enable_skey(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
unsigned long ptev;
pgste_t pgste;
pgste = pgste_get_lock(pte);
/*
* Remove all zero page mappings,
* after establishing a policy to forbid zero page mappings
* following faults for that page will get fresh anonymous pages
*/
if (is_zero_pfn(pte_pfn(*pte))) {
ptep_flush_direct(walk->mm, addr, pte);
pte_val(*pte) = _PAGE_INVALID;
}
/* Clear storage key */
pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
PGSTE_GR_BIT | PGSTE_GC_BIT);
ptev = pte_val(*pte);
if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
pgste_set_unlock(pte, pgste);
return 0;
}
int s390_enable_skey(void)
{
struct mm_walk walk = { .pte_entry = __s390_enable_skey };
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int rc = 0;
down_write(&mm->mmap_sem);
if (mm_use_skey(mm))
goto out_up;
mm->context.use_skey = 1;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
MADV_UNMERGEABLE, &vma->vm_flags)) {
mm->context.use_skey = 0;
rc = -ENOMEM;
goto out_up;
}
}
mm->def_flags &= ~VM_MERGEABLE;
walk.mm = mm;
walk_page_range(0, TASK_SIZE, &walk);
out_up:
up_write(&mm->mmap_sem);
return rc;
}
EXPORT_SYMBOL_GPL(s390_enable_skey);
/*
* Reset CMMA state, make all pages stable again.
*/
static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
pgste_t pgste;
pgste = pgste_get_lock(pte);
pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
pgste_set_unlock(pte, pgste);
return 0;
}
void s390_reset_cmma(struct mm_struct *mm)
{
struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
down_write(&mm->mmap_sem);
walk.mm = mm;
walk_page_range(0, TASK_SIZE, &walk);
up_write(&mm->mmap_sem);
}
EXPORT_SYMBOL_GPL(s390_reset_cmma);
/*
* Test and reset if a guest page is dirty
*/
......
......@@ -3,4 +3,4 @@
#
obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o
pci_event.o pci_debug.o pci_insn.o pci_mmio.o
......@@ -369,8 +369,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
/* Allocate adapter summary indicator bit */
rc = -EIO;
......@@ -474,7 +473,8 @@ static void zpci_map_resources(struct zpci_dev *zdev)
len = pci_resource_len(pdev, i);
if (!len)
continue;
pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
pdev->resource[i].start =
(resource_size_t __force) pci_iomap(pdev, i, 0);
pdev->resource[i].end = pdev->resource[i].start + len - 1;
}
}
......@@ -489,7 +489,8 @@ static void zpci_unmap_resources(struct zpci_dev *zdev)
len = pci_resource_len(pdev, i);
if (!len)
continue;
pci_iounmap(pdev, (void *) pdev->resource[i].start);
pci_iounmap(pdev, (void __iomem __force *)
pdev->resource[i].start);
}
}
......
......@@ -62,6 +62,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->tlb_refresh = response->refresh;
zdev->dma_mask = response->dasm;
zdev->msi_addr = response->msia;
zdev->max_msi = response->noi;
zdev->fmb_update = response->mui;
switch (response->version) {
......
......@@ -158,10 +158,7 @@ int __init zpci_debug_init(void)
void zpci_debug_exit(void)
{
if (pci_debug_msg_id)
debug_unregister(pci_debug_msg_id);
if (pci_debug_err_id)
debug_unregister(pci_debug_err_id);
debug_unregister(pci_debug_msg_id);
debug_unregister(pci_debug_err_id);
debugfs_remove(debugfs_root);
}
/*
* Access to PCI I/O memory from user space programs.
*
* Copyright IBM Corp. 2014
* Author(s): Alexey Ishchuk <aishchuk@linux.vnet.ibm.com>
*/
#include <linux/kernel.h>
#include <linux/syscalls.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/pci.h>
static long get_pfn(unsigned long user_addr, unsigned long access,
unsigned long *pfn)
{
struct vm_area_struct *vma;
long ret;
down_read(&current->mm->mmap_sem);
ret = -EINVAL;
vma = find_vma(current->mm, user_addr);
if (!vma)
goto out;
ret = -EACCES;
if (!(vma->vm_flags & access))
goto out;
ret = follow_pfn(vma, user_addr, pfn);
out:
up_read(&current->mm->mmap_sem);
return ret;
}
SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
const void __user *, user_buffer, size_t, length)
{
u8 local_buf[64];
void __iomem *io_addr;
void *buf;
unsigned long pfn;
long ret;
if (!zpci_is_enabled())
return -ENODEV;
if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
return -EINVAL;
if (length > 64) {
buf = kmalloc(length, GFP_KERNEL);
if (!buf)
return -ENOMEM;
} else
buf = local_buf;
ret = get_pfn(mmio_addr, VM_WRITE, &pfn);
if (ret)
goto out;
io_addr = (void *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
ret = -EFAULT;
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
goto out;
if (copy_from_user(buf, user_buffer, length))
goto out;
memcpy_toio(io_addr, buf, length);
ret = 0;
out:
if (buf != local_buf)
kfree(buf);
return ret;
}
SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
void __user *, user_buffer, size_t, length)
{
u8 local_buf[64];
void __iomem *io_addr;
void *buf;
unsigned long pfn;
long ret;
if (!zpci_is_enabled())
return -ENODEV;
if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
return -EINVAL;
if (length > 64) {
buf = kmalloc(length, GFP_KERNEL);
if (!buf)
return -ENOMEM;
} else
buf = local_buf;
ret = get_pfn(mmio_addr, VM_READ, &pfn);
if (ret)
goto out;
io_addr = (void *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
ret = -EFAULT;
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
goto out;
memcpy_fromio(buf, io_addr, length);
if (copy_to_user(user_buffer, buf, length))
goto out;
ret = 0;
out:
if (buf != local_buf)
kfree(buf);
return ret;
}
......@@ -1377,6 +1377,20 @@ int dasd_term_IO(struct dasd_ccw_req *cqr)
"I/O error, retry");
break;
case -EINVAL:
/*
* device not valid so no I/O could be running
* handle CQR as termination successful
*/
cqr->status = DASD_CQR_CLEARED;
cqr->stopclk = get_tod_clock();
cqr->starttime = 0;
/* no retries for invalid devices */
cqr->retries = -1;
DBF_DEV_EVENT(DBF_ERR, device, "%s",
"EINVAL, handle as terminated");
/* fake rc to success */
rc = 0;
break;
case -EBUSY:
DBF_DEV_EVENT(DBF_ERR, device, "%s",
"device busy, retry later");
......@@ -1683,11 +1697,8 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
if (cqr->status == DASD_CQR_CLEAR_PENDING &&
scsw_fctl(&irb->scsw) & SCSW_FCTL_CLEAR_FUNC) {
cqr->status = DASD_CQR_CLEARED;
if (cqr->callback_data == DASD_SLEEPON_START_TAG)
cqr->callback_data = DASD_SLEEPON_END_TAG;
dasd_device_clear_timer(device);
wake_up(&dasd_flush_wq);
wake_up(&generic_waitq);
dasd_schedule_device_bh(device);
return;
}
......@@ -2326,21 +2337,11 @@ static int _dasd_sleep_on_queue(struct list_head *ccw_queue, int interruptible)
return -EAGAIN;
/* normal recovery for basedev IO */
if (__dasd_sleep_on_erp(cqr)) {
if (__dasd_sleep_on_erp(cqr))
/* handle erp first */
goto retry;
/* remember that ERP was needed */
rc = 1;
/* skip processing for active cqr */
if (cqr->status != DASD_CQR_TERMINATED &&
cqr->status != DASD_CQR_NEED_ERP)
break;
}
}
/* start ERP requests in upper loop */
if (rc)
goto retry;
return 0;
}
......
......@@ -99,15 +99,37 @@ void dasd_gendisk_free(struct dasd_block *block)
int dasd_scan_partitions(struct dasd_block *block)
{
struct block_device *bdev;
int retry, rc;
retry = 5;
bdev = bdget_disk(block->gdp, 0);
if (!bdev || blkdev_get(bdev, FMODE_READ, NULL) < 0)
if (!bdev) {
DBF_DEV_EVENT(DBF_ERR, block->base, "%s",
"scan partitions error, bdget returned NULL");
return -ENODEV;
}
rc = blkdev_get(bdev, FMODE_READ, NULL);
if (rc < 0) {
DBF_DEV_EVENT(DBF_ERR, block->base,
"scan partitions error, blkdev_get returned %d",
rc);
return -ENODEV;
}
/*
* See fs/partition/check.c:register_disk,rescan_partitions
* Can't call rescan_partitions directly. Use ioctl.
*/
ioctl_by_bdev(bdev, BLKRRPART, 0);
rc = ioctl_by_bdev(bdev, BLKRRPART, 0);
while (rc == -EBUSY && retry > 0) {
schedule();
rc = ioctl_by_bdev(bdev, BLKRRPART, 0);
retry--;
DBF_DEV_EVENT(DBF_ERR, block->base,
"scan partitions error, retry %d rc %d",
retry, rc);
}
/*
* Since the matching blkdev_put call to the blkdev_get in
* this function is not called before dasd_destroy_partitions
......
This diff is collapsed.
......@@ -30,8 +30,8 @@ struct scm_blk_dev {
struct scm_request {
struct scm_blk_dev *bdev;
struct request *request;
struct aidaw *aidaw;
struct aidaw *next_aidaw;
struct request **request;
struct aob *aob;
struct list_head list;
u8 retries;
......@@ -55,6 +55,8 @@ void scm_blk_irq(struct scm_device *, void *, int);
void scm_request_finish(struct scm_request *);
void scm_request_requeue(struct scm_request *);
struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes);
int scm_drv_init(void);
void scm_drv_cleanup(void);
......
......@@ -57,39 +57,52 @@ void scm_request_cluster_init(struct scm_request *scmrq)
scmrq->cluster.state = CLUSTER_NONE;
}
static bool clusters_intersect(struct scm_request *A, struct scm_request *B)
static bool clusters_intersect(struct request *A, struct request *B)
{
unsigned long firstA, lastA, firstB, lastB;
firstA = ((u64) blk_rq_pos(A->request) << 9) / CLUSTER_SIZE;
lastA = (((u64) blk_rq_pos(A->request) << 9) +
blk_rq_bytes(A->request) - 1) / CLUSTER_SIZE;
firstA = ((u64) blk_rq_pos(A) << 9) / CLUSTER_SIZE;
lastA = (((u64) blk_rq_pos(A) << 9) +
blk_rq_bytes(A) - 1) / CLUSTER_SIZE;
firstB = ((u64) blk_rq_pos(B->request) << 9) / CLUSTER_SIZE;
lastB = (((u64) blk_rq_pos(B->request) << 9) +
blk_rq_bytes(B->request) - 1) / CLUSTER_SIZE;
firstB = ((u64) blk_rq_pos(B) << 9) / CLUSTER_SIZE;
lastB = (((u64) blk_rq_pos(B) << 9) +
blk_rq_bytes(B) - 1) / CLUSTER_SIZE;
return (firstB <= lastA && firstA <= lastB);
}
bool scm_reserve_cluster(struct scm_request *scmrq)
{
struct request *req = scmrq->request[scmrq->aob->request.msb_count];
struct scm_blk_dev *bdev = scmrq->bdev;
struct scm_request *iter;
int pos, add = 1;
if (write_cluster_size == 0)
return true;
spin_lock(&bdev->lock);
list_for_each_entry(iter, &bdev->cluster_list, cluster.list) {
if (clusters_intersect(scmrq, iter) &&
(rq_data_dir(scmrq->request) == WRITE ||
rq_data_dir(iter->request) == WRITE)) {
spin_unlock(&bdev->lock);
return false;
if (iter == scmrq) {
/*
* We don't have to use clusters_intersect here, since
* cluster requests are always started separately.
*/
add = 0;
continue;
}
for (pos = 0; pos <= iter->aob->request.msb_count; pos++) {
if (clusters_intersect(req, iter->request[pos]) &&
(rq_data_dir(req) == WRITE ||
rq_data_dir(iter->request[pos]) == WRITE)) {
spin_unlock(&bdev->lock);
return false;
}
}
}
list_add(&scmrq->cluster.list, &bdev->cluster_list);
if (add)
list_add(&scmrq->cluster.list, &bdev->cluster_list);
spin_unlock(&bdev->lock);
return true;
......@@ -114,14 +127,14 @@ void scm_blk_dev_cluster_setup(struct scm_blk_dev *bdev)
blk_queue_io_opt(bdev->rq, CLUSTER_SIZE);
}
static void scm_prepare_cluster_request(struct scm_request *scmrq)
static int scm_prepare_cluster_request(struct scm_request *scmrq)
{
struct scm_blk_dev *bdev = scmrq->bdev;
struct scm_device *scmdev = bdev->gendisk->private_data;
struct request *req = scmrq->request;
struct aidaw *aidaw = scmrq->aidaw;
struct request *req = scmrq->request[0];
struct msb *msb = &scmrq->aob->msb[0];
struct req_iterator iter;
struct aidaw *aidaw;
struct bio_vec bv;
int i = 0;
u64 addr;
......@@ -131,11 +144,9 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq)
scmrq->cluster.state = CLUSTER_READ;
/* fall through */
case CLUSTER_READ:
scmrq->aob->request.msb_count = 1;
msb->bs = MSB_BS_4K;
msb->oc = MSB_OC_READ;
msb->flags = MSB_FLAG_IDA;
msb->data_addr = (u64) aidaw;
msb->blk_count = write_cluster_size;
addr = scmdev->address + ((u64) blk_rq_pos(req) << 9);
......@@ -146,6 +157,12 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq)
CLUSTER_SIZE))
msb->blk_count = 2 * write_cluster_size;
aidaw = scm_aidaw_fetch(scmrq, msb->blk_count * PAGE_SIZE);
if (!aidaw)
return -ENOMEM;
scmrq->aob->request.msb_count = 1;
msb->data_addr = (u64) aidaw;
for (i = 0; i < msb->blk_count; i++) {
aidaw->data_addr = (u64) scmrq->cluster.buf[i];
aidaw++;
......@@ -153,6 +170,7 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq)
break;
case CLUSTER_WRITE:
aidaw = (void *) msb->data_addr;
msb->oc = MSB_OC_WRITE;
for (addr = msb->scm_addr;
......@@ -173,22 +191,29 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq)
}
break;
}
return 0;
}
bool scm_need_cluster_request(struct scm_request *scmrq)
{
if (rq_data_dir(scmrq->request) == READ)
int pos = scmrq->aob->request.msb_count;
if (rq_data_dir(scmrq->request[pos]) == READ)
return false;
return blk_rq_bytes(scmrq->request) < CLUSTER_SIZE;
return blk_rq_bytes(scmrq->request[pos]) < CLUSTER_SIZE;
}
/* Called with queue lock held. */
void scm_initiate_cluster_request(struct scm_request *scmrq)
{
scm_prepare_cluster_request(scmrq);
if (scm_prepare_cluster_request(scmrq))
goto requeue;
if (eadm_start_aob(scmrq->aob))
scm_request_requeue(scmrq);
goto requeue;
return;
requeue:
scm_request_requeue(scmrq);
}
bool scm_test_cluster_request(struct scm_request *scmrq)
......
......@@ -102,6 +102,16 @@ config SCLP_ASYNC
want for inform other people about your kernel panics,
need this feature and intend to run your kernel in LPAR.
config SCLP_ASYNC_ID
string "Component ID for Call Home"
depends on SCLP_ASYNC
default "000000000"
help
The Component ID for Call Home is used to identify the correct
problem reporting queue the call home records should be sent to.
If your are unsure, please use the default value "000000000".
config HMC_DRV
def_tristate m
prompt "Support for file transfers from HMC drive CD/DVD-ROM"
......
......@@ -137,7 +137,8 @@ static int sclp_async_send_wait(char *message)
* Retain Queue
* e.g. 5639CC140 500 Red Hat RHEL5 Linux for zSeries (RHEL AS)
*/
strncpy(sccb->evbuf.comp_id, "000000000", sizeof(sccb->evbuf.comp_id));
strncpy(sccb->evbuf.comp_id, CONFIG_SCLP_ASYNC_ID,
sizeof(sccb->evbuf.comp_id));
sccb->evbuf.header.length = sizeof(sccb->evbuf);
sccb->header.length = sizeof(sccb->evbuf) + sizeof(sccb->header);
sccb->header.function_code = SCLP_NORMAL_WRITE;
......
......@@ -31,7 +31,7 @@
MODULE_DESCRIPTION("driver for s390 eadm subchannels");
MODULE_LICENSE("GPL");
#define EADM_TIMEOUT (5 * HZ)
#define EADM_TIMEOUT (7 * HZ)
static DEFINE_SPINLOCK(list_lock);
static LIST_HEAD(eadm_list);
......
......@@ -103,6 +103,17 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp,
int full)
{
return pmdp_get_and_clear(mm, address, pmdp);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pte_t *ptep,
......
......@@ -335,6 +335,7 @@ extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
#endif
int arch_check_ftrace_location(struct kprobe *p);
/* Get the kprobe at this addr (if any) - called with preemption disabled */
struct kprobe *get_kprobe(void *addr);
......
......@@ -56,6 +56,17 @@ extern int sysctl_legacy_va_layout;
#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
#endif
/*
* To prevent common memory management code establishing
* a zero page mapping on a read fault.
* This macro should be defined within <asm/pgtable.h>.
* s390 does this to prevent multiplexing of hardware bits
* related to the physical page in case of virtualization.
*/
#ifndef mm_forbids_zeropage
#define mm_forbids_zeropage(X) (0)
#endif
extern unsigned long sysctl_user_reserve_kbytes;
extern unsigned long sysctl_admin_reserve_kbytes;
......
......@@ -1410,16 +1410,10 @@ static inline int check_kprobe_rereg(struct kprobe *p)
return ret;
}
static int check_kprobe_address_safe(struct kprobe *p,
struct module **probed_mod)
int __weak arch_check_ftrace_location(struct kprobe *p)
{
int ret = 0;
unsigned long ftrace_addr;
/*
* If the address is located on a ftrace nop, set the
* breakpoint to the following instruction.
*/
ftrace_addr = ftrace_location((unsigned long)p->addr);
if (ftrace_addr) {
#ifdef CONFIG_KPROBES_ON_FTRACE
......@@ -1431,7 +1425,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
return -EINVAL;
#endif
}
return 0;
}
static int check_kprobe_address_safe(struct kprobe *p,
struct module **probed_mod)
{
int ret;
ret = arch_check_ftrace_location(p);
if (ret)
return ret;
jump_label_lock();
preempt_disable();
......
......@@ -169,6 +169,8 @@ cond_syscall(ppc_rtas);
cond_syscall(sys_spu_run);
cond_syscall(sys_spu_create);
cond_syscall(sys_subpage_prot);
cond_syscall(sys_s390_pci_mmio_read);
cond_syscall(sys_s390_pci_mmio_write);
/* mmu depending weak syscall entries */
cond_syscall(sys_mprotect);
......
......@@ -804,7 +804,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
return VM_FAULT_OOM;
if (!(flags & FAULT_FLAG_WRITE) &&
if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm) &&
transparent_hugepage_use_zero_page()) {
spinlock_t *ptl;
pgtable_t pgtable;
......@@ -1399,7 +1399,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
* pgtable_trans_huge_withdraw after finishing pmdp related
* operations.
*/
orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd);
orig_pmd = pmdp_get_and_clear_full(tlb->mm, addr, pmd,
tlb->fullmm);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
if (is_huge_zero_pmd(orig_pmd)) {
......
......@@ -2627,7 +2627,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_SIGBUS;
/* Use the zero-page for reads */
if (!(flags & FAULT_FLAG_WRITE)) {
if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {
entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
vma->vm_page_prot));
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
......
......@@ -404,7 +404,7 @@ do_file(char const *const fname)
}
if (w2(ghdr->e_machine) == EM_S390) {
reltype = R_390_64;
mcount_adjust_64 = -8;
mcount_adjust_64 = -14;
}
if (w2(ghdr->e_machine) == EM_MIPS) {
reltype = R_MIPS_64;
......
......@@ -243,7 +243,7 @@ if ($arch eq "x86_64") {
} elsif ($arch eq "s390" && $bits == 64) {
$mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$";
$mcount_adjust = -8;
$mcount_adjust = -14;
$alignment = 8;
$type = ".quad";
$ld .= " -m elf64_s390";
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment