Commit e71d5126 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull second round of s390 updates from Martin Schwidefsky:

 - rework of the vdso code to avoid the use of the access register mode

 - use perf AUX buffers for the transport of diagnostic sample data

 - add perf_regs and user stack dump support

 - enable perf call graphs for user space programs

 - add perf register support for floating-point registers

 - all remaining s390 related timer_setup conversions

 - bug fixes and cleanups

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (30 commits)
  s390: remove unused parameter from Makefile
  zfcp: purely mechanical update using timer API, plus blank lines
  s390/scsi: Convert timers to use timer_setup()
  s390/cpum_sf: correctly set the PID and TID in perf samples
  s390/cpum_sf: load program parameter at sampler enablement
  s390/perf: add perf register support for floating-point registers
  s390/perf: extend perf_regs support to include floating-point registers
  s390/perf: define common DWARF register string table
  s390/perf: add support for perf_regs and libdw
  s390/perf: add perf_regs support and user stack dump
  s390/cpum_sf: do not register PMU if no sampling mode is authorized
  s390/cpumf: remove raw event support in basic-only sampling mode
  s390/perf: add callback to perf to enable using AUX buffer
  s390/cpumf: enable using AUX buffer
  s390/cpumf: introduce AUX buffer for dump diagnostic sample data
  s390/disassembler: increase show_code buffer size
  s390: Remove CONFIG_HARDENED_USERCOPY
  s390: enable CPU alternatives unconditionally
  s390/nmi: remove unused code
  s390/mm: remove unused code
  ...
parents c3e9c04b ab35727e
...@@ -148,6 +148,7 @@ config S390 ...@@ -148,6 +148,7 @@ config S390
select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACER
select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GCC_PLUGINS
select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZ4
...@@ -158,6 +159,8 @@ config S390 ...@@ -158,6 +159,8 @@ config S390
select HAVE_KRETPROBES select HAVE_KRETPROBES
select HAVE_KVM select HAVE_KVM
select HAVE_LIVEPATCH select HAVE_LIVEPATCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_MEMBLOCK select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MEMBLOCK_PHYS_MAP select HAVE_MEMBLOCK_PHYS_MAP
...@@ -538,22 +541,6 @@ config ARCH_RANDOM ...@@ -538,22 +541,6 @@ config ARCH_RANDOM
If unsure, say Y. If unsure, say Y.
config ALTERNATIVES
def_bool y
prompt "Patch optimized instructions for running CPU type"
help
When enabled the kernel code is compiled with additional
alternative instructions blocks optimized for newer CPU types.
These alternative instructions blocks are patched at kernel boot
time when running CPU supports them. This mechanism is used to
optimize some critical code paths (i.e. spinlocks) for newer CPUs
even if kernel is build to support older machine generations.
This mechanism could be disabled by appending "noaltinstr"
option to the kernel command line.
If unsure, say Y.
endmenu endmenu
menu "Memory setup" menu "Memory setup"
......
...@@ -629,6 +629,7 @@ CONFIG_STACK_TRACER=y ...@@ -629,6 +629,7 @@ CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_FUNCTION_PROFILER=y CONFIG_FUNCTION_PROFILER=y
CONFIG_HIST_TRIGGERS=y CONFIG_HIST_TRIGGERS=y
CONFIG_DMA_API_DEBUG=y
CONFIG_LKDTM=m CONFIG_LKDTM=m
CONFIG_TEST_LIST_SORT=y CONFIG_TEST_LIST_SORT=y
CONFIG_TEST_SORT=y CONFIG_TEST_SORT=y
...@@ -637,14 +638,12 @@ CONFIG_RBTREE_TEST=y ...@@ -637,14 +638,12 @@ CONFIG_RBTREE_TEST=y
CONFIG_INTERVAL_TREE_TEST=m CONFIG_INTERVAL_TREE_TEST=m
CONFIG_PERCPU_TEST=m CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y CONFIG_ATOMIC64_SELFTEST=y
CONFIG_DMA_API_DEBUG=y
CONFIG_TEST_BPF=m CONFIG_TEST_BPF=m
CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_S390_PTDUMP=y CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_NETWORK=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_FORTIFY_SOURCE=y CONFIG_FORTIFY_SOURCE=y
CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y
...@@ -660,13 +659,11 @@ CONFIG_CRYPTO_PCRYPT=m ...@@ -660,13 +659,11 @@ CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_CRC32=m
......
...@@ -587,7 +587,6 @@ CONFIG_BIG_KEYS=y ...@@ -587,7 +587,6 @@ CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_NETWORK=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
...@@ -605,13 +604,10 @@ CONFIG_CRYPTO_PCRYPT=m ...@@ -605,13 +604,10 @@ CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_CRC32=m
......
...@@ -585,7 +585,6 @@ CONFIG_BIG_KEYS=y ...@@ -585,7 +585,6 @@ CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_NETWORK=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
...@@ -603,13 +602,10 @@ CONFIG_CRYPTO_PCRYPT=m ...@@ -603,13 +602,10 @@ CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_CRC32=m
......
...@@ -15,14 +15,9 @@ struct alt_instr { ...@@ -15,14 +15,9 @@ struct alt_instr {
u8 replacementlen; /* length of new instruction */ u8 replacementlen; /* length of new instruction */
} __packed; } __packed;
#ifdef CONFIG_ALTERNATIVES void apply_alternative_instructions(void);
extern void apply_alternative_instructions(void); void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
#else
static inline void apply_alternative_instructions(void) {};
static inline void apply_alternatives(struct alt_instr *start,
struct alt_instr *end) {};
#endif
/* /*
* |661: |662: |6620 |663: * |661: |662: |6620 |663:
* +-----------+---------------------+ * +-----------+---------------------+
...@@ -109,7 +104,6 @@ static inline void apply_alternatives(struct alt_instr *start, ...@@ -109,7 +104,6 @@ static inline void apply_alternatives(struct alt_instr *start,
b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \ b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \
INSTR_LEN_SANITY_CHECK(altinstr_len(num)) INSTR_LEN_SANITY_CHECK(altinstr_len(num))
#ifdef CONFIG_ALTERNATIVES
/* alternative assembly primitive: */ /* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, altinstr, facility) \ #define ALTERNATIVE(oldinstr, altinstr, facility) \
".pushsection .altinstr_replacement, \"ax\"\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \
...@@ -130,14 +124,6 @@ static inline void apply_alternatives(struct alt_instr *start, ...@@ -130,14 +124,6 @@ static inline void apply_alternatives(struct alt_instr *start,
ALTINSTR_ENTRY(facility1, 1) \ ALTINSTR_ENTRY(facility1, 1) \
ALTINSTR_ENTRY(facility2, 2) \ ALTINSTR_ENTRY(facility2, 2) \
".popsection\n" ".popsection\n"
#else
/* Alternative instructions are disabled, let's put just oldinstr in */
#define ALTERNATIVE(oldinstr, altinstr, facility) \
oldinstr "\n"
#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
oldinstr "\n"
#endif
/* /*
* Alternative instructions for different CPU types or capabilities. * Alternative instructions for different CPU types or capabilities.
......
...@@ -144,6 +144,12 @@ struct hws_trailer_entry { ...@@ -144,6 +144,12 @@ struct hws_trailer_entry {
unsigned long long progusage2; /* */ unsigned long long progusage2; /* */
} __packed; } __packed;
/* Load program parameter */
static inline void lpp(void *pp)
{
asm volatile(".insn s,0xb2800000,0(%0)\n":: "a" (pp) : "memory");
}
/* Query counter information */ /* Query counter information */
static inline int qctri(struct cpumf_ctr_info *info) static inline int qctri(struct cpumf_ctr_info *info)
{ {
...@@ -167,7 +173,7 @@ static inline int lcctl(u64 ctl) ...@@ -167,7 +173,7 @@ static inline int lcctl(u64 ctl)
" .insn s,0xb2840000,%1\n" " .insn s,0xb2840000,%1\n"
" ipm %0\n" " ipm %0\n"
" srl %0,28\n" " srl %0,28\n"
: "=d" (cc) : "m" (ctl) : "cc"); : "=d" (cc) : "Q" (ctl) : "cc");
return cc; return cc;
} }
......
...@@ -26,9 +26,9 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, ...@@ -26,9 +26,9 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
u32 __user *uaddr) u32 __user *uaddr)
{ {
int oldval = 0, newval, ret; int oldval = 0, newval, ret;
mm_segment_t old_fs;
load_kernel_asce(); old_fs = enable_sacf_uaccess();
pagefault_disable(); pagefault_disable();
switch (op) { switch (op) {
case FUTEX_OP_SET: case FUTEX_OP_SET:
...@@ -55,6 +55,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, ...@@ -55,6 +55,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
ret = -ENOSYS; ret = -ENOSYS;
} }
pagefault_enable(); pagefault_enable();
disable_sacf_uaccess(old_fs);
if (!ret) if (!ret)
*oval = oldval; *oval = oldval;
...@@ -65,9 +66,10 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, ...@@ -65,9 +66,10 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval) u32 oldval, u32 newval)
{ {
mm_segment_t old_fs;
int ret; int ret;
load_kernel_asce(); old_fs = enable_sacf_uaccess();
asm volatile( asm volatile(
" sacf 256\n" " sacf 256\n"
"0: cs %1,%4,0(%5)\n" "0: cs %1,%4,0(%5)\n"
...@@ -77,6 +79,7 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, ...@@ -77,6 +79,7 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
: "cc", "memory"); : "cc", "memory");
disable_sacf_uaccess(old_fs);
*uval = oldval; *uval = oldval;
return ret; return ret;
} }
......
...@@ -115,33 +115,28 @@ struct lowcore { ...@@ -115,33 +115,28 @@ struct lowcore {
/* Address space pointer. */ /* Address space pointer. */
__u64 kernel_asce; /* 0x0378 */ __u64 kernel_asce; /* 0x0378 */
__u64 user_asce; /* 0x0380 */ __u64 user_asce; /* 0x0380 */
__u64 vdso_asce; /* 0x0388 */
/* /*
* The lpp and current_pid fields form a * The lpp and current_pid fields form a
* 64-bit value that is set as program * 64-bit value that is set as program
* parameter with the LPP instruction. * parameter with the LPP instruction.
*/ */
__u32 lpp; /* 0x0388 */ __u32 lpp; /* 0x0390 */
__u32 current_pid; /* 0x038c */ __u32 current_pid; /* 0x0394 */
/* SMP info area */ /* SMP info area */
__u32 cpu_nr; /* 0x0390 */ __u32 cpu_nr; /* 0x0398 */
__u32 softirq_pending; /* 0x0394 */ __u32 softirq_pending; /* 0x039c */
__u64 percpu_offset; /* 0x0398 */ __u32 preempt_count; /* 0x03a0 */
__u64 vdso_per_cpu_data; /* 0x03a0 */ __u32 spinlock_lockval; /* 0x03a4 */
__u64 machine_flags; /* 0x03a8 */ __u32 spinlock_index; /* 0x03a8 */
__u32 preempt_count; /* 0x03b0 */ __u32 fpu_flags; /* 0x03ac */
__u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b0 */
__u64 gmap; /* 0x03b8 */ __u64 vdso_per_cpu_data; /* 0x03b8 */
__u32 spinlock_lockval; /* 0x03c0 */ __u64 machine_flags; /* 0x03c0 */
__u32 spinlock_index; /* 0x03c4 */ __u64 gmap; /* 0x03c8 */
__u32 fpu_flags; /* 0x03c8 */ __u8 pad_0x03d0[0x0e00-0x03d0]; /* 0x03d0 */
__u8 pad_0x03cc[0x0400-0x03cc]; /* 0x03cc */
/* Per cpu primary space access list */
__u32 paste[16]; /* 0x0400 */
__u8 pad_0x04c0[0x0e00-0x0440]; /* 0x0440 */
/* /*
* 0xe00 contains the address of the IPL Parameter Information * 0xe00 contains the address of the IPL Parameter Information
...@@ -193,14 +188,14 @@ extern struct lowcore *lowcore_ptr[]; ...@@ -193,14 +188,14 @@ extern struct lowcore *lowcore_ptr[];
static inline void set_prefix(__u32 address) static inline void set_prefix(__u32 address)
{ {
asm volatile("spx %0" : : "m" (address) : "memory"); asm volatile("spx %0" : : "Q" (address) : "memory");
} }
static inline __u32 store_prefix(void) static inline __u32 store_prefix(void)
{ {
__u32 address; __u32 address;
asm volatile("stpx %0" : "=m" (address)); asm volatile("stpx %0" : "=Q" (address));
return address; return address;
} }
......
...@@ -73,41 +73,38 @@ static inline int init_new_context(struct task_struct *tsk, ...@@ -73,41 +73,38 @@ static inline int init_new_context(struct task_struct *tsk,
static inline void set_user_asce(struct mm_struct *mm) static inline void set_user_asce(struct mm_struct *mm)
{ {
S390_lowcore.user_asce = mm->context.asce; S390_lowcore.user_asce = mm->context.asce;
if (current->thread.mm_segment.ar4) __ctl_load(S390_lowcore.user_asce, 1, 1);
__ctl_load(S390_lowcore.user_asce, 7, 7); clear_cpu_flag(CIF_ASCE_PRIMARY);
set_cpu_flag(CIF_ASCE_PRIMARY);
} }
static inline void clear_user_asce(void) static inline void clear_user_asce(void)
{ {
S390_lowcore.user_asce = S390_lowcore.kernel_asce; S390_lowcore.user_asce = S390_lowcore.kernel_asce;
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.user_asce, 1, 1);
__ctl_load(S390_lowcore.user_asce, 7, 7);
}
static inline void load_kernel_asce(void)
{
unsigned long asce;
__ctl_store(asce, 1, 1);
if (asce != S390_lowcore.kernel_asce)
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
set_cpu_flag(CIF_ASCE_PRIMARY); set_cpu_flag(CIF_ASCE_PRIMARY);
} }
mm_segment_t enable_sacf_uaccess(void);
void disable_sacf_uaccess(mm_segment_t old_fs);
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk) struct task_struct *tsk)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
S390_lowcore.user_asce = next->context.asce;
if (prev == next) if (prev == next)
return; return;
S390_lowcore.user_asce = next->context.asce;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
/* Clear old ASCE by loading the kernel ASCE. */ /* Clear previous user-ASCE from CR1 and CR7 */
__ctl_load(S390_lowcore.kernel_asce, 1, 1); if (!test_cpu_flag(CIF_ASCE_PRIMARY)) {
__ctl_load(S390_lowcore.kernel_asce, 7, 7); __ctl_load(S390_lowcore.kernel_asce, 1, 1);
set_cpu_flag(CIF_ASCE_PRIMARY);
}
if (test_cpu_flag(CIF_ASCE_SECONDARY)) {
__ctl_load(S390_lowcore.vdso_asce, 7, 7);
clear_cpu_flag(CIF_ASCE_SECONDARY);
}
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
} }
...@@ -117,7 +114,6 @@ static inline void finish_arch_post_lock_switch(void) ...@@ -117,7 +114,6 @@ static inline void finish_arch_post_lock_switch(void)
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm; struct mm_struct *mm = tsk->mm;
load_kernel_asce();
if (mm) { if (mm) {
preempt_disable(); preempt_disable();
while (atomic_read(&mm->context.flush_count)) while (atomic_read(&mm->context.flush_count))
......
...@@ -64,27 +64,10 @@ struct perf_sf_sde_regs { ...@@ -64,27 +64,10 @@ struct perf_sf_sde_regs {
#define REG_OVERFLOW 1 #define REG_OVERFLOW 1
#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) #define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) #define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
#define RAWSAMPLE_REG(hwc) ((hwc)->config)
#define TEAR_REG(hwc) ((hwc)->last_tag) #define TEAR_REG(hwc) ((hwc)->last_tag)
#define SAMPL_RATE(hwc) ((hwc)->event_base) #define SAMPL_RATE(hwc) ((hwc)->event_base)
#define SAMPL_FLAGS(hwc) ((hwc)->config_base) #define SAMPL_FLAGS(hwc) ((hwc)->config_base)
#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) #define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS) #define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
/* Structure for sampling data entries to be passed as perf raw sample data
* to user space. Note that raw sample data must be aligned and, thus, might
* be padded with zeros.
*/
struct sf_raw_sample {
#define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE
#define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE
u64 format;
u32 size; /* Size of sf_raw_sample */
u16 bsdes; /* Basic-sampling data entry size */
u16 dsdes; /* Diagnostic-sampling data entry size */
struct hws_basic_entry basic; /* Basic-sampling data entry */
struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
u8 padding[]; /* Padding to next multiple of 8 */
} __packed;
#endif /* _ASM_S390_PERF_EVENT_H */ #endif /* _ASM_S390_PERF_EVENT_H */
...@@ -109,9 +109,7 @@ extern void execve_tail(void); ...@@ -109,9 +109,7 @@ extern void execve_tail(void);
#define HAVE_ARCH_PICK_MMAP_LAYOUT #define HAVE_ARCH_PICK_MMAP_LAYOUT
typedef struct { typedef unsigned int mm_segment_t;
__u32 ar4;
} mm_segment_t;
/* /*
* Thread structure * Thread structure
...@@ -247,7 +245,7 @@ static inline unsigned short stap(void) ...@@ -247,7 +245,7 @@ static inline unsigned short stap(void)
{ {
unsigned short cpu_address; unsigned short cpu_address;
asm volatile("stap %0" : "=m" (cpu_address)); asm volatile("stap %0" : "=Q" (cpu_address));
return cpu_address; return cpu_address;
} }
......
...@@ -13,10 +13,12 @@ ...@@ -13,10 +13,12 @@
#define PIF_SYSCALL 0 /* inside a system call */ #define PIF_SYSCALL 0 /* inside a system call */
#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ #define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */
#define PIF_SYSCALL_RESTART 2 /* restart the current system call */ #define PIF_SYSCALL_RESTART 2 /* restart the current system call */
#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */
#define _PIF_SYSCALL _BITUL(PIF_SYSCALL) #define _PIF_SYSCALL _BITUL(PIF_SYSCALL)
#define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP) #define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP)
#define _PIF_SYSCALL_RESTART _BITUL(PIF_SYSCALL_RESTART) #define _PIF_SYSCALL_RESTART _BITUL(PIF_SYSCALL_RESTART)
#define _PIF_GUEST_FAULT _BITUL(PIF_GUEST_FAULT)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#define MACHINE_FLAG_SCC _BITUL(17) #define MACHINE_FLAG_SCC _BITUL(17)
#define LPP_MAGIC _BITUL(31) #define LPP_MAGIC _BITUL(31)
#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) #define LPP_PID_MASK _AC(0xffffffff, UL)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/ctl_reg.h> #include <asm/ctl_reg.h>
#include <asm/extable.h> #include <asm/extable.h>
#include <asm/facility.h>
/* /*
* The fs value determines whether argument validity checking should be * The fs value determines whether argument validity checking should be
...@@ -26,27 +26,16 @@ ...@@ -26,27 +26,16 @@
* For historical reasons, these macros are grossly misnamed. * For historical reasons, these macros are grossly misnamed.
*/ */
#define MAKE_MM_SEG(a) ((mm_segment_t) { (a) }) #define KERNEL_DS (0)
#define KERNEL_DS_SACF (1)
#define USER_DS (2)
#define KERNEL_DS MAKE_MM_SEG(0) #define USER_DS_SACF (3)
#define USER_DS MAKE_MM_SEG(1)
#define get_ds() (KERNEL_DS) #define get_ds() (KERNEL_DS)
#define get_fs() (current->thread.mm_segment) #define get_fs() (current->thread.mm_segment)
#define segment_eq(a,b) ((a).ar4 == (b).ar4) #define segment_eq(a,b) (((a) & 2) == ((b) & 2))
static inline void set_fs(mm_segment_t fs) void set_fs(mm_segment_t fs);
{
current->thread.mm_segment = fs;
if (uaccess_kernel()) {
set_cpu_flag(CIF_ASCE_SECONDARY);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
} else {
clear_cpu_flag(CIF_ASCE_SECONDARY);
__ctl_load(S390_lowcore.user_asce, 7, 7);
}
}
static inline int __range_ok(unsigned long addr, unsigned long size) static inline int __range_ok(unsigned long addr, unsigned long size)
{ {
...@@ -95,7 +84,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); ...@@ -95,7 +84,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
{ {
unsigned long spec = 0x810000UL; unsigned long spec = 0x010000UL;
int rc; int rc;
switch (size) { switch (size) {
...@@ -125,7 +114,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) ...@@ -125,7 +114,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
{ {
unsigned long spec = 0x81UL; unsigned long spec = 0x01UL;
int rc; int rc;
switch (size) { switch (size) {
......
...@@ -46,6 +46,7 @@ struct vdso_per_cpu_data { ...@@ -46,6 +46,7 @@ struct vdso_per_cpu_data {
}; };
extern struct vdso_data *vdso_data; extern struct vdso_data *vdso_data;
extern struct vdso_data boot_vdso_data;
void vdso_alloc_boot_cpu(struct lowcore *lowcore); void vdso_alloc_boot_cpu(struct lowcore *lowcore);
int vdso_alloc_per_cpu(struct lowcore *lowcore); int vdso_alloc_per_cpu(struct lowcore *lowcore);
......
#ifndef _ASM_S390_PERF_REGS_H
#define _ASM_S390_PERF_REGS_H
enum perf_event_s390_regs {
PERF_REG_S390_R0,
PERF_REG_S390_R1,
PERF_REG_S390_R2,
PERF_REG_S390_R3,
PERF_REG_S390_R4,
PERF_REG_S390_R5,
PERF_REG_S390_R6,
PERF_REG_S390_R7,
PERF_REG_S390_R8,
PERF_REG_S390_R9,
PERF_REG_S390_R10,
PERF_REG_S390_R11,
PERF_REG_S390_R12,
PERF_REG_S390_R13,
PERF_REG_S390_R14,
PERF_REG_S390_R15,
PERF_REG_S390_FP0,
PERF_REG_S390_FP1,
PERF_REG_S390_FP2,
PERF_REG_S390_FP3,
PERF_REG_S390_FP4,
PERF_REG_S390_FP5,
PERF_REG_S390_FP6,
PERF_REG_S390_FP7,
PERF_REG_S390_FP8,
PERF_REG_S390_FP9,
PERF_REG_S390_FP10,
PERF_REG_S390_FP11,
PERF_REG_S390_FP12,
PERF_REG_S390_FP13,
PERF_REG_S390_FP14,
PERF_REG_S390_FP15,
PERF_REG_S390_MASK,
PERF_REG_S390_PC,
PERF_REG_S390_MAX
};
#endif /* _ASM_S390_PERF_REGS_H */
...@@ -59,7 +59,7 @@ obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o ...@@ -59,7 +59,7 @@ obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
extra-y += head.o head64.o vmlinux.lds extra-y += head.o head64.o vmlinux.lds
...@@ -77,10 +77,9 @@ obj-$(CONFIG_KPROBES) += kprobes.o ...@@ -77,10 +77,9 @@ obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_ALTERNATIVES) += alternative.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
obj-$(CONFIG_TRACEPOINTS) += trace.o obj-$(CONFIG_TRACEPOINTS) += trace.o
......
...@@ -171,6 +171,7 @@ int main(void) ...@@ -171,6 +171,7 @@ int main(void)
OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source);
OFFSET(__LC_USER_ASCE, lowcore, user_asce); OFFSET(__LC_USER_ASCE, lowcore, user_asce);
OFFSET(__LC_VDSO_ASCE, lowcore, vdso_asce);
OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_LPP, lowcore, lpp);
OFFSET(__LC_CURRENT_PID, lowcore, current_pid); OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset);
...@@ -178,7 +179,6 @@ int main(void) ...@@ -178,7 +179,6 @@ int main(void)
OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
OFFSET(__LC_GMAP, lowcore, gmap); OFFSET(__LC_GMAP, lowcore, gmap);
OFFSET(__LC_PASTE, lowcore, paste);
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
OFFSET(__LC_DUMP_REIPL, lowcore, ipib); OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
/* hardware defined lowcore locations 0x1000 - 0x18ff */ /* hardware defined lowcore locations 0x1000 - 0x18ff */
......
...@@ -480,7 +480,7 @@ void show_code(struct pt_regs *regs) ...@@ -480,7 +480,7 @@ void show_code(struct pt_regs *regs)
{ {
char *mode = user_mode(regs) ? "User" : "Krnl"; char *mode = user_mode(regs) ? "User" : "Krnl";
unsigned char code[64]; unsigned char code[64];
char buffer[64], *ptr; char buffer[128], *ptr;
mm_segment_t old_fs; mm_segment_t old_fs;
unsigned long addr; unsigned long addr;
int start, end, opsize, hops, i; int start, end, opsize, hops, i;
...@@ -543,7 +543,7 @@ void show_code(struct pt_regs *regs) ...@@ -543,7 +543,7 @@ void show_code(struct pt_regs *regs)
start += opsize; start += opsize;
pr_cont("%s", buffer); pr_cont("%s", buffer);
ptr = buffer; ptr = buffer;
ptr += sprintf(ptr, "\n "); ptr += sprintf(ptr, "\n\t ");
hops++; hops++;
} }
pr_cont("\n"); pr_cont("\n");
......
...@@ -379,13 +379,21 @@ ENTRY(system_call) ...@@ -379,13 +379,21 @@ ENTRY(system_call)
jg s390_handle_mcck # TIF bit will be cleared by handler jg s390_handle_mcck # TIF bit will be cleared by handler
# #
# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce # _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce
# #
.Lsysc_asce: .Lsysc_asce:
ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY
lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce
TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
jz .Lsysc_return
#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ?
jnz .Lsysc_set_fs_fixup
ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY j .Lsysc_return
jz .Lsysc_return .Lsysc_set_fs_fixup:
#endif
larl %r14,.Lsysc_return larl %r14,.Lsysc_return
jg set_fs_fixup jg set_fs_fixup
...@@ -518,6 +526,7 @@ ENTRY(pgm_check_handler) ...@@ -518,6 +526,7 @@ ENTRY(pgm_check_handler)
stmg %r8,%r15,__LC_SAVE_AREA_SYNC stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK lg %r10,__LC_LAST_BREAK
lg %r12,__LC_CURRENT lg %r12,__LC_CURRENT
lghi %r11,0
larl %r13,cleanup_critical larl %r13,cleanup_critical
lmg %r8,%r9,__LC_PGM_OLD_PSW lmg %r8,%r9,__LC_PGM_OLD_PSW
tmhh %r8,0x0001 # test problem state bit tmhh %r8,0x0001 # test problem state bit
...@@ -532,6 +541,7 @@ ENTRY(pgm_check_handler) ...@@ -532,6 +541,7 @@ ENTRY(pgm_check_handler)
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit larl %r9,sie_exit # skip forward to sie_exit
lghi %r11,_PIF_GUEST_FAULT
#endif #endif
0: tmhh %r8,0x4000 # PER bit set in old PSW ? 0: tmhh %r8,0x4000 # PER bit set in old PSW ?
jnz 1f # -> enabled, can't be a double fault jnz 1f # -> enabled, can't be a double fault
...@@ -549,13 +559,14 @@ ENTRY(pgm_check_handler) ...@@ -549,13 +559,14 @@ ENTRY(pgm_check_handler)
jz 3f jz 3f
mvc __THREAD_trap_tdb(256,%r14),0(%r13) mvc __THREAD_trap_tdb(256,%r14),0(%r13)
3: stg %r10,__THREAD_last_break(%r14) 3: stg %r10,__THREAD_last_break(%r14)
4: la %r11,STACK_FRAME_OVERHEAD(%r15) 4: lgr %r13,%r11
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11) stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
stmg %r8,%r9,__PT_PSW(%r11) stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) stg %r13,__PT_FLAGS(%r11)
stg %r10,__PT_ARGS(%r11) stg %r10,__PT_ARGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception tm __LC_PGM_ILC+3,0x80 # check for per exception
jz 5f jz 5f
...@@ -738,10 +749,18 @@ ENTRY(io_int_handler) ...@@ -738,10 +749,18 @@ ENTRY(io_int_handler)
# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce # _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
# #
.Lio_asce: .Lio_asce:
ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY
lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce
TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
jz .Lio_return
#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ?
jnz .Lio_set_fs_fixup
ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY j .Lio_return
jz .Lio_return .Lio_set_fs_fixup:
#endif
larl %r14,.Lio_return larl %r14,.Lio_return
jg set_fs_fixup jg set_fs_fixup
......
...@@ -28,7 +28,7 @@ ENTRY(startup_continue) ...@@ -28,7 +28,7 @@ ENTRY(startup_continue)
lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
# move IPL device to lowcore # move IPL device to lowcore
lghi %r0,__LC_PASTE larl %r0,boot_vdso_data
stg %r0,__LC_VDSO_PER_CPU stg %r0,__LC_VDSO_PER_CPU
# #
# Setup stack # Setup stack
......
...@@ -433,16 +433,13 @@ int module_finalize(const Elf_Ehdr *hdr, ...@@ -433,16 +433,13 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *s; const Elf_Shdr *s;
char *secstrings; char *secstrings;
if (IS_ENABLED(CONFIG_ALTERNATIVES)) { secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".altinstructions", secstrings + s->sh_name)) {
if (!strcmp(".altinstructions", /* patch .altinstructions */
secstrings + s->sh_name)) { void *aseg = (void *)s->sh_addr;
/* patch .altinstructions */
void *aseg = (void *)s->sh_addr;
apply_alternatives(aseg, aseg + s->sh_size); apply_alternatives(aseg, aseg + s->sh_size);
}
} }
} }
......
...@@ -191,7 +191,6 @@ static int notrace s390_check_registers(union mci mci, int umode) ...@@ -191,7 +191,6 @@ static int notrace s390_check_registers(union mci mci, int umode)
{ {
union ctlreg2 cr2; union ctlreg2 cr2;
int kill_task; int kill_task;
void *fpt_save_area;
kill_task = 0; kill_task = 0;
...@@ -224,7 +223,6 @@ static int notrace s390_check_registers(union mci mci, int umode) ...@@ -224,7 +223,6 @@ static int notrace s390_check_registers(union mci mci, int umode)
if (!test_cpu_flag(CIF_FPU)) if (!test_cpu_flag(CIF_FPU))
kill_task = 1; kill_task = 1;
} }
fpt_save_area = &S390_lowcore.floating_pt_save_area;
if (!mci.fc) { if (!mci.fc) {
/* /*
* Floating point control register can't be restored. * Floating point control register can't be restored.
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/pid.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -77,6 +78,15 @@ struct sf_buffer { ...@@ -77,6 +78,15 @@ struct sf_buffer {
unsigned long *tail; /* last sample-data-block-table */ unsigned long *tail; /* last sample-data-block-table */
}; };
struct aux_buffer {
struct sf_buffer sfb;
unsigned long head; /* index of SDB of buffer head */
unsigned long alert_mark; /* index of SDB of alert request position */
unsigned long empty_mark; /* mark of SDB not marked full */
unsigned long *sdb_index; /* SDB address for fast lookup */
unsigned long *sdbt_index; /* SDBT address for fast lookup */
};
struct cpu_hw_sf { struct cpu_hw_sf {
/* CPU-measurement sampling information block */ /* CPU-measurement sampling information block */
struct hws_qsi_info_block qsi; struct hws_qsi_info_block qsi;
...@@ -85,6 +95,7 @@ struct cpu_hw_sf { ...@@ -85,6 +95,7 @@ struct cpu_hw_sf {
struct sf_buffer sfb; /* Sampling buffer */ struct sf_buffer sfb; /* Sampling buffer */
unsigned int flags; /* Status flags */ unsigned int flags; /* Status flags */
struct perf_event *event; /* Scheduled perf event */ struct perf_event *event; /* Scheduled perf event */
struct perf_output_handle handle; /* AUX buffer output handle */
}; };
static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
...@@ -341,22 +352,6 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) ...@@ -341,22 +352,6 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
sfb_account_allocs(num, hwc); sfb_account_allocs(num, hwc);
} }
static size_t event_sample_size(struct hw_perf_event *hwc)
{
struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
size_t sample_size;
/* The sample size depends on the sampling function: The basic-sampling
* function must be always enabled, diagnostic-sampling function is
* optional.
*/
sample_size = sfr->bsdes;
if (SAMPL_DIAG_MODE(hwc))
sample_size += sfr->dsdes;
return sample_size;
}
static void deallocate_buffers(struct cpu_hw_sf *cpuhw) static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
{ {
if (cpuhw->sfb.sdbt) if (cpuhw->sfb.sdbt)
...@@ -366,35 +361,7 @@ static void deallocate_buffers(struct cpu_hw_sf *cpuhw) ...@@ -366,35 +361,7 @@ static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
{ {
unsigned long n_sdb, freq, factor; unsigned long n_sdb, freq, factor;
size_t sfr_size, sample_size; size_t sample_size;
struct sf_raw_sample *sfr;
/* Allocate raw sample buffer
*
* The raw sample buffer is used to temporarily store sampling data
* entries for perf raw sample processing. The buffer size mainly
* depends on the size of diagnostic-sampling data entries which is
* machine-specific. The exact size calculation includes:
* 1. The first 4 bytes of diagnostic-sampling data entries are
* already reflected in the sf_raw_sample structure. Subtract
* these bytes.
* 2. The perf raw sample data must be 8-byte aligned (u64) and
* perf's internal data size must be considered too. So add
* an additional u32 for correct alignment and subtract before
* allocating the buffer.
* 3. Store the raw sample buffer pointer in the perf event
* hardware structure.
*/
sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) +
sizeof(u32), sizeof(u64));
sfr_size -= sizeof(u32);
sfr = kzalloc(sfr_size, GFP_KERNEL);
if (!sfr)
return -ENOMEM;
sfr->size = sfr_size;
sfr->bsdes = cpuhw->qsi.bsdes;
sfr->dsdes = cpuhw->qsi.dsdes;
RAWSAMPLE_REG(hwc) = (unsigned long) sfr;
/* Calculate sampling buffers using 4K pages /* Calculate sampling buffers using 4K pages
* *
...@@ -420,7 +387,7 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) ...@@ -420,7 +387,7 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
* ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
* to 511 SDBs). * to 511 SDBs).
*/ */
sample_size = event_sample_size(hwc); sample_size = sizeof(struct hws_basic_entry);
freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
factor = 1; factor = 1;
n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
...@@ -619,10 +586,6 @@ static int reserve_pmc_hardware(void) ...@@ -619,10 +586,6 @@ static int reserve_pmc_hardware(void)
static void hw_perf_event_destroy(struct perf_event *event) static void hw_perf_event_destroy(struct perf_event *event)
{ {
/* Free raw sample buffer */
if (RAWSAMPLE_REG(&event->hw))
kfree((void *) RAWSAMPLE_REG(&event->hw));
/* Release PMC if this is the last perf event */ /* Release PMC if this is the last perf event */
if (!atomic_add_unless(&num_events, -1, 1)) { if (!atomic_add_unless(&num_events, -1, 1)) {
mutex_lock(&pmc_reserve_mutex); mutex_lock(&pmc_reserve_mutex);
...@@ -642,15 +605,8 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period) ...@@ -642,15 +605,8 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period)
static void hw_reset_registers(struct hw_perf_event *hwc, static void hw_reset_registers(struct hw_perf_event *hwc,
unsigned long *sdbt_origin) unsigned long *sdbt_origin)
{ {
struct sf_raw_sample *sfr;
/* (Re)set to first sample-data-block-table */ /* (Re)set to first sample-data-block-table */
TEAR_REG(hwc) = (unsigned long) sdbt_origin; TEAR_REG(hwc) = (unsigned long) sdbt_origin;
/* (Re)set raw sampling buffer register */
sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
memset(&sfr->basic, 0, sizeof(sfr->basic));
memset(&sfr->diag, 0, sfr->dsdes);
} }
static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
...@@ -660,6 +616,67 @@ static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, ...@@ -660,6 +616,67 @@ static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
si->min_sampl_rate, si->max_sampl_rate); si->min_sampl_rate, si->max_sampl_rate);
} }
static u32 cpumsf_pid_type(struct perf_event *event,
u32 pid, enum pid_type type)
{
struct task_struct *tsk;
/* Idle process */
if (!pid)
goto out;
tsk = find_task_by_pid_ns(pid, &init_pid_ns);
pid = -1;
if (tsk) {
/*
* Only top level events contain the pid namespace in which
* they are created.
*/
if (event->parent)
event = event->parent;
pid = __task_pid_nr_ns(tsk, type, event->ns);
/*
* See also 1d953111b648
* "perf/core: Don't report zero PIDs for exiting tasks".
*/
if (!pid && !pid_alive(tsk))
pid = -1;
}
out:
return pid;
}
static void cpumsf_output_event_pid(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
u32 pid;
struct perf_event_header header;
struct perf_output_handle handle;
/*
* Obtain the PID from the basic-sampling data entry and
* correct the data->tid_entry.pid value.
*/
pid = data->tid_entry.pid;
/* Protect callchain buffers, tasks */
rcu_read_lock();
perf_prepare_sample(&header, data, event, regs);
if (perf_output_begin(&handle, event, header.size))
goto out;
/* Update the process ID (see also kernel/events/core.c) */
data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID);
data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
perf_output_sample(&handle, &header, data, event);
perf_output_end(&handle);
out:
rcu_read_unlock();
}
static int __hw_perf_event_init(struct perf_event *event) static int __hw_perf_event_init(struct perf_event *event)
{ {
struct cpu_hw_sf *cpuhw; struct cpu_hw_sf *cpuhw;
...@@ -770,6 +787,10 @@ static int __hw_perf_event_init(struct perf_event *event) ...@@ -770,6 +787,10 @@ static int __hw_perf_event_init(struct perf_event *event)
hwc->extra_reg.reg = REG_OVERFLOW; hwc->extra_reg.reg = REG_OVERFLOW;
OVERFLOW_REG(hwc) = 0; OVERFLOW_REG(hwc) = 0;
/* Use AUX buffer. No need to allocate it by ourself */
if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
return 0;
/* Allocate the per-CPU sampling buffer using the CPU information /* Allocate the per-CPU sampling buffer using the CPU information
* from the event. If the event is not pinned to a particular * from the event. If the event is not pinned to a particular
* CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
...@@ -789,6 +810,14 @@ static int __hw_perf_event_init(struct perf_event *event) ...@@ -789,6 +810,14 @@ static int __hw_perf_event_init(struct perf_event *event)
break; break;
} }
} }
/* If PID/TID sampling is active, replace the default overflow
* handler to extract and resolve the PIDs from the basic-sampling
* data entries.
*/
if (event->attr.sample_type & PERF_SAMPLE_TID)
if (is_default_overflow_handler(event))
event->overflow_handler = cpumsf_output_event_pid;
out: out:
return err; return err;
} }
...@@ -866,10 +895,15 @@ static void cpumsf_pmu_enable(struct pmu *pmu) ...@@ -866,10 +895,15 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
*/ */
if (cpuhw->event) { if (cpuhw->event) {
hwc = &cpuhw->event->hw; hwc = &cpuhw->event->hw;
/* Account number of overflow-designated buffer extents */ if (!(SAMPL_DIAG_MODE(hwc))) {
sfb_account_overflows(cpuhw, hwc); /*
if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) * Account number of overflow-designated
extend_sampling_buffer(&cpuhw->sfb, hwc); * buffer extents
*/
sfb_account_overflows(cpuhw, hwc);
if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
extend_sampling_buffer(&cpuhw->sfb, hwc);
}
} }
/* (Re)enable the PMU and sampling facility */ /* (Re)enable the PMU and sampling facility */
...@@ -884,6 +918,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu) ...@@ -884,6 +918,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
return; return;
} }
/* Load current program parameter */
lpp(&S390_lowcore.lpp);
debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
"tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
cpuhw->lsctl.ed, cpuhw->lsctl.cd, cpuhw->lsctl.ed, cpuhw->lsctl.cd,
...@@ -967,22 +1004,16 @@ static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, ...@@ -967,22 +1004,16 @@ static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
* *
* Return non-zero if an event overflow occurred. * Return non-zero if an event overflow occurred.
*/ */
static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) static int perf_push_sample(struct perf_event *event,
struct hws_basic_entry *basic)
{ {
int overflow; int overflow;
struct pt_regs regs; struct pt_regs regs;
struct perf_sf_sde_regs *sde_regs; struct perf_sf_sde_regs *sde_regs;
struct perf_sample_data data; struct perf_sample_data data;
struct perf_raw_record raw = {
.frag = {
.size = sfr->size,
.data = sfr,
},
};
/* Setup perf sample */ /* Setup perf sample */
perf_sample_data_init(&data, 0, event->hw.last_period); perf_sample_data_init(&data, 0, event->hw.last_period);
data.raw = &raw;
/* Setup pt_regs to look like an CPU-measurement external interrupt /* Setup pt_regs to look like an CPU-measurement external interrupt
* using the Program Request Alert code. The regs.int_parm_long * using the Program Request Alert code. The regs.int_parm_long
...@@ -994,11 +1025,11 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) ...@@ -994,11 +1025,11 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
regs.int_parm = CPU_MF_INT_SF_PRA; regs.int_parm = CPU_MF_INT_SF_PRA;
sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long; sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
psw_bits(regs.psw).ia = sfr->basic.ia; psw_bits(regs.psw).ia = basic->ia;
psw_bits(regs.psw).dat = sfr->basic.T; psw_bits(regs.psw).dat = basic->T;
psw_bits(regs.psw).wait = sfr->basic.W; psw_bits(regs.psw).wait = basic->W;
psw_bits(regs.psw).pstate = sfr->basic.P; psw_bits(regs.psw).pstate = basic->P;
psw_bits(regs.psw).as = sfr->basic.AS; psw_bits(regs.psw).as = basic->AS;
/* /*
* Use the hardware provided configuration level to decide if the * Use the hardware provided configuration level to decide if the
...@@ -1011,7 +1042,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) ...@@ -1011,7 +1042,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
* If the value differs from 0xffff (the host value), we assume to * If the value differs from 0xffff (the host value), we assume to
* be a KVM guest. * be a KVM guest.
*/ */
switch (sfr->basic.CL) { switch (basic->CL) {
case 1: /* logical partition */ case 1: /* logical partition */
sde_regs->in_guest = 0; sde_regs->in_guest = 0;
break; break;
...@@ -1019,11 +1050,17 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) ...@@ -1019,11 +1050,17 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
sde_regs->in_guest = 1; sde_regs->in_guest = 1;
break; break;
default: /* old machine, use heuristics */ default: /* old machine, use heuristics */
if (sfr->basic.gpp || sfr->basic.prim_asn != 0xffff) if (basic->gpp || basic->prim_asn != 0xffff)
sde_regs->in_guest = 1; sde_regs->in_guest = 1;
break; break;
} }
/*
* Store the PID value from the sample-data-entry to be
* processed and resolved by cpumsf_output_event_pid().
*/
data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
overflow = 0; overflow = 0;
if (perf_exclude_event(event, &regs, sde_regs)) if (perf_exclude_event(event, &regs, sde_regs))
goto out; goto out;
...@@ -1041,75 +1078,12 @@ static void perf_event_count_update(struct perf_event *event, u64 count) ...@@ -1041,75 +1078,12 @@ static void perf_event_count_update(struct perf_event *event, u64 count)
local64_add(count, &event->count); local64_add(count, &event->count);
} }
static int sample_format_is_valid(struct hws_combined_entry *sample, static void debug_sample_entry(struct hws_basic_entry *sample,
unsigned int flags) struct hws_trailer_entry *te)
{
if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
/* Only basic-sampling data entries with data-entry-format
* version of 0x0001 can be processed.
*/
if (sample->basic.def != 0x0001)
return 0;
if (flags & PERF_CPUM_SF_DIAG_MODE)
/* The data-entry-format number of diagnostic-sampling data
* entries can vary. Because diagnostic data is just passed
* through, do only a sanity check on the DEF.
*/
if (sample->diag.def < 0x8001)
return 0;
return 1;
}
static int sample_is_consistent(struct hws_combined_entry *sample,
unsigned long flags)
{
/* This check applies only to basic-sampling data entries of potentially
* combined-sampling data entries. Invalid entries cannot be processed
* by the PMU and, thus, do not deliver an associated
* diagnostic-sampling data entry.
*/
if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE)))
return 0;
/*
* Samples are skipped, if they are invalid or for which the
* instruction address is not predictable, i.e., the wait-state bit is
* set.
*/
if (sample->basic.I || sample->basic.W)
return 0;
return 1;
}
static void reset_sample_slot(struct hws_combined_entry *sample,
unsigned long flags)
{
if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
sample->basic.def = 0;
if (flags & PERF_CPUM_SF_DIAG_MODE)
sample->diag.def = 0;
}
static void sfr_store_sample(struct sf_raw_sample *sfr,
struct hws_combined_entry *sample)
{
if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE))
sfr->basic = sample->basic;
if (sfr->format & PERF_CPUM_SF_DIAG_MODE)
memcpy(&sfr->diag, &sample->diag, sfr->dsdes);
}
static void debug_sample_entry(struct hws_combined_entry *sample,
struct hws_trailer_entry *te,
unsigned long flags)
{ {
debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
"sampling data entry: te->f=%i basic.def=%04x (%p)" "sampling data entry: te->f=%i basic.def=%04x (%p)\n",
" diag.def=%04x (%p)\n", te->f, te->f, sample->def, sample);
sample->basic.def, &sample->basic,
(flags & PERF_CPUM_SF_DIAG_MODE)
? sample->diag.def : 0xFFFF,
(flags & PERF_CPUM_SF_DIAG_MODE)
? &sample->diag : NULL);
} }
/* hw_collect_samples() - Walk through a sample-data-block and collect samples /* hw_collect_samples() - Walk through a sample-data-block and collect samples
...@@ -1135,44 +1109,37 @@ static void debug_sample_entry(struct hws_combined_entry *sample, ...@@ -1135,44 +1109,37 @@ static void debug_sample_entry(struct hws_combined_entry *sample,
static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
unsigned long long *overflow) unsigned long long *overflow)
{ {
unsigned long flags = SAMPL_FLAGS(&event->hw);
struct hws_combined_entry *sample;
struct hws_trailer_entry *te; struct hws_trailer_entry *te;
struct sf_raw_sample *sfr; struct hws_basic_entry *sample;
size_t sample_size;
/* Prepare and initialize raw sample data */
sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw);
sfr->format = flags & PERF_CPUM_SF_MODE_MASK;
sample_size = event_sample_size(&event->hw);
te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
sample = (struct hws_combined_entry *) *sdbt; sample = (struct hws_basic_entry *) *sdbt;
while ((unsigned long *) sample < (unsigned long *) te) { while ((unsigned long *) sample < (unsigned long *) te) {
/* Check for an empty sample */ /* Check for an empty sample */
if (!sample->basic.def) if (!sample->def)
break; break;
/* Update perf event period */ /* Update perf event period */
perf_event_count_update(event, SAMPL_RATE(&event->hw)); perf_event_count_update(event, SAMPL_RATE(&event->hw));
/* Check sampling data entry */ /* Check whether sample is valid */
if (sample_format_is_valid(sample, flags)) { if (sample->def == 0x0001) {
/* If an event overflow occurred, the PMU is stopped to /* If an event overflow occurred, the PMU is stopped to
* throttle event delivery. Remaining sample data is * throttle event delivery. Remaining sample data is
* discarded. * discarded.
*/ */
if (!*overflow) { if (!*overflow) {
if (sample_is_consistent(sample, flags)) { /* Check whether sample is consistent */
if (sample->I == 0 && sample->W == 0) {
/* Deliver sample data to perf */ /* Deliver sample data to perf */
sfr_store_sample(sfr, sample); *overflow = perf_push_sample(event,
*overflow = perf_push_sample(event, sfr); sample);
} }
} else } else
/* Count discarded samples */ /* Count discarded samples */
*overflow += 1; *overflow += 1;
} else { } else {
debug_sample_entry(sample, te, flags); debug_sample_entry(sample, te);
/* Sample slot is not yet written or other record. /* Sample slot is not yet written or other record.
* *
* This condition can occur if the buffer was reused * This condition can occur if the buffer was reused
...@@ -1188,8 +1155,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, ...@@ -1188,8 +1155,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
} }
/* Reset sample slot and advance to next sample */ /* Reset sample slot and advance to next sample */
reset_sample_slot(sample, flags); sample->def = 0;
sample += sample_size; sample++;
} }
} }
...@@ -1215,6 +1182,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) ...@@ -1215,6 +1182,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
int done; int done;
/*
* AUX buffer is used when in diagnostic sampling mode.
* No perf events/samples are created.
*/
if (SAMPL_DIAG_MODE(&event->hw))
return;
if (flush_all && SDB_FULL_BLOCKS(hwc)) if (flush_all && SDB_FULL_BLOCKS(hwc))
flush_all = 0; flush_all = 0;
...@@ -1291,6 +1265,439 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) ...@@ -1291,6 +1265,439 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
sampl_overflow, event_overflow); sampl_overflow, event_overflow);
} }
#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0)
#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark)
#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark)
/*
* Get trailer entry by index of SDB.
*/
static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
unsigned long index)
{
unsigned long sdb;
index = AUX_SDB_INDEX(aux, index);
sdb = aux->sdb_index[index];
return (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
}
/*
* Finish sampling on the cpu. Called by cpumsf_pmu_del() with pmu
* disabled. Collect the full SDBs in AUX buffer which have not reached
* the point of alert indicator. And ignore the SDBs which are not
* full.
*
* 1. Scan SDBs to see how much data is there and consume them.
* 2. Remove alert indicator in the buffer.
*/
static void aux_output_end(struct perf_output_handle *handle)
{
unsigned long i, range_scan, idx;
struct aux_buffer *aux;
struct hws_trailer_entry *te;
aux = perf_get_aux(handle);
if (!aux)
return;
range_scan = AUX_SDB_NUM_ALERT(aux);
for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
break;
}
/* i is num of SDBs which are full */
perf_aux_output_end(handle, i << PAGE_SHIFT);
/* Remove alert indicators in the buffer */
te = aux_sdb_trailer(aux, aux->alert_mark);
te->flags &= ~SDB_TE_ALERT_REQ_MASK;
debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i);
}
/*
* Start sampling on the CPU. Called by cpumsf_pmu_add() when an event
* is first added to the CPU or rescheduled again to the CPU. It is called
* with pmu disabled.
*
* 1. Reset the trailer of SDBs to get ready for new data.
* 2. Tell the hardware where to put the data by reset the SDBs buffer
* head(tear/dear).
*/
static int aux_output_begin(struct perf_output_handle *handle,
struct aux_buffer *aux,
struct cpu_hw_sf *cpuhw)
{
unsigned long range;
unsigned long i, range_scan, idx;
unsigned long head, base, offset;
struct hws_trailer_entry *te;
if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
return -EINVAL;
aux->head = handle->head >> PAGE_SHIFT;
range = (handle->size + 1) >> PAGE_SHIFT;
if (range <= 1)
return -ENOMEM;
/*
* SDBs between aux->head and aux->empty_mark are already ready
* for new data. range_scan is num of SDBs not within them.
*/
if (range > AUX_SDB_NUM_EMPTY(aux)) {
range_scan = range - AUX_SDB_NUM_EMPTY(aux);
idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
te->flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
te->flags = te->flags & ~SDB_TE_ALERT_REQ_MASK;
te->overflow = 0;
}
/* Save the position of empty SDBs */
aux->empty_mark = aux->head + range - 1;
}
/* Set alert indicator */
aux->alert_mark = aux->head + range/2 - 1;
te = aux_sdb_trailer(aux, aux->alert_mark);
te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
/* Reset hardware buffer head */
head = AUX_SDB_INDEX(aux, aux->head);
base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
offset = head % CPUM_SF_SDB_PER_TABLE;
cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
cpuhw->lsctl.dear = aux->sdb_index[head];
debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
"head->alert_mark->empty_mark (num_alert, range)"
"[%lx -> %lx -> %lx] (%lx, %lx) "
"tear index %lx, tear %lx dear %lx\n",
aux->head, aux->alert_mark, aux->empty_mark,
AUX_SDB_NUM_ALERT(aux), range,
head / CPUM_SF_SDB_PER_TABLE,
cpuhw->lsctl.tear,
cpuhw->lsctl.dear);
return 0;
}
/*
* Set alert indicator on SDB at index @alert_index while sampler is running.
*
* Return true if successfully.
* Return false if full indicator is already set by hardware sampler.
*/
static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
unsigned long long *overflow)
{
unsigned long long orig_overflow, orig_flags, new_flags;
struct hws_trailer_entry *te;
te = aux_sdb_trailer(aux, alert_index);
do {
orig_flags = te->flags;
orig_overflow = te->overflow;
*overflow = orig_overflow;
if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
/*
* SDB is already set by hardware.
* Abort and try to set somewhere
* behind.
*/
return false;
}
new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
} while (!cmpxchg_double(&te->flags, &te->overflow,
orig_flags, orig_overflow,
new_flags, 0ULL));
return true;
}
/*
* aux_reset_buffer() - Scan and setup SDBs for new samples
* @aux: The AUX buffer to set
* @range: The range of SDBs to scan started from aux->head
* @overflow: Set to overflow count
*
* Set alert indicator on the SDB at index of aux->alert_mark. If this SDB is
* marked as empty, check if it is already set full by the hardware sampler.
* If yes, that means new data is already there before we can set an alert
* indicator. Caller should try to set alert indicator to some position behind.
*
* Scan the SDBs in AUX buffer from behind aux->empty_mark. They are used
* previously and have already been consumed by user space. Reset these SDBs
* (clear full indicator and alert indicator) for new data.
* If aux->alert_mark fall in this area, just set it. Overflow count is
* recorded while scanning.
*
* SDBs between aux->head and aux->empty_mark are already reset at last time.
* and ready for new samples. So scanning on this area could be skipped.
*
* Return true if alert indicator is set successfully and false if not.
*/
static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
unsigned long long *overflow)
{
unsigned long long orig_overflow, orig_flags, new_flags;
unsigned long i, range_scan, idx;
struct hws_trailer_entry *te;
if (range <= AUX_SDB_NUM_EMPTY(aux))
/*
* No need to scan. All SDBs in range are marked as empty.
* Just set alert indicator. Should check race with hardware
* sampler.
*/
return aux_set_alert(aux, aux->alert_mark, overflow);
if (aux->alert_mark <= aux->empty_mark)
/*
* Set alert indicator on empty SDB. Should check race
* with hardware sampler.
*/
if (!aux_set_alert(aux, aux->alert_mark, overflow))
return false;
/*
* Scan the SDBs to clear full and alert indicator used previously.
* Start scanning from one SDB behind empty_mark. If the new alert
* indicator fall into this range, set it.
*/
range_scan = range - AUX_SDB_NUM_EMPTY(aux);
idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
do {
orig_flags = te->flags;
orig_overflow = te->overflow;
new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
if (idx == aux->alert_mark)
new_flags |= SDB_TE_ALERT_REQ_MASK;
else
new_flags &= ~SDB_TE_ALERT_REQ_MASK;
} while (!cmpxchg_double(&te->flags, &te->overflow,
orig_flags, orig_overflow,
new_flags, 0ULL));
*overflow += orig_overflow;
}
/* Update empty_mark to new position */
aux->empty_mark = aux->head + range - 1;
return true;
}
/*
* Measurement alert handler for diagnostic mode sampling.
*/
static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
{
struct aux_buffer *aux;
int done = 0;
unsigned long range = 0, size;
unsigned long long overflow = 0;
struct perf_output_handle *handle = &cpuhw->handle;
unsigned long num_sdb;
aux = perf_get_aux(handle);
if (WARN_ON_ONCE(!aux))
return;
/* Inform user space new data arrived */
size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
perf_aux_output_end(handle, size);
num_sdb = aux->sfb.num_sdb;
while (!done) {
/* Get an output handle */
aux = perf_aux_output_begin(handle, cpuhw->event);
if (handle->size == 0) {
pr_err("The AUX buffer with %lu pages for the "
"diagnostic-sampling mode is full\n",
num_sdb);
debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n");
break;
}
if (WARN_ON_ONCE(!aux))
return;
/* Update head and alert_mark to new position */
aux->head = handle->head >> PAGE_SHIFT;
range = (handle->size + 1) >> PAGE_SHIFT;
if (range == 1)
aux->alert_mark = aux->head;
else
aux->alert_mark = aux->head + range/2 - 1;
if (aux_reset_buffer(aux, range, &overflow)) {
if (!overflow) {
done = 1;
break;
}
size = range << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
pr_err("Sample data caused the AUX buffer with %lu "
"pages to overflow\n", num_sdb);
debug_sprintf_event(sfdbg, 1, "head %lx range %lx "
"overflow %llx\n",
aux->head, range, overflow);
} else {
size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
debug_sprintf_event(sfdbg, 6, "head %lx alert %lx "
"already full, try another\n",
aux->head, aux->alert_mark);
}
}
if (done)
debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
"[%lx -> %lx -> %lx] (%lx, %lx)\n",
aux->head, aux->alert_mark, aux->empty_mark,
AUX_SDB_NUM_ALERT(aux), range);
}
/*
* Callback when freeing AUX buffers.
*/
static void aux_buffer_free(void *data)
{
struct aux_buffer *aux = data;
unsigned long i, num_sdbt;
if (!aux)
return;
/* Free SDBT. SDB is freed by the caller */
num_sdbt = aux->sfb.num_sdbt;
for (i = 0; i < num_sdbt; i++)
free_page(aux->sdbt_index[i]);
kfree(aux->sdbt_index);
kfree(aux->sdb_index);
kfree(aux);
debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free "
"%lu SDBTs\n", num_sdbt);
}
/*
* aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling
* @cpu: On which to allocate, -1 means current
* @pages: Array of pointers to buffer pages passed from perf core
* @nr_pages: Total pages
* @snapshot: Flag for snapshot mode
*
* This is the callback when setup an event using AUX buffer. Perf tool can
* trigger this by an additional mmap() call on the event. Unlike the buffer
* for basic samples, AUX buffer belongs to the event. It is scheduled with
* the task among online cpus when it is a per-thread event.
*
* Return the private AUX buffer structure if success or NULL if fails.
*/
static void *aux_buffer_setup(int cpu, void **pages, int nr_pages,
bool snapshot)
{
struct sf_buffer *sfb;
struct aux_buffer *aux;
unsigned long *new, *tail;
int i, n_sdbt;
if (!nr_pages || !pages)
return NULL;
if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
pr_err("AUX buffer size (%i pages) is larger than the "
"maximum sampling buffer limit\n",
nr_pages);
return NULL;
} else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
pr_err("AUX buffer size (%i pages) is less than the "
"minimum sampling buffer limit\n",
nr_pages);
return NULL;
}
/* Allocate aux_buffer struct for the event */
aux = kmalloc(sizeof(struct aux_buffer), GFP_KERNEL);
if (!aux)
goto no_aux;
sfb = &aux->sfb;
/* Allocate sdbt_index for fast reference */
n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE;
aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
if (!aux->sdbt_index)
goto no_sdbt_index;
/* Allocate sdb_index for fast reference */
aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
if (!aux->sdb_index)
goto no_sdb_index;
/* Allocate the first SDBT */
sfb->num_sdbt = 0;
sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
if (!sfb->sdbt)
goto no_sdbt;
aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt;
tail = sfb->tail = sfb->sdbt;
/*
* Link the provided pages of AUX buffer to SDBT.
* Allocate SDBT if needed.
*/
for (i = 0; i < nr_pages; i++, tail++) {
if (require_table_link(tail)) {
new = (unsigned long *) get_zeroed_page(GFP_KERNEL);
if (!new)
goto no_sdbt;
aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
/* Link current page to tail of chain */
*tail = (unsigned long)(void *) new + 1;
tail = new;
}
/* Tail is the entry in a SDBT */
*tail = (unsigned long)pages[i];
aux->sdb_index[i] = (unsigned long)pages[i];
}
sfb->num_sdb = nr_pages;
/* Link the last entry in the SDBT to the first SDBT */
*tail = (unsigned long) sfb->sdbt + 1;
sfb->tail = tail;
/*
* Initial all SDBs are zeroed. Mark it as empty.
* So there is no need to clear the full indicator
* when this event is first added.
*/
aux->empty_mark = sfb->num_sdb - 1;
debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs"
" and %lu SDBs\n",
sfb->num_sdbt, sfb->num_sdb);
return aux;
no_sdbt:
/* SDBs (AUX buffer pages) are freed by caller */
for (i = 0; i < sfb->num_sdbt; i++)
free_page(aux->sdbt_index[i]);
kfree(aux->sdb_index);
no_sdb_index:
kfree(aux->sdbt_index);
no_sdbt_index:
kfree(aux);
no_aux:
return NULL;
}
static void cpumsf_pmu_read(struct perf_event *event) static void cpumsf_pmu_read(struct perf_event *event)
{ {
/* Nothing to do ... updates are interrupt-driven */ /* Nothing to do ... updates are interrupt-driven */
...@@ -1342,12 +1749,13 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags) ...@@ -1342,12 +1749,13 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags)
static int cpumsf_pmu_add(struct perf_event *event, int flags) static int cpumsf_pmu_add(struct perf_event *event, int flags)
{ {
struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
struct aux_buffer *aux;
int err; int err;
if (cpuhw->flags & PMU_F_IN_USE) if (cpuhw->flags & PMU_F_IN_USE)
return -EAGAIN; return -EAGAIN;
if (!cpuhw->sfb.sdbt) if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
return -EINVAL; return -EINVAL;
err = 0; err = 0;
...@@ -1362,10 +1770,12 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) ...@@ -1362,10 +1770,12 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
*/ */
cpuhw->lsctl.s = 0; cpuhw->lsctl.s = 0;
cpuhw->lsctl.h = 1; cpuhw->lsctl.h = 1;
cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); if (!SAMPL_DIAG_MODE(&event->hw)) {
cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
}
/* Ensure sampling functions are in the disabled state. If disabled, /* Ensure sampling functions are in the disabled state. If disabled,
* switch on sampling enable control. */ * switch on sampling enable control. */
...@@ -1373,9 +1783,18 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) ...@@ -1373,9 +1783,18 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
err = -EAGAIN; err = -EAGAIN;
goto out; goto out;
} }
cpuhw->lsctl.es = 1; if (SAMPL_DIAG_MODE(&event->hw)) {
if (SAMPL_DIAG_MODE(&event->hw)) aux = perf_aux_output_begin(&cpuhw->handle, event);
if (!aux) {
err = -EINVAL;
goto out;
}
err = aux_output_begin(&cpuhw->handle, aux, cpuhw);
if (err)
goto out;
cpuhw->lsctl.ed = 1; cpuhw->lsctl.ed = 1;
}
cpuhw->lsctl.es = 1;
/* Set in_use flag and store event */ /* Set in_use flag and store event */
cpuhw->event = event; cpuhw->event = event;
...@@ -1401,6 +1820,8 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags) ...@@ -1401,6 +1820,8 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags)
cpuhw->flags &= ~PMU_F_IN_USE; cpuhw->flags &= ~PMU_F_IN_USE;
cpuhw->event = NULL; cpuhw->event = NULL;
if (SAMPL_DIAG_MODE(&event->hw))
aux_output_end(&cpuhw->handle);
perf_event_update_userpage(event); perf_event_update_userpage(event);
perf_pmu_enable(event->pmu); perf_pmu_enable(event->pmu);
} }
...@@ -1448,6 +1869,9 @@ static struct pmu cpumf_sampling = { ...@@ -1448,6 +1869,9 @@ static struct pmu cpumf_sampling = {
.read = cpumsf_pmu_read, .read = cpumsf_pmu_read,
.attr_groups = cpumsf_pmu_attr_groups, .attr_groups = cpumsf_pmu_attr_groups,
.setup_aux = aux_buffer_setup,
.free_aux = aux_buffer_free,
}; };
static void cpumf_measurement_alert(struct ext_code ext_code, static void cpumf_measurement_alert(struct ext_code ext_code,
...@@ -1471,7 +1895,10 @@ static void cpumf_measurement_alert(struct ext_code ext_code, ...@@ -1471,7 +1895,10 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
/* Program alert request */ /* Program alert request */
if (alert & CPU_MF_INT_SF_PRA) { if (alert & CPU_MF_INT_SF_PRA) {
if (cpuhw->flags & PMU_F_IN_USE) if (cpuhw->flags & PMU_F_IN_USE)
hw_perf_event_update(cpuhw->event, 0); if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
hw_collect_aux(cpuhw);
else
hw_perf_event_update(cpuhw->event, 0);
else else
WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
} }
...@@ -1590,6 +2017,9 @@ static int __init init_cpum_sampling_pmu(void) ...@@ -1590,6 +2017,9 @@ static int __init init_cpum_sampling_pmu(void)
return -ENODEV; return -ENODEV;
} }
if (!si.as && !si.ad)
return -ENODEV;
if (si.bsdes != sizeof(struct hws_basic_entry)) { if (si.bsdes != sizeof(struct hws_basic_entry)) {
pr_cpumsf_err(RS_INIT_FAILURE_BSDES); pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
return -EINVAL; return -EINVAL;
......
#include <linux/perf_event.h>
#include <linux/perf_regs.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/bug.h>
#include <asm/ptrace.h>
#include <asm/fpu/api.h>
#include <asm/fpu/types.h>
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
freg_t fp;
if (WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX))
return 0;
if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15)
return regs->gprs[idx];
if (idx >= PERF_REG_S390_FP0 && idx <= PERF_REG_S390_FP15) {
if (!user_mode(regs))
return 0;
idx -= PERF_REG_S390_FP0;
fp = MACHINE_HAS_VX ? *(freg_t *)(current->thread.fpu.vxrs + idx)
: current->thread.fpu.fprs[idx];
return fp.ui;
}
if (idx == PERF_REG_S390_MASK)
return regs->psw.mask;
if (idx == PERF_REG_S390_PC)
return regs->psw.addr;
return regs->gprs[idx];
}
#define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1))
int perf_reg_validate(u64 mask)
{
if (!mask || mask & REG_RESERVED)
return -EINVAL;
return 0;
}
u64 perf_reg_abi(struct task_struct *task)
{
if (test_tsk_thread_flag(task, TIF_31BIT))
return PERF_SAMPLE_REGS_ABI_32;
return PERF_SAMPLE_REGS_ABI_64;
}
void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs,
struct pt_regs *regs_user_copy)
{
/*
* Use the regs from the first interruption and let
* perf_sample_regs_intr() handle interrupts (regs == get_irq_regs()).
*
* Also save FPU registers for user-space tasks only.
*/
regs_user->regs = task_pt_regs(current);
if (user_mode(regs_user->regs))
save_fpu_regs();
regs_user->abi = perf_reg_abi(current);
}
...@@ -158,16 +158,9 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) ...@@ -158,16 +158,9 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore)
{ {
unsigned long segment_table, page_table, page_frame; unsigned long segment_table, page_table, page_frame;
struct vdso_per_cpu_data *vd; struct vdso_per_cpu_data *vd;
u32 *psal, *aste;
int i;
lowcore->vdso_per_cpu_data = __LC_PASTE;
if (!vdso_enabled)
return 0;
segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); page_table = get_zeroed_page(GFP_KERNEL);
page_frame = get_zeroed_page(GFP_KERNEL); page_frame = get_zeroed_page(GFP_KERNEL);
if (!segment_table || !page_table || !page_frame) if (!segment_table || !page_table || !page_frame)
goto out; goto out;
...@@ -179,25 +172,15 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) ...@@ -179,25 +172,15 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore)
vd->cpu_nr = lowcore->cpu_nr; vd->cpu_nr = lowcore->cpu_nr;
vd->node_id = cpu_to_node(vd->cpu_nr); vd->node_id = cpu_to_node(vd->cpu_nr);
/* Set up access register mode page table */ /* Set up page table for the vdso address space */
memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES); memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES);
memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE); memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE);
*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
*(unsigned long *) page_table = _PAGE_PROTECT + page_frame; *(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
psal = (u32 *) (page_table + 256*sizeof(unsigned long)); lowcore->vdso_asce = segment_table +
aste = psal + 32;
for (i = 4; i < 32; i += 4)
psal[i] = 0x80000000;
lowcore->paste[4] = (u32)(addr_t) psal;
psal[0] = 0x02000000;
psal[2] = (u32)(addr_t) aste;
*(unsigned long *) (aste + 2) = segment_table +
_ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
aste[4] = (u32)(addr_t) psal;
lowcore->vdso_per_cpu_data = page_frame; lowcore->vdso_per_cpu_data = page_frame;
return 0; return 0;
...@@ -212,14 +195,8 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) ...@@ -212,14 +195,8 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore)
void vdso_free_per_cpu(struct lowcore *lowcore) void vdso_free_per_cpu(struct lowcore *lowcore)
{ {
unsigned long segment_table, page_table, page_frame; unsigned long segment_table, page_table, page_frame;
u32 *psal, *aste;
if (!vdso_enabled)
return;
psal = (u32 *)(addr_t) lowcore->paste[4]; segment_table = lowcore->vdso_asce & PAGE_MASK;
aste = (u32 *)(addr_t) psal[2];
segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
page_table = *(unsigned long *) segment_table; page_table = *(unsigned long *) segment_table;
page_frame = *(unsigned long *) page_table; page_frame = *(unsigned long *) page_table;
...@@ -228,16 +205,6 @@ void vdso_free_per_cpu(struct lowcore *lowcore) ...@@ -228,16 +205,6 @@ void vdso_free_per_cpu(struct lowcore *lowcore)
free_pages(segment_table, SEGMENT_ORDER); free_pages(segment_table, SEGMENT_ORDER);
} }
static void vdso_init_cr5(void)
{
unsigned long cr5;
if (!vdso_enabled)
return;
cr5 = offsetof(struct lowcore, paste);
__ctl_load(cr5, 5, 5);
}
/* /*
* This is called from binfmt_elf, we create the special vma for the * This is called from binfmt_elf, we create the special vma for the
* vDSO and insert it into the mm struct tree * vDSO and insert it into the mm struct tree
...@@ -314,8 +281,6 @@ static int __init vdso_init(void) ...@@ -314,8 +281,6 @@ static int __init vdso_init(void)
{ {
int i; int i;
if (!vdso_enabled)
return 0;
vdso_init_data(vdso_data); vdso_init_data(vdso_data);
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
/* Calculate the size of the 32 bit vDSO */ /* Calculate the size of the 32 bit vDSO */
...@@ -354,7 +319,6 @@ static int __init vdso_init(void) ...@@ -354,7 +319,6 @@ static int __init vdso_init(void)
vdso64_pagelist[vdso64_pages] = NULL; vdso64_pagelist[vdso64_pages] = NULL;
if (vdso_alloc_per_cpu(&S390_lowcore)) if (vdso_alloc_per_cpu(&S390_lowcore))
BUG(); BUG();
vdso_init_cr5();
get_page(virt_to_page(vdso_data)); get_page(virt_to_page(vdso_data));
......
...@@ -15,23 +15,11 @@ ...@@ -15,23 +15,11 @@
.type __kernel_getcpu,@function .type __kernel_getcpu,@function
__kernel_getcpu: __kernel_getcpu:
.cfi_startproc .cfi_startproc
ear %r1,%a4
lhi %r4,1
sll %r4,24
sar %a4,%r4
la %r4,0 la %r4,0
epsw %r0,0 sacf 256
sacf 512
l %r5,__VDSO_CPU_NR(%r4) l %r5,__VDSO_CPU_NR(%r4)
l %r4,__VDSO_NODE_ID(%r4) l %r4,__VDSO_NODE_ID(%r4)
tml %r0,0x4000 sacf 0
jo 1f
tml %r0,0x8000
jno 0f
sacf 256
j 1f
0: sacf 0
1: sar %a4,%r1
ltr %r2,%r2 ltr %r2,%r2
jz 2f jz 2f
st %r5,0(%r2) st %r5,0(%r2)
......
...@@ -114,23 +114,12 @@ __kernel_clock_gettime: ...@@ -114,23 +114,12 @@ __kernel_clock_gettime:
br %r14 br %r14
/* CPUCLOCK_VIRT for this thread */ /* CPUCLOCK_VIRT for this thread */
9: icm %r0,15,__VDSO_ECTG_OK(%r5) 9: lghi %r4,0
icm %r0,15,__VDSO_ECTG_OK(%r5)
jz 12f jz 12f
ear %r2,%a4 sacf 256 /* Magic ectg instruction */
llilh %r4,0x0100
sar %a4,%r4
lghi %r4,0
epsw %r5,0
sacf 512 /* Magic ectg instruction */
.insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
tml %r5,0x4000 sacf 0
jo 11f
tml %r5,0x8000
jno 10f
sacf 256
j 11f
10: sacf 0
11: sar %a4,%r2
algr %r1,%r0 /* r1 = cputime as TOD value */ algr %r1,%r0 /* r1 = cputime as TOD value */
mghi %r1,1000 /* convert to nanoseconds */ mghi %r1,1000 /* convert to nanoseconds */
srlg %r1,%r1,12 /* r1 = cputime in nanosec */ srlg %r1,%r1,12 /* r1 = cputime in nanosec */
......
...@@ -15,22 +15,11 @@ ...@@ -15,22 +15,11 @@
.type __kernel_getcpu,@function .type __kernel_getcpu,@function
__kernel_getcpu: __kernel_getcpu:
.cfi_startproc .cfi_startproc
ear %r1,%a4
llilh %r4,0x0100
sar %a4,%r4
la %r4,0 la %r4,0
epsw %r0,0 sacf 256
sacf 512
l %r5,__VDSO_CPU_NR(%r4) l %r5,__VDSO_CPU_NR(%r4)
l %r4,__VDSO_NODE_ID(%r4) l %r4,__VDSO_NODE_ID(%r4)
tml %r0,0x4000 sacf 0
jo 1f
tml %r0,0x8000
jno 0f
sacf 256
j 1f
0: sacf 0
1: sar %a4,%r1
ltgr %r2,%r2 ltgr %r2,%r2
jz 2f jz 2f
st %r5,0(%r2) st %r5,0(%r2)
......
...@@ -78,7 +78,7 @@ static inline int arch_load_niai4(int *lock) ...@@ -78,7 +78,7 @@ static inline int arch_load_niai4(int *lock)
ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */ ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */
" l %0,%1\n" " l %0,%1\n"
: "=d" (owner) : "Q" (*lock) : "memory"); : "=d" (owner) : "Q" (*lock) : "memory");
return owner; return owner;
} }
static inline int arch_cmpxchg_niai8(int *lock, int old, int new) static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
...@@ -226,9 +226,10 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) ...@@ -226,9 +226,10 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
/* Try to get the lock if it is free. */ /* Try to get the lock if it is free. */
if (!owner) { if (!owner) {
new = (old & _Q_TAIL_MASK) | lockval; new = (old & _Q_TAIL_MASK) | lockval;
if (arch_cmpxchg_niai8(&lp->lock, old, new)) if (arch_cmpxchg_niai8(&lp->lock, old, new)) {
/* Got the lock */ /* Got the lock */
return; return;
}
continue; continue;
} }
if (count-- >= 0) if (count-- >= 0)
......
...@@ -40,10 +40,67 @@ static inline int copy_with_mvcos(void) ...@@ -40,10 +40,67 @@ static inline int copy_with_mvcos(void)
} }
#endif #endif
void set_fs(mm_segment_t fs)
{
current->thread.mm_segment = fs;
if (fs == USER_DS) {
__ctl_load(S390_lowcore.user_asce, 1, 1);
clear_cpu_flag(CIF_ASCE_PRIMARY);
} else {
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
set_cpu_flag(CIF_ASCE_PRIMARY);
}
if (fs & 1) {
if (fs == USER_DS_SACF)
__ctl_load(S390_lowcore.user_asce, 7, 7);
else
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
set_cpu_flag(CIF_ASCE_SECONDARY);
}
}
EXPORT_SYMBOL(set_fs);
mm_segment_t enable_sacf_uaccess(void)
{
mm_segment_t old_fs;
unsigned long asce, cr;
old_fs = current->thread.mm_segment;
if (old_fs & 1)
return old_fs;
current->thread.mm_segment |= 1;
asce = S390_lowcore.kernel_asce;
if (likely(old_fs == USER_DS)) {
__ctl_store(cr, 1, 1);
if (cr != S390_lowcore.kernel_asce) {
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
set_cpu_flag(CIF_ASCE_PRIMARY);
}
asce = S390_lowcore.user_asce;
}
__ctl_store(cr, 7, 7);
if (cr != asce) {
__ctl_load(asce, 7, 7);
set_cpu_flag(CIF_ASCE_SECONDARY);
}
return old_fs;
}
EXPORT_SYMBOL(enable_sacf_uaccess);
void disable_sacf_uaccess(mm_segment_t old_fs)
{
if (old_fs == USER_DS && test_facility(27)) {
__ctl_load(S390_lowcore.user_asce, 1, 1);
clear_cpu_flag(CIF_ASCE_PRIMARY);
}
current->thread.mm_segment = old_fs;
}
EXPORT_SYMBOL(disable_sacf_uaccess);
static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
unsigned long size) unsigned long size)
{ {
register unsigned long reg0 asm("0") = 0x81UL; register unsigned long reg0 asm("0") = 0x01UL;
unsigned long tmp1, tmp2; unsigned long tmp1, tmp2;
tmp1 = -4096UL; tmp1 = -4096UL;
...@@ -74,8 +131,9 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, ...@@ -74,8 +131,9 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
unsigned long size) unsigned long size)
{ {
unsigned long tmp1, tmp2; unsigned long tmp1, tmp2;
mm_segment_t old_fs;
load_kernel_asce(); old_fs = enable_sacf_uaccess();
tmp1 = -256UL; tmp1 = -256UL;
asm volatile( asm volatile(
" sacf 0\n" " sacf 0\n"
...@@ -102,6 +160,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, ...@@ -102,6 +160,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
: : "cc", "memory"); : : "cc", "memory");
disable_sacf_uaccess(old_fs);
return size; return size;
} }
...@@ -116,7 +175,7 @@ EXPORT_SYMBOL(raw_copy_from_user); ...@@ -116,7 +175,7 @@ EXPORT_SYMBOL(raw_copy_from_user);
static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
unsigned long size) unsigned long size)
{ {
register unsigned long reg0 asm("0") = 0x810000UL; register unsigned long reg0 asm("0") = 0x010000UL;
unsigned long tmp1, tmp2; unsigned long tmp1, tmp2;
tmp1 = -4096UL; tmp1 = -4096UL;
...@@ -147,8 +206,9 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, ...@@ -147,8 +206,9 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
unsigned long size) unsigned long size)
{ {
unsigned long tmp1, tmp2; unsigned long tmp1, tmp2;
mm_segment_t old_fs;
load_kernel_asce(); old_fs = enable_sacf_uaccess();
tmp1 = -256UL; tmp1 = -256UL;
asm volatile( asm volatile(
" sacf 0\n" " sacf 0\n"
...@@ -175,6 +235,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, ...@@ -175,6 +235,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
: : "cc", "memory"); : : "cc", "memory");
disable_sacf_uaccess(old_fs);
return size; return size;
} }
...@@ -189,7 +250,7 @@ EXPORT_SYMBOL(raw_copy_to_user); ...@@ -189,7 +250,7 @@ EXPORT_SYMBOL(raw_copy_to_user);
static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
unsigned long size) unsigned long size)
{ {
register unsigned long reg0 asm("0") = 0x810081UL; register unsigned long reg0 asm("0") = 0x010001UL;
unsigned long tmp1, tmp2; unsigned long tmp1, tmp2;
tmp1 = -4096UL; tmp1 = -4096UL;
...@@ -212,9 +273,10 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use ...@@ -212,9 +273,10 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use
static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from, static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from,
unsigned long size) unsigned long size)
{ {
mm_segment_t old_fs;
unsigned long tmp1; unsigned long tmp1;
load_kernel_asce(); old_fs = enable_sacf_uaccess();
asm volatile( asm volatile(
" sacf 256\n" " sacf 256\n"
" aghi %0,-1\n" " aghi %0,-1\n"
...@@ -238,6 +300,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user ...@@ -238,6 +300,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
: "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
: : "cc", "memory"); : : "cc", "memory");
disable_sacf_uaccess(old_fs);
return size; return size;
} }
...@@ -251,7 +314,7 @@ EXPORT_SYMBOL(raw_copy_in_user); ...@@ -251,7 +314,7 @@ EXPORT_SYMBOL(raw_copy_in_user);
static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
{ {
register unsigned long reg0 asm("0") = 0x810000UL; register unsigned long reg0 asm("0") = 0x010000UL;
unsigned long tmp1, tmp2; unsigned long tmp1, tmp2;
tmp1 = -4096UL; tmp1 = -4096UL;
...@@ -279,9 +342,10 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size ...@@ -279,9 +342,10 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
static inline unsigned long clear_user_xc(void __user *to, unsigned long size) static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
{ {
mm_segment_t old_fs;
unsigned long tmp1, tmp2; unsigned long tmp1, tmp2;
load_kernel_asce(); old_fs = enable_sacf_uaccess();
asm volatile( asm volatile(
" sacf 256\n" " sacf 256\n"
" aghi %0,-1\n" " aghi %0,-1\n"
...@@ -310,6 +374,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size) ...@@ -310,6 +374,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
: "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
: : "cc", "memory"); : : "cc", "memory");
disable_sacf_uaccess(old_fs);
return size; return size;
} }
...@@ -345,10 +410,15 @@ static inline unsigned long strnlen_user_srst(const char __user *src, ...@@ -345,10 +410,15 @@ static inline unsigned long strnlen_user_srst(const char __user *src,
unsigned long __strnlen_user(const char __user *src, unsigned long size) unsigned long __strnlen_user(const char __user *src, unsigned long size)
{ {
mm_segment_t old_fs;
unsigned long len;
if (unlikely(!size)) if (unlikely(!size))
return 0; return 0;
load_kernel_asce(); old_fs = enable_sacf_uaccess();
return strnlen_user_srst(src, size); len = strnlen_user_srst(src, size);
disable_sacf_uaccess(old_fs);
return len;
} }
EXPORT_SYMBOL(__strnlen_user); EXPORT_SYMBOL(__strnlen_user);
......
...@@ -50,6 +50,13 @@ ...@@ -50,6 +50,13 @@
#define VM_FAULT_SIGNAL 0x080000 #define VM_FAULT_SIGNAL 0x080000
#define VM_FAULT_PFAULT 0x100000 #define VM_FAULT_PFAULT 0x100000
enum fault_type {
KERNEL_FAULT,
USER_FAULT,
VDSO_FAULT,
GMAP_FAULT,
};
static unsigned long store_indication __read_mostly; static unsigned long store_indication __read_mostly;
static int __init fault_init(void) static int __init fault_init(void)
...@@ -99,27 +106,34 @@ void bust_spinlocks(int yes) ...@@ -99,27 +106,34 @@ void bust_spinlocks(int yes)
} }
/* /*
* Returns the address space associated with the fault. * Find out which address space caused the exception.
* Returns 0 for kernel space and 1 for user space. * Access register mode is impossible, ignore space == 3.
*/ */
static inline int user_space_fault(struct pt_regs *regs) static inline enum fault_type get_fault_type(struct pt_regs *regs)
{ {
unsigned long trans_exc_code; unsigned long trans_exc_code;
/*
* The lowest two bits of the translation exception
* identification indicate which paging table was used.
*/
trans_exc_code = regs->int_parm_long & 3; trans_exc_code = regs->int_parm_long & 3;
if (trans_exc_code == 3) /* home space -> kernel */ if (likely(trans_exc_code == 0)) {
return 0; /* primary space exception */
if (user_mode(regs)) if (IS_ENABLED(CONFIG_PGSTE) &&
return 1; test_pt_regs_flag(regs, PIF_GUEST_FAULT))
if (trans_exc_code == 2) /* secondary space -> set_fs */ return GMAP_FAULT;
return current->thread.mm_segment.ar4; if (current->thread.mm_segment == USER_DS)
if (current->flags & PF_VCPU) return USER_FAULT;
return 1; return KERNEL_FAULT;
return 0; }
if (trans_exc_code == 2) {
/* secondary space exception */
if (current->thread.mm_segment & 1) {
if (current->thread.mm_segment == USER_DS_SACF)
return USER_FAULT;
return KERNEL_FAULT;
}
return VDSO_FAULT;
}
/* home space exception -> access via kernel ASCE */
return KERNEL_FAULT;
} }
static int bad_address(void *p) static int bad_address(void *p)
...@@ -204,20 +218,23 @@ static void dump_fault_info(struct pt_regs *regs) ...@@ -204,20 +218,23 @@ static void dump_fault_info(struct pt_regs *regs)
break; break;
} }
pr_cont("mode while using "); pr_cont("mode while using ");
if (!user_space_fault(regs)) { switch (get_fault_type(regs)) {
asce = S390_lowcore.kernel_asce; case USER_FAULT:
pr_cont("kernel ");
}
#ifdef CONFIG_PGSTE
else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
asce = gmap->asce;
pr_cont("gmap ");
}
#endif
else {
asce = S390_lowcore.user_asce; asce = S390_lowcore.user_asce;
pr_cont("user "); pr_cont("user ");
break;
case VDSO_FAULT:
asce = S390_lowcore.vdso_asce;
pr_cont("vdso ");
break;
case GMAP_FAULT:
asce = ((struct gmap *) S390_lowcore.gmap)->asce;
pr_cont("gmap ");
break;
case KERNEL_FAULT:
asce = S390_lowcore.kernel_asce;
pr_cont("kernel ");
break;
} }
pr_cont("ASCE.\n"); pr_cont("ASCE.\n");
dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
...@@ -273,7 +290,7 @@ static noinline void do_no_context(struct pt_regs *regs) ...@@ -273,7 +290,7 @@ static noinline void do_no_context(struct pt_regs *regs)
* Oops. The kernel tried to access some bad page. We'll have to * Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice. * terminate things with extreme prejudice.
*/ */
if (!user_space_fault(regs)) if (get_fault_type(regs) == KERNEL_FAULT)
printk(KERN_ALERT "Unable to handle kernel pointer dereference" printk(KERN_ALERT "Unable to handle kernel pointer dereference"
" in virtual kernel address space\n"); " in virtual kernel address space\n");
else else
...@@ -395,12 +412,11 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, int fault) ...@@ -395,12 +412,11 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, int fault)
*/ */
static inline int do_exception(struct pt_regs *regs, int access) static inline int do_exception(struct pt_regs *regs, int access)
{ {
#ifdef CONFIG_PGSTE
struct gmap *gmap; struct gmap *gmap;
#endif
struct task_struct *tsk; struct task_struct *tsk;
struct mm_struct *mm; struct mm_struct *mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
enum fault_type type;
unsigned long trans_exc_code; unsigned long trans_exc_code;
unsigned long address; unsigned long address;
unsigned int flags; unsigned int flags;
...@@ -425,8 +441,19 @@ static inline int do_exception(struct pt_regs *regs, int access) ...@@ -425,8 +441,19 @@ static inline int do_exception(struct pt_regs *regs, int access)
* user context. * user context.
*/ */
fault = VM_FAULT_BADCONTEXT; fault = VM_FAULT_BADCONTEXT;
if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm)) type = get_fault_type(regs);
switch (type) {
case KERNEL_FAULT:
goto out;
case VDSO_FAULT:
fault = VM_FAULT_BADMAP;
goto out; goto out;
case USER_FAULT:
case GMAP_FAULT:
if (faulthandler_disabled() || !mm)
goto out;
break;
}
address = trans_exc_code & __FAIL_ADDR_MASK; address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
...@@ -437,10 +464,9 @@ static inline int do_exception(struct pt_regs *regs, int access) ...@@ -437,10 +464,9 @@ static inline int do_exception(struct pt_regs *regs, int access)
flags |= FAULT_FLAG_WRITE; flags |= FAULT_FLAG_WRITE;
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE gmap = NULL;
gmap = (current->flags & PF_VCPU) ? if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
(struct gmap *) S390_lowcore.gmap : NULL; gmap = (struct gmap *) S390_lowcore.gmap;
if (gmap) {
current->thread.gmap_addr = address; current->thread.gmap_addr = address;
current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE); current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
current->thread.gmap_int_code = regs->int_code & 0xffff; current->thread.gmap_int_code = regs->int_code & 0xffff;
...@@ -452,7 +478,6 @@ static inline int do_exception(struct pt_regs *regs, int access) ...@@ -452,7 +478,6 @@ static inline int do_exception(struct pt_regs *regs, int access)
if (gmap->pfault_enabled) if (gmap->pfault_enabled)
flags |= FAULT_FLAG_RETRY_NOWAIT; flags |= FAULT_FLAG_RETRY_NOWAIT;
} }
#endif
retry: retry:
fault = VM_FAULT_BADMAP; fault = VM_FAULT_BADMAP;
...@@ -507,15 +532,14 @@ static inline int do_exception(struct pt_regs *regs, int access) ...@@ -507,15 +532,14 @@ static inline int do_exception(struct pt_regs *regs, int access)
regs, address); regs, address);
} }
if (fault & VM_FAULT_RETRY) { if (fault & VM_FAULT_RETRY) {
#ifdef CONFIG_PGSTE if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { (flags & FAULT_FLAG_RETRY_NOWAIT)) {
/* FAULT_FLAG_RETRY_NOWAIT has been set, /* FAULT_FLAG_RETRY_NOWAIT has been set,
* mmap_sem has not been released */ * mmap_sem has not been released */
current->thread.gmap_pfault = 1; current->thread.gmap_pfault = 1;
fault = VM_FAULT_PFAULT; fault = VM_FAULT_PFAULT;
goto out_up; goto out_up;
} }
#endif
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */ * of starvation. */
flags &= ~(FAULT_FLAG_ALLOW_RETRY | flags &= ~(FAULT_FLAG_ALLOW_RETRY |
...@@ -525,8 +549,7 @@ static inline int do_exception(struct pt_regs *regs, int access) ...@@ -525,8 +549,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
goto retry; goto retry;
} }
} }
#ifdef CONFIG_PGSTE if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
if (gmap) {
address = __gmap_link(gmap, current->thread.gmap_addr, address = __gmap_link(gmap, current->thread.gmap_addr,
address); address);
if (address == -EFAULT) { if (address == -EFAULT) {
...@@ -538,7 +561,6 @@ static inline int do_exception(struct pt_regs *regs, int access) ...@@ -538,7 +561,6 @@ static inline int do_exception(struct pt_regs *regs, int access)
goto out_up; goto out_up;
} }
} }
#endif
fault = 0; fault = 0;
out_up: out_up:
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
...@@ -706,7 +728,7 @@ static void pfault_interrupt(struct ext_code ext_code, ...@@ -706,7 +728,7 @@ static void pfault_interrupt(struct ext_code ext_code,
return; return;
inc_irq_stat(IRQEXT_PFL); inc_irq_stat(IRQEXT_PFL);
/* Get the token (= pid of the affected task). */ /* Get the token (= pid of the affected task). */
pid = param64 & LPP_PFAULT_PID_MASK; pid = param64 & LPP_PID_MASK;
rcu_read_lock(); rcu_read_lock();
tsk = find_task_by_pid_ns(pid, &init_pid_ns); tsk = find_task_by_pid_ns(pid, &init_pid_ns);
if (tsk) if (tsk)
......
...@@ -1187,12 +1187,11 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) ...@@ -1187,12 +1187,11 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
unsigned long *sgt) unsigned long *sgt)
{ {
unsigned long asce, *pgt; unsigned long *pgt;
struct page *page; struct page *page;
int i; int i;
BUG_ON(!gmap_is_shadow(sg)); BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT;
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
continue; continue;
...@@ -1245,12 +1244,11 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) ...@@ -1245,12 +1244,11 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
unsigned long *r3t) unsigned long *r3t)
{ {
unsigned long asce, *sgt; unsigned long *sgt;
struct page *page; struct page *page;
int i; int i;
BUG_ON(!gmap_is_shadow(sg)); BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r3t | _ASCE_TYPE_REGION3;
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
continue; continue;
...@@ -1303,12 +1301,11 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) ...@@ -1303,12 +1301,11 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
unsigned long *r2t) unsigned long *r2t)
{ {
unsigned long asce, *r3t; unsigned long *r3t;
struct page *page; struct page *page;
int i; int i;
BUG_ON(!gmap_is_shadow(sg)); BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r2t | _ASCE_TYPE_REGION2;
for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
continue; continue;
......
...@@ -95,6 +95,7 @@ void __init paging_init(void) ...@@ -95,6 +95,7 @@ void __init paging_init(void)
} }
init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
S390_lowcore.kernel_asce = init_mm.context.asce; S390_lowcore.kernel_asce = init_mm.context.asce;
S390_lowcore.user_asce = S390_lowcore.kernel_asce;
crst_table_init((unsigned long *) init_mm.pgd, pgd_type); crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
vmem_map_init(); vmem_map_init();
......
...@@ -71,10 +71,8 @@ static void __crst_table_upgrade(void *arg) ...@@ -71,10 +71,8 @@ static void __crst_table_upgrade(void *arg)
{ {
struct mm_struct *mm = arg; struct mm_struct *mm = arg;
if (current->active_mm == mm) { if (current->active_mm == mm)
clear_user_asce();
set_user_asce(mm); set_user_asce(mm);
}
__tlb_flush_local(); __tlb_flush_local();
} }
......
...@@ -21,4 +21,4 @@ include/generated/facilities.h: $(obj)/gen_facilities FORCE ...@@ -21,4 +21,4 @@ include/generated/facilities.h: $(obj)/gen_facilities FORCE
$(call filechk,facilities.h) $(call filechk,facilities.h)
include/generated/dis.h: $(obj)/gen_opcode_table FORCE include/generated/dis.h: $(obj)/gen_opcode_table FORCE
$(call filechk,dis.h,__FUN) $(call filechk,dis.h)
...@@ -282,9 +282,9 @@ static void raw3215_start_io(struct raw3215_info *raw) ...@@ -282,9 +282,9 @@ static void raw3215_start_io(struct raw3215_info *raw)
/* /*
* Function to start a delayed output after RAW3215_TIMEOUT seconds * Function to start a delayed output after RAW3215_TIMEOUT seconds
*/ */
static void raw3215_timeout(unsigned long __data) static void raw3215_timeout(struct timer_list *t)
{ {
struct raw3215_info *raw = (struct raw3215_info *) __data; struct raw3215_info *raw = from_timer(raw, t, timer);
unsigned long flags; unsigned long flags;
spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags);
...@@ -670,7 +670,7 @@ static struct raw3215_info *raw3215_alloc_info(void) ...@@ -670,7 +670,7 @@ static struct raw3215_info *raw3215_alloc_info(void)
return NULL; return NULL;
} }
setup_timer(&info->timer, raw3215_timeout, (unsigned long)info); timer_setup(&info->timer, raw3215_timeout, 0);
init_waitqueue_head(&info->empty_wait); init_waitqueue_head(&info->empty_wait);
tasklet_init(&info->tlet, raw3215_wakeup, (unsigned long)info); tasklet_init(&info->tlet, raw3215_wakeup, (unsigned long)info);
tty_port_init(&info->port); tty_port_init(&info->port);
......
...@@ -69,7 +69,7 @@ static struct con3270 *condev; ...@@ -69,7 +69,7 @@ static struct con3270 *condev;
#define CON_UPDATE_STATUS 4 /* Update status line. */ #define CON_UPDATE_STATUS 4 /* Update status line. */
#define CON_UPDATE_ALL 8 /* Recreate screen. */ #define CON_UPDATE_ALL 8 /* Recreate screen. */
static void con3270_update(struct con3270 *); static void con3270_update(struct timer_list *);
/* /*
* Setup timeout for a device. On timeout trigger an update. * Setup timeout for a device. On timeout trigger an update.
...@@ -205,8 +205,9 @@ con3270_write_callback(struct raw3270_request *rq, void *data) ...@@ -205,8 +205,9 @@ con3270_write_callback(struct raw3270_request *rq, void *data)
* Update console display. * Update console display.
*/ */
static void static void
con3270_update(struct con3270 *cp) con3270_update(struct timer_list *t)
{ {
struct con3270 *cp = from_timer(cp, t, timer);
struct raw3270_request *wrq; struct raw3270_request *wrq;
char wcc, prolog[6]; char wcc, prolog[6];
unsigned long flags; unsigned long flags;
...@@ -552,7 +553,7 @@ con3270_flush(void) ...@@ -552,7 +553,7 @@ con3270_flush(void)
con3270_update_status(cp); con3270_update_status(cp);
while (cp->update_flags != 0) { while (cp->update_flags != 0) {
spin_unlock_irqrestore(&cp->view.lock, flags); spin_unlock_irqrestore(&cp->view.lock, flags);
con3270_update(cp); con3270_update(&cp->timer);
spin_lock_irqsave(&cp->view.lock, flags); spin_lock_irqsave(&cp->view.lock, flags);
con3270_wait_write(cp); con3270_wait_write(cp);
} }
...@@ -623,8 +624,7 @@ con3270_init(void) ...@@ -623,8 +624,7 @@ con3270_init(void)
INIT_LIST_HEAD(&condev->lines); INIT_LIST_HEAD(&condev->lines);
INIT_LIST_HEAD(&condev->update); INIT_LIST_HEAD(&condev->update);
setup_timer(&condev->timer, (void (*)(unsigned long)) con3270_update, timer_setup(&condev->timer, con3270_update, 0);
(unsigned long) condev);
tasklet_init(&condev->readlet, tasklet_init(&condev->readlet,
(void (*)(unsigned long)) con3270_read_tasklet, (void (*)(unsigned long)) con3270_read_tasklet,
(unsigned long) condev->read); (unsigned long) condev->read);
......
...@@ -136,6 +136,7 @@ static enum sclp_suspend_state_t { ...@@ -136,6 +136,7 @@ static enum sclp_suspend_state_t {
#define SCLP_BUSY_INTERVAL 10 #define SCLP_BUSY_INTERVAL 10
#define SCLP_RETRY_INTERVAL 30 #define SCLP_RETRY_INTERVAL 30
static void sclp_request_timeout(bool force_restart);
static void sclp_process_queue(void); static void sclp_process_queue(void);
static void __sclp_make_read_req(void); static void __sclp_make_read_req(void);
static int sclp_init_mask(int calculate); static int sclp_init_mask(int calculate);
...@@ -154,25 +155,32 @@ __sclp_queue_read_req(void) ...@@ -154,25 +155,32 @@ __sclp_queue_read_req(void)
/* Set up request retry timer. Called while sclp_lock is locked. */ /* Set up request retry timer. Called while sclp_lock is locked. */
static inline void static inline void
__sclp_set_request_timer(unsigned long time, void (*function)(unsigned long), __sclp_set_request_timer(unsigned long time, void (*cb)(struct timer_list *))
unsigned long data)
{ {
del_timer(&sclp_request_timer); del_timer(&sclp_request_timer);
sclp_request_timer.function = function; sclp_request_timer.function = (TIMER_FUNC_TYPE)cb;
sclp_request_timer.data = data;
sclp_request_timer.expires = jiffies + time; sclp_request_timer.expires = jiffies + time;
add_timer(&sclp_request_timer); add_timer(&sclp_request_timer);
} }
/* Request timeout handler. Restart the request queue. If DATA is non-zero, static void sclp_request_timeout_restart(struct timer_list *unused)
{
sclp_request_timeout(true);
}
static void sclp_request_timeout_normal(struct timer_list *unused)
{
sclp_request_timeout(false);
}
/* Request timeout handler. Restart the request queue. If force_restart,
* force restart of running request. */ * force restart of running request. */
static void static void sclp_request_timeout(bool force_restart)
sclp_request_timeout(unsigned long data)
{ {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&sclp_lock, flags); spin_lock_irqsave(&sclp_lock, flags);
if (data) { if (force_restart) {
if (sclp_running_state == sclp_running_state_running) { if (sclp_running_state == sclp_running_state_running) {
/* Break running state and queue NOP read event request /* Break running state and queue NOP read event request
* to get a defined interface state. */ * to get a defined interface state. */
...@@ -181,7 +189,7 @@ sclp_request_timeout(unsigned long data) ...@@ -181,7 +189,7 @@ sclp_request_timeout(unsigned long data)
} }
} else { } else {
__sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ, __sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ,
sclp_request_timeout, 0); sclp_request_timeout_normal);
} }
spin_unlock_irqrestore(&sclp_lock, flags); spin_unlock_irqrestore(&sclp_lock, flags);
sclp_process_queue(); sclp_process_queue();
...@@ -239,7 +247,7 @@ static struct sclp_req *__sclp_req_queue_remove_expired_req(void) ...@@ -239,7 +247,7 @@ static struct sclp_req *__sclp_req_queue_remove_expired_req(void)
* invokes callback. This timer can be set per request in situations where * invokes callback. This timer can be set per request in situations where
* waiting too long would be harmful to the system, e.g. during SE reboot. * waiting too long would be harmful to the system, e.g. during SE reboot.
*/ */
static void sclp_req_queue_timeout(unsigned long data) static void sclp_req_queue_timeout(struct timer_list *unused)
{ {
unsigned long flags, expires_next; unsigned long flags, expires_next;
struct sclp_req *req; struct sclp_req *req;
...@@ -276,12 +284,12 @@ __sclp_start_request(struct sclp_req *req) ...@@ -276,12 +284,12 @@ __sclp_start_request(struct sclp_req *req)
req->status = SCLP_REQ_RUNNING; req->status = SCLP_REQ_RUNNING;
sclp_running_state = sclp_running_state_running; sclp_running_state = sclp_running_state_running;
__sclp_set_request_timer(SCLP_RETRY_INTERVAL * HZ, __sclp_set_request_timer(SCLP_RETRY_INTERVAL * HZ,
sclp_request_timeout, 1); sclp_request_timeout_restart);
return 0; return 0;
} else if (rc == -EBUSY) { } else if (rc == -EBUSY) {
/* Try again later */ /* Try again later */
__sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ, __sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ,
sclp_request_timeout, 0); sclp_request_timeout_normal);
return 0; return 0;
} }
/* Request failed */ /* Request failed */
...@@ -315,7 +323,7 @@ sclp_process_queue(void) ...@@ -315,7 +323,7 @@ sclp_process_queue(void)
/* Cannot abort already submitted request - could still /* Cannot abort already submitted request - could still
* be active at the SCLP */ * be active at the SCLP */
__sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ, __sclp_set_request_timer(SCLP_BUSY_INTERVAL * HZ,
sclp_request_timeout, 0); sclp_request_timeout_normal);
break; break;
} }
do_post: do_post:
...@@ -558,7 +566,7 @@ sclp_sync_wait(void) ...@@ -558,7 +566,7 @@ sclp_sync_wait(void)
if (timer_pending(&sclp_request_timer) && if (timer_pending(&sclp_request_timer) &&
get_tod_clock_fast() > timeout && get_tod_clock_fast() > timeout &&
del_timer(&sclp_request_timer)) del_timer(&sclp_request_timer))
sclp_request_timer.function(sclp_request_timer.data); sclp_request_timer.function((TIMER_DATA_TYPE)&sclp_request_timer);
cpu_relax(); cpu_relax();
} }
local_irq_disable(); local_irq_disable();
...@@ -915,7 +923,7 @@ static void sclp_check_handler(struct ext_code ext_code, ...@@ -915,7 +923,7 @@ static void sclp_check_handler(struct ext_code ext_code,
/* Initial init mask request timed out. Modify request state to failed. */ /* Initial init mask request timed out. Modify request state to failed. */
static void static void
sclp_check_timeout(unsigned long data) sclp_check_timeout(struct timer_list *unused)
{ {
unsigned long flags; unsigned long flags;
...@@ -954,7 +962,7 @@ sclp_check_interface(void) ...@@ -954,7 +962,7 @@ sclp_check_interface(void)
sclp_init_req.status = SCLP_REQ_RUNNING; sclp_init_req.status = SCLP_REQ_RUNNING;
sclp_running_state = sclp_running_state_running; sclp_running_state = sclp_running_state_running;
__sclp_set_request_timer(SCLP_RETRY_INTERVAL * HZ, __sclp_set_request_timer(SCLP_RETRY_INTERVAL * HZ,
sclp_check_timeout, 0); sclp_check_timeout);
spin_unlock_irqrestore(&sclp_lock, flags); spin_unlock_irqrestore(&sclp_lock, flags);
/* Enable service-signal interruption - needs to happen /* Enable service-signal interruption - needs to happen
* with IRQs enabled. */ * with IRQs enabled. */
...@@ -1159,9 +1167,8 @@ sclp_init(void) ...@@ -1159,9 +1167,8 @@ sclp_init(void)
INIT_LIST_HEAD(&sclp_req_queue); INIT_LIST_HEAD(&sclp_req_queue);
INIT_LIST_HEAD(&sclp_reg_list); INIT_LIST_HEAD(&sclp_reg_list);
list_add(&sclp_state_change_event.list, &sclp_reg_list); list_add(&sclp_state_change_event.list, &sclp_reg_list);
init_timer(&sclp_request_timer); timer_setup(&sclp_request_timer, NULL, 0);
init_timer(&sclp_queue_timer); timer_setup(&sclp_queue_timer, sclp_req_queue_timeout, 0);
sclp_queue_timer.function = sclp_req_queue_timeout;
/* Check interface */ /* Check interface */
spin_unlock_irqrestore(&sclp_lock, flags); spin_unlock_irqrestore(&sclp_lock, flags);
rc = sclp_check_interface(); rc = sclp_check_interface();
......
...@@ -125,7 +125,7 @@ static void sclp_console_sync_queue(void) ...@@ -125,7 +125,7 @@ static void sclp_console_sync_queue(void)
* temporary write buffer without further waiting on a final new line. * temporary write buffer without further waiting on a final new line.
*/ */
static void static void
sclp_console_timeout(unsigned long data) sclp_console_timeout(struct timer_list *unused)
{ {
sclp_conbuf_emit(); sclp_conbuf_emit();
} }
...@@ -211,7 +211,6 @@ sclp_console_write(struct console *console, const char *message, ...@@ -211,7 +211,6 @@ sclp_console_write(struct console *console, const char *message,
/* Setup timer to output current console buffer after 1/10 second */ /* Setup timer to output current console buffer after 1/10 second */
if (sclp_conbuf != NULL && sclp_chars_in_buffer(sclp_conbuf) != 0 && if (sclp_conbuf != NULL && sclp_chars_in_buffer(sclp_conbuf) != 0 &&
!timer_pending(&sclp_con_timer)) { !timer_pending(&sclp_con_timer)) {
setup_timer(&sclp_con_timer, sclp_console_timeout, 0UL);
mod_timer(&sclp_con_timer, jiffies + HZ / 10); mod_timer(&sclp_con_timer, jiffies + HZ / 10);
} }
out: out:
...@@ -332,7 +331,7 @@ sclp_console_init(void) ...@@ -332,7 +331,7 @@ sclp_console_init(void)
INIT_LIST_HEAD(&sclp_con_outqueue); INIT_LIST_HEAD(&sclp_con_outqueue);
spin_lock_init(&sclp_con_lock); spin_lock_init(&sclp_con_lock);
sclp_conbuf = NULL; sclp_conbuf = NULL;
init_timer(&sclp_con_timer); timer_setup(&sclp_con_timer, sclp_console_timeout, 0);
/* Set output format */ /* Set output format */
if (MACHINE_IS_VM) if (MACHINE_IS_VM)
......
...@@ -151,7 +151,7 @@ __sclp_ttybuf_emit(struct sclp_buffer *buffer) ...@@ -151,7 +151,7 @@ __sclp_ttybuf_emit(struct sclp_buffer *buffer)
* temporary write buffer. * temporary write buffer.
*/ */
static void static void
sclp_tty_timeout(unsigned long data) sclp_tty_timeout(struct timer_list *unused)
{ {
unsigned long flags; unsigned long flags;
struct sclp_buffer *buf; struct sclp_buffer *buf;
...@@ -218,7 +218,6 @@ static int sclp_tty_write_string(const unsigned char *str, int count, int may_fa ...@@ -218,7 +218,6 @@ static int sclp_tty_write_string(const unsigned char *str, int count, int may_fa
/* Setup timer to output current console buffer after 1/10 second */ /* Setup timer to output current console buffer after 1/10 second */
if (sclp_ttybuf && sclp_chars_in_buffer(sclp_ttybuf) && if (sclp_ttybuf && sclp_chars_in_buffer(sclp_ttybuf) &&
!timer_pending(&sclp_tty_timer)) { !timer_pending(&sclp_tty_timer)) {
setup_timer(&sclp_tty_timer, sclp_tty_timeout, 0UL);
mod_timer(&sclp_tty_timer, jiffies + HZ / 10); mod_timer(&sclp_tty_timer, jiffies + HZ / 10);
} }
spin_unlock_irqrestore(&sclp_tty_lock, flags); spin_unlock_irqrestore(&sclp_tty_lock, flags);
...@@ -526,7 +525,7 @@ sclp_tty_init(void) ...@@ -526,7 +525,7 @@ sclp_tty_init(void)
} }
INIT_LIST_HEAD(&sclp_tty_outqueue); INIT_LIST_HEAD(&sclp_tty_outqueue);
spin_lock_init(&sclp_tty_lock); spin_lock_init(&sclp_tty_lock);
init_timer(&sclp_tty_timer); timer_setup(&sclp_tty_timer, sclp_tty_timeout, 0);
sclp_ttybuf = NULL; sclp_ttybuf = NULL;
sclp_tty_buffer_count = 0; sclp_tty_buffer_count = 0;
if (MACHINE_IS_VM) { if (MACHINE_IS_VM) {
......
...@@ -357,7 +357,7 @@ sclp_vt220_add_msg(struct sclp_vt220_request *request, ...@@ -357,7 +357,7 @@ sclp_vt220_add_msg(struct sclp_vt220_request *request,
* Emit buffer after having waited long enough for more data to arrive. * Emit buffer after having waited long enough for more data to arrive.
*/ */
static void static void
sclp_vt220_timeout(unsigned long data) sclp_vt220_timeout(struct timer_list *unused)
{ {
sclp_vt220_emit_current(); sclp_vt220_emit_current();
} }
...@@ -454,8 +454,6 @@ __sclp_vt220_write(const unsigned char *buf, int count, int do_schedule, ...@@ -454,8 +454,6 @@ __sclp_vt220_write(const unsigned char *buf, int count, int do_schedule,
/* Setup timer to output current console buffer after some time */ /* Setup timer to output current console buffer after some time */
if (sclp_vt220_current_request != NULL && if (sclp_vt220_current_request != NULL &&
!timer_pending(&sclp_vt220_timer) && do_schedule) { !timer_pending(&sclp_vt220_timer) && do_schedule) {
sclp_vt220_timer.function = sclp_vt220_timeout;
sclp_vt220_timer.data = 0UL;
sclp_vt220_timer.expires = jiffies + BUFFER_MAX_DELAY; sclp_vt220_timer.expires = jiffies + BUFFER_MAX_DELAY;
add_timer(&sclp_vt220_timer); add_timer(&sclp_vt220_timer);
} }
...@@ -699,7 +697,7 @@ static int __init __sclp_vt220_init(int num_pages) ...@@ -699,7 +697,7 @@ static int __init __sclp_vt220_init(int num_pages)
spin_lock_init(&sclp_vt220_lock); spin_lock_init(&sclp_vt220_lock);
INIT_LIST_HEAD(&sclp_vt220_empty); INIT_LIST_HEAD(&sclp_vt220_empty);
INIT_LIST_HEAD(&sclp_vt220_outqueue); INIT_LIST_HEAD(&sclp_vt220_outqueue);
init_timer(&sclp_vt220_timer); timer_setup(&sclp_vt220_timer, sclp_vt220_timeout, 0);
tty_port_init(&sclp_vt220_port); tty_port_init(&sclp_vt220_port);
sclp_vt220_current_request = NULL; sclp_vt220_current_request = NULL;
sclp_vt220_buffered_chars = 0; sclp_vt220_buffered_chars = 0;
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
static void __tape_do_irq (struct ccw_device *, unsigned long, struct irb *); static void __tape_do_irq (struct ccw_device *, unsigned long, struct irb *);
static void tape_delayed_next_request(struct work_struct *); static void tape_delayed_next_request(struct work_struct *);
static void tape_long_busy_timeout(unsigned long data); static void tape_long_busy_timeout(struct timer_list *t);
/* /*
* One list to contain all tape devices of all disciplines, so * One list to contain all tape devices of all disciplines, so
...@@ -381,8 +381,7 @@ tape_generic_online(struct tape_device *device, ...@@ -381,8 +381,7 @@ tape_generic_online(struct tape_device *device,
return -EINVAL; return -EINVAL;
} }
init_timer(&device->lb_timeout); timer_setup(&device->lb_timeout, tape_long_busy_timeout, 0);
device->lb_timeout.function = tape_long_busy_timeout;
/* Let the discipline have a go at the device. */ /* Let the discipline have a go at the device. */
device->discipline = discipline; device->discipline = discipline;
...@@ -867,18 +866,16 @@ tape_delayed_next_request(struct work_struct *work) ...@@ -867,18 +866,16 @@ tape_delayed_next_request(struct work_struct *work)
spin_unlock_irq(get_ccwdev_lock(device->cdev)); spin_unlock_irq(get_ccwdev_lock(device->cdev));
} }
static void tape_long_busy_timeout(unsigned long data) static void tape_long_busy_timeout(struct timer_list *t)
{ {
struct tape_device *device = from_timer(device, t, lb_timeout);
struct tape_request *request; struct tape_request *request;
struct tape_device *device;
device = (struct tape_device *) data;
spin_lock_irq(get_ccwdev_lock(device->cdev)); spin_lock_irq(get_ccwdev_lock(device->cdev));
request = list_entry(device->req_queue.next, struct tape_request, list); request = list_entry(device->req_queue.next, struct tape_request, list);
BUG_ON(request->status != TAPE_REQUEST_LONG_BUSY); BUG_ON(request->status != TAPE_REQUEST_LONG_BUSY);
DBF_LH(6, "%08x: Long busy timeout.\n", device->cdev_id); DBF_LH(6, "%08x: Long busy timeout.\n", device->cdev_id);
__tape_start_next_request(device); __tape_start_next_request(device);
device->lb_timeout.data = 0UL;
tape_put_device(device); tape_put_device(device);
spin_unlock_irq(get_ccwdev_lock(device->cdev)); spin_unlock_irq(get_ccwdev_lock(device->cdev));
} }
...@@ -1157,7 +1154,6 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) ...@@ -1157,7 +1154,6 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
if (req->status == TAPE_REQUEST_LONG_BUSY) { if (req->status == TAPE_REQUEST_LONG_BUSY) {
DBF_EVENT(3, "(%08x): del timer\n", device->cdev_id); DBF_EVENT(3, "(%08x): del timer\n", device->cdev_id);
if (del_timer(&device->lb_timeout)) { if (del_timer(&device->lb_timeout)) {
device->lb_timeout.data = 0UL;
tape_put_device(device); tape_put_device(device);
__tape_start_next_request(device); __tape_start_next_request(device);
} }
...@@ -1212,8 +1208,6 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) ...@@ -1212,8 +1208,6 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
case TAPE_IO_PENDING: case TAPE_IO_PENDING:
break; break;
case TAPE_IO_LONG_BUSY: case TAPE_IO_LONG_BUSY:
device->lb_timeout.data =
(unsigned long) tape_get_device(device);
device->lb_timeout.expires = jiffies + device->lb_timeout.expires = jiffies +
LONG_BUSY_TIMEOUT * HZ; LONG_BUSY_TIMEOUT * HZ;
DBF_EVENT(3, "(%08x): add timer\n", device->cdev_id); DBF_EVENT(3, "(%08x): add timer\n", device->cdev_id);
......
...@@ -118,7 +118,7 @@ struct tty3270 { ...@@ -118,7 +118,7 @@ struct tty3270 {
#define TTY_UPDATE_STATUS 8 /* Update status line. */ #define TTY_UPDATE_STATUS 8 /* Update status line. */
#define TTY_UPDATE_ALL 16 /* Recreate screen. */ #define TTY_UPDATE_ALL 16 /* Recreate screen. */
static void tty3270_update(struct tty3270 *); static void tty3270_update(struct timer_list *);
static void tty3270_resize_work(struct work_struct *work); static void tty3270_resize_work(struct work_struct *work);
/* /*
...@@ -361,8 +361,9 @@ tty3270_write_callback(struct raw3270_request *rq, void *data) ...@@ -361,8 +361,9 @@ tty3270_write_callback(struct raw3270_request *rq, void *data)
* Update 3270 display. * Update 3270 display.
*/ */
static void static void
tty3270_update(struct tty3270 *tp) tty3270_update(struct timer_list *t)
{ {
struct tty3270 *tp = from_timer(tp, t, timer);
static char invalid_sba[2] = { 0xff, 0xff }; static char invalid_sba[2] = { 0xff, 0xff };
struct raw3270_request *wrq; struct raw3270_request *wrq;
unsigned long updated; unsigned long updated;
...@@ -748,8 +749,7 @@ tty3270_alloc_view(void) ...@@ -748,8 +749,7 @@ tty3270_alloc_view(void)
goto out_reset; goto out_reset;
tty_port_init(&tp->port); tty_port_init(&tp->port);
setup_timer(&tp->timer, (void (*)(unsigned long)) tty3270_update, timer_setup(&tp->timer, tty3270_update, 0);
(unsigned long) tp);
tasklet_init(&tp->readlet, tasklet_init(&tp->readlet,
(void (*)(unsigned long)) tty3270_read_tasklet, (void (*)(unsigned long)) tty3270_read_tasklet,
(unsigned long) tp->read); (unsigned long) tp->read);
......
...@@ -142,7 +142,7 @@ static void io_subchannel_shutdown(struct subchannel *); ...@@ -142,7 +142,7 @@ static void io_subchannel_shutdown(struct subchannel *);
static int io_subchannel_sch_event(struct subchannel *, int); static int io_subchannel_sch_event(struct subchannel *, int);
static int io_subchannel_chp_event(struct subchannel *, struct chp_link *, static int io_subchannel_chp_event(struct subchannel *, struct chp_link *,
int); int);
static void recovery_func(unsigned long data); static void recovery_func(struct timer_list *unused);
static struct css_device_id io_subchannel_ids[] = { static struct css_device_id io_subchannel_ids[] = {
{ .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, }, { .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
...@@ -194,7 +194,7 @@ int __init io_subchannel_init(void) ...@@ -194,7 +194,7 @@ int __init io_subchannel_init(void)
{ {
int ret; int ret;
setup_timer(&recovery_timer, recovery_func, 0); timer_setup(&recovery_timer, recovery_func, 0);
ret = bus_register(&ccw_bus_type); ret = bus_register(&ccw_bus_type);
if (ret) if (ret)
return ret; return ret;
...@@ -726,7 +726,7 @@ static int io_subchannel_initialize_dev(struct subchannel *sch, ...@@ -726,7 +726,7 @@ static int io_subchannel_initialize_dev(struct subchannel *sch,
INIT_WORK(&priv->todo_work, ccw_device_todo); INIT_WORK(&priv->todo_work, ccw_device_todo);
INIT_LIST_HEAD(&priv->cmb_list); INIT_LIST_HEAD(&priv->cmb_list);
init_waitqueue_head(&priv->wait_q); init_waitqueue_head(&priv->wait_q);
init_timer(&priv->timer); timer_setup(&priv->timer, ccw_device_timeout, 0);
atomic_set(&priv->onoff, 0); atomic_set(&priv->onoff, 0);
cdev->ccwlock = sch->lock; cdev->ccwlock = sch->lock;
...@@ -1271,7 +1271,7 @@ static void recovery_work_func(struct work_struct *unused) ...@@ -1271,7 +1271,7 @@ static void recovery_work_func(struct work_struct *unused)
static DECLARE_WORK(recovery_work, recovery_work_func); static DECLARE_WORK(recovery_work, recovery_work_func);
static void recovery_func(unsigned long data) static void recovery_func(struct timer_list *unused)
{ {
/* /*
* We can't do our recovery in softirq context and it's not * We can't do our recovery in softirq context and it's not
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <asm/ccwdev.h> #include <asm/ccwdev.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/timer.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
...@@ -134,6 +135,7 @@ int ccw_device_notify(struct ccw_device *, int); ...@@ -134,6 +135,7 @@ int ccw_device_notify(struct ccw_device *, int);
void ccw_device_set_disconnected(struct ccw_device *cdev); void ccw_device_set_disconnected(struct ccw_device *cdev);
void ccw_device_set_notoper(struct ccw_device *cdev); void ccw_device_set_notoper(struct ccw_device *cdev);
void ccw_device_timeout(struct timer_list *t);
void ccw_device_set_timeout(struct ccw_device *, int); void ccw_device_set_timeout(struct ccw_device *, int);
void ccw_device_schedule_recovery(void); void ccw_device_schedule_recovery(void);
......
...@@ -91,12 +91,12 @@ static void ccw_timeout_log(struct ccw_device *cdev) ...@@ -91,12 +91,12 @@ static void ccw_timeout_log(struct ccw_device *cdev)
/* /*
* Timeout function. It just triggers a DEV_EVENT_TIMEOUT. * Timeout function. It just triggers a DEV_EVENT_TIMEOUT.
*/ */
static void void
ccw_device_timeout(unsigned long data) ccw_device_timeout(struct timer_list *t)
{ {
struct ccw_device *cdev; struct ccw_device_private *priv = from_timer(priv, t, timer);
struct ccw_device *cdev = priv->cdev;
cdev = (struct ccw_device *) data;
spin_lock_irq(cdev->ccwlock); spin_lock_irq(cdev->ccwlock);
if (timeout_log_enabled) if (timeout_log_enabled)
ccw_timeout_log(cdev); ccw_timeout_log(cdev);
...@@ -118,8 +118,6 @@ ccw_device_set_timeout(struct ccw_device *cdev, int expires) ...@@ -118,8 +118,6 @@ ccw_device_set_timeout(struct ccw_device *cdev, int expires)
if (mod_timer(&cdev->private->timer, jiffies + expires)) if (mod_timer(&cdev->private->timer, jiffies + expires))
return; return;
} }
cdev->private->timer.function = ccw_device_timeout;
cdev->private->timer.data = (unsigned long) cdev;
cdev->private->timer.expires = jiffies + expires; cdev->private->timer.expires = jiffies + expires;
add_timer(&cdev->private->timer); add_timer(&cdev->private->timer);
} }
......
...@@ -94,9 +94,10 @@ static int eadm_subchannel_clear(struct subchannel *sch) ...@@ -94,9 +94,10 @@ static int eadm_subchannel_clear(struct subchannel *sch)
return 0; return 0;
} }
static void eadm_subchannel_timeout(unsigned long data) static void eadm_subchannel_timeout(struct timer_list *t)
{ {
struct subchannel *sch = (struct subchannel *) data; struct eadm_private *private = from_timer(private, t, timer);
struct subchannel *sch = private->sch;
spin_lock_irq(sch->lock); spin_lock_irq(sch->lock);
EADM_LOG(1, "timeout"); EADM_LOG(1, "timeout");
...@@ -118,8 +119,6 @@ static void eadm_subchannel_set_timeout(struct subchannel *sch, int expires) ...@@ -118,8 +119,6 @@ static void eadm_subchannel_set_timeout(struct subchannel *sch, int expires)
if (mod_timer(&private->timer, jiffies + expires)) if (mod_timer(&private->timer, jiffies + expires))
return; return;
} }
private->timer.function = eadm_subchannel_timeout;
private->timer.data = (unsigned long) sch;
private->timer.expires = jiffies + expires; private->timer.expires = jiffies + expires;
add_timer(&private->timer); add_timer(&private->timer);
} }
...@@ -224,7 +223,7 @@ static int eadm_subchannel_probe(struct subchannel *sch) ...@@ -224,7 +223,7 @@ static int eadm_subchannel_probe(struct subchannel *sch)
return -ENOMEM; return -ENOMEM;
INIT_LIST_HEAD(&private->head); INIT_LIST_HEAD(&private->head);
init_timer(&private->timer); timer_setup(&private->timer, eadm_subchannel_timeout, 0);
spin_lock_irq(sch->lock); spin_lock_irq(sch->lock);
set_eadm_private(sch, private); set_eadm_private(sch, private);
......
...@@ -393,7 +393,7 @@ int test_nonshared_ind(struct qdio_irq *); ...@@ -393,7 +393,7 @@ int test_nonshared_ind(struct qdio_irq *);
/* prototypes for setup */ /* prototypes for setup */
void qdio_inbound_processing(unsigned long data); void qdio_inbound_processing(unsigned long data);
void qdio_outbound_processing(unsigned long data); void qdio_outbound_processing(unsigned long data);
void qdio_outbound_timer(unsigned long data); void qdio_outbound_timer(struct timer_list *t);
void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
struct irb *irb); struct irb *irb);
int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs, int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs,
......
...@@ -894,9 +894,9 @@ void qdio_outbound_processing(unsigned long data) ...@@ -894,9 +894,9 @@ void qdio_outbound_processing(unsigned long data)
__qdio_outbound_processing(q); __qdio_outbound_processing(q);
} }
void qdio_outbound_timer(unsigned long data) void qdio_outbound_timer(struct timer_list *t)
{ {
struct qdio_q *q = (struct qdio_q *)data; struct qdio_q *q = from_timer(q, t, u.out.timer);
qdio_tasklet_schedule(q); qdio_tasklet_schedule(q);
} }
......
...@@ -252,8 +252,7 @@ static void setup_queues(struct qdio_irq *irq_ptr, ...@@ -252,8 +252,7 @@ static void setup_queues(struct qdio_irq *irq_ptr,
tasklet_init(&q->tasklet, qdio_outbound_processing, tasklet_init(&q->tasklet, qdio_outbound_processing,
(unsigned long) q); (unsigned long) q);
setup_timer(&q->u.out.timer, (void(*)(unsigned long)) timer_setup(&q->u.out.timer, qdio_outbound_timer, 0);
&qdio_outbound_timer, (unsigned long)q);
} }
} }
......
...@@ -374,13 +374,13 @@ void ap_wait(enum ap_wait wait) ...@@ -374,13 +374,13 @@ void ap_wait(enum ap_wait wait)
/** /**
* ap_request_timeout(): Handling of request timeouts * ap_request_timeout(): Handling of request timeouts
* @data: Holds the AP device. * @t: timer making this callback
* *
* Handles request timeouts. * Handles request timeouts.
*/ */
void ap_request_timeout(unsigned long data) void ap_request_timeout(struct timer_list *t)
{ {
struct ap_queue *aq = (struct ap_queue *) data; struct ap_queue *aq = from_timer(aq, t, timeout);
if (ap_suspend_flag) if (ap_suspend_flag)
return; return;
...@@ -1203,7 +1203,7 @@ static void ap_scan_bus(struct work_struct *unused) ...@@ -1203,7 +1203,7 @@ static void ap_scan_bus(struct work_struct *unused)
mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ); mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
} }
static void ap_config_timeout(unsigned long ptr) static void ap_config_timeout(struct timer_list *unused)
{ {
if (ap_suspend_flag) if (ap_suspend_flag)
return; return;
...@@ -1306,7 +1306,7 @@ int __init ap_module_init(void) ...@@ -1306,7 +1306,7 @@ int __init ap_module_init(void)
goto out_bus; goto out_bus;
/* Setup the AP bus rescan timer. */ /* Setup the AP bus rescan timer. */
setup_timer(&ap_config_timer, ap_config_timeout, 0); timer_setup(&ap_config_timer, ap_config_timeout, 0);
/* /*
* Setup the high resultion poll timer. * Setup the high resultion poll timer.
......
...@@ -241,7 +241,7 @@ void ap_flush_queue(struct ap_queue *aq); ...@@ -241,7 +241,7 @@ void ap_flush_queue(struct ap_queue *aq);
void *ap_airq_ptr(void); void *ap_airq_ptr(void);
void ap_wait(enum ap_wait wait); void ap_wait(enum ap_wait wait);
void ap_request_timeout(unsigned long data); void ap_request_timeout(struct timer_list *t);
void ap_bus_force_rescan(void); void ap_bus_force_rescan(void);
void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *ap_msg); void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *ap_msg);
......
...@@ -634,7 +634,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type) ...@@ -634,7 +634,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
INIT_LIST_HEAD(&aq->list); INIT_LIST_HEAD(&aq->list);
INIT_LIST_HEAD(&aq->pendingq); INIT_LIST_HEAD(&aq->pendingq);
INIT_LIST_HEAD(&aq->requestq); INIT_LIST_HEAD(&aq->requestq);
setup_timer(&aq->timeout, ap_request_timeout, (unsigned long) aq); timer_setup(&aq->timeout, ap_request_timeout, 0);
return aq; return aq;
} }
......
...@@ -564,21 +564,24 @@ void zfcp_erp_notify(struct zfcp_erp_action *erp_action, unsigned long set_mask) ...@@ -564,21 +564,24 @@ void zfcp_erp_notify(struct zfcp_erp_action *erp_action, unsigned long set_mask)
* zfcp_erp_timeout_handler - Trigger ERP action from timed out ERP request * zfcp_erp_timeout_handler - Trigger ERP action from timed out ERP request
* @data: ERP action (from timer data) * @data: ERP action (from timer data)
*/ */
void zfcp_erp_timeout_handler(unsigned long data) void zfcp_erp_timeout_handler(struct timer_list *t)
{ {
struct zfcp_erp_action *act = (struct zfcp_erp_action *) data; struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer);
struct zfcp_erp_action *act = fsf_req->erp_action;
zfcp_erp_notify(act, ZFCP_STATUS_ERP_TIMEDOUT); zfcp_erp_notify(act, ZFCP_STATUS_ERP_TIMEDOUT);
} }
static void zfcp_erp_memwait_handler(unsigned long data) static void zfcp_erp_memwait_handler(struct timer_list *t)
{ {
zfcp_erp_notify((struct zfcp_erp_action *)data, 0); struct zfcp_erp_action *act = from_timer(act, t, timer);
zfcp_erp_notify(act, 0);
} }
static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action) static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action)
{ {
setup_timer(&erp_action->timer, zfcp_erp_memwait_handler, timer_setup(&erp_action->timer, zfcp_erp_memwait_handler, 0);
(unsigned long) erp_action);
erp_action->timer.expires = jiffies + HZ; erp_action->timer.expires = jiffies + HZ;
add_timer(&erp_action->timer); add_timer(&erp_action->timer);
} }
......
...@@ -69,7 +69,7 @@ extern int zfcp_erp_thread_setup(struct zfcp_adapter *); ...@@ -69,7 +69,7 @@ extern int zfcp_erp_thread_setup(struct zfcp_adapter *);
extern void zfcp_erp_thread_kill(struct zfcp_adapter *); extern void zfcp_erp_thread_kill(struct zfcp_adapter *);
extern void zfcp_erp_wait(struct zfcp_adapter *); extern void zfcp_erp_wait(struct zfcp_adapter *);
extern void zfcp_erp_notify(struct zfcp_erp_action *, unsigned long); extern void zfcp_erp_notify(struct zfcp_erp_action *, unsigned long);
extern void zfcp_erp_timeout_handler(unsigned long); extern void zfcp_erp_timeout_handler(struct timer_list *t);
/* zfcp_fc.c */ /* zfcp_fc.c */
extern struct kmem_cache *zfcp_fc_req_cache; extern struct kmem_cache *zfcp_fc_req_cache;
......
...@@ -21,9 +21,11 @@ ...@@ -21,9 +21,11 @@
struct kmem_cache *zfcp_fsf_qtcb_cache; struct kmem_cache *zfcp_fsf_qtcb_cache;
static void zfcp_fsf_request_timeout_handler(unsigned long data) static void zfcp_fsf_request_timeout_handler(struct timer_list *t)
{ {
struct zfcp_adapter *adapter = (struct zfcp_adapter *) data; struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer);
struct zfcp_adapter *adapter = fsf_req->adapter;
zfcp_qdio_siosl(adapter); zfcp_qdio_siosl(adapter);
zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED,
"fsrth_1"); "fsrth_1");
...@@ -32,8 +34,7 @@ static void zfcp_fsf_request_timeout_handler(unsigned long data) ...@@ -32,8 +34,7 @@ static void zfcp_fsf_request_timeout_handler(unsigned long data)
static void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req, static void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req,
unsigned long timeout) unsigned long timeout)
{ {
fsf_req->timer.function = zfcp_fsf_request_timeout_handler; fsf_req->timer.function = (TIMER_FUNC_TYPE)zfcp_fsf_request_timeout_handler;
fsf_req->timer.data = (unsigned long) fsf_req->adapter;
fsf_req->timer.expires = jiffies + timeout; fsf_req->timer.expires = jiffies + timeout;
add_timer(&fsf_req->timer); add_timer(&fsf_req->timer);
} }
...@@ -41,8 +42,7 @@ static void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req, ...@@ -41,8 +42,7 @@ static void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req,
static void zfcp_fsf_start_erp_timer(struct zfcp_fsf_req *fsf_req) static void zfcp_fsf_start_erp_timer(struct zfcp_fsf_req *fsf_req)
{ {
BUG_ON(!fsf_req->erp_action); BUG_ON(!fsf_req->erp_action);
fsf_req->timer.function = zfcp_erp_timeout_handler; fsf_req->timer.function = (TIMER_FUNC_TYPE)zfcp_erp_timeout_handler;
fsf_req->timer.data = (unsigned long) fsf_req->erp_action;
fsf_req->timer.expires = jiffies + 30 * HZ; fsf_req->timer.expires = jiffies + 30 * HZ;
add_timer(&fsf_req->timer); add_timer(&fsf_req->timer);
} }
...@@ -692,7 +692,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio, ...@@ -692,7 +692,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio,
adapter->req_no++; adapter->req_no++;
INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->list);
init_timer(&req->timer); timer_setup(&req->timer, NULL, 0);
init_completion(&req->completion); init_completion(&req->completion);
req->adapter = adapter; req->adapter = adapter;
......
...@@ -53,6 +53,10 @@ ifeq ($(SRCARCH),arm64) ...@@ -53,6 +53,10 @@ ifeq ($(SRCARCH),arm64)
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif endif
ifeq ($(ARCH),s390)
NO_PERF_REGS := 0
endif
ifeq ($(NO_PERF_REGS),0) ifeq ($(NO_PERF_REGS),0)
$(call detected,CONFIG_PERF_REGS) $(call detected,CONFIG_PERF_REGS)
endif endif
...@@ -61,7 +65,7 @@ endif ...@@ -61,7 +65,7 @@ endif
# Disable it on all other architectures in case libdw unwind # Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures # support is detected in system. Add supported architectures
# to the check. # to the check.
ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc)) ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc s390))
NO_LIBDW_DWARF_UNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1
endif endif
......
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#ifdef DEFINE_DWARF_REGSTR_TABLE #ifndef S390_DWARF_REGS_TABLE_H
/* This is included in perf/util/dwarf-regs.c */ #define S390_DWARF_REGS_TABLE_H
static const char * const s390_regstr_tbl[] = { #define REG_DWARFNUM_NAME(reg, idx) [idx] = "%" #reg
/*
* For reference, see DWARF register mapping:
* http://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_s390/x1542.html
*/
static const char * const s390_dwarf_regs[] = {
"%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
REG_DWARFNUM_NAME(f0, 16),
REG_DWARFNUM_NAME(f1, 20),
REG_DWARFNUM_NAME(f2, 17),
REG_DWARFNUM_NAME(f3, 21),
REG_DWARFNUM_NAME(f4, 18),
REG_DWARFNUM_NAME(f5, 22),
REG_DWARFNUM_NAME(f6, 19),
REG_DWARFNUM_NAME(f7, 23),
REG_DWARFNUM_NAME(f8, 24),
REG_DWARFNUM_NAME(f9, 28),
REG_DWARFNUM_NAME(f10, 25),
REG_DWARFNUM_NAME(f11, 29),
REG_DWARFNUM_NAME(f12, 26),
REG_DWARFNUM_NAME(f13, 30),
REG_DWARFNUM_NAME(f14, 27),
REG_DWARFNUM_NAME(f15, 31),
REG_DWARFNUM_NAME(c0, 32),
REG_DWARFNUM_NAME(c1, 33),
REG_DWARFNUM_NAME(c2, 34),
REG_DWARFNUM_NAME(c3, 35),
REG_DWARFNUM_NAME(c4, 36),
REG_DWARFNUM_NAME(c5, 37),
REG_DWARFNUM_NAME(c6, 38),
REG_DWARFNUM_NAME(c7, 39),
REG_DWARFNUM_NAME(c8, 40),
REG_DWARFNUM_NAME(c9, 41),
REG_DWARFNUM_NAME(c10, 42),
REG_DWARFNUM_NAME(c11, 43),
REG_DWARFNUM_NAME(c12, 44),
REG_DWARFNUM_NAME(c13, 45),
REG_DWARFNUM_NAME(c14, 46),
REG_DWARFNUM_NAME(c15, 47),
REG_DWARFNUM_NAME(a0, 48),
REG_DWARFNUM_NAME(a1, 49),
REG_DWARFNUM_NAME(a2, 50),
REG_DWARFNUM_NAME(a3, 51),
REG_DWARFNUM_NAME(a4, 52),
REG_DWARFNUM_NAME(a5, 53),
REG_DWARFNUM_NAME(a6, 54),
REG_DWARFNUM_NAME(a7, 55),
REG_DWARFNUM_NAME(a8, 56),
REG_DWARFNUM_NAME(a9, 57),
REG_DWARFNUM_NAME(a10, 58),
REG_DWARFNUM_NAME(a11, 59),
REG_DWARFNUM_NAME(a12, 60),
REG_DWARFNUM_NAME(a13, 61),
REG_DWARFNUM_NAME(a14, 62),
REG_DWARFNUM_NAME(a15, 63),
REG_DWARFNUM_NAME(pswm, 64),
REG_DWARFNUM_NAME(pswa, 65),
}; };
#endif
#ifdef DEFINE_DWARF_REGSTR_TABLE
/* This is included in perf/util/dwarf-regs.c */
#define s390_regstr_tbl s390_dwarf_regs
#endif /* DEFINE_DWARF_REGSTR_TABLE */
#endif /* S390_DWARF_REGS_TABLE_H */
#ifndef ARCH_PERF_REGS_H
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include <linux/types.h>
#include <../../../../arch/s390/include/uapi/asm/perf_regs.h>
void perf_regs_load(u64 *regs);
#define PERF_REGS_MASK ((1ULL << PERF_REG_S390_MAX) - 1)
#define PERF_REGS_MAX PERF_REG_S390_MAX
#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
#define PERF_REG_IP PERF_REG_S390_PC
#define PERF_REG_SP PERF_REG_S390_R15
static inline const char *perf_reg_name(int id)
{
switch (id) {
case PERF_REG_S390_R0:
return "R0";
case PERF_REG_S390_R1:
return "R1";
case PERF_REG_S390_R2:
return "R2";
case PERF_REG_S390_R3:
return "R3";
case PERF_REG_S390_R4:
return "R4";
case PERF_REG_S390_R5:
return "R5";
case PERF_REG_S390_R6:
return "R6";
case PERF_REG_S390_R7:
return "R7";
case PERF_REG_S390_R8:
return "R8";
case PERF_REG_S390_R9:
return "R9";
case PERF_REG_S390_R10:
return "R10";
case PERF_REG_S390_R11:
return "R11";
case PERF_REG_S390_R12:
return "R12";
case PERF_REG_S390_R13:
return "R13";
case PERF_REG_S390_R14:
return "R14";
case PERF_REG_S390_R15:
return "R15";
case PERF_REG_S390_FP0:
return "FP0";
case PERF_REG_S390_FP1:
return "FP1";
case PERF_REG_S390_FP2:
return "FP2";
case PERF_REG_S390_FP3:
return "FP3";
case PERF_REG_S390_FP4:
return "FP4";
case PERF_REG_S390_FP5:
return "FP5";
case PERF_REG_S390_FP6:
return "FP6";
case PERF_REG_S390_FP7:
return "FP7";
case PERF_REG_S390_FP8:
return "FP8";
case PERF_REG_S390_FP9:
return "FP9";
case PERF_REG_S390_FP10:
return "FP10";
case PERF_REG_S390_FP11:
return "FP11";
case PERF_REG_S390_FP12:
return "FP12";
case PERF_REG_S390_FP13:
return "FP13";
case PERF_REG_S390_FP14:
return "FP14";
case PERF_REG_S390_FP15:
return "FP15";
case PERF_REG_S390_MASK:
return "MASK";
case PERF_REG_S390_PC:
return "PC";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */
...@@ -2,5 +2,8 @@ libperf-y += header.o ...@@ -2,5 +2,8 @@ libperf-y += header.o
libperf-y += kvm-stat.o libperf-y += kvm-stat.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-y += machine.o libperf-y += machine.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
#include <stdbool.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/log2.h>
#include "../../util/evlist.h"
#include "../../util/auxtrace.h"
#include "../../util/evsel.h"
#define PERF_EVENT_CPUM_SF 0xB0000 /* Event: Basic-sampling */
#define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */
#define DEFAULT_AUX_PAGES 128
#define DEFAULT_FREQ 4000
static void cpumsf_free(struct auxtrace_record *itr)
{
free(itr);
}
static size_t cpumsf_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
{
return 0;
}
static int
cpumsf_info_fill(struct auxtrace_record *itr __maybe_unused,
struct perf_session *session __maybe_unused,
struct auxtrace_info_event *auxtrace_info __maybe_unused,
size_t priv_size __maybe_unused)
{
return 0;
}
static unsigned long
cpumsf_reference(struct auxtrace_record *itr __maybe_unused)
{
return 0;
}
static int
cpumsf_recording_options(struct auxtrace_record *ar __maybe_unused,
struct perf_evlist *evlist __maybe_unused,
struct record_opts *opts)
{
unsigned int factor = 1;
unsigned int pages;
opts->full_auxtrace = true;
/*
* The AUX buffer size should be set properly to avoid
* overflow of samples if it is not set explicitly.
* DEFAULT_AUX_PAGES is an proper size when sampling frequency
* is DEFAULT_FREQ. It is expected to hold about 1/2 second
* of sampling data. The size used for AUX buffer will scale
* according to the specified frequency and DEFAULT_FREQ.
*/
if (!opts->auxtrace_mmap_pages) {
if (opts->user_freq != UINT_MAX)
factor = (opts->user_freq + DEFAULT_FREQ
- 1) / DEFAULT_FREQ;
pages = DEFAULT_AUX_PAGES * factor;
opts->auxtrace_mmap_pages = roundup_pow_of_two(pages);
}
return 0;
}
static int
cpumsf_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
struct record_opts *opts __maybe_unused,
const char *str __maybe_unused)
{
return 0;
}
/*
* auxtrace_record__init is called when perf record
* check if the event really need auxtrace
*/
struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
int *err)
{
struct auxtrace_record *aux;
struct perf_evsel *pos;
int diagnose = 0;
if (evlist->nr_entries == 0)
return NULL;
evlist__for_each_entry(evlist, pos) {
if (pos->attr.config == PERF_EVENT_CPUM_SF_DIAG) {
diagnose = 1;
break;
}
}
if (!diagnose)
return NULL;
/* sampling in diagnose mode. alloc aux buffer */
aux = zalloc(sizeof(*aux));
if (aux == NULL) {
*err = -ENOMEM;
return NULL;
}
aux->parse_snapshot_options = cpumsf_parse_snapshot_options;
aux->recording_options = cpumsf_recording_options;
aux->info_priv_size = cpumsf_info_priv_size;
aux->info_fill = cpumsf_info_fill;
aux->free = cpumsf_free;
aux->reference = cpumsf_reference;
return aux;
}
...@@ -9,15 +9,10 @@ ...@@ -9,15 +9,10 @@
#include <stddef.h> #include <stddef.h>
#include <dwarf-regs.h> #include <dwarf-regs.h>
#include <linux/kernel.h>
#define NUM_GPRS 16 #include "dwarf-regs-table.h"
static const char *gpr_names[NUM_GPRS] = {
"%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
};
const char *get_arch_regstr(unsigned int n) const char *get_arch_regstr(unsigned int n)
{ {
return (n >= NUM_GPRS) ? NULL : gpr_names[n]; return (n >= ARRAY_SIZE(s390_dwarf_regs)) ? NULL : s390_dwarf_regs[n];
} }
#include <linux/kernel.h>
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
#include "../../util/event.h"
#include "dwarf-regs-table.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
Dwarf_Word dwarf_regs[ARRAY_SIZE(s390_dwarf_regs)];
#define REG(r) ({ \
Dwarf_Word val = 0; \
perf_reg_value(&val, user_regs, PERF_REG_S390_##r); \
val; \
})
/*
* For DWARF register mapping details,
* see also perf/arch/s390/include/dwarf-regs-table.h
*/
dwarf_regs[0] = REG(R0);
dwarf_regs[1] = REG(R1);
dwarf_regs[2] = REG(R2);
dwarf_regs[3] = REG(R3);
dwarf_regs[4] = REG(R4);
dwarf_regs[5] = REG(R5);
dwarf_regs[6] = REG(R6);
dwarf_regs[7] = REG(R7);
dwarf_regs[8] = REG(R8);
dwarf_regs[9] = REG(R9);
dwarf_regs[10] = REG(R10);
dwarf_regs[11] = REG(R11);
dwarf_regs[12] = REG(R12);
dwarf_regs[13] = REG(R13);
dwarf_regs[14] = REG(R14);
dwarf_regs[15] = REG(R15);
dwarf_regs[16] = REG(FP0);
dwarf_regs[17] = REG(FP2);
dwarf_regs[18] = REG(FP4);
dwarf_regs[19] = REG(FP6);
dwarf_regs[20] = REG(FP1);
dwarf_regs[21] = REG(FP3);
dwarf_regs[22] = REG(FP5);
dwarf_regs[23] = REG(FP7);
dwarf_regs[24] = REG(FP8);
dwarf_regs[25] = REG(FP10);
dwarf_regs[26] = REG(FP12);
dwarf_regs[27] = REG(FP14);
dwarf_regs[28] = REG(FP9);
dwarf_regs[29] = REG(FP11);
dwarf_regs[30] = REG(FP13);
dwarf_regs[31] = REG(FP15);
dwarf_regs[64] = REG(MASK);
dwarf_regs[65] = REG(PC);
dwfl_thread_state_register_pc(thread, dwarf_regs[65]);
return dwfl_thread_state_registers(thread, 0, 32, dwarf_regs);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment