Commit e0f3e8f1 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Martin Schwidefsky:
 "The bulk of the s390 patches for 4.13. Some new things but mostly bug
  fixes and cleanups. Noteworthy changes:

   - The SCM block driver is converted to blk-mq

   - Switch s390 to 5 level page tables. The virtual address space for a
     user space process can now have up to 16EB-4KB.

   - Introduce a ELF phdr flag for qemu to avoid the global
     vm.alloc_pgste which forces all processes to large page tables

   - A couple of PCI improvements to improve error recovery

   - Included is the merge of the base support for proper machine checks
     for KVM"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (52 commits)
  s390/dasd: Fix faulty ENODEV for RO sysfs attribute
  s390/pci: recognize name clashes with uids
  s390/pci: provide more debug information
  s390/pci: fix handling of PEC 306
  s390/pci: improve pci hotplug
  s390/pci: introduce clp_get_state
  s390/pci: improve error handling during fmb (de)registration
  s390/pci: improve unreg_ioat error handling
  s390/pci: improve error handling during interrupt deregistration
  s390/pci: don't cleanup in arch_setup_msi_irqs
  KVM: s390: Backup the guest's machine check info
  s390/nmi: s390: New low level handling for machine check happening in guest
  s390/fpu: export save_fpu_regs for all configs
  s390/kvm: avoid global config of vm.alloc_pgste=1
  s390: rename struct psw_bits members
  s390: rename psw_bits enums
  s390/mm: use correct address space when enabling DAT
  s390/cio: introduce io_subchannel_type
  s390/ipl: revert Load Normal semantics for LPAR CCW-type re-IPL
  s390/dumpstack: remove raw stack dump
  ...
parents e5859eb8 9e293b5a
......@@ -64,6 +64,7 @@ config ARCH_SUPPORTS_UPROBES
config S390
def_bool y
select ARCH_BINFMT_ELF_STATE
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_GCOV_PROFILE_ALL
......@@ -184,7 +185,7 @@ config SCHED_OMIT_FRAME_POINTER
config PGTABLE_LEVELS
int
default 4
default 5
source "init/Kconfig"
......
......@@ -6,7 +6,8 @@ obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o paes_s390.o
obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
......
......@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/random.h>
#include <linux/static_key.h>
#include <asm/cpacf.h>
......
generic-y += asm-offsets.h
generic-y += cacheflush.h
generic-y += clkdev.h
generic-y += device.h
generic-y += dma-contiguous.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += export.h
generic-y += fb.h
generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kmap_types.h
......
/*
* Arch specific extensions to struct device
*
* This file is released under the GPLv2
*/
struct dev_archdata {
};
struct pdev_archdata {
};
......@@ -117,6 +117,9 @@
#define ELF_DATA ELFDATA2MSB
#define ELF_ARCH EM_S390
/* s390 specific phdr types */
#define PT_S390_PGSTE 0x70000000
/*
* ELF register definitions..
*/
......@@ -151,6 +154,35 @@ extern unsigned int vdso_enabled;
&& (x)->e_ident[EI_CLASS] == ELF_CLASS)
#define compat_start_thread start_thread31
struct arch_elf_state {
int rc;
};
#define INIT_ARCH_ELF_STATE { .rc = 0 }
#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0)
#ifdef CONFIG_PGSTE
#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \
({ \
struct arch_elf_state *_state = state; \
if ((phdr)->p_type == PT_S390_PGSTE && \
!page_table_allocate_pgste && \
!test_thread_flag(TIF_PGSTE) && \
!current->mm->context.alloc_pgste) { \
set_thread_flag(TIF_PGSTE); \
set_pt_regs_flag(task_pt_regs(current), \
PIF_SYSCALL_RESTART); \
_state->rc = -EAGAIN; \
} \
_state->rc; \
})
#else
#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \
({ \
(state)->rc; \
})
#endif
/* For SVR4/S390 the function pointer to be registered with `atexit` is
passed in R14. */
#define ELF_PLAT_INIT(_r, load_addr) \
......
#ifndef _ASM_FB_H_
#define _ASM_FB_H_
#include <linux/fb.h>
#define fb_pgprotect(...) do {} while (0)
static inline int fb_is_primary_device(struct fb_info *info)
{
return 0;
}
#endif /* _ASM_FB_H_ */
......@@ -25,8 +25,6 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define IO_SPACE_LIMIT 0
#ifdef CONFIG_PCI
#define ioremap_nocache(addr, size) ioremap(addr, size)
#define ioremap_wc ioremap_nocache
#define ioremap_wt ioremap_nocache
......@@ -49,6 +47,8 @@ static inline void ioport_unmap(void __iomem *p)
{
}
#ifdef CONFIG_PCI
/*
* s390 needs a private implementation of pci_iomap since ioremap with its
* offset parameter isn't sufficient. That's because BAR spaces are not
......
......@@ -107,6 +107,20 @@ struct esca_block {
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
} __packed;
/*
* This struct is used to store some machine check info from lowcore
* for machine checks that happen while the guest is running.
* This info in host's lowcore might be overwritten by a second machine
* check from host when host is in the machine check's high-level handling.
* The size is 24 bytes.
*/
struct mcck_volatile_info {
__u64 mcic;
__u64 failing_storage_address;
__u32 ext_damage_code;
__u32 reserved;
};
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
......@@ -264,7 +278,8 @@ struct kvm_s390_itdb {
struct sie_page {
struct kvm_s390_sie_block sie_block;
__u8 reserved200[1024]; /* 0x0200 */
struct mcck_volatile_info mcck_info; /* 0x0200 */
__u8 reserved218[1000]; /* 0x0218 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[2304]; /* 0x0700 */
} __packed;
......
......@@ -25,7 +25,9 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste;
mm->context.alloc_pgste = page_table_allocate_pgste ||
test_thread_flag(TIF_PGSTE) ||
current->mm->context.alloc_pgste;
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
mm->context.use_cmma = 0;
......
......@@ -14,7 +14,14 @@
#include <linux/const.h>
#include <linux/types.h>
#define MCIC_SUBCLASS_MASK (1ULL<<63 | 1ULL<<62 | 1ULL<<61 | \
1ULL<<59 | 1ULL<<58 | 1ULL<<56 | \
1ULL<<55 | 1ULL<<54 | 1ULL<<53 | \
1ULL<<52 | 1ULL<<47 | 1ULL<<46 | \
1ULL<<45 | 1ULL<<44)
#define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63)
#define MCCK_CODE_EXT_DAMAGE _BITUL(63 - 5)
#define MCCK_CODE_CP _BITUL(63 - 9)
#define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46)
#define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20)
#define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23)
......
......@@ -74,6 +74,7 @@ typedef struct { unsigned long pgste; } pgste_t;
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pud; } pud_t;
typedef struct { unsigned long p4d; } p4d_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef pte_t *pgtable_t;
......@@ -82,12 +83,14 @@ typedef pte_t *pgtable_t;
#define pte_val(x) ((x).pte)
#define pmd_val(x) ((x).pmd)
#define pud_val(x) ((x).pud)
#define p4d_val(x) ((x).p4d)
#define pgd_val(x) ((x).pgd)
#define __pgste(x) ((pgste_t) { (x) } )
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pud(x) ((pud_t) { (x) } )
#define __p4d(x) ((p4d_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
......
......@@ -70,11 +70,10 @@ struct zpci_fmb {
} __packed __aligned(128);
enum zpci_state {
ZPCI_FN_STATE_RESERVED,
ZPCI_FN_STATE_STANDBY,
ZPCI_FN_STATE_CONFIGURED,
ZPCI_FN_STATE_ONLINE,
NR_ZPCI_FN_STATES,
ZPCI_FN_STATE_STANDBY = 0,
ZPCI_FN_STATE_CONFIGURED = 1,
ZPCI_FN_STATE_RESERVED = 2,
ZPCI_FN_STATE_ONLINE = 3,
};
struct zpci_bar_struct {
......@@ -109,7 +108,7 @@ struct zpci_dev {
u64 msi_addr; /* MSI address */
unsigned int max_msi; /* maximum number of MSI's */
struct airq_iv *aibv; /* adapter interrupt bit vector */
unsigned int aisb; /* number of the summary bit */
unsigned long aisb; /* number of the summary bit */
/* DMA stuff */
unsigned long *dma_table;
......@@ -159,11 +158,12 @@ extern const struct attribute_group *zpci_attr_groups[];
----------------------------------------------------------------------------- */
/* Base stuff */
int zpci_create_device(struct zpci_dev *);
void zpci_remove_device(struct zpci_dev *zdev);
int zpci_enable_device(struct zpci_dev *);
int zpci_disable_device(struct zpci_dev *);
void zpci_stop_device(struct zpci_dev *);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
int zpci_unregister_ioat(struct zpci_dev *, u8);
void zpci_remove_reserved_devices(void);
/* CLP */
int clp_scan_pci_devices(void);
......@@ -172,6 +172,7 @@ int clp_rescan_pci_devices_simple(void);
int clp_add_pci_device(u32, u32, int);
int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
int clp_get_state(u32 fid, enum zpci_state *state);
#ifdef CONFIG_PCI
/* Error handling and recovery */
......
......@@ -76,7 +76,7 @@ struct zpci_fib {
u32 gd;
} __packed __aligned(8);
int zpci_mod_fc(u64 req, struct zpci_fib *fib);
u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status);
int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
int zpci_load(u64 *data, u64 req, u64 offset);
int zpci_store(u64 data, u64 req, u64 offset);
......
......@@ -51,12 +51,24 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
return _SEGMENT_ENTRY_EMPTY;
if (mm->context.asce_limit <= (1UL << 42))
return _REGION3_ENTRY_EMPTY;
return _REGION2_ENTRY_EMPTY;
if (mm->context.asce_limit <= (1UL << 53))
return _REGION2_ENTRY_EMPTY;
return _REGION1_ENTRY_EMPTY;
}
int crst_table_upgrade(struct mm_struct *);
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
void crst_table_downgrade(struct mm_struct *);
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long *table = crst_table_alloc(mm);
if (table)
crst_table_init(table, _REGION2_ENTRY_EMPTY);
return (p4d_t *) table;
}
#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long *table = crst_table_alloc(mm);
......@@ -86,9 +98,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
crst_table_free(mm, (unsigned long *) pmd);
}
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{
pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d);
}
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud);
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
......
......@@ -24,7 +24,6 @@
* the S390 page table tree.
*/
#ifndef __ASSEMBLY__
#include <asm-generic/5level-fixup.h>
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
......@@ -87,12 +86,15 @@ extern unsigned long zero_page_mask;
*/
#define PMD_SHIFT 20
#define PUD_SHIFT 31
#define PGDIR_SHIFT 42
#define P4D_SHIFT 42
#define PGDIR_SHIFT 53
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PUD_SIZE (1UL << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
#define P4D_SIZE (1UL << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE-1))
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
......@@ -105,6 +107,7 @@ extern unsigned long zero_page_mask;
#define PTRS_PER_PTE 256
#define PTRS_PER_PMD 2048
#define PTRS_PER_PUD 2048
#define PTRS_PER_P4D 2048
#define PTRS_PER_PGD 2048
#define FIRST_USER_ADDRESS 0UL
......@@ -115,6 +118,8 @@ extern unsigned long zero_page_mask;
printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
#define pud_ERROR(e) \
printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
#define p4d_ERROR(e) \
printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e))
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
......@@ -296,8 +301,6 @@ static inline int is_module_addr(void *addr)
#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */
#define _REGION3_ENTRY_ORIGIN ~0x7ffUL/* region third table origin */
#define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */
#define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */
#define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */
......@@ -310,8 +313,8 @@ static inline int is_module_addr(void *addr)
#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
#endif
#define _REGION_ENTRY_BITS 0xfffffffffffff227UL
#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL
#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL
#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
......@@ -560,18 +563,23 @@ static inline void crdte(unsigned long old, unsigned long new,
}
/*
* pgd/pmd/pte query functions
* pgd/p4d/pud/pmd/pte query functions
*/
static inline int pgd_folded(pgd_t pgd)
{
return (pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1;
}
static inline int pgd_present(pgd_t pgd)
{
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
if (pgd_folded(pgd))
return 1;
return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
}
static inline int pgd_none(pgd_t pgd)
{
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
if (pgd_folded(pgd))
return 0;
return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
}
......@@ -589,16 +597,48 @@ static inline int pgd_bad(pgd_t pgd)
return (pgd_val(pgd) & mask) != 0;
}
static inline int p4d_folded(p4d_t p4d)
{
return (p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2;
}
static inline int p4d_present(p4d_t p4d)
{
if (p4d_folded(p4d))
return 1;
return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL;
}
static inline int p4d_none(p4d_t p4d)
{
if (p4d_folded(p4d))
return 0;
return p4d_val(p4d) == _REGION2_ENTRY_EMPTY;
}
static inline unsigned long p4d_pfn(p4d_t p4d)
{
unsigned long origin_mask;
origin_mask = _REGION_ENTRY_ORIGIN;
return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT;
}
static inline int pud_folded(pud_t pud)
{
return (pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3;
}
static inline int pud_present(pud_t pud)
{
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
if (pud_folded(pud))
return 1;
return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
}
static inline int pud_none(pud_t pud)
{
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
if (pud_folded(pud))
return 0;
return pud_val(pud) == _REGION3_ENTRY_EMPTY;
}
......@@ -614,7 +654,7 @@ static inline unsigned long pud_pfn(pud_t pud)
{
unsigned long origin_mask;
origin_mask = _REGION3_ENTRY_ORIGIN;
origin_mask = _REGION_ENTRY_ORIGIN;
if (pud_large(pud))
origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
......@@ -641,6 +681,13 @@ static inline int pud_bad(pud_t pud)
return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
}
static inline int p4d_bad(p4d_t p4d)
{
if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
return pud_bad(__pud(p4d_val(p4d)));
return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
}
static inline int pmd_present(pmd_t pmd)
{
return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
......@@ -794,8 +841,14 @@ static inline int pte_unused(pte_t pte)
static inline void pgd_clear(pgd_t *pgd)
{
if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
pgd_val(*pgd) = _REGION1_ENTRY_EMPTY;
}
static inline void p4d_clear(p4d_t *p4d)
{
if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
p4d_val(*p4d) = _REGION2_ENTRY_EMPTY;
}
static inline void pud_clear(pud_t *pud)
......@@ -1089,6 +1142,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
}
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
......@@ -1098,19 +1152,31 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
pud_t *pud = (pud_t *) pgd;
if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
pud = (pud_t *) pgd_deref(*pgd);
return pud + pud_index(address);
p4d_t *p4d = (p4d_t *) pgd;
if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
p4d = (p4d_t *) pgd_deref(*pgd);
return p4d + p4d_index(address);
}
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
pud_t *pud = (pud_t *) p4d;
if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
pud = (pud_t *) p4d_deref(*p4d);
return pud + pud_index(address);
}
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
pmd_t *pmd = (pmd_t *) pud;
if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
pmd = (pmd_t *) pud_deref(*pud);
return pmd + pmd_index(address);
......@@ -1122,6 +1188,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
#define pud_page(pud) pfn_to_page(pud_pfn(pud))
#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d))
/* Find an entry in the lowest level page table.. */
#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
......
......@@ -20,6 +20,7 @@
#define CIF_FPU 4 /* restore FPU registers */
#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */
#define CIF_ENABLED_WAIT 6 /* in enabled wait state */
#define CIF_MCCK_GUEST 7 /* machine check happening in guest */
#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
#define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY)
......@@ -28,6 +29,7 @@
#define _CIF_FPU _BITUL(CIF_FPU)
#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT)
#define _CIF_MCCK_GUEST _BITUL(CIF_MCCK_GUEST)
#ifndef __ASSEMBLY__
......@@ -92,11 +94,11 @@ extern void execve_tail(void);
*/
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \
(1UL << 31) : (1UL << 53))
(1UL << 31) : -PAGE_SIZE)
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
(1UL << 30) : (1UL << 41))
#define TASK_SIZE TASK_SIZE_OF(current)
#define TASK_SIZE_MAX (1UL << 53)
#define TASK_SIZE_MAX (-PAGE_SIZE)
#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \
(1UL << 31) : (1UL << 42))
......
......@@ -11,9 +11,11 @@
#define PIF_SYSCALL 0 /* inside a system call */
#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */
#define PIF_SYSCALL_RESTART 2 /* restart the current system call */
#define _PIF_SYSCALL _BITUL(PIF_SYSCALL)
#define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP)
#define _PIF_SYSCALL_RESTART _BITUL(PIF_SYSCALL_RESTART)
#ifndef __ASSEMBLY__
......@@ -24,38 +26,38 @@
PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
struct psw_bits {
unsigned long : 1;
unsigned long r : 1; /* PER-Mask */
unsigned long : 3;
unsigned long t : 1; /* DAT Mode */
unsigned long i : 1; /* Input/Output Mask */
unsigned long e : 1; /* External Mask */
unsigned long key : 4; /* PSW Key */
unsigned long : 1;
unsigned long m : 1; /* Machine-Check Mask */
unsigned long w : 1; /* Wait State */
unsigned long p : 1; /* Problem State */
unsigned long as : 2; /* Address Space Control */
unsigned long cc : 2; /* Condition Code */
unsigned long pm : 4; /* Program Mask */
unsigned long ri : 1; /* Runtime Instrumentation */
unsigned long : 6;
unsigned long eaba : 2; /* Addressing Mode */
unsigned long : 31;
unsigned long ia : 64; /* Instruction Address */
unsigned long : 1;
unsigned long per : 1; /* PER-Mask */
unsigned long : 3;
unsigned long dat : 1; /* DAT Mode */
unsigned long io : 1; /* Input/Output Mask */
unsigned long ext : 1; /* External Mask */
unsigned long key : 4; /* PSW Key */
unsigned long : 1;
unsigned long mcheck : 1; /* Machine-Check Mask */
unsigned long wait : 1; /* Wait State */
unsigned long pstate : 1; /* Problem State */
unsigned long as : 2; /* Address Space Control */
unsigned long cc : 2; /* Condition Code */
unsigned long pm : 4; /* Program Mask */
unsigned long ri : 1; /* Runtime Instrumentation */
unsigned long : 6;
unsigned long eaba : 2; /* Addressing Mode */
unsigned long : 31;
unsigned long ia : 64; /* Instruction Address */
};
enum {
PSW_AMODE_24BIT = 0,
PSW_AMODE_31BIT = 1,
PSW_AMODE_64BIT = 3
PSW_BITS_AMODE_24BIT = 0,
PSW_BITS_AMODE_31BIT = 1,
PSW_BITS_AMODE_64BIT = 3
};
enum {
PSW_AS_PRIMARY = 0,
PSW_AS_ACCREG = 1,
PSW_AS_SECONDARY = 2,
PSW_AS_HOME = 3
PSW_BITS_AS_PRIMARY = 0,
PSW_BITS_AS_ACCREG = 1,
PSW_BITS_AS_SECONDARY = 2,
PSW_BITS_AS_HOME = 3
};
#define psw_bits(__psw) (*({ \
......
......@@ -59,7 +59,7 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
int cc;
cc = ____pcpu_sigp(addr, order, parm, &_status);
if (status && cc == 1)
if (status && cc == SIGP_CC_STATUS_STORED)
*status = _status;
return cc;
}
......
......@@ -58,6 +58,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_UPROBE 3 /* breakpointed or single-stepping */
#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
#define TIF_PATCH_PENDING 5 /* pending live patching update */
#define TIF_PGSTE 6 /* New mm's will use 4K page tables */
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
......
......@@ -136,6 +136,21 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
tlb_remove_table(tlb, pmd);
}
/*
* p4d_free_tlb frees a pud table and clears the CRSTE for the
* region second table entry from the tlb.
* If the mm uses a four level page table the single p4d is freed
* as the pgd. p4d_free_tlb checks the asce_limit against 8PB
* to avoid the double free of the p4d in this case.
*/
static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address)
{
if (tlb->mm->context.asce_limit <= (1UL << 53))
return;
tlb_remove_table(tlb, p4d);
}
/*
* pud_free_tlb frees a pud table and clears the CRSTE for the
* region third table entry from the tlb.
......
......@@ -58,6 +58,9 @@ int main(void)
OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
OFFSET(__SF_GPRS, stack_frame, gprs);
OFFSET(__SF_EMPTY, stack_frame, empty1);
OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
BLANK();
/* timeval/timezone offsets for use by vdso */
OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
......
......@@ -98,8 +98,10 @@ static int show_address(void *data, unsigned long address, int reliable)
return 0;
}
static void show_trace(struct task_struct *task, unsigned long sp)
void show_stack(struct task_struct *task, unsigned long *stack)
{
unsigned long sp = (unsigned long) stack;
if (!sp)
sp = task ? task->thread.ksp : current_stack_pointer();
printk("Call Trace:\n");
......@@ -109,29 +111,6 @@ static void show_trace(struct task_struct *task, unsigned long sp)
debug_show_held_locks(task);
}
void show_stack(struct task_struct *task, unsigned long *sp)
{
unsigned long *stack;
int i;
stack = sp;
if (!stack) {
if (!task)
stack = (unsigned long *)current_stack_pointer();
else
stack = (unsigned long *)task->thread.ksp;
}
printk(KERN_DEFAULT "Stack:\n");
for (i = 0; i < 20; i++) {
if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
break;
if (i % 4 == 0)
printk(KERN_DEFAULT " ");
pr_cont("%016lx%c", *stack++, i % 4 == 3 ? '\n' : ' ');
}
show_trace(task, (unsigned long)sp);
}
static void show_last_breaking_event(struct pt_regs *regs)
{
printk("Last Breaking-Event-Address:\n");
......@@ -149,8 +128,8 @@ void show_registers(struct pt_regs *regs)
pr_cont(" (%pSR)", (void *)regs->psw.addr);
pr_cont("\n");
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
"P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e,
psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm);
"P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext,
psw->key, psw->mcheck, psw->wait, psw->pstate, psw->as, psw->cc, psw->pm);
pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba);
printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
......@@ -169,7 +148,7 @@ void show_regs(struct pt_regs *regs)
show_registers(regs);
/* Show stack backtrace if pt_regs is from kernel mode */
if (!user_mode(regs))
show_trace(NULL, regs->gprs[15]);
show_stack(NULL, (unsigned long *) regs->gprs[15]);
show_last_breaking_event(regs);
}
......
......@@ -52,7 +52,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
_CIF_ASCE_SECONDARY | _CIF_FPU)
_PIF_WORK = (_PIF_PER_TRAP)
_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
#define BASED(name) name-cleanup_critical(%r13)
......@@ -225,6 +225,7 @@ ENTRY(sie64a)
jnz .Lsie_skip
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lsie_skip # exit if fp/vx regs changed
.Lsie_entry:
sie 0(%r14)
.Lsie_skip:
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
......@@ -334,6 +335,8 @@ ENTRY(system_call)
jo .Lsysc_mcck_pending
TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jo .Lsysc_reschedule
TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
jo .Lsysc_syscall_restart
#ifdef CONFIG_UPROBES
TSTMSK __TI_flags(%r12),_TIF_UPROBE
jo .Lsysc_uprobe_notify
......@@ -347,6 +350,8 @@ ENTRY(system_call)
jo .Lsysc_patch_pending # handle live patching just before
# signals and possible syscall restart
#endif
TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
jo .Lsysc_syscall_restart
TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
jo .Lsysc_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
......@@ -447,6 +452,15 @@ ENTRY(system_call)
larl %r14,.Lsysc_return
jg do_per_trap
#
# _PIF_SYSCALL_RESTART is set, repeat the current system call
#
.Lsysc_syscall_restart:
ni __PT_FLAGS+7(%r11),255-_PIF_SYSCALL_RESTART
lmg %r1,%r7,__PT_R1(%r11) # load svc arguments
lg %r2,__PT_ORIG_GPR2(%r11)
j .Lsysc_do_svc
#
# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
# and after the system call
......@@ -881,9 +895,7 @@ ENTRY(save_fpu_regs)
oi __LC_CPU_FLAGS+7,_CIF_FPU
br %r14
.Lsave_fpu_regs_end:
#if IS_ENABLED(CONFIG_KVM)
EXPORT_SYMBOL(save_fpu_regs)
#endif
/*
* Load floating-point controls and floating-point or vector registers.
......@@ -1111,7 +1123,13 @@ cleanup_critical:
.quad .Lsie_done
.Lcleanup_sie:
lg %r9,__SF_EMPTY(%r15) # get control block pointer
cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt?
je 1f
slg %r9,BASED(.Lsie_crit_mcck_start)
clg %r9,BASED(.Lsie_crit_mcck_length)
jh 1f
oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
1: lg %r9,__SF_EMPTY(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
......@@ -1296,6 +1314,10 @@ cleanup_critical:
.quad .Lsie_gmap
.Lsie_critical_length:
.quad .Lsie_done - .Lsie_gmap
.Lsie_crit_mcck_start:
.quad .Lsie_entry
.Lsie_crit_mcck_length:
.quad .Lsie_skip - .Lsie_entry
#endif
.section .rodata, "a"
......
......@@ -25,6 +25,8 @@
#include <asm/crw.h>
#include <asm/switch_to.h>
#include <asm/ctl_reg.h>
#include <asm/asm-offsets.h>
#include <linux/kvm_host.h>
struct mcck_struct {
unsigned int kill_task : 1;
......@@ -274,12 +276,39 @@ static int notrace s390_validate_registers(union mci mci, int umode)
return kill_task;
}
/*
* Backup the guest's machine check info to its description block
*/
static void notrace s390_backup_mcck_info(struct pt_regs *regs)
{
struct mcck_volatile_info *mcck_backup;
struct sie_page *sie_page;
/* r14 contains the sie block, which was set in sie64a */
struct kvm_s390_sie_block *sie_block =
(struct kvm_s390_sie_block *) regs->gprs[14];
if (sie_block == NULL)
/* Something's seriously wrong, stop system. */
s390_handle_damage();
sie_page = container_of(sie_block, struct sie_page, sie_block);
mcck_backup = &sie_page->mcck_info;
mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
mcck_backup->failing_storage_address
= S390_lowcore.failing_storage_address;
}
#define MAX_IPD_COUNT 29
#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
#define ED_STP_ISLAND 6 /* External damage STP island check */
#define ED_STP_SYNC 7 /* External damage STP sync check */
#define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
/*
* machine check handler.
*/
......@@ -291,6 +320,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
struct mcck_struct *mcck;
unsigned long long tmp;
union mci mci;
unsigned long mcck_dam_code;
nmi_enter();
inc_irq_stat(NMI_NMI);
......@@ -301,7 +331,13 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
/* System damage -> stopping machine */
s390_handle_damage();
}
if (mci.pd) {
/*
* Reinject the instruction processing damages' machine checks
* including Delayed Access Exception into the guest
* instead of damaging the host if they happen in the guest.
*/
if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
if (mci.b) {
/* Processing backup -> verify if we can survive this */
u64 z_mcic, o_mcic, t_mcic;
......@@ -345,6 +381,14 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck->mcck_code = mci.val;
set_cpu_flag(CIF_MCCK_PENDING);
}
/*
* Backup the machine check's info if it happens when the guest
* is running.
*/
if (test_cpu_flag(CIF_MCCK_GUEST))
s390_backup_mcck_info(regs);
if (mci.cd) {
/* Timing facility damage */
s390_handle_damage();
......@@ -358,15 +402,22 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (mcck->stp_queue)
set_cpu_flag(CIF_MCCK_PENDING);
}
if (mci.se)
/* Storage error uncorrected */
s390_handle_damage();
if (mci.ke)
/* Storage key-error uncorrected */
s390_handle_damage();
if (mci.ds && mci.fa)
/* Storage degradation */
s390_handle_damage();
/*
* Reinject storage related machine checks into the guest if they
* happen when the guest is running.
*/
if (!test_cpu_flag(CIF_MCCK_GUEST)) {
if (mci.se)
/* Storage error uncorrected */
s390_handle_damage();
if (mci.ke)
/* Storage key-error uncorrected */
s390_handle_damage();
if (mci.ds && mci.fa)
/* Storage degradation */
s390_handle_damage();
}
if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
......@@ -377,6 +428,19 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck->warning = 1;
set_cpu_flag(CIF_MCCK_PENDING);
}
/*
* If there are only Channel Report Pending and External Damage
* machine checks, they will not be reinjected into the guest
* because they refer to host conditions only.
*/
mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
if (test_cpu_flag(CIF_MCCK_GUEST) &&
(mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
/* Set exit reason code for host's later handling */
*((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
}
clear_cpu_flag(CIF_MCCK_GUEST);
nmi_exit();
}
......
......@@ -995,11 +995,11 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
regs.int_parm = CPU_MF_INT_SF_PRA;
sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
psw_bits(regs.psw).ia = sfr->basic.ia;
psw_bits(regs.psw).t = sfr->basic.T;
psw_bits(regs.psw).w = sfr->basic.W;
psw_bits(regs.psw).p = sfr->basic.P;
psw_bits(regs.psw).as = sfr->basic.AS;
psw_bits(regs.psw).ia = sfr->basic.ia;
psw_bits(regs.psw).dat = sfr->basic.T;
psw_bits(regs.psw).wait = sfr->basic.W;
psw_bits(regs.psw).per = sfr->basic.P;
psw_bits(regs.psw).as = sfr->basic.AS;
/*
* Use the hardware provided configuration level to decide if the
......
......@@ -245,6 +245,5 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
struct perf_pmu_events_attr *pmu_attr;
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
return sprintf(page, "event=0x%04llx,name=%s\n",
pmu_attr->id, attr->attr.name);
return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
}
......@@ -1160,6 +1160,8 @@ static int s390_gs_cb_get(struct task_struct *target,
return -ENODEV;
if (!data)
return -ENODATA;
if (target == current)
save_gs_cb(data);
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
data, 0, sizeof(struct gs_cb));
}
......@@ -1170,6 +1172,7 @@ static int s390_gs_cb_set(struct task_struct *target,
const void *kbuf, const void __user *ubuf)
{
struct gs_cb *data = target->thread.gs_cb;
int rc;
if (!MACHINE_HAS_GS)
return -ENODEV;
......@@ -1177,10 +1180,18 @@ static int s390_gs_cb_set(struct task_struct *target,
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->gsd = 25;
target->thread.gs_cb = data;
if (target == current)
__ctl_set_bit(2, 4);
} else if (target == current) {
save_gs_cb(data);
}
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
data, 0, sizeof(struct gs_cb));
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
data, 0, sizeof(struct gs_cb));
if (target == current)
restore_gs_cb(data);
return rc;
}
static int s390_gs_bc_get(struct task_struct *target,
......
......@@ -26,6 +26,7 @@
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/kernel_stat.h>
#include <linux/kmemleak.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irqflags.h>
......@@ -207,6 +208,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL);
if (!mcesa_origin)
goto out;
/* The pointer is stored with mcesa_bits ORed in */
kmemleak_not_leak((void *) mcesa_origin);
mcesa_bits = MACHINE_HAS_GS ? 11 : 0;
}
} else {
......
......@@ -21,6 +21,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/cpu.h>
#include <asm/fpu/api.h>
#include "entry.h"
......
......@@ -27,12 +27,12 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
if (psw_bits(regs->psw).eaba == PSW_AMODE_24BIT)
if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT)
return -EINVAL;
if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_AMODE_31BIT)
if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)
return -EINVAL;
clear_pt_regs_flag(regs, PIF_PER_TRAP);
auprobe->saved_per = psw_bits(regs->psw).r;
auprobe->saved_per = psw_bits(regs->psw).per;
auprobe->saved_int_code = regs->int_code;
regs->int_code = UPROBE_TRAP_NR;
regs->psw.addr = current->utask->xol_vaddr;
......@@ -81,7 +81,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
update_cr_regs(current);
psw_bits(regs->psw).r = auprobe->saved_per;
psw_bits(regs->psw).per = auprobe->saved_per;
regs->int_code = auprobe->saved_int_code;
if (fixup & FIXUP_PSW_NORMAL)
......@@ -372,8 +372,8 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
if ((psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) ||
((psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) &&
if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) ||
((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) &&
!is_compat_task())) {
regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE);
do_report_trap(regs, SIGILL, ILL_ILLADR, NULL);
......
......@@ -50,6 +50,56 @@ static struct page **vdso64_pagelist;
*/
unsigned int __read_mostly vdso_enabled = 1;
static int vdso_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page **vdso_pagelist;
unsigned long vdso_pages;
vdso_pagelist = vdso64_pagelist;
vdso_pages = vdso64_pages;
#ifdef CONFIG_COMPAT
if (is_compat_task()) {
vdso_pagelist = vdso32_pagelist;
vdso_pages = vdso32_pages;
}
#endif
if (vmf->pgoff >= vdso_pages)
return VM_FAULT_SIGBUS;
vmf->page = vdso_pagelist[vmf->pgoff];
get_page(vmf->page);
return 0;
}
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *vma)
{
unsigned long vdso_pages;
vdso_pages = vdso64_pages;
#ifdef CONFIG_COMPAT
if (is_compat_task())
vdso_pages = vdso32_pages;
#endif
if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start)
return -EINVAL;
if (WARN_ON_ONCE(current->mm != vma->vm_mm))
return -EFAULT;
current->mm->context.vdso_base = vma->vm_start;
return 0;
}
static const struct vm_special_mapping vdso_mapping = {
.name = "[vdso]",
.fault = vdso_fault,
.mremap = vdso_mremap,
};
static int __init vdso_setup(char *s)
{
unsigned long val;
......@@ -181,7 +231,7 @@ static void vdso_init_cr5(void)
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
struct page **vdso_pagelist;
struct vm_area_struct *vma;
unsigned long vdso_pages;
unsigned long vdso_base;
int rc;
......@@ -194,13 +244,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (!uses_interp)
return 0;
vdso_pagelist = vdso64_pagelist;
vdso_pages = vdso64_pages;
#ifdef CONFIG_COMPAT
if (is_compat_task()) {
vdso_pagelist = vdso32_pagelist;
if (is_compat_task())
vdso_pages = vdso32_pages;
}
#endif
/*
* vDSO has a problem and was disabled, just don't "enable" it for
......@@ -209,8 +256,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (vdso_pages == 0)
return 0;
current->mm->context.vdso_base = 0;
/*
* pick a base address for the vDSO in process space. We try to put
* it at vdso_base which is the "natural" base for it, but we might
......@@ -224,13 +269,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
goto out_up;
}
/*
* Put vDSO base into mm struct. We need to do this before calling
* install_special_mapping or the perf counter mmap tracking code
* will fail to recognise it as a vDSO (since arch_vma_name fails).
*/
current->mm->context.vdso_base = vdso_base;
/*
* our vma flags don't have VM_WRITE so by default, the process
* isn't allowed to write those pages.
......@@ -241,24 +279,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
* It's fine to use that for setting breakpoints in the vDSO code
* pages though.
*/
rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso_pagelist);
if (rc)
current->mm->context.vdso_base = 0;
vma = _install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
&vdso_mapping);
if (IS_ERR(vma)) {
rc = PTR_ERR(vma);
goto out_up;
}
current->mm->context.vdso_base = vdso_base;
rc = 0;
out_up:
up_write(&mm->mmap_sem);
return rc;
}
const char *arch_vma_name(struct vm_area_struct *vma)
{
if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
return "[vdso]";
return NULL;
}
static int __init vdso_init(void)
{
int i;
......
......@@ -110,11 +110,10 @@ static inline u64 scale_vtime(u64 vtime)
return vtime;
}
static void account_system_index_scaled(struct task_struct *p,
u64 cputime, u64 scaled,
static void account_system_index_scaled(struct task_struct *p, u64 cputime,
enum cpu_usage_stat index)
{
p->stimescaled += cputime_to_nsecs(scaled);
p->stimescaled += cputime_to_nsecs(scale_vtime(cputime));
account_system_index_time(p, cputime_to_nsecs(cputime), index);
}
......@@ -176,14 +175,11 @@ static int do_account_vtime(struct task_struct *tsk)
}
if (system)
account_system_index_scaled(tsk, system, scale_vtime(system),
CPUTIME_SYSTEM);
account_system_index_scaled(tsk, system, CPUTIME_SYSTEM);
if (hardirq)
account_system_index_scaled(tsk, hardirq, scale_vtime(hardirq),
CPUTIME_IRQ);
account_system_index_scaled(tsk, hardirq, CPUTIME_IRQ);
if (softirq)
account_system_index_scaled(tsk, softirq, scale_vtime(softirq),
CPUTIME_SOFTIRQ);
account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ);
steal = S390_lowcore.steal_timer;
if ((s64) steal > 0) {
......
......@@ -551,26 +551,26 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
int rc;
struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
if (!psw.t) {
if (!psw.dat) {
asce->val = 0;
asce->r = 1;
return 0;
}
if (mode == GACC_IFETCH)
psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY;
if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
psw.as = PSW_BITS_AS_PRIMARY;
switch (psw.as) {
case PSW_AS_PRIMARY:
case PSW_BITS_AS_PRIMARY:
asce->val = vcpu->arch.sie_block->gcr[1];
return 0;
case PSW_AS_SECONDARY:
case PSW_BITS_AS_SECONDARY:
asce->val = vcpu->arch.sie_block->gcr[7];
return 0;
case PSW_AS_HOME:
case PSW_BITS_AS_HOME:
asce->val = vcpu->arch.sie_block->gcr[13];
return 0;
case PSW_AS_ACCREG:
case PSW_BITS_AS_ACCREG:
rc = ar_translation(vcpu, asce, ar, mode);
if (rc > 0)
return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
......@@ -771,7 +771,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
if (!ctlreg0.lap)
return 0;
if (psw_bits(*psw).t && asce.p)
if (psw_bits(*psw).dat && asce.p)
return 0;
return 1;
}
......@@ -790,7 +790,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
PROT_TYPE_LA);
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
if (psw_bits(*psw).dat) {
rc = guest_translate(vcpu, ga, pages, asce, mode);
if (rc < 0)
return rc;
......@@ -831,7 +831,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
pages = vmalloc(nr_pages * sizeof(unsigned long));
if (!pages)
return -ENOMEM;
need_ipte_lock = psw_bits(*psw).t && !asce.r;
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
......@@ -899,7 +899,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
mode, PROT_TYPE_LA);
}
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
rc = guest_translate(vcpu, gva, gpa, asce, mode);
if (rc > 0)
return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
......
......@@ -57,9 +57,9 @@ static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
if (psw_bits(*psw).eaba == PSW_BITS_AMODE_64BIT)
return ga;
if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
if (psw_bits(*psw).eaba == PSW_BITS_AMODE_31BIT)
return ga & ((1UL << 31) - 1);
return ga & ((1UL << 24) - 1);
}
......
......@@ -613,15 +613,15 @@ int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
* instruction. Check primary and home space-switch-event
* controls. (theoretically home -> home produced no event)
*/
if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) &&
(pssec(vcpu) || hssec(vcpu)))
if (((new_as == PSW_BITS_AS_HOME) ^ old_as_is_home(vcpu)) &&
(pssec(vcpu) || hssec(vcpu)))
vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
/*
* PT, PTI, PR, PC instruction operate on primary AS only. Check
* if the primary-space-switch-event control was or got set.
*/
if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) &&
if (new_as == PSW_BITS_AS_PRIMARY && !old_as_is_home(vcpu) &&
(pssec(vcpu) || old_ssec(vcpu)))
vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
}
......
......@@ -2067,6 +2067,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
if (!vcpu)
goto out;
BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
if (!sie_page)
goto out_free_cpu;
......
......@@ -361,7 +361,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
}
}
if (m3 & SSKE_MB) {
if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT)
if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
else
vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
......@@ -374,7 +374,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
{
vcpu->stat.instruction_ipte_interlock++;
if (psw_bits(vcpu->arch.sie_block->gpsw).p)
if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
kvm_s390_retry_instr(vcpu);
......@@ -901,7 +901,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
/* only support 2G frame size if EDAT2 is available and we are
not in 24-bit addressing mode */
if (!test_kvm_facility(vcpu->kvm, 78) ||
psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT)
psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
break;
......@@ -938,7 +938,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
start += PAGE_SIZE;
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) {
if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
vcpu->run->s.regs.gprs[reg2] = end;
} else {
vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
......
......@@ -149,7 +149,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
}
static void walk_pud_level(struct seq_file *m, struct pg_state *st,
pgd_t *pgd, unsigned long addr)
p4d_t *p4d, unsigned long addr)
{
unsigned int prot;
pud_t *pud;
......@@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
st->current_address = addr;
pud = pud_offset(pgd, addr);
pud = pud_offset(p4d, addr);
if (!pud_none(*pud))
if (pud_large(*pud)) {
prot = pud_val(*pud) &
......@@ -172,6 +172,23 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
}
}
static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
pgd_t *pgd, unsigned long addr)
{
p4d_t *p4d;
int i;
for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) {
st->current_address = addr;
p4d = p4d_offset(pgd, addr);
if (!p4d_none(*p4d))
walk_pud_level(m, st, p4d, addr);
else
note_page(m, st, _PAGE_INVALID, 2);
addr += P4D_SIZE;
}
}
static void walk_pgd_level(struct seq_file *m)
{
unsigned long addr = 0;
......@@ -184,7 +201,7 @@ static void walk_pgd_level(struct seq_file *m)
st.current_address = addr;
pgd = pgd_offset_k(addr);
if (!pgd_none(*pgd))
walk_pud_level(m, &st, pgd, addr);
walk_p4d_level(m, &st, pgd, addr);
else
note_page(m, &st, _PAGE_INVALID, 1);
addr += PGDIR_SIZE;
......
......@@ -130,7 +130,7 @@ static int bad_address(void *p)
static void dump_pagetable(unsigned long asce, unsigned long address)
{
unsigned long *table = __va(asce & PAGE_MASK);
unsigned long *table = __va(asce & _ASCE_ORIGIN);
pr_alert("AS:%016lx ", asce);
switch (asce & _ASCE_TYPE_MASK) {
......
......@@ -125,7 +125,7 @@ static void gmap_radix_tree_free(struct radix_tree_root *root)
struct radix_tree_iter iter;
unsigned long indices[16];
unsigned long index;
void **slot;
void __rcu **slot;
int i, nr;
/* A radix tree is freed by deleting all of its entries */
......@@ -150,7 +150,7 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
struct radix_tree_iter iter;
unsigned long indices[16];
unsigned long index;
void **slot;
void __rcu **slot;
int i, nr;
/* A radix tree is freed by deleting all of its entries */
......@@ -537,6 +537,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
unsigned long *table;
spinlock_t *ptl;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
int rc;
......@@ -573,7 +574,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
mm = gmap->mm;
pgd = pgd_offset(mm, vmaddr);
VM_BUG_ON(pgd_none(*pgd));
pud = pud_offset(pgd, vmaddr);
p4d = p4d_offset(pgd, vmaddr);
VM_BUG_ON(p4d_none(*p4d));
pud = pud_offset(p4d, vmaddr);
VM_BUG_ON(pud_none(*pud));
/* large puds cannot yet be handled */
if (pud_large(*pud))
......@@ -1008,7 +1011,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table);
static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
struct gmap_rmap *rmap)
{
void **slot;
void __rcu **slot;
BUG_ON(!gmap_is_shadow(sg));
slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
......
......@@ -166,15 +166,15 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
return 1;
}
static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp, pud;
pudp = (pud_t *) pgdp;
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
pudp = (pud_t *) pgd_deref(pgd);
pudp = (pud_t *) p4dp;
if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
pudp = (pud_t *) p4d_deref(p4d);
pudp += pud_index(addr);
do {
pud = *pudp;
......@@ -194,6 +194,29 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
return 1;
}
static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long next;
p4d_t *p4dp, p4d;
p4dp = (p4d_t *) pgdp;
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
p4dp = (p4d_t *) pgd_deref(pgd);
p4dp += p4d_index(addr);
do {
p4d = *p4dp;
barrier();
next = p4d_addr_end(addr, end);
if (p4d_none(p4d))
return 0;
if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr))
return 0;
} while (p4dp++, addr = next, addr != end);
return 1;
}
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
......@@ -228,7 +251,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
......
......@@ -162,16 +162,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp = NULL;
pgdp = pgd_offset(mm, addr);
pudp = pud_alloc(mm, pgdp, addr);
if (pudp) {
if (sz == PUD_SIZE)
return (pte_t *) pudp;
else if (sz == PMD_SIZE)
pmdp = pmd_alloc(mm, pudp, addr);
p4dp = p4d_alloc(mm, pgdp, addr);
if (p4dp) {
pudp = pud_alloc(mm, p4dp, addr);
if (pudp) {
if (sz == PUD_SIZE)
return (pte_t *) pudp;
else if (sz == PMD_SIZE)
pmdp = pmd_alloc(mm, pudp, addr);
}
}
return (pte_t *) pmdp;
}
......@@ -179,16 +183,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp = NULL;
pgdp = pgd_offset(mm, addr);
if (pgd_present(*pgdp)) {
pudp = pud_offset(pgdp, addr);
if (pud_present(*pudp)) {
if (pud_large(*pudp))
return (pte_t *) pudp;
pmdp = pmd_offset(pudp, addr);
p4dp = p4d_offset(pgdp, addr);
if (p4d_present(*p4dp)) {
pudp = pud_offset(p4dp, addr);
if (pud_present(*pudp)) {
if (pud_large(*pudp))
return (pte_t *) pudp;
pmdp = pmd_offset(pudp, addr);
}
}
}
return (pte_t *) pmdp;
......
......@@ -81,6 +81,7 @@ void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
unsigned long pgd_type, asce_bits;
psw_t psw;
init_mm.pgd = swapper_pg_dir;
if (VMALLOC_END > (1UL << 42)) {
......@@ -100,7 +101,10 @@ void __init paging_init(void)
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
__ctl_load(S390_lowcore.kernel_asce, 13, 13);
__arch_local_irq_stosm(0x04);
psw.mask = __extract_psw();
psw_bits(psw).dat = 1;
psw_bits(psw).as = PSW_BITS_AS_HOME;
__load_psw_mask(psw.mask);
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
......
......@@ -120,7 +120,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
check_asce_limit:
if (addr + len > current->mm->context.asce_limit) {
rc = crst_table_upgrade(mm);
rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
}
......@@ -184,7 +184,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
check_asce_limit:
if (addr + len > current->mm->context.asce_limit) {
rc = crst_table_upgrade(mm);
rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
}
......
......@@ -229,14 +229,14 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
}
static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
pud_t *pudp;
int rc = 0;
pudp = pud_offset(pgd, addr);
pudp = pud_offset(p4d, addr);
do {
if (pud_none(*pudp))
return -EINVAL;
......@@ -259,6 +259,26 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
return rc;
}
static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
p4d_t *p4dp;
int rc = 0;
p4dp = p4d_offset(pgd, addr);
do {
if (p4d_none(*p4dp))
return -EINVAL;
next = p4d_addr_end(addr, end);
rc = walk_pud_level(p4dp, addr, next, flags);
p4dp++;
addr = next;
cond_resched();
} while (addr < end && !rc);
return rc;
}
static DEFINE_MUTEX(cpa_mutex);
static int change_page_attr(unsigned long addr, unsigned long end,
......@@ -278,7 +298,7 @@ static int change_page_attr(unsigned long addr, unsigned long end,
if (pgd_none(*pgdp))
break;
next = pgd_addr_end(addr, end);
rc = walk_pud_level(pgdp, addr, next, flags);
rc = walk_p4d_level(pgdp, addr, next, flags);
if (rc)
break;
cond_resched();
......@@ -319,6 +339,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
unsigned long address;
int nr, i, j;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
......@@ -326,7 +347,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
for (i = 0; i < numpages;) {
address = page_to_phys(page + i);
pgd = pgd_offset_k(address);
pud = pud_offset(pgd, address);
p4d = p4d_offset(pgd, address);
pud = pud_offset(p4d, address);
pmd = pmd_offset(pud, address);
pte = pte_offset_kernel(pmd, address);
nr = (unsigned long)pte >> ilog2(sizeof(long));
......
......@@ -76,29 +76,46 @@ static void __crst_table_upgrade(void *arg)
__tlb_flush_local();
}
int crst_table_upgrade(struct mm_struct *mm)
int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
{
unsigned long *table, *pgd;
int rc, notify;
/* upgrade should only happen from 3 to 4 levels */
BUG_ON(mm->context.asce_limit != (1UL << 42));
table = crst_table_alloc(mm);
if (!table)
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
BUG_ON(mm->context.asce_limit < (1UL << 42));
if (end >= TASK_SIZE_MAX)
return -ENOMEM;
spin_lock_bh(&mm->page_table_lock);
pgd = (unsigned long *) mm->pgd;
crst_table_init(table, _REGION2_ENTRY_EMPTY);
pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
mm->pgd = (pgd_t *) table;
mm->context.asce_limit = 1UL << 53;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
spin_unlock_bh(&mm->page_table_lock);
on_each_cpu(__crst_table_upgrade, mm, 0);
return 0;
rc = 0;
notify = 0;
while (mm->context.asce_limit < end) {
table = crst_table_alloc(mm);
if (!table) {
rc = -ENOMEM;
break;
}
spin_lock_bh(&mm->page_table_lock);
pgd = (unsigned long *) mm->pgd;
if (mm->context.asce_limit == (1UL << 42)) {
crst_table_init(table, _REGION2_ENTRY_EMPTY);
p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
mm->pgd = (pgd_t *) table;
mm->context.asce_limit = 1UL << 53;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
} else {
crst_table_init(table, _REGION1_ENTRY_EMPTY);
pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
mm->pgd = (pgd_t *) table;
mm->context.asce_limit = -PAGE_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
}
notify = 1;
spin_unlock_bh(&mm->page_table_lock);
}
if (notify)
on_each_cpu(__crst_table_upgrade, mm, 0);
return rc;
}
void crst_table_downgrade(struct mm_struct *mm)
......@@ -274,7 +291,7 @@ static void __tlb_remove_table(void *_table)
struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
switch (mask) {
case 0: /* pmd or pud */
case 0: /* pmd, pud, or p4d */
free_pages((unsigned long) table, 2);
break;
case 1: /* lower 2K of a 4K page table */
......
......@@ -610,6 +610,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
{
spinlock_t *ptl;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgste_t pgste;
......@@ -618,7 +619,10 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
bool dirty;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return false;
pud = pud_alloc(mm, p4d, addr);
if (!pud)
return false;
pmd = pmd_alloc(mm, pud, addr);
......
......@@ -38,6 +38,17 @@ static void __ref *vmem_alloc_pages(unsigned int order)
return (void *) memblock_alloc(size, size);
}
static inline p4d_t *vmem_p4d_alloc(void)
{
p4d_t *p4d = NULL;
p4d = vmem_alloc_pages(2);
if (!p4d)
return NULL;
clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4);
return p4d;
}
static inline pud_t *vmem_pud_alloc(void)
{
pud_t *pud = NULL;
......@@ -85,6 +96,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
unsigned long end = start + size;
unsigned long address = start;
pgd_t *pg_dir;
p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
......@@ -102,12 +114,19 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
p4_dir = vmem_p4d_alloc();
if (!p4_dir)
goto out;
pgd_populate(&init_mm, pg_dir, p4_dir);
}
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
pu_dir = vmem_pud_alloc();
if (!pu_dir)
goto out;
pgd_populate(&init_mm, pg_dir, pu_dir);
p4d_populate(&init_mm, p4_dir, pu_dir);
}
pu_dir = pud_offset(pg_dir, address);
pu_dir = pud_offset(p4_dir, address);
if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
!(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
!debug_pagealloc_enabled()) {
......@@ -161,6 +180,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
unsigned long end = start + size;
unsigned long address = start;
pgd_t *pg_dir;
p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
......@@ -172,7 +192,12 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
address += PGDIR_SIZE;
continue;
}
pu_dir = pud_offset(pg_dir, address);
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
address += P4D_SIZE;
continue;
}
pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
address += PUD_SIZE;
continue;
......@@ -213,6 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
unsigned long pgt_prot, sgt_prot;
unsigned long address = start;
pgd_t *pg_dir;
p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
......@@ -227,13 +253,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
for (address = start; address < end;) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
p4_dir = vmem_p4d_alloc();
if (!p4_dir)
goto out;
pgd_populate(&init_mm, pg_dir, p4_dir);
}
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
pu_dir = vmem_pud_alloc();
if (!pu_dir)
goto out;
pgd_populate(&init_mm, pg_dir, pu_dir);
p4d_populate(&init_mm, p4_dir, pu_dir);
}
pu_dir = pud_offset(pg_dir, address);
pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
pm_dir = vmem_pmd_alloc();
if (!pm_dir)
......
......@@ -86,6 +86,25 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
return zdev;
}
void zpci_remove_reserved_devices(void)
{
struct zpci_dev *tmp, *zdev;
enum zpci_state state;
LIST_HEAD(remove);
spin_lock(&zpci_list_lock);
list_for_each_entry_safe(zdev, tmp, &zpci_list, entry) {
if (zdev->state == ZPCI_FN_STATE_STANDBY &&
!clp_get_state(zdev->fid, &state) &&
state == ZPCI_FN_STATE_RESERVED)
list_move_tail(&zdev->entry, &remove);
}
spin_unlock(&zpci_list_lock);
list_for_each_entry_safe(zdev, tmp, &remove, entry)
zpci_remove_device(zdev);
}
static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
{
return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
......@@ -108,6 +127,7 @@ static int zpci_set_airq(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
struct zpci_fib fib = {0};
u8 status;
fib.isc = PCI_ISC;
fib.sum = 1; /* enable summary notifications */
......@@ -117,60 +137,58 @@ static int zpci_set_airq(struct zpci_dev *zdev)
fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
fib.aisbo = zdev->aisb & 63;
return zpci_mod_fc(req, &fib);
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}
struct mod_pci_args {
u64 base;
u64 limit;
u64 iota;
u64 fmb_addr;
};
static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args)
/* Modify PCI: Unregister adapter interruptions */
static int zpci_clear_airq(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn);
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
struct zpci_fib fib = {0};
u8 cc, status;
fib.pba = args->base;
fib.pal = args->limit;
fib.iota = args->iota;
fib.fmb_addr = args->fmb_addr;
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3 || (cc == 1 && status == 24))
/* Function already gone or IRQs already deregistered. */
cc = 0;
return zpci_mod_fc(req, &fib);
return cc ? -EIO : 0;
}
/* Modify PCI: Register I/O address translation parameters */
int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
u64 base, u64 limit, u64 iota)
{
struct mod_pci_args args = { base, limit, iota, 0 };
u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
struct zpci_fib fib = {0};
u8 status;
WARN_ON_ONCE(iota & 0x3fff);
args.iota |= ZPCI_IOTA_RTTO_FLAG;
return mod_pci(zdev, ZPCI_MOD_FC_REG_IOAT, dmaas, &args);
fib.pba = base;
fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}
/* Modify PCI: Unregister I/O address translation parameters */
int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
{
struct mod_pci_args args = { 0, 0, 0, 0 };
return mod_pci(zdev, ZPCI_MOD_FC_DEREG_IOAT, dmaas, &args);
}
/* Modify PCI: Unregister adapter interruptions */
static int zpci_clear_airq(struct zpci_dev *zdev)
{
struct mod_pci_args args = { 0, 0, 0, 0 };
u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_DEREG_IOAT);
struct zpci_fib fib = {0};
u8 cc, status;
return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args);
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3) /* Function already gone. */
cc = 0;
return cc ? -EIO : 0;
}
/* Modify PCI: Set PCI function measurement parameters */
int zpci_fmb_enable_device(struct zpci_dev *zdev)
{
struct mod_pci_args args = { 0, 0, 0, 0 };
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
struct zpci_fib fib = {0};
u8 cc, status;
if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
return -EINVAL;
......@@ -185,25 +203,35 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
atomic64_set(&zdev->mapped_pages, 0);
atomic64_set(&zdev->unmapped_pages, 0);
args.fmb_addr = virt_to_phys(zdev->fmb);
return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
fib.fmb_addr = virt_to_phys(zdev->fmb);
cc = zpci_mod_fc(req, &fib, &status);
if (cc) {
kmem_cache_free(zdev_fmb_cache, zdev->fmb);
zdev->fmb = NULL;
}
return cc ? -EIO : 0;
}
/* Modify PCI: Disable PCI function measurement */
int zpci_fmb_disable_device(struct zpci_dev *zdev)
{
struct mod_pci_args args = { 0, 0, 0, 0 };
int rc;
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
struct zpci_fib fib = {0};
u8 cc, status;
if (!zdev->fmb)
return -EINVAL;
/* Function measurement is disabled if fmb address is zero */
rc = mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3) /* Function already gone. */
cc = 0;
kmem_cache_free(zdev_fmb_cache, zdev->fmb);
zdev->fmb = NULL;
return rc;
if (!cc) {
kmem_cache_free(zdev_fmb_cache, zdev->fmb);
zdev->fmb = NULL;
}
return cc ? -EIO : 0;
}
static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
......@@ -372,22 +400,21 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
struct msi_msg msg;
int rc, irq;
zdev->aisb = -1UL;
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
/* Allocate adapter summary indicator bit */
rc = -EIO;
aisb = airq_iv_alloc_bit(zpci_aisb_iv);
if (aisb == -1UL)
goto out;
return -EIO;
zdev->aisb = aisb;
/* Create adapter interrupt vector */
rc = -ENOMEM;
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
if (!zdev->aibv)
goto out_si;
return -ENOMEM;
/* Wire up shortcut pointer */
zpci_aibv[aisb] = zdev->aibv;
......@@ -398,10 +425,10 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
rc = -EIO;
irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
if (irq < 0)
goto out_msi;
return -ENOMEM;
rc = irq_set_msi_desc(irq, msi);
if (rc)
goto out_msi;
return rc;
irq_set_chip_and_handler(irq, &zpci_irq_chip,
handle_simple_irq);
msg.data = hwirq;
......@@ -415,27 +442,9 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
/* Enable adapter interrupts */
rc = zpci_set_airq(zdev);
if (rc)
goto out_msi;
return rc;
return (msi_vecs == nvec) ? 0 : msi_vecs;
out_msi:
for_each_pci_msi_entry(msi, pdev) {
if (hwirq-- == 0)
break;
irq_set_msi_desc(msi->irq, NULL);
irq_free_desc(msi->irq);
msi->msg.address_lo = 0;
msi->msg.address_hi = 0;
msi->msg.data = 0;
msi->irq = 0;
}
zpci_aibv[aisb] = NULL;
airq_iv_release(zdev->aibv);
out_si:
airq_iv_free_bit(zpci_aisb_iv, aisb);
out:
return rc;
}
void arch_teardown_msi_irqs(struct pci_dev *pdev)
......@@ -451,6 +460,8 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
/* Release MSI interrupts */
for_each_pci_msi_entry(msi, pdev) {
if (!msi->irq)
continue;
if (msi->msi_attrib.is_msix)
__pci_msix_desc_mask_irq(msi, 1);
else
......@@ -463,9 +474,15 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
msi->irq = 0;
}
zpci_aibv[zdev->aisb] = NULL;
airq_iv_release(zdev->aibv);
airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
if (zdev->aisb != -1UL) {
zpci_aibv[zdev->aisb] = NULL;
airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
zdev->aisb = -1UL;
}
if (zdev->aibv) {
airq_iv_release(zdev->aibv);
zdev->aibv = NULL;
}
}
static void zpci_map_resources(struct pci_dev *pdev)
......@@ -719,6 +736,16 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
{
if (zpci_unique_uid) {
zdev->domain = (u16) zdev->uid;
if (zdev->domain >= ZPCI_NR_DEVICES)
return 0;
spin_lock(&zpci_domain_lock);
if (test_bit(zdev->domain, zpci_domain)) {
spin_unlock(&zpci_domain_lock);
return -EEXIST;
}
set_bit(zdev->domain, zpci_domain);
spin_unlock(&zpci_domain_lock);
return 0;
}
......@@ -735,7 +762,7 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
static void zpci_free_domain(struct zpci_dev *zdev)
{
if (zpci_unique_uid)
if (zdev->domain >= ZPCI_NR_DEVICES)
return;
spin_lock(&zpci_domain_lock);
......@@ -755,6 +782,7 @@ void pcibios_remove_bus(struct pci_bus *bus)
list_del(&zdev->entry);
spin_unlock(&zpci_list_lock);
zpci_dbg(3, "rem fid:%x\n", zdev->fid);
kfree(zdev);
}
......@@ -847,15 +875,14 @@ int zpci_create_device(struct zpci_dev *zdev)
return rc;
}
void zpci_stop_device(struct zpci_dev *zdev)
void zpci_remove_device(struct zpci_dev *zdev)
{
zpci_dma_exit_device(zdev);
/*
* Note: SCLP disables fh via set-pci-fn so don't
* do that here.
*/
if (!zdev->bus)
return;
pci_stop_root_bus(zdev->bus);
pci_remove_root_bus(zdev->bus);
}
EXPORT_SYMBOL_GPL(zpci_stop_device);
int zpci_report_error(struct pci_dev *pdev,
struct zpci_report_error_header *report)
......
......@@ -193,12 +193,12 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
int clp_add_pci_device(u32 fid, u32 fh, int configured)
{
struct zpci_dev *zdev;
int rc;
int rc = -ENOMEM;
zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured);
zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
if (!zdev)
return -ENOMEM;
goto error;
zdev->fh = fh;
zdev->fid = fid;
......@@ -219,6 +219,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured)
return 0;
error:
zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
kfree(zdev);
return rc;
}
......@@ -295,8 +296,8 @@ int clp_disable_fh(struct zpci_dev *zdev)
return rc;
}
static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
void (*cb)(struct clp_fh_list_entry *entry))
static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data,
void (*cb)(struct clp_fh_list_entry *, void *))
{
u64 resume_token = 0;
int entries, i, rc;
......@@ -327,21 +328,13 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
resume_token = rrb->response.resume_token;
for (i = 0; i < entries; i++)
cb(&rrb->response.fh_list[i]);
cb(&rrb->response.fh_list[i], data);
} while (resume_token);
out:
return rc;
}
static void __clp_add(struct clp_fh_list_entry *entry)
{
if (!entry->vendor_id)
return;
clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
}
static void __clp_rescan(struct clp_fh_list_entry *entry)
static void __clp_add(struct clp_fh_list_entry *entry, void *data)
{
struct zpci_dev *zdev;
......@@ -349,22 +342,11 @@ static void __clp_rescan(struct clp_fh_list_entry *entry)
return;
zdev = get_zdev_by_fid(entry->fid);
if (!zdev) {
if (!zdev)
clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
return;
}
if (!entry->config_state) {
/*
* The handle is already disabled, that means no iota/irq freeing via
* the firmware interfaces anymore. Need to free resources manually
* (DMA memory, debug, sysfs)...
*/
zpci_stop_device(zdev);
}
}
static void __clp_update(struct clp_fh_list_entry *entry)
static void __clp_update(struct clp_fh_list_entry *entry, void *data)
{
struct zpci_dev *zdev;
......@@ -387,7 +369,7 @@ int clp_scan_pci_devices(void)
if (!rrb)
return -ENOMEM;
rc = clp_list_pci(rrb, __clp_add);
rc = clp_list_pci(rrb, NULL, __clp_add);
clp_free_block(rrb);
return rc;
......@@ -398,11 +380,13 @@ int clp_rescan_pci_devices(void)
struct clp_req_rsp_list_pci *rrb;
int rc;
zpci_remove_reserved_devices();
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
rc = clp_list_pci(rrb, __clp_rescan);
rc = clp_list_pci(rrb, NULL, __clp_add);
clp_free_block(rrb);
return rc;
......@@ -417,7 +401,40 @@ int clp_rescan_pci_devices_simple(void)
if (!rrb)
return -ENOMEM;
rc = clp_list_pci(rrb, __clp_update);
rc = clp_list_pci(rrb, NULL, __clp_update);
clp_free_block(rrb);
return rc;
}
struct clp_state_data {
u32 fid;
enum zpci_state state;
};
static void __clp_get_state(struct clp_fh_list_entry *entry, void *data)
{
struct clp_state_data *sd = data;
if (entry->fid != sd->fid)
return;
sd->state = entry->config_state;
}
int clp_get_state(u32 fid, enum zpci_state *state)
{
struct clp_req_rsp_list_pci *rrb;
struct clp_state_data sd = {fid, ZPCI_FN_STATE_RESERVED};
int rc;
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
rc = clp_list_pci(rrb, &sd, __clp_get_state);
if (!rc)
*state = sd.state;
clp_free_block(rrb);
return rc;
......
......@@ -601,7 +601,9 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
*/
WARN_ON(zdev->s390_domain);
zpci_unregister_ioat(zdev, 0);
if (zpci_unregister_ioat(zdev, 0))
return;
dma_cleanup_tables(zdev->dma_table);
zdev->dma_table = NULL;
vfree(zdev->iommu_bitmap);
......
......@@ -74,6 +74,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
enum zpci_state state;
int ret;
if (zdev)
......@@ -108,6 +109,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
break;
case 0x0303: /* Deconfiguration requested */
if (!zdev)
break;
if (pdev)
pci_stop_and_remove_bus_device_locked(pdev);
......@@ -121,7 +124,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zdev->state = ZPCI_FN_STATE_STANDBY;
break;
case 0x0304: /* Configured -> Standby */
case 0x0304: /* Configured -> Standby|Reserved */
if (!zdev)
break;
if (pdev) {
/* Give the driver a hint that the function is
* already unusable. */
......@@ -132,6 +137,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zdev->fh = ccdf->fh;
zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
if (!clp_get_state(ccdf->fid, &state) &&
state == ZPCI_FN_STATE_RESERVED) {
zpci_remove_device(zdev);
}
break;
case 0x0306: /* 0x308 or 0x302 for multiple devices */
clp_rescan_pci_devices();
......@@ -139,8 +148,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
case 0x0308: /* Standby -> Reserved */
if (!zdev)
break;
pci_stop_root_bus(zdev->bus);
pci_remove_root_bus(zdev->bus);
zpci_remove_device(zdev);
break;
default:
break;
......
......@@ -40,20 +40,20 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
return cc;
}
int zpci_mod_fc(u64 req, struct zpci_fib *fib)
u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
{
u8 cc, status;
u8 cc;
do {
cc = __mpcifc(req, fib, &status);
cc = __mpcifc(req, fib, status);
if (cc == 2)
msleep(ZPCI_INSN_BUSY_DELAY);
} while (cc == 2);
if (cc)
zpci_err_insn(cc, status, req, 0);
zpci_err_insn(cc, *status, req, 0);
return (cc) ? -EIO : 0;
return cc;
}
/* Refresh PCI Translations */
......
......@@ -34,8 +34,6 @@ static struct facility_def facility_defs[] = {
18, /* long displacement facility */
#endif
#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
7, /* stfle */
17, /* message security assist */
21, /* extended-immediate facility */
25, /* store clock fast */
#endif
......
......@@ -89,6 +89,20 @@ config PKEY
requires to have at least one CEX card in coprocessor mode
available at runtime.
config CRYPTO_PAES_S390
tristate "PAES cipher algorithms"
depends on S390
depends on ZCRYPT
depends on PKEY
select CRYPTO_ALGAPI
select CRYPTO_BLKCIPHER
help
This is the s390 hardware accelerated implementation of the
AES cipher algorithms for use with protected key.
Select this option if you want to use the paes cipher
for example to use protected key encrypted devices.
config CRYPTO_SHA1_S390
tristate "SHA1 digest algorithm"
depends on S390
......@@ -137,7 +151,6 @@ config CRYPTO_AES_S390
depends on S390
select CRYPTO_ALGAPI
select CRYPTO_BLKCIPHER
select PKEY
help
This is the s390 hardware accelerated implementation of the
AES cipher algorithms (FIPS-197).
......
......@@ -82,10 +82,3 @@ config SCM_BLOCK
To compile this driver as a module, choose M here: the
module will be called scm_block.
config SCM_BLOCK_CLUSTER_WRITE
def_bool y
prompt "SCM force cluster writes"
depends on SCM_BLOCK
help
Force writes to Storage Class Memory (SCM) to be in done in clusters.
......@@ -19,7 +19,4 @@ obj-$(CONFIG_BLK_DEV_XPRAM) += xpram.o
obj-$(CONFIG_DCSSBLK) += dcssblk.o
scm_block-objs := scm_drv.o scm_blk.o
ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
scm_block-objs += scm_blk_cluster.o
endif
obj-$(CONFIG_SCM_BLOCK) += scm_block.o
......@@ -1965,8 +1965,12 @@ static int __dasd_device_is_unusable(struct dasd_device *device,
{
int mask = ~(DASD_STOPPED_DC_WAIT | DASD_UNRESUMED_PM);
if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) {
/* dasd is being set offline. */
if (test_bit(DASD_FLAG_OFFLINE, &device->flags) &&
!test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
/*
* dasd is being set offline
* but it is no safe offline where we have to allow I/O
*/
return 1;
}
if (device->stopped) {
......@@ -3570,57 +3574,69 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
else
pr_warn("%s: The DASD cannot be set offline while it is in use\n",
dev_name(&cdev->dev));
clear_bit(DASD_FLAG_OFFLINE, &device->flags);
goto out_busy;
rc = -EBUSY;
goto out_err;
}
}
if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
/*
* safe offline already running
* could only be called by normal offline so safe_offline flag
* needs to be removed to run normal offline and kill all I/O
*/
if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags))
/* Already doing normal offline processing */
goto out_busy;
else
clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
} else {
if (test_bit(DASD_FLAG_OFFLINE, &device->flags))
/* Already doing offline processing */
goto out_busy;
/*
* Test if the offline processing is already running and exit if so.
* If a safe offline is being processed this could only be a normal
* offline that should be able to overtake the safe offline and
* cancel any I/O we do not want to wait for any longer
*/
if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) {
if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING,
&device->flags);
} else {
rc = -EBUSY;
goto out_err;
}
}
set_bit(DASD_FLAG_OFFLINE, &device->flags);
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
/*
* if safe_offline called set safe_offline_running flag and
* if safe_offline is called set safe_offline_running flag and
* clear safe_offline so that a call to normal offline
* can overrun safe_offline processing
*/
if (test_and_clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags) &&
!test_and_set_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
/* need to unlock here to wait for outstanding I/O */
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
/*
* If we want to set the device safe offline all IO operations
* should be finished before continuing the offline process
* so sync bdev first and then wait for our queues to become
* empty
*/
/* sync blockdev and partitions */
if (device->block) {
rc = fsync_bdev(device->block->bdev);
if (rc != 0)
goto interrupted;
}
/* schedule device tasklet and wait for completion */
dasd_schedule_device_bh(device);
rc = wait_event_interruptible(shutdown_waitq,
_wait_for_empty_queues(device));
if (rc != 0)
goto interrupted;
/*
* check if a normal offline process overtook the offline
* processing in this case simply do nothing beside returning
* that we got interrupted
* otherwise mark safe offline as not running any longer and
* continue with normal offline
*/
spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
if (!test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
rc = -ERESTARTSYS;
goto out_err;
}
clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags);
}
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
dasd_set_target_state(device, DASD_STATE_NEW);
/* dasd_delete_device destroys the device reference. */
......@@ -3632,22 +3648,18 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
*/
if (block)
dasd_free_block(block);
return 0;
interrupted:
/* interrupted by signal */
clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags);
clear_bit(DASD_FLAG_OFFLINE, &device->flags);
dasd_put_device(device);
return rc;
out_busy:
out_err:
dasd_put_device(device);
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
return -EBUSY;
return rc;
}
EXPORT_SYMBOL_GPL(dasd_generic_set_offline);
......
......@@ -315,45 +315,58 @@ static int __init dasd_parse_range(const char *range)
char *features_str = NULL;
char *from_str = NULL;
char *to_str = NULL;
size_t len = strlen(range) + 1;
char tmp[len];
int rc = 0;
char *tmp;
strlcpy(tmp, range, len);
tmp = kstrdup(range, GFP_KERNEL);
if (!tmp)
return -ENOMEM;
if (dasd_evaluate_range_param(tmp, &from_str, &to_str, &features_str))
goto out_err;
if (dasd_evaluate_range_param(tmp, &from_str, &to_str, &features_str)) {
rc = -EINVAL;
goto out;
}
if (dasd_busid(from_str, &from_id0, &from_id1, &from))
goto out_err;
if (dasd_busid(from_str, &from_id0, &from_id1, &from)) {
rc = -EINVAL;
goto out;
}
to = from;
to_id0 = from_id0;
to_id1 = from_id1;
if (to_str) {
if (dasd_busid(to_str, &to_id0, &to_id1, &to))
goto out_err;
if (dasd_busid(to_str, &to_id0, &to_id1, &to)) {
rc = -EINVAL;
goto out;
}
if (from_id0 != to_id0 || from_id1 != to_id1 || from > to) {
pr_err("%s is not a valid device range\n", range);
goto out_err;
rc = -EINVAL;
goto out;
}
}
features = dasd_feature_list(features_str);
if (features < 0)
goto out_err;
if (features < 0) {
rc = -EINVAL;
goto out;
}
/* each device in dasd= parameter should be set initially online */
features |= DASD_FEATURE_INITIAL_ONLINE;
while (from <= to) {
sprintf(bus_id, "%01x.%01x.%04x", from_id0, from_id1, from++);
devmap = dasd_add_busid(bus_id, features);
if (IS_ERR(devmap))
return PTR_ERR(devmap);
if (IS_ERR(devmap)) {
rc = PTR_ERR(devmap);
goto out;
}
}
return 0;
out:
kfree(tmp);
out_err:
return -EINVAL;
return rc;
}
/*
......@@ -735,13 +748,22 @@ static ssize_t
dasd_ro_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct dasd_devmap *devmap;
int ro_flag;
struct dasd_device *device;
int ro_flag = 0;
devmap = dasd_find_busid(dev_name(dev));
if (!IS_ERR(devmap))
ro_flag = (devmap->features & DASD_FEATURE_READONLY) != 0;
else
ro_flag = (DASD_FEATURE_DEFAULT & DASD_FEATURE_READONLY) != 0;
if (IS_ERR(devmap))
goto out;
ro_flag = !!(devmap->features & DASD_FEATURE_READONLY);
spin_lock(&dasd_devmap_lock);
device = devmap->device;
if (device)
ro_flag |= test_bit(DASD_FLAG_DEVICE_RO, &device->flags);
spin_unlock(&dasd_devmap_lock);
out:
return snprintf(buf, PAGE_SIZE, ro_flag ? "1\n" : "0\n");
}
......@@ -764,7 +786,7 @@ dasd_ro_store(struct device *dev, struct device_attribute *attr,
device = dasd_device_from_cdev(cdev);
if (IS_ERR(device))
return PTR_ERR(device);
return count;
spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
val = val || test_bit(DASD_FLAG_DEVICE_RO, &device->flags);
......@@ -928,11 +950,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr,
{
struct ccw_device *cdev = to_ccwdev(dev);
struct dasd_device *device;
unsigned long flags;
int rc;
device = dasd_device_from_cdev(cdev);
spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
device = dasd_device_from_cdev_locked(cdev);
if (IS_ERR(device)) {
rc = PTR_ERR(device);
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
goto out;
}
......@@ -940,12 +965,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr,
test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
/* Already doing offline processing */
dasd_put_device(device);
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
rc = -EBUSY;
goto out;
}
set_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
dasd_put_device(device);
spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
rc = ccw_device_set_offline(cdev);
......
This diff is collapsed.
......@@ -4,6 +4,7 @@
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/genhd.h>
#include <linux/list.h>
......@@ -14,18 +15,14 @@
#define SCM_QUEUE_DELAY 5
struct scm_blk_dev {
struct tasklet_struct tasklet;
struct request_queue *rq;
struct gendisk *gendisk;
struct blk_mq_tag_set tag_set;
struct scm_device *scmdev;
spinlock_t rq_lock; /* guard the request queue */
spinlock_t lock; /* guard the rest of the blockdev */
spinlock_t lock;
atomic_t queued_reqs;
enum {SCM_OPER, SCM_WR_PROHIBIT} state;
struct list_head finished_requests;
#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
struct list_head cluster_list;
#endif
};
struct scm_request {
......@@ -36,13 +33,6 @@ struct scm_request {
struct list_head list;
u8 retries;
blk_status_t error;
#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
struct {
enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
struct list_head list;
void **buf;
} cluster;
#endif
};
#define to_aobrq(rq) container_of((void *) rq, struct aob_rq_header, data)
......@@ -52,55 +42,11 @@ void scm_blk_dev_cleanup(struct scm_blk_dev *);
void scm_blk_set_available(struct scm_blk_dev *);
void scm_blk_irq(struct scm_device *, void *, blk_status_t);
void scm_request_finish(struct scm_request *);
void scm_request_requeue(struct scm_request *);
struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes);
int scm_drv_init(void);
void scm_drv_cleanup(void);
#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
void __scm_free_rq_cluster(struct scm_request *);
int __scm_alloc_rq_cluster(struct scm_request *);
void scm_request_cluster_init(struct scm_request *);
bool scm_reserve_cluster(struct scm_request *);
void scm_release_cluster(struct scm_request *);
void scm_blk_dev_cluster_setup(struct scm_blk_dev *);
bool scm_need_cluster_request(struct scm_request *);
void scm_initiate_cluster_request(struct scm_request *);
void scm_cluster_request_irq(struct scm_request *);
bool scm_test_cluster_request(struct scm_request *);
bool scm_cluster_size_valid(void);
#else /* CONFIG_SCM_BLOCK_CLUSTER_WRITE */
static inline void __scm_free_rq_cluster(struct scm_request *scmrq) {}
static inline int __scm_alloc_rq_cluster(struct scm_request *scmrq)
{
return 0;
}
static inline void scm_request_cluster_init(struct scm_request *scmrq) {}
static inline bool scm_reserve_cluster(struct scm_request *scmrq)
{
return true;
}
static inline void scm_release_cluster(struct scm_request *scmrq) {}
static inline void scm_blk_dev_cluster_setup(struct scm_blk_dev *bdev) {}
static inline bool scm_need_cluster_request(struct scm_request *scmrq)
{
return false;
}
static inline void scm_initiate_cluster_request(struct scm_request *scmrq) {}
static inline void scm_cluster_request_irq(struct scm_request *scmrq) {}
static inline bool scm_test_cluster_request(struct scm_request *scmrq)
{
return false;
}
static inline bool scm_cluster_size_valid(void)
{
return true;
}
#endif /* CONFIG_SCM_BLOCK_CLUSTER_WRITE */
extern debug_info_t *scm_debug;
#define SCM_LOG(imp, txt) do { \
......
/*
* Block driver for s390 storage class memory.
*
* Copyright IBM Corp. 2012
* Author(s): Sebastian Ott <sebott@linux.vnet.ibm.com>
*/
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/genhd.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <asm/eadm.h>
#include "scm_blk.h"
static unsigned int write_cluster_size = 64;
module_param(write_cluster_size, uint, S_IRUGO);
MODULE_PARM_DESC(write_cluster_size,
"Number of pages used for contiguous writes.");
#define CLUSTER_SIZE (write_cluster_size * PAGE_SIZE)
void __scm_free_rq_cluster(struct scm_request *scmrq)
{
int i;
if (!scmrq->cluster.buf)
return;
for (i = 0; i < 2 * write_cluster_size; i++)
free_page((unsigned long) scmrq->cluster.buf[i]);
kfree(scmrq->cluster.buf);
}
int __scm_alloc_rq_cluster(struct scm_request *scmrq)
{
int i;
scmrq->cluster.buf = kzalloc(sizeof(void *) * 2 * write_cluster_size,
GFP_KERNEL);
if (!scmrq->cluster.buf)
return -ENOMEM;
for (i = 0; i < 2 * write_cluster_size; i++) {
scmrq->cluster.buf[i] = (void *) get_zeroed_page(GFP_DMA);
if (!scmrq->cluster.buf[i])
return -ENOMEM;
}
INIT_LIST_HEAD(&scmrq->cluster.list);
return 0;
}
void scm_request_cluster_init(struct scm_request *scmrq)
{
scmrq->cluster.state = CLUSTER_NONE;
}
static bool clusters_intersect(struct request *A, struct request *B)
{
unsigned long firstA, lastA, firstB, lastB;
firstA = ((u64) blk_rq_pos(A) << 9) / CLUSTER_SIZE;
lastA = (((u64) blk_rq_pos(A) << 9) +
blk_rq_bytes(A) - 1) / CLUSTER_SIZE;
firstB = ((u64) blk_rq_pos(B) << 9) / CLUSTER_SIZE;
lastB = (((u64) blk_rq_pos(B) << 9) +
blk_rq_bytes(B) - 1) / CLUSTER_SIZE;
return (firstB <= lastA && firstA <= lastB);
}
bool scm_reserve_cluster(struct scm_request *scmrq)
{
struct request *req = scmrq->request[scmrq->aob->request.msb_count];
struct scm_blk_dev *bdev = scmrq->bdev;
struct scm_request *iter;
int pos, add = 1;
if (write_cluster_size == 0)
return true;
spin_lock(&bdev->lock);
list_for_each_entry(iter, &bdev->cluster_list, cluster.list) {
if (iter == scmrq) {
/*
* We don't have to use clusters_intersect here, since
* cluster requests are always started separately.
*/
add = 0;
continue;
}
for (pos = 0; pos < iter->aob->request.msb_count; pos++) {
if (clusters_intersect(req, iter->request[pos]) &&
(rq_data_dir(req) == WRITE ||
rq_data_dir(iter->request[pos]) == WRITE)) {
spin_unlock(&bdev->lock);
return false;
}
}
}
if (add)
list_add(&scmrq->cluster.list, &bdev->cluster_list);
spin_unlock(&bdev->lock);
return true;
}
void scm_release_cluster(struct scm_request *scmrq)
{
struct scm_blk_dev *bdev = scmrq->bdev;
unsigned long flags;
if (write_cluster_size == 0)
return;
spin_lock_irqsave(&bdev->lock, flags);
list_del(&scmrq->cluster.list);
spin_unlock_irqrestore(&bdev->lock, flags);
}
void scm_blk_dev_cluster_setup(struct scm_blk_dev *bdev)
{
INIT_LIST_HEAD(&bdev->cluster_list);
blk_queue_io_opt(bdev->rq, CLUSTER_SIZE);
}
static int scm_prepare_cluster_request(struct scm_request *scmrq)
{
struct scm_blk_dev *bdev = scmrq->bdev;
struct scm_device *scmdev = bdev->gendisk->private_data;
struct request *req = scmrq->request[0];
struct msb *msb = &scmrq->aob->msb[0];
struct req_iterator iter;
struct aidaw *aidaw;
struct bio_vec bv;
int i = 0;
u64 addr;
switch (scmrq->cluster.state) {
case CLUSTER_NONE:
scmrq->cluster.state = CLUSTER_READ;
/* fall through */
case CLUSTER_READ:
msb->bs = MSB_BS_4K;
msb->oc = MSB_OC_READ;
msb->flags = MSB_FLAG_IDA;
msb->blk_count = write_cluster_size;
addr = scmdev->address + ((u64) blk_rq_pos(req) << 9);
msb->scm_addr = round_down(addr, CLUSTER_SIZE);
if (msb->scm_addr !=
round_down(addr + (u64) blk_rq_bytes(req) - 1,
CLUSTER_SIZE))
msb->blk_count = 2 * write_cluster_size;
aidaw = scm_aidaw_fetch(scmrq, msb->blk_count * PAGE_SIZE);
if (!aidaw)
return -ENOMEM;
scmrq->aob->request.msb_count = 1;
msb->data_addr = (u64) aidaw;
for (i = 0; i < msb->blk_count; i++) {
aidaw->data_addr = (u64) scmrq->cluster.buf[i];
aidaw++;
}
break;
case CLUSTER_WRITE:
aidaw = (void *) msb->data_addr;
msb->oc = MSB_OC_WRITE;
for (addr = msb->scm_addr;
addr < scmdev->address + ((u64) blk_rq_pos(req) << 9);
addr += PAGE_SIZE) {
aidaw->data_addr = (u64) scmrq->cluster.buf[i];
aidaw++;
i++;
}
rq_for_each_segment(bv, req, iter) {
aidaw->data_addr = (u64) page_address(bv.bv_page);
aidaw++;
i++;
}
for (; i < msb->blk_count; i++) {
aidaw->data_addr = (u64) scmrq->cluster.buf[i];
aidaw++;
}
break;
}
return 0;
}
bool scm_need_cluster_request(struct scm_request *scmrq)
{
int pos = scmrq->aob->request.msb_count;
if (rq_data_dir(scmrq->request[pos]) == READ)
return false;
return blk_rq_bytes(scmrq->request[pos]) < CLUSTER_SIZE;
}
/* Called with queue lock held. */
void scm_initiate_cluster_request(struct scm_request *scmrq)
{
if (scm_prepare_cluster_request(scmrq))
goto requeue;
if (eadm_start_aob(scmrq->aob))
goto requeue;
return;
requeue:
scm_request_requeue(scmrq);
}
bool scm_test_cluster_request(struct scm_request *scmrq)
{
return scmrq->cluster.state != CLUSTER_NONE;
}
void scm_cluster_request_irq(struct scm_request *scmrq)
{
struct scm_blk_dev *bdev = scmrq->bdev;
unsigned long flags;
switch (scmrq->cluster.state) {
case CLUSTER_NONE:
BUG();
break;
case CLUSTER_READ:
if (scmrq->error) {
scm_request_finish(scmrq);
break;
}
scmrq->cluster.state = CLUSTER_WRITE;
spin_lock_irqsave(&bdev->rq_lock, flags);
scm_initiate_cluster_request(scmrq);
spin_unlock_irqrestore(&bdev->rq_lock, flags);
break;
case CLUSTER_WRITE:
scm_request_finish(scmrq);
break;
}
}
bool scm_cluster_size_valid(void)
{
if (write_cluster_size == 1 || write_cluster_size > 128)
return false;
return !(write_cluster_size & (write_cluster_size - 1));
}
......@@ -296,6 +296,51 @@ static const struct attribute_group *default_subch_attr_groups[] = {
NULL,
};
static ssize_t chpids_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct subchannel *sch = to_subchannel(dev);
struct chsc_ssd_info *ssd = &sch->ssd_info;
ssize_t ret = 0;
int mask;
int chp;
for (chp = 0; chp < 8; chp++) {
mask = 0x80 >> chp;
if (ssd->path_mask & mask)
ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
else
ret += sprintf(buf + ret, "00 ");
}
ret += sprintf(buf + ret, "\n");
return ret;
}
static DEVICE_ATTR(chpids, 0444, chpids_show, NULL);
static ssize_t pimpampom_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct subchannel *sch = to_subchannel(dev);
struct pmcw *pmcw = &sch->schib.pmcw;
return sprintf(buf, "%02x %02x %02x\n",
pmcw->pim, pmcw->pam, pmcw->pom);
}
static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL);
static struct attribute *io_subchannel_type_attrs[] = {
&dev_attr_chpids.attr,
&dev_attr_pimpampom.attr,
NULL,
};
ATTRIBUTE_GROUPS(io_subchannel_type);
static const struct device_type io_subchannel_type = {
.groups = io_subchannel_type_groups,
};
int css_register_subchannel(struct subchannel *sch)
{
int ret;
......@@ -304,6 +349,10 @@ int css_register_subchannel(struct subchannel *sch)
sch->dev.parent = &channel_subsystems[0]->device;
sch->dev.bus = &css_bus_type;
sch->dev.groups = default_subch_attr_groups;
if (sch->st == SUBCHANNEL_TYPE_IO)
sch->dev.type = &io_subchannel_type;
/*
* We don't want to generate uevents for I/O subchannels that don't
* have a working ccw device behind them since they will be
......
......@@ -208,44 +208,6 @@ int __init io_subchannel_init(void)
/************************ device handling **************************/
/*
* A ccw_device has some interfaces in sysfs in addition to the
* standard ones.
* The following entries are designed to export the information which
* resided in 2.4 in /proc/subchannels. Subchannel and device number
* are obvious, so they don't have an entry :)
* TODO: Split chpids and pimpampom up? Where is "in use" in the tree?
*/
static ssize_t
chpids_show (struct device * dev, struct device_attribute *attr, char * buf)
{
struct subchannel *sch = to_subchannel(dev);
struct chsc_ssd_info *ssd = &sch->ssd_info;
ssize_t ret = 0;
int chp;
int mask;
for (chp = 0; chp < 8; chp++) {
mask = 0x80 >> chp;
if (ssd->path_mask & mask)
ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
else
ret += sprintf(buf + ret, "00 ");
}
ret += sprintf (buf+ret, "\n");
return min((ssize_t)PAGE_SIZE, ret);
}
static ssize_t
pimpampom_show (struct device * dev, struct device_attribute *attr, char * buf)
{
struct subchannel *sch = to_subchannel(dev);
struct pmcw *pmcw = &sch->schib.pmcw;
return sprintf (buf, "%02x %02x %02x\n",
pmcw->pim, pmcw->pam, pmcw->pom);
}
static ssize_t
devtype_show (struct device *dev, struct device_attribute *attr, char *buf)
{
......@@ -636,8 +598,6 @@ static ssize_t vpm_show(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%02x\n", sch->vpm);
}
static DEVICE_ATTR(chpids, 0444, chpids_show, NULL);
static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL);
static DEVICE_ATTR(devtype, 0444, devtype_show, NULL);
static DEVICE_ATTR(cutype, 0444, cutype_show, NULL);
static DEVICE_ATTR(modalias, 0444, modalias_show, NULL);
......@@ -647,8 +607,6 @@ static DEVICE_ATTR(logging, 0200, NULL, initiate_logging);
static DEVICE_ATTR(vpm, 0444, vpm_show, NULL);
static struct attribute *io_subchannel_attrs[] = {
&dev_attr_chpids.attr,
&dev_attr_pimpampom.attr,
&dev_attr_logging.attr,
&dev_attr_vpm.attr,
NULL,
......
......@@ -89,54 +89,6 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
private->state = VFIO_CCW_STATE_IDLE;
}
/*
* Sysfs interfaces
*/
static ssize_t chpids_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct subchannel *sch = to_subchannel(dev);
struct chsc_ssd_info *ssd = &sch->ssd_info;
ssize_t ret = 0;
int chp;
int mask;
for (chp = 0; chp < 8; chp++) {
mask = 0x80 >> chp;
if (ssd->path_mask & mask)
ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
else
ret += sprintf(buf + ret, "00 ");
}
ret += sprintf(buf+ret, "\n");
return ret;
}
static ssize_t pimpampom_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct subchannel *sch = to_subchannel(dev);
struct pmcw *pmcw = &sch->schib.pmcw;
return sprintf(buf, "%02x %02x %02x\n",
pmcw->pim, pmcw->pam, pmcw->pom);
}
static DEVICE_ATTR(chpids, 0444, chpids_show, NULL);
static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL);
static struct attribute *vfio_subchannel_attrs[] = {
&dev_attr_chpids.attr,
&dev_attr_pimpampom.attr,
NULL,
};
static struct attribute_group vfio_subchannel_attr_group = {
.attrs = vfio_subchannel_attrs,
};
/*
* Css driver callbacks
*/
......@@ -174,13 +126,9 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
if (ret)
goto out_free;
ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
if (ret)
goto out_disable;
ret = vfio_ccw_mdev_reg(sch);
if (ret)
goto out_rm_group;
goto out_disable;
INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
atomic_set(&private->avail, 1);
......@@ -188,8 +136,6 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
return 0;
out_rm_group:
sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
out_disable:
cio_disable_subchannel(sch);
out_free:
......@@ -206,8 +152,6 @@ static int vfio_ccw_sch_remove(struct subchannel *sch)
vfio_ccw_mdev_unreg(sch);
sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
dev_set_drvdata(&sch->dev, NULL);
kfree(private);
......
......@@ -766,7 +766,7 @@ static ssize_t ap_domain_store(struct bus_type *bus,
ap_domain_index = domain;
spin_unlock_bh(&ap_domain_lock);
AP_DBF(DBF_DEBUG, "store new default domain=%d\n", domain);
AP_DBF(DBF_DEBUG, "stored new default domain=%d\n", domain);
return count;
}
......@@ -952,6 +952,7 @@ static int ap_select_domain(void)
}
if (best_domain >= 0){
ap_domain_index = best_domain;
AP_DBF(DBF_DEBUG, "new ap_domain_index=%d\n", ap_domain_index);
spin_unlock_bh(&ap_domain_lock);
return 0;
}
......@@ -988,7 +989,7 @@ static void ap_scan_bus(struct work_struct *unused)
ap_qid_t qid;
int depth = 0, type = 0;
unsigned int functions = 0;
int rc, id, dom, borked, domains;
int rc, id, dom, borked, domains, defdomdevs = 0;
AP_DBF(DBF_DEBUG, "ap_scan_bus running\n");
......@@ -1052,6 +1053,8 @@ static void ap_scan_bus(struct work_struct *unused)
put_device(dev);
if (!borked) {
domains++;
if (dom == ap_domain_index)
defdomdevs++;
continue;
}
}
......@@ -1098,6 +1101,8 @@ static void ap_scan_bus(struct work_struct *unused)
continue;
}
domains++;
if (dom == ap_domain_index)
defdomdevs++;
} /* end domain loop */
if (ac) {
/* remove card dev if there are no queue devices */
......@@ -1106,6 +1111,11 @@ static void ap_scan_bus(struct work_struct *unused)
put_device(&ac->ap_dev.device);
}
} /* end device loop */
if (defdomdevs < 1)
AP_DBF(DBF_INFO, "no queue device with default domain %d available\n",
ap_domain_index);
out:
mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
}
......@@ -1174,14 +1184,14 @@ int __init ap_module_init(void)
ap_init_configuration();
if (ap_configuration)
max_domain_id = ap_max_domain_id ? : (AP_DOMAINS - 1);
max_domain_id =
ap_max_domain_id ? ap_max_domain_id : AP_DOMAINS - 1;
else
max_domain_id = 15;
if (ap_domain_index < -1 || ap_domain_index > max_domain_id) {
pr_warn("%d is not a valid cryptographic domain\n",
ap_domain_index);
rc = -EINVAL;
goto out_free;
ap_domain_index = -1;
}
/* In resume callback we need to know if the user had set the domain.
* If so, we can not just reset it.
......@@ -1254,7 +1264,6 @@ int __init ap_module_init(void)
unregister_reset_call(&ap_reset_call);
if (ap_using_interrupts())
unregister_adapter_interrupt(&ap_airq);
out_free:
kfree(ap_configuration);
return rc;
}
......
......@@ -178,9 +178,9 @@ static inline void prep_xcrb(struct ica_xcRB *pxcrb,
pxcrb->user_defined = (cardnr == 0xFFFF ? AUTOSELECT : cardnr);
pxcrb->request_control_blk_length =
preqcblk->cprb_len + preqcblk->req_parml;
pxcrb->request_control_blk_addr = (void *) preqcblk;
pxcrb->request_control_blk_addr = (void __user *) preqcblk;
pxcrb->reply_control_blk_length = preqcblk->rpl_msgbl;
pxcrb->reply_control_blk_addr = (void *) prepcblk;
pxcrb->reply_control_blk_addr = (void __user *) prepcblk;
}
/*
......@@ -1194,7 +1194,7 @@ static struct miscdevice pkey_dev = {
/*
* Module init
*/
int __init pkey_init(void)
static int __init pkey_init(void)
{
cpacf_mask_t pckmo_functions;
......
......@@ -821,8 +821,10 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
do {
rc = zcrypt_rsa_modexpo(&mex);
} while (rc == -EAGAIN);
if (rc)
if (rc) {
ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSAMODEXPO rc=%d", rc);
return rc;
}
return put_user(mex.outputdatalength, &umex->outputdatalength);
}
case ICARSACRT: {
......@@ -838,8 +840,10 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
do {
rc = zcrypt_rsa_crt(&crt);
} while (rc == -EAGAIN);
if (rc)
if (rc) {
ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSACRT rc=%d", rc);
return rc;
}
return put_user(crt.outputdatalength, &ucrt->outputdatalength);
}
case ZSECSENDCPRB: {
......@@ -855,6 +859,8 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
do {
rc = zcrypt_send_cprb(&xcRB);
} while (rc == -EAGAIN);
if (rc)
ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d", rc);
if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB)))
return -EFAULT;
return rc;
......@@ -872,6 +878,8 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
do {
rc = zcrypt_send_ep11_cprb(&xcrb);
} while (rc == -EAGAIN);
if (rc)
ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d", rc);
if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb)))
return -EFAULT;
return rc;
......
......@@ -48,26 +48,6 @@ struct cca_token_hdr {
#define CCA_TKN_HDR_ID_EXT 0x1E
/**
* mapping for the cca private ME section
*/
struct cca_private_ext_ME_sec {
unsigned char section_identifier;
unsigned char version;
unsigned short section_length;
unsigned char private_key_hash[20];
unsigned char reserved1[4];
unsigned char key_format;
unsigned char reserved2;
unsigned char key_name_hash[20];
unsigned char key_use_flags[4];
unsigned char reserved3[6];
unsigned char reserved4[24];
unsigned char confounder[24];
unsigned char exponent[128];
unsigned char modulus[128];
} __attribute__((packed));
#define CCA_PVT_USAGE_ALL 0x80
/**
......@@ -123,77 +103,6 @@ struct cca_pvt_ext_CRT_sec {
#define CCA_PVT_EXT_CRT_SEC_ID_PVT 0x08
#define CCA_PVT_EXT_CRT_SEC_FMT_CL 0x40
/**
* Set up private key fields of a type6 MEX message.
* Note that all numerics in the key token are big-endian,
* while the entries in the key block header are little-endian.
*
* @mex: pointer to user input data
* @p: pointer to memory area for the key
*
* Returns the size of the key area or -EFAULT
*/
static inline int zcrypt_type6_mex_key_de(struct ica_rsa_modexpo *mex,
void *p, int big_endian)
{
static struct cca_token_hdr static_pvt_me_hdr = {
.token_identifier = 0x1E,
.token_length = 0x0183,
};
static struct cca_private_ext_ME_sec static_pvt_me_sec = {
.section_identifier = 0x02,
.section_length = 0x016C,
.key_use_flags = {0x80,0x00,0x00,0x00},
};
static struct cca_public_sec static_pub_me_sec = {
.section_identifier = 0x04,
.section_length = 0x000F,
.exponent_len = 0x0003,
};
static char pk_exponent[3] = { 0x01, 0x00, 0x01 };
struct {
struct T6_keyBlock_hdr t6_hdr;
struct cca_token_hdr pvtMeHdr;
struct cca_private_ext_ME_sec pvtMeSec;
struct cca_public_sec pubMeSec;
char exponent[3];
} __attribute__((packed)) *key = p;
unsigned char *temp;
memset(key, 0, sizeof(*key));
if (big_endian) {
key->t6_hdr.blen = cpu_to_be16(0x189);
key->t6_hdr.ulen = cpu_to_be16(0x189 - 2);
} else {
key->t6_hdr.blen = cpu_to_le16(0x189);
key->t6_hdr.ulen = cpu_to_le16(0x189 - 2);
}
key->pvtMeHdr = static_pvt_me_hdr;
key->pvtMeSec = static_pvt_me_sec;
key->pubMeSec = static_pub_me_sec;
/*
* In a private key, the modulus doesn't appear in the public
* section. So, an arbitrary public exponent of 0x010001 will be
* used.
*/
memcpy(key->exponent, pk_exponent, 3);
/* key parameter block */
temp = key->pvtMeSec.exponent +
sizeof(key->pvtMeSec.exponent) - mex->inputdatalength;
if (copy_from_user(temp, mex->b_key, mex->inputdatalength))
return -EFAULT;
/* modulus */
temp = key->pvtMeSec.modulus +
sizeof(key->pvtMeSec.modulus) - mex->inputdatalength;
if (copy_from_user(temp, mex->n_modulus, mex->inputdatalength))
return -EFAULT;
key->pubMeSec.modulus_bit_len = 8 * mex->inputdatalength;
return sizeof(*key);
}
/**
* Set up private key fields of a type6 MEX message. The _pad variant
* strips leading zeroes from the b_key.
......@@ -205,8 +114,7 @@ static inline int zcrypt_type6_mex_key_de(struct ica_rsa_modexpo *mex,
*
* Returns the size of the key area or -EFAULT
*/
static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex,
void *p, int big_endian)
static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, void *p)
{
static struct cca_token_hdr static_pub_hdr = {
.token_identifier = 0x1E,
......@@ -251,13 +159,8 @@ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex,
2*mex->inputdatalength - i;
key->pubHdr.token_length =
key->pubSec.section_length + sizeof(key->pubHdr);
if (big_endian) {
key->t6_hdr.ulen = cpu_to_be16(key->pubHdr.token_length + 4);
key->t6_hdr.blen = cpu_to_be16(key->pubHdr.token_length + 6);
} else {
key->t6_hdr.ulen = cpu_to_le16(key->pubHdr.token_length + 4);
key->t6_hdr.blen = cpu_to_le16(key->pubHdr.token_length + 6);
}
key->t6_hdr.ulen = key->pubHdr.token_length + 4;
key->t6_hdr.blen = key->pubHdr.token_length + 6;
return sizeof(*key) + 2*mex->inputdatalength - i;
}
......@@ -271,8 +174,7 @@ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex,
*
* Returns the size of the key area or -EFAULT
*/
static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt,
void *p, int big_endian)
static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, void *p)
{
static struct cca_public_sec static_cca_pub_sec = {
.section_identifier = 4,
......@@ -298,13 +200,8 @@ static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt,
size = sizeof(*key) + key_len + sizeof(*pub) + 3;
/* parameter block.key block */
if (big_endian) {
key->t6_hdr.blen = cpu_to_be16(size);
key->t6_hdr.ulen = cpu_to_be16(size - 2);
} else {
key->t6_hdr.blen = cpu_to_le16(size);
key->t6_hdr.ulen = cpu_to_le16(size - 2);
}
key->t6_hdr.blen = size;
key->t6_hdr.ulen = size - 2;
/* key token header */
key->token.token_identifier = CCA_TKN_HDR_ID_EXT;
......
......@@ -291,7 +291,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq,
return -EFAULT;
/* Set up key which is located after the variable length text. */
size = zcrypt_type6_mex_key_en(mex, msg->text+mex->inputdatalength, 1);
size = zcrypt_type6_mex_key_en(mex, msg->text+mex->inputdatalength);
if (size < 0)
return size;
size += sizeof(*msg) + mex->inputdatalength;
......@@ -353,7 +353,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq,
return -EFAULT;
/* Set up key which is located after the variable length text. */
size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength, 1);
size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength);
if (size < 0)
return size;
size += sizeof(*msg) + crt->inputdatalength; /* total size of msg */
......
......@@ -44,7 +44,7 @@ config HVC_RTAS
config HVC_IUCV
bool "z/VM IUCV Hypervisor console support (VM only)"
depends on S390
depends on S390 && NET
select HVC_DRIVER
select IUCV
default y
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment