Commit 0944fe3f authored by Martin Schwidefsky's avatar Martin Schwidefsky

s390/mm: implement software referenced bits

The last remaining use for the storage key of the s390 architecture
is reference counting. The alternative is to make page table entries
invalid while they are old. On access the fault handler marks the
pte/pmd as young which makes the pte/pmd valid if the access rights
allow read access. The pte/pmd invalidations required for software
managed reference bits cost a bit of performance, on the other hand
the RRBE/RRBM instructions to read and reset the referenced bits are
quite expensive as well.
Reviewed-by: default avatarGerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent fbd70035
...@@ -140,15 +140,6 @@ static inline int page_reset_referenced(unsigned long addr) ...@@ -140,15 +140,6 @@ static inline int page_reset_referenced(unsigned long addr)
#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ #define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */
#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ #define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
/*
* Test and clear referenced bit in storage key.
*/
#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
static inline int page_test_and_clear_young(unsigned long pfn)
{
return page_reset_referenced(pfn << PAGE_SHIFT);
}
struct page; struct page;
void arch_free_page(struct page *page, int order); void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order);
......
...@@ -226,8 +226,9 @@ extern unsigned long MODULES_END; ...@@ -226,8 +226,9 @@ extern unsigned long MODULES_END;
#define _PAGE_TYPE 0x002 /* SW pte type bit */ #define _PAGE_TYPE 0x002 /* SW pte type bit */
#define _PAGE_YOUNG 0x004 /* SW pte young bit */ #define _PAGE_YOUNG 0x004 /* SW pte young bit */
#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ #define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
#define _PAGE_WRITE 0x010 /* SW pte write bit */ #define _PAGE_READ 0x010 /* SW pte read bit */
#define _PAGE_SPECIAL 0x020 /* SW associated with special page */ #define _PAGE_WRITE 0x020 /* SW pte write bit */
#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
#define __HAVE_ARCH_PTE_SPECIAL #define __HAVE_ARCH_PTE_SPECIAL
/* Set of bits not changed in pte_modify */ /* Set of bits not changed in pte_modify */
...@@ -246,16 +247,22 @@ extern unsigned long MODULES_END; ...@@ -246,16 +247,22 @@ extern unsigned long MODULES_END;
* 842100000000 * 842100000000
* 000084210000 * 000084210000
* 000000008421 * 000000008421
* .IR....wdytp * .IR...wrdytp
* empty .10....00000 * empty .10...000000
* swap .10....xxx10 * swap .10...xxxx10
* file .11....xxxx0 * file .11...xxxxx0
* prot-none, clean .11....00x01 * prot-none, clean, old .11...000001
* prot-none, dirty .10....01x01 * prot-none, clean, young .11...000101
* read-only, clean .01....00x01 * prot-none, dirty, old .10...001001
* read-only, dirty .01....01x01 * prot-none, dirty, young .10...001101
* read-write, clean .01....10x01 * read-only, clean, old .11...010001
* read-write, dirty .00....11x01 * read-only, clean, young .01...010101
* read-only, dirty, old .11...011001
* read-only, dirty, young .01...011101
* read-write, clean, old .11...110001
* read-write, clean, young .01...110101
* read-write, dirty, old .10...111001
* read-write, dirty, young .00...111101
* *
* pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
* pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
...@@ -273,15 +280,26 @@ extern unsigned long MODULES_END; ...@@ -273,15 +280,26 @@ extern unsigned long MODULES_END;
#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ #define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */
/* Bits in the segment table entry */ /* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */
#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */
#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
/*
* Segment table entry encoding (I = invalid, R = read-only bit):
* ..R...I.....
* prot-none ..1...1.....
* read-only ..1...0.....
* read-write ..0...0.....
* empty ..0...1.....
*/
/* Page status table bits for virtualization */ /* Page status table bits for virtualization */
#define PGSTE_ACC_BITS 0xf0000000UL #define PGSTE_ACC_BITS 0xf0000000UL
#define PGSTE_FP_BIT 0x08000000UL #define PGSTE_FP_BIT 0x08000000UL
...@@ -290,9 +308,7 @@ extern unsigned long MODULES_END; ...@@ -290,9 +308,7 @@ extern unsigned long MODULES_END;
#define PGSTE_HC_BIT 0x00200000UL #define PGSTE_HC_BIT 0x00200000UL
#define PGSTE_GR_BIT 0x00040000UL #define PGSTE_GR_BIT 0x00040000UL
#define PGSTE_GC_BIT 0x00020000UL #define PGSTE_GC_BIT 0x00020000UL
#define PGSTE_UR_BIT 0x00008000UL #define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */
#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */
#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */
#else /* CONFIG_64BIT */ #else /* CONFIG_64BIT */
...@@ -331,6 +347,8 @@ extern unsigned long MODULES_END; ...@@ -331,6 +347,8 @@ extern unsigned long MODULES_END;
#define _REGION3_ENTRY_CO 0x100 /* change-recording override */ #define _REGION3_ENTRY_CO 0x100 /* change-recording override */
/* Bits in the segment table entry */ /* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
...@@ -342,6 +360,21 @@ extern unsigned long MODULES_END; ...@@ -342,6 +360,21 @@ extern unsigned long MODULES_END;
#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ #define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */
#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */
#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG
/*
* Segment table entry encoding (R = read-only, I = invalid, y = young bit):
* ..R...I...y.
* prot-none, old ..0...1...1.
* prot-none, young ..1...1...1.
* read-only, old ..1...1...0.
* read-only, young ..1...0...1.
* read-write, old ..0...1...0.
* read-write, young ..0...0...1.
* The segment table origin is used to distinguish empty (origin==0) from
* read-write, old segment table entries (origin!=0)
*/
#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ #define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
...@@ -357,9 +390,7 @@ extern unsigned long MODULES_END; ...@@ -357,9 +390,7 @@ extern unsigned long MODULES_END;
#define PGSTE_HC_BIT 0x0020000000000000UL #define PGSTE_HC_BIT 0x0020000000000000UL
#define PGSTE_GR_BIT 0x0004000000000000UL #define PGSTE_GR_BIT 0x0004000000000000UL
#define PGSTE_GC_BIT 0x0002000000000000UL #define PGSTE_GC_BIT 0x0002000000000000UL
#define PGSTE_UR_BIT 0x0000800000000000UL #define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */
#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */
#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */
#endif /* CONFIG_64BIT */ #endif /* CONFIG_64BIT */
...@@ -375,12 +406,17 @@ extern unsigned long MODULES_END; ...@@ -375,12 +406,17 @@ extern unsigned long MODULES_END;
* Page protection definitions. * Page protection definitions.
*/ */
#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID) #define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID)
#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_PROTECT) #define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \
#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_PROTECT) _PAGE_INVALID | _PAGE_PROTECT)
#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY) _PAGE_INVALID | _PAGE_PROTECT)
#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY)
#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_PROTECT) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_YOUNG | _PAGE_DIRTY)
#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_YOUNG | _PAGE_DIRTY)
#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
_PAGE_PROTECT)
/* /*
* On s390 the page table entry has an invalid bit and a read-only bit. * On s390 the page table entry has an invalid bit and a read-only bit.
...@@ -410,9 +446,10 @@ extern unsigned long MODULES_END; ...@@ -410,9 +446,10 @@ extern unsigned long MODULES_END;
* Segment entry (large page) protection definitions. * Segment entry (large page) protection definitions.
*/ */
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ #define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
_SEGMENT_ENTRY_NONE)
#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \
_SEGMENT_ENTRY_PROTECT) _SEGMENT_ENTRY_PROTECT)
#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT) #define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID)
#define SEGMENT_WRITE __pgprot(0)
static inline int mm_has_pgste(struct mm_struct *mm) static inline int mm_has_pgste(struct mm_struct *mm)
{ {
...@@ -520,10 +557,19 @@ static inline int pmd_large(pmd_t pmd) ...@@ -520,10 +557,19 @@ static inline int pmd_large(pmd_t pmd)
#endif #endif
} }
static inline int pmd_prot_none(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
(pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
}
static inline int pmd_bad(pmd_t pmd) static inline int pmd_bad(pmd_t pmd)
{ {
unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INVALID; #ifdef CONFIG_64BIT
return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; if (pmd_large(pmd))
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
#endif
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
} }
#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
...@@ -542,12 +588,21 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, ...@@ -542,12 +588,21 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
#define __HAVE_ARCH_PMD_WRITE #define __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd) static inline int pmd_write(pmd_t pmd)
{ {
if (pmd_prot_none(pmd))
return 0;
return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
} }
static inline int pmd_young(pmd_t pmd) static inline int pmd_young(pmd_t pmd)
{ {
return 0; int young = 0;
#ifdef CONFIG_64BIT
if (pmd_prot_none(pmd))
young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
else
young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
#endif
return young;
} }
static inline int pte_present(pte_t pte) static inline int pte_present(pte_t pte)
...@@ -632,33 +687,28 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste) ...@@ -632,33 +687,28 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
{ {
#ifdef CONFIG_PGSTE #ifdef CONFIG_PGSTE
unsigned long address, bits; unsigned long address, bits, skey;
unsigned char skey;
if (pte_val(*ptep) & _PAGE_INVALID) if (pte_val(*ptep) & _PAGE_INVALID)
return pgste; return pgste;
address = pte_val(*ptep) & PAGE_MASK; address = pte_val(*ptep) & PAGE_MASK;
skey = page_get_storage_key(address); skey = (unsigned long) page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
/* Clear page changed & referenced bit in the storage key */ if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
if (bits & _PAGE_CHANGED) /* Transfer dirty + referenced bit to host bits in pgste */
pgste_val(pgste) |= bits << 52;
page_set_storage_key(address, skey ^ bits, 0); page_set_storage_key(address, skey ^ bits, 0);
else if (bits) } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
(bits & _PAGE_REFERENCED)) {
/* Transfer referenced bit to host bit in pgste */
pgste_val(pgste) |= PGSTE_HR_BIT;
page_reset_referenced(address); page_reset_referenced(address);
}
/* Transfer page changed & referenced bit to guest bits in pgste */ /* Transfer page changed & referenced bit to guest bits in pgste */
pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
/* Get host changed & referenced bits from pgste */
bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52;
/* Transfer page changed & referenced bit to kvm user bits */
pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */
/* Clear relevant host bits in pgste. */
pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT);
pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
/* Copy page access key and fetch protection bit to pgste */ /* Copy page access key and fetch protection bit to pgste */
pgste_val(pgste) |= pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
(unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
/* Transfer referenced bit to pte */
pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1;
#endif #endif
return pgste; return pgste;
...@@ -667,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) ...@@ -667,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
{ {
#ifdef CONFIG_PGSTE #ifdef CONFIG_PGSTE
int young;
if (pte_val(*ptep) & _PAGE_INVALID) if (pte_val(*ptep) & _PAGE_INVALID)
return pgste; return pgste;
/* Get referenced bit from storage key */ /* Get referenced bit from storage key */
young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
if (young) pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
pgste_val(pgste) |= PGSTE_GR_BIT;
/* Get host referenced bit from pgste */
if (pgste_val(pgste) & PGSTE_HR_BIT) {
pgste_val(pgste) &= ~PGSTE_HR_BIT;
young = 1;
}
/* Transfer referenced bit to kvm user bits and pte */
if (young) {
pgste_val(pgste) |= PGSTE_UR_BIT;
pte_val(*ptep) |= _PAGE_YOUNG;
}
#endif #endif
return pgste; return pgste;
} }
...@@ -839,11 +876,7 @@ static inline int pte_dirty(pte_t pte) ...@@ -839,11 +876,7 @@ static inline int pte_dirty(pte_t pte)
static inline int pte_young(pte_t pte) static inline int pte_young(pte_t pte)
{ {
#ifdef CONFIG_PGSTE return (pte_val(pte) & _PAGE_YOUNG) != 0;
if (pte_val(pte) & _PAGE_YOUNG)
return 1;
#endif
return 0;
} }
/* /*
...@@ -884,6 +917,16 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -884,6 +917,16 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{ {
pte_val(pte) &= _PAGE_CHG_MASK; pte_val(pte) &= _PAGE_CHG_MASK;
pte_val(pte) |= pgprot_val(newprot); pte_val(pte) |= pgprot_val(newprot);
/*
* newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
* invalid bit set, clear it again for readable, young pages
*/
if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
pte_val(pte) &= ~_PAGE_INVALID;
/*
* newprot for PAGE_READ and PAGE_WRITE has the page protection
* bit set, clear it again for writable, dirty pages
*/
if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
pte_val(pte) &= ~_PAGE_PROTECT; pte_val(pte) &= ~_PAGE_PROTECT;
return pte; return pte;
...@@ -921,14 +964,16 @@ static inline pte_t pte_mkdirty(pte_t pte) ...@@ -921,14 +964,16 @@ static inline pte_t pte_mkdirty(pte_t pte)
static inline pte_t pte_mkold(pte_t pte) static inline pte_t pte_mkold(pte_t pte)
{ {
#ifdef CONFIG_PGSTE
pte_val(pte) &= ~_PAGE_YOUNG; pte_val(pte) &= ~_PAGE_YOUNG;
#endif pte_val(pte) |= _PAGE_INVALID;
return pte; return pte;
} }
static inline pte_t pte_mkyoung(pte_t pte) static inline pte_t pte_mkyoung(pte_t pte)
{ {
pte_val(pte) |= _PAGE_YOUNG;
if (pte_val(pte) & _PAGE_READ)
pte_val(pte) &= ~_PAGE_INVALID;
return pte; return pte;
} }
...@@ -958,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, ...@@ -958,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
if (mm_has_pgste(mm)) { if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep); pgste = pgste_get_lock(ptep);
pgste = pgste_update_all(ptep, pgste); pgste = pgste_update_all(ptep, pgste);
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
pgste_val(pgste) &= ~PGSTE_UC_BIT; pgste_val(pgste) &= ~PGSTE_HC_BIT;
pgste_set_unlock(ptep, pgste); pgste_set_unlock(ptep, pgste);
return dirty; return dirty;
} }
...@@ -978,42 +1023,13 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, ...@@ -978,42 +1023,13 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
if (mm_has_pgste(mm)) { if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep); pgste = pgste_get_lock(ptep);
pgste = pgste_update_young(ptep, pgste); pgste = pgste_update_young(ptep, pgste);
young = !!(pgste_val(pgste) & PGSTE_UR_BIT); young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
pgste_val(pgste) &= ~PGSTE_UR_BIT; pgste_val(pgste) &= ~PGSTE_HR_BIT;
pgste_set_unlock(ptep, pgste); pgste_set_unlock(ptep, pgste);
} }
return young; return young;
} }
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
pgste_t pgste;
pte_t pte;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_young(ptep, pgste);
pte = *ptep;
*ptep = pte_mkold(pte);
pgste_set_unlock(ptep, pgste);
return pte_young(pte);
}
return 0;
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
/* No need to flush TLB
* On s390 reference bits are in storage key and never in TLB
* With virtualization we handle the reference bit, without we
* we can simply return */
return ptep_test_and_clear_young(vma, address, ptep);
}
static inline void __ptep_ipte(unsigned long address, pte_t *ptep) static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
{ {
if (!(pte_val(*ptep) & _PAGE_INVALID)) { if (!(pte_val(*ptep) & _PAGE_INVALID)) {
...@@ -1042,6 +1058,40 @@ static inline void ptep_flush_lazy(struct mm_struct *mm, ...@@ -1042,6 +1058,40 @@ static inline void ptep_flush_lazy(struct mm_struct *mm,
mm->context.flush_mm = 1; mm->context.flush_mm = 1;
} }
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
pgste_t pgste;
pte_t pte;
int young;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
}
pte = *ptep;
__ptep_ipte(addr, ptep);
young = pte_young(pte);
pte = pte_mkold(pte);
if (mm_has_pgste(vma->vm_mm)) {
pgste_set_pte(ptep, pte);
pgste_set_unlock(ptep, pgste);
} else
*ptep = pte;
return young;
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
return ptep_test_and_clear_young(vma, address, ptep);
}
/* /*
* This is hard to understand. ptep_get_and_clear and ptep_clear_flush * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
* both clear the TLB for the unmapped pte. The reason is that * both clear the TLB for the unmapped pte. The reason is that
...@@ -1229,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) ...@@ -1229,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
{ {
pte_t __pte; pte_t __pte;
pte_val(__pte) = physpage + pgprot_val(pgprot); pte_val(__pte) = physpage + pgprot_val(pgprot);
return __pte; return pte_mkyoung(__pte);
} }
static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
...@@ -1338,10 +1388,41 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) ...@@ -1338,10 +1388,41 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
return pgprot_val(SEGMENT_WRITE); return pgprot_val(SEGMENT_WRITE);
} }
static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
#ifdef CONFIG_64BIT
if (pmd_prot_none(pmd)) {
pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
} else {
pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
}
#endif
return pmd;
}
static inline pmd_t pmd_mkold(pmd_t pmd)
{
#ifdef CONFIG_64BIT
if (pmd_prot_none(pmd)) {
pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
} else {
pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
}
#endif
return pmd;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{ {
int young;
young = pmd_young(pmd);
pmd_val(pmd) &= _SEGMENT_CHG_MASK; pmd_val(pmd) &= _SEGMENT_CHG_MASK;
pmd_val(pmd) |= massage_pgprot_pmd(newprot); pmd_val(pmd) |= massage_pgprot_pmd(newprot);
if (young)
pmd = pmd_mkyoung(pmd);
return pmd; return pmd;
} }
...@@ -1349,13 +1430,13 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) ...@@ -1349,13 +1430,13 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
{ {
pmd_t __pmd; pmd_t __pmd;
pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
return __pmd; return pmd_mkyoung(__pmd);
} }
static inline pmd_t pmd_mkwrite(pmd_t pmd) static inline pmd_t pmd_mkwrite(pmd_t pmd)
{ {
/* Do not clobber PROT_NONE pages! */ /* Do not clobber PROT_NONE segments! */
if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)) if (!pmd_prot_none(pmd))
pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
return pmd; return pmd;
} }
...@@ -1391,6 +1472,8 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) ...@@ -1391,6 +1472,8 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
static inline pmd_t pmd_wrprotect(pmd_t pmd) static inline pmd_t pmd_wrprotect(pmd_t pmd)
{ {
/* Do not clobber PROT_NONE segments! */
if (!pmd_prot_none(pmd))
pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
return pmd; return pmd;
} }
...@@ -1401,50 +1484,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) ...@@ -1401,50 +1484,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return pmd; return pmd;
} }
static inline pmd_t pmd_mkold(pmd_t pmd)
{
/* No referenced bit in the segment table entry. */
return pmd;
}
static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
/* No referenced bit in the segment table entry. */
return pmd;
}
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp) unsigned long address, pmd_t *pmdp)
{ {
unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK; pmd_t pmd;
long tmp, rc;
int counter;
rc = 0; pmd = *pmdp;
if (MACHINE_HAS_RRBM) { __pmd_idte(address, pmdp);
counter = PTRS_PER_PTE >> 6; *pmdp = pmd_mkold(pmd);
asm volatile( return pmd_young(pmd);
"0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */
" ogr %1,%0\n"
" la %3,0(%4,%3)\n"
" brct %2,0b\n"
: "=&d" (tmp), "+&d" (rc), "+d" (counter),
"+a" (pmd_addr)
: "a" (64 * 4096UL) : "cc");
rc = !!rc;
} else {
counter = PTRS_PER_PTE;
asm volatile(
"0: rrbe 0,%2\n"
" la %2,0(%3,%2)\n"
" brc 12,1f\n"
" lhi %0,1\n"
"1: brct %1,0b\n"
: "+d" (rc), "+d" (counter), "+a" (pmd_addr)
: "a" (4096UL) : "cc");
}
return rc;
} }
#define __HAVE_ARCH_PMDP_GET_AND_CLEAR #define __HAVE_ARCH_PMDP_GET_AND_CLEAR
......
...@@ -10,19 +10,25 @@ ...@@ -10,19 +10,25 @@
static inline pmd_t __pte_to_pmd(pte_t pte) static inline pmd_t __pte_to_pmd(pte_t pte)
{ {
int none, prot; int none, young, prot;
pmd_t pmd; pmd_t pmd;
/* /*
* Convert encoding pte bits pmd bits * Convert encoding pte bits pmd bits
* .IR.....wdtp ..R...I..... * .IR...wrdytp ..R...I...y.
* empty .10.....0000 -> ..0...1..... * empty .10...000000 -> ..0...1...0.
* prot-none, clean .11.....0001 -> ..1...1..... * prot-none, clean, old .11...000001 -> ..0...1...1.
* prot-none, dirty .10.....0101 -> ..1...1..... * prot-none, clean, young .11...000101 -> ..1...1...1.
* read-only, clean .01.....0001 -> ..1...0..... * prot-none, dirty, old .10...001001 -> ..0...1...1.
* read-only, dirty .01.....0101 -> ..1...0..... * prot-none, dirty, young .10...001101 -> ..1...1...1.
* read-write, clean .01.....1001 -> ..0...0..... * read-only, clean, old .11...010001 -> ..1...1...0.
* read-write, dirty .00.....1101 -> ..0...0..... * read-only, clean, young .01...010101 -> ..1...0...1.
* read-only, dirty, old .11...011001 -> ..1...1...0.
* read-only, dirty, young .01...011101 -> ..1...0...1.
* read-write, clean, old .11...110001 -> ..0...1...0.
* read-write, clean, young .01...110101 -> ..0...0...1.
* read-write, dirty, old .10...111001 -> ..0...1...0.
* read-write, dirty, young .00...111101 -> ..0...0...1.
* Huge ptes are dirty by definition, a clean pte is made dirty * Huge ptes are dirty by definition, a clean pte is made dirty
* by the conversion. * by the conversion.
*/ */
...@@ -31,9 +37,14 @@ static inline pmd_t __pte_to_pmd(pte_t pte) ...@@ -31,9 +37,14 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
if (pte_val(pte) & _PAGE_INVALID) if (pte_val(pte) & _PAGE_INVALID)
pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
none = (pte_val(pte) & _PAGE_PRESENT) && none = (pte_val(pte) & _PAGE_PRESENT) &&
(pte_val(pte) & _PAGE_INVALID); !(pte_val(pte) & _PAGE_READ) &&
prot = (pte_val(pte) & _PAGE_PROTECT); !(pte_val(pte) & _PAGE_WRITE);
if (prot || none) prot = (pte_val(pte) & _PAGE_PROTECT) &&
!(pte_val(pte) & _PAGE_WRITE);
young = pte_val(pte) & _PAGE_YOUNG;
if (none || young)
pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
if (prot || (none && young))
pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
} else } else
pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
...@@ -46,11 +57,14 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) ...@@ -46,11 +57,14 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
/* /*
* Convert encoding pmd bits pte bits * Convert encoding pmd bits pte bits
* ..R...I..... .IR.....wdtp * ..R...I...y. .IR...wrdytp
* empty ..0...1..... -> .10.....0000 * empty ..0...1...0. -> .10...000000
* prot-none, young ..1...1..... -> .10.....0101 * prot-none, old ..0...1...1. -> .10...001001
* read-only, young ..1...0..... -> .01.....0101 * prot-none, young ..1...1...1. -> .10...001101
* read-write, young ..0...0..... -> .00.....1101 * read-only, old ..1...1...0. -> .11...011001
* read-only, young ..1...0...1. -> .01...011101
* read-write, old ..0...1...0. -> .10...111001
* read-write, young ..0...0...1. -> .00...111101
* Huge ptes are dirty by definition * Huge ptes are dirty by definition
*/ */
if (pmd_present(pmd)) { if (pmd_present(pmd)) {
...@@ -58,11 +72,17 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) ...@@ -58,11 +72,17 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
(pmd_val(pmd) & PAGE_MASK); (pmd_val(pmd) & PAGE_MASK);
if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
pte_val(pte) |= _PAGE_INVALID; pte_val(pte) |= _PAGE_INVALID;
else { if (pmd_prot_none(pmd)) {
if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
pte_val(pte) |= _PAGE_YOUNG;
} else {
pte_val(pte) |= _PAGE_READ;
if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
pte_val(pte) |= _PAGE_PROTECT; pte_val(pte) |= _PAGE_PROTECT;
else else
pte_val(pte) |= _PAGE_WRITE; pte_val(pte) |= _PAGE_WRITE;
if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
pte_val(pte) |= _PAGE_YOUNG;
} }
} else } else
pte_val(pte) = _PAGE_INVALID; pte_val(pte) = _PAGE_INVALID;
......
...@@ -754,7 +754,8 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, ...@@ -754,7 +754,8 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
atomic_set(&page->_mapcount, 3); atomic_set(&page->_mapcount, 3);
table = (unsigned long *) page_to_phys(page); table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
PAGE_SIZE/2);
return table; return table;
} }
...@@ -792,26 +793,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, ...@@ -792,26 +793,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
if (!(pte_val(*ptep) & _PAGE_INVALID)) { if (!(pte_val(*ptep) & _PAGE_INVALID)) {
unsigned long address, bits; unsigned long address, bits, skey;
unsigned char skey;
address = pte_val(*ptep) & PAGE_MASK; address = pte_val(*ptep) & PAGE_MASK;
skey = page_get_storage_key(address); skey = (unsigned long) page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
/* Set storage key ACC and FP */ /* Set storage key ACC and FP */
page_set_storage_key(address, page_set_storage_key(address, skey, !nq);
(key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)),
!nq);
/* Merge host changed & referenced into pgste */ /* Merge host changed & referenced into pgste */
pgste_val(new) |= bits << 52; pgste_val(new) |= bits << 52;
/* Transfer skey changed & referenced bit to kvm user bits */
pgste_val(new) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */
} }
/* changing the guest storage key is considered a change of the page */ /* changing the guest storage key is considered a change of the page */
if ((pgste_val(new) ^ pgste_val(old)) & if ((pgste_val(new) ^ pgste_val(old)) &
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
pgste_val(new) |= PGSTE_UC_BIT; pgste_val(new) |= PGSTE_HC_BIT;
pgste_set_unlock(ptep, new); pgste_set_unlock(ptep, new);
pte_unmap_unlock(*ptep, ptl); pte_unmap_unlock(*ptep, ptl);
......
...@@ -118,6 +118,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) ...@@ -118,6 +118,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
!(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
pmd_val(*pm_dir) = __pa(address) | pmd_val(*pm_dir) = __pa(address) |
_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
_SEGMENT_ENTRY_YOUNG |
(ro ? _SEGMENT_ENTRY_PROTECT : 0); (ro ? _SEGMENT_ENTRY_PROTECT : 0);
address += PMD_SIZE; address += PMD_SIZE;
continue; continue;
......
...@@ -208,10 +208,6 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) ...@@ -208,10 +208,6 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif #endif
#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
#define page_test_and_clear_young(pfn) (0)
#endif
#ifndef __HAVE_ARCH_PGD_OFFSET_GATE #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
#endif #endif
......
...@@ -873,9 +873,6 @@ int page_referenced(struct page *page, ...@@ -873,9 +873,6 @@ int page_referenced(struct page *page,
vm_flags); vm_flags);
if (we_locked) if (we_locked)
unlock_page(page); unlock_page(page);
if (page_test_and_clear_young(page_to_pfn(page)))
referenced++;
} }
out: out:
return referenced; return referenced;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment