Commit ae73ad05 authored by Russell King's avatar Russell King

Merge tag 'arm-p2v-for-v5.11' of...

Merge tag 'arm-p2v-for-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux into devel-stable

Implement the necessary changes in the ARM assembler boot code to permit
the relative alignment of the physical and the virtual addresses of the
kernel to be as little as 2 MiB, as opposed to the minimum of 16 MiB we
support today.

Series was posted here, and reviewed by Nicolas Pitre and Linus Walleij:
https://lore.kernel.org/linux-arm-kernel/20200921154117.757-1-ardb@kernel.org/
parents 3650b228 9443076e
...@@ -243,7 +243,7 @@ config ARM_PATCH_PHYS_VIRT ...@@ -243,7 +243,7 @@ config ARM_PATCH_PHYS_VIRT
kernel in system memory. kernel in system memory.
This can only be used with non-XIP MMU kernels where the base This can only be used with non-XIP MMU kernels where the base
of physical memory is at a 16MB boundary. of physical memory is at a 2 MiB boundary.
Only disable this option if you know that you do not require Only disable this option if you know that you do not require
this feature (eg, building a kernel for a single machine) and this feature (eg, building a kernel for a single machine) and
......
...@@ -494,4 +494,88 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) ...@@ -494,4 +494,88 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
#define _ASM_NOKPROBE(entry) #define _ASM_NOKPROBE(entry)
#endif #endif
.macro __adldst_l, op, reg, sym, tmp, c
.if __LINUX_ARM_ARCH__ < 7
ldr\c \tmp, .La\@
.subsection 1
.align 2
.La\@: .long \sym - .Lpc\@
.previous
.else
.ifnb \c
THUMB( ittt \c )
.endif
movw\c \tmp, #:lower16:\sym - .Lpc\@
movt\c \tmp, #:upper16:\sym - .Lpc\@
.endif
#ifndef CONFIG_THUMB2_KERNEL
.set .Lpc\@, . + 8 // PC bias
.ifc \op, add
add\c \reg, \tmp, pc
.else
\op\c \reg, [pc, \tmp]
.endif
#else
.Lb\@: add\c \tmp, \tmp, pc
/*
* In Thumb-2 builds, the PC bias depends on whether we are currently
* emitting into a .arm or a .thumb section. The size of the add opcode
* above will be 2 bytes when emitting in Thumb mode and 4 bytes when
* emitting in ARM mode, so let's use this to account for the bias.
*/
.set .Lpc\@, . + (. - .Lb\@)
.ifnc \op, add
\op\c \reg, [\tmp]
.endif
#endif
.endm
/*
* mov_l - move a constant value or [relocated] address into a register
*/
.macro mov_l, dst:req, imm:req
.if __LINUX_ARM_ARCH__ < 7
ldr \dst, =\imm
.else
movw \dst, #:lower16:\imm
movt \dst, #:upper16:\imm
.endif
.endm
/*
* adr_l - adr pseudo-op with unlimited range
*
* @dst: destination register
* @sym: name of the symbol
* @cond: conditional opcode suffix
*/
.macro adr_l, dst:req, sym:req, cond
__adldst_l add, \dst, \sym, \dst, \cond
.endm
/*
* ldr_l - ldr <literal> pseudo-op with unlimited range
*
* @dst: destination register
* @sym: name of the symbol
* @cond: conditional opcode suffix
*/
.macro ldr_l, dst:req, sym:req, cond
__adldst_l ldr, \dst, \sym, \dst, \cond
.endm
/*
* str_l - str <literal> pseudo-op with unlimited range
*
* @src: source register
* @sym: name of the symbol
* @tmp: mandatory scratch register
* @cond: conditional opcode suffix
*/
.macro str_l, src:req, sym:req, tmp:req, cond
__adldst_l str, \src, \sym, \tmp, \cond
.endm
#endif /* __ASM_ASSEMBLER_H__ */ #endif /* __ASM_ASSEMBLER_H__ */
...@@ -51,6 +51,7 @@ typedef struct user_fp elf_fpregset_t; ...@@ -51,6 +51,7 @@ typedef struct user_fp elf_fpregset_t;
#define R_ARM_NONE 0 #define R_ARM_NONE 0
#define R_ARM_PC24 1 #define R_ARM_PC24 1
#define R_ARM_ABS32 2 #define R_ARM_ABS32 2
#define R_ARM_REL32 3
#define R_ARM_CALL 28 #define R_ARM_CALL 28
#define R_ARM_JUMP24 29 #define R_ARM_JUMP24 29
#define R_ARM_TARGET1 38 #define R_ARM_TARGET1 38
...@@ -58,11 +59,15 @@ typedef struct user_fp elf_fpregset_t; ...@@ -58,11 +59,15 @@ typedef struct user_fp elf_fpregset_t;
#define R_ARM_PREL31 42 #define R_ARM_PREL31 42
#define R_ARM_MOVW_ABS_NC 43 #define R_ARM_MOVW_ABS_NC 43
#define R_ARM_MOVT_ABS 44 #define R_ARM_MOVT_ABS 44
#define R_ARM_MOVW_PREL_NC 45
#define R_ARM_MOVT_PREL 46
#define R_ARM_THM_CALL 10 #define R_ARM_THM_CALL 10
#define R_ARM_THM_JUMP24 30 #define R_ARM_THM_JUMP24 30
#define R_ARM_THM_MOVW_ABS_NC 47 #define R_ARM_THM_MOVW_ABS_NC 47
#define R_ARM_THM_MOVT_ABS 48 #define R_ARM_THM_MOVT_ABS 48
#define R_ARM_THM_MOVW_PREL_NC 49
#define R_ARM_THM_MOVT_PREL 50
/* /*
* These are used to set parameters in the core dumps. * These are used to set parameters in the core dumps.
......
...@@ -173,6 +173,7 @@ extern unsigned long vectors_base; ...@@ -173,6 +173,7 @@ extern unsigned long vectors_base;
* so that all we need to do is modify the 8-bit constant field. * so that all we need to do is modify the 8-bit constant field.
*/ */
#define __PV_BITS_31_24 0x81000000 #define __PV_BITS_31_24 0x81000000
#define __PV_BITS_23_16 0x810000
#define __PV_BITS_7_0 0x81 #define __PV_BITS_7_0 0x81
extern unsigned long __pv_phys_pfn_offset; extern unsigned long __pv_phys_pfn_offset;
...@@ -183,43 +184,65 @@ extern const void *__pv_table_begin, *__pv_table_end; ...@@ -183,43 +184,65 @@ extern const void *__pv_table_begin, *__pv_table_end;
#define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
#define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset)
#define __pv_stub(from,to,instr,type) \ #ifndef CONFIG_THUMB2_KERNEL
#define __pv_stub(from,to,instr) \
__asm__("@ __pv_stub\n" \ __asm__("@ __pv_stub\n" \
"1: " instr " %0, %1, %2\n" \ "1: " instr " %0, %1, %2\n" \
"2: " instr " %0, %0, %3\n" \
" .pushsection .pv_table,\"a\"\n" \ " .pushsection .pv_table,\"a\"\n" \
" .long 1b\n" \ " .long 1b - ., 2b - .\n" \
" .popsection\n" \ " .popsection\n" \
: "=r" (to) \ : "=r" (to) \
: "r" (from), "I" (type)) : "r" (from), "I" (__PV_BITS_31_24), \
"I"(__PV_BITS_23_16))
#define __pv_add_carry_stub(x, y) \
__asm__("@ __pv_add_carry_stub\n" \
"0: movw %R0, #0\n" \
" adds %Q0, %1, %R0, lsl #20\n" \
"1: mov %R0, %2\n" \
" adc %R0, %R0, #0\n" \
" .pushsection .pv_table,\"a\"\n" \
" .long 0b - ., 1b - .\n" \
" .popsection\n" \
: "=&r" (y) \
: "r" (x), "I" (__PV_BITS_7_0) \
: "cc")
#define __pv_stub_mov_hi(t) \ #else
__asm__ volatile("@ __pv_stub_mov\n" \ #define __pv_stub(from,to,instr) \
"1: mov %R0, %1\n" \ __asm__("@ __pv_stub\n" \
"0: movw %0, #0\n" \
" lsl %0, #21\n" \
" " instr " %0, %1, %0\n" \
" .pushsection .pv_table,\"a\"\n" \ " .pushsection .pv_table,\"a\"\n" \
" .long 1b\n" \ " .long 0b - .\n" \
" .popsection\n" \ " .popsection\n" \
: "=r" (t) \ : "=&r" (to) \
: "I" (__PV_BITS_7_0)) : "r" (from))
#define __pv_add_carry_stub(x, y) \ #define __pv_add_carry_stub(x, y) \
__asm__ volatile("@ __pv_add_carry_stub\n" \ __asm__("@ __pv_add_carry_stub\n" \
"1: adds %Q0, %1, %2\n" \ "0: movw %R0, #0\n" \
" lsls %R0, #21\n" \
" adds %Q0, %1, %R0\n" \
"1: mvn %R0, #0\n" \
" adc %R0, %R0, #0\n" \ " adc %R0, %R0, #0\n" \
" .pushsection .pv_table,\"a\"\n" \ " .pushsection .pv_table,\"a\"\n" \
" .long 1b\n" \ " .long 0b - ., 1b - .\n" \
" .popsection\n" \ " .popsection\n" \
: "+r" (y) \ : "=&r" (y) \
: "r" (x), "I" (__PV_BITS_31_24) \ : "r" (x) \
: "cc") : "cc")
#endif
static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
{ {
phys_addr_t t; phys_addr_t t;
if (sizeof(phys_addr_t) == 4) { if (sizeof(phys_addr_t) == 4) {
__pv_stub(x, t, "add", __PV_BITS_31_24); __pv_stub(x, t, "add");
} else { } else {
__pv_stub_mov_hi(t);
__pv_add_carry_stub(x, t); __pv_add_carry_stub(x, t);
} }
return t; return t;
...@@ -235,7 +258,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x) ...@@ -235,7 +258,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
* assembler expression receives 32 bit argument * assembler expression receives 32 bit argument
* in place where 'r' 32 bit operand is expected. * in place where 'r' 32 bit operand is expected.
*/ */
__pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24); __pv_stub((unsigned long) x, t, "sub");
return t; return t;
} }
......
...@@ -92,6 +92,7 @@ obj-$(CONFIG_PARAVIRT) += paravirt.o ...@@ -92,6 +92,7 @@ obj-$(CONFIG_PARAVIRT) += paravirt.o
head-y := head$(MMUEXT).o head-y := head$(MMUEXT).o
obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_DEBUG_LL) += debug.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_ARM_PATCH_PHYS_VIRT) += phys2virt.o
# This is executed very early using a temporary stack when no memory allocator # This is executed very early using a temporary stack when no memory allocator
# nor global data is available. Everything has to be allocated on the stack. # nor global data is available. Everything has to be allocated on the stack.
......
...@@ -586,146 +586,4 @@ ENTRY(fixup_smp) ...@@ -586,146 +586,4 @@ ENTRY(fixup_smp)
ldmfd sp!, {r4 - r6, pc} ldmfd sp!, {r4 - r6, pc}
ENDPROC(fixup_smp) ENDPROC(fixup_smp)
#ifdef __ARMEB__
#define LOW_OFFSET 0x4
#define HIGH_OFFSET 0x0
#else
#define LOW_OFFSET 0x0
#define HIGH_OFFSET 0x4
#endif
#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
/* __fixup_pv_table - patch the stub instructions with the delta between
* PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
* can be expressed by an immediate shifter operand. The stub instruction
* has a form of '(add|sub) rd, rn, #imm'.
*/
__HEAD
__fixup_pv_table:
adr r0, 1f
ldmia r0, {r3-r7}
mvn ip, #0
subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET
add r4, r4, r3 @ adjust table start address
add r5, r5, r3 @ adjust table end address
add r6, r6, r3 @ adjust __pv_phys_pfn_offset address
add r7, r7, r3 @ adjust __pv_offset address
mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN
str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset
strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits
mov r6, r3, lsr #24 @ constant for add/sub instructions
teq r3, r6, lsl #24 @ must be 16MiB aligned
THUMB( it ne @ cross section branch )
bne __error
str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits
b __fixup_a_pv_table
ENDPROC(__fixup_pv_table)
.align
1: .long .
.long __pv_table_begin
.long __pv_table_end
2: .long __pv_phys_pfn_offset
.long __pv_offset
.text
__fixup_a_pv_table:
adr r0, 3f
ldr r6, [r0]
add r6, r6, r3
ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word
ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word
mov r6, r6, lsr #24
cmn r0, #1
#ifdef CONFIG_THUMB2_KERNEL
moveq r0, #0x200000 @ set bit 21, mov to mvn instruction
lsls r6, #24
beq 2f
clz r7, r6
lsr r6, #24
lsl r6, r7
bic r6, #0x0080
lsrs r7, #1
orrcs r6, #0x0080
orr r6, r6, r7, lsl #12
orr r6, #0x4000
b 2f
1: add r7, r3
ldrh ip, [r7, #2]
ARM_BE8(rev16 ip, ip)
tst ip, #0x4000
and ip, #0x8f00
orrne ip, r6 @ mask in offset bits 31-24
orreq ip, r0 @ mask in offset bits 7-0
ARM_BE8(rev16 ip, ip)
strh ip, [r7, #2]
bne 2f
ldrh ip, [r7]
ARM_BE8(rev16 ip, ip)
bic ip, #0x20
orr ip, ip, r0, lsr #16
ARM_BE8(rev16 ip, ip)
strh ip, [r7]
2: cmp r4, r5
ldrcc r7, [r4], #4 @ use branch for delay slot
bcc 1b
bx lr
#else
#ifdef CONFIG_CPU_ENDIAN_BE8
moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction
#else
moveq r0, #0x400000 @ set bit 22, mov to mvn instruction
#endif
b 2f
1: ldr ip, [r7, r3]
#ifdef CONFIG_CPU_ENDIAN_BE8
@ in BE8, we load data in BE, but instructions still in LE
bic ip, ip, #0xff000000
tst ip, #0x000f0000 @ check the rotation field
orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24
biceq ip, ip, #0x00004000 @ clear bit 22
orreq ip, ip, r0 @ mask in offset bits 7-0
#else
bic ip, ip, #0x000000ff
tst ip, #0xf00 @ check the rotation field
orrne ip, ip, r6 @ mask in offset bits 31-24
biceq ip, ip, #0x400000 @ clear bit 22
orreq ip, ip, r0 @ mask in offset bits 7-0
#endif
str ip, [r7, r3]
2: cmp r4, r5
ldrcc r7, [r4], #4 @ use branch for delay slot
bcc 1b
ret lr
#endif
ENDPROC(__fixup_a_pv_table)
.align
3: .long __pv_offset
ENTRY(fixup_pv_table)
stmfd sp!, {r4 - r7, lr}
mov r3, #0 @ no offset
mov r4, r0 @ r0 = table start
add r5, r0, r1 @ r1 = table size
bl __fixup_a_pv_table
ldmfd sp!, {r4 - r7, pc}
ENDPROC(fixup_pv_table)
.data
.align 2
.globl __pv_phys_pfn_offset
.type __pv_phys_pfn_offset, %object
__pv_phys_pfn_offset:
.word 0
.size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset
.globl __pv_offset
.type __pv_offset, %object
__pv_offset:
.quad 0
.size __pv_offset, . -__pv_offset
#endif
#include "head-common.S" #include "head-common.S"
...@@ -185,14 +185,24 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, ...@@ -185,14 +185,24 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
*(u32 *)loc |= offset & 0x7fffffff; *(u32 *)loc |= offset & 0x7fffffff;
break; break;
case R_ARM_REL32:
*(u32 *)loc += sym->st_value - loc;
break;
case R_ARM_MOVW_ABS_NC: case R_ARM_MOVW_ABS_NC:
case R_ARM_MOVT_ABS: case R_ARM_MOVT_ABS:
case R_ARM_MOVW_PREL_NC:
case R_ARM_MOVT_PREL:
offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = tmp = __mem_to_opcode_arm(*(u32 *)loc);
offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
offset = (offset ^ 0x8000) - 0x8000; offset = (offset ^ 0x8000) - 0x8000;
offset += sym->st_value; offset += sym->st_value;
if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL ||
ELF32_R_TYPE(rel->r_info) == R_ARM_MOVW_PREL_NC)
offset -= loc;
if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS ||
ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL)
offset >>= 16; offset >>= 16;
tmp &= 0xfff0f000; tmp &= 0xfff0f000;
...@@ -283,6 +293,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, ...@@ -283,6 +293,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVW_ABS_NC:
case R_ARM_THM_MOVT_ABS: case R_ARM_THM_MOVT_ABS:
case R_ARM_THM_MOVW_PREL_NC:
case R_ARM_THM_MOVT_PREL:
upper = __mem_to_opcode_thumb16(*(u16 *)loc); upper = __mem_to_opcode_thumb16(*(u16 *)loc);
lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
...@@ -302,7 +314,11 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, ...@@ -302,7 +314,11 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
offset = (offset ^ 0x8000) - 0x8000; offset = (offset ^ 0x8000) - 0x8000;
offset += sym->st_value; offset += sym->st_value;
if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL ||
ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVW_PREL_NC)
offset -= loc;
if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS ||
ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL)
offset >>= 16; offset >>= 16;
upper = (u16)((upper & 0xfbf0) | upper = (u16)((upper & 0xfbf0) |
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 1994-2002 Russell King
* Copyright (c) 2003, 2020 ARM Limited
* All Rights Reserved
*/
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/page.h>
#ifdef __ARMEB__
#define LOW_OFFSET 0x4
#define HIGH_OFFSET 0x0
#else
#define LOW_OFFSET 0x0
#define HIGH_OFFSET 0x4
#endif
/*
* __fixup_pv_table - patch the stub instructions with the delta between
* PHYS_OFFSET and PAGE_OFFSET, which is assumed to be
* 2 MiB aligned.
*
* Called from head.S, which expects the following registers to be preserved:
* r1 = machine no, r2 = atags or dtb,
* r8 = phys_offset, r9 = cpuid, r10 = procinfo
*/
__HEAD
ENTRY(__fixup_pv_table)
mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN
str_l r0, __pv_phys_pfn_offset, r3
adr_l r0, __pv_offset
subs r3, r8, #PAGE_OFFSET @ PHYS_OFFSET - PAGE_OFFSET
mvn ip, #0
strcc ip, [r0, #HIGH_OFFSET] @ save to __pv_offset high bits
str r3, [r0, #LOW_OFFSET] @ save to __pv_offset low bits
mov r0, r3, lsr #21 @ constant for add/sub instructions
teq r3, r0, lsl #21 @ must be 2 MiB aligned
bne 0f
adr_l r4, __pv_table_begin
adr_l r5, __pv_table_end
b __fixup_a_pv_table
0: mov r0, r0 @ deadloop on error
b 0b
ENDPROC(__fixup_pv_table)
.text
__fixup_a_pv_table:
adr_l r6, __pv_offset
ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word
ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word
cmn r0, #1
#ifdef CONFIG_THUMB2_KERNEL
@
@ The Thumb-2 versions of the patchable sequences are
@
@ phys-to-virt: movw <reg>, #offset<31:21>
@ lsl <reg>, #21
@ sub <VA>, <PA>, <reg>
@
@ virt-to-phys (non-LPAE): movw <reg>, #offset<31:21>
@ lsl <reg>, #21
@ add <PA>, <VA>, <reg>
@
@ virt-to-phys (LPAE): movw <reg>, #offset<31:21>
@ lsl <reg>, #21
@ adds <PAlo>, <VA>, <reg>
@ mov <PAhi>, #offset<39:32>
@ adc <PAhi>, <PAhi>, #0
@
@ In the non-LPAE case, all patchable instructions are MOVW
@ instructions, where we need to patch in the offset into the
@ second halfword of the opcode (the 16-bit immediate is encoded
@ as imm4:i:imm3:imm8)
@
@ 15 11 10 9 4 3 0 15 14 12 11 8 7 0
@ +-----------+---+-------------+------++---+------+----+------+
@ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 |
@ +-----------+---+-------------+------++---+------+----+------+
@
@ In the LPAE case, we also need to patch in the high word of the
@ offset into the immediate field of the MOV instruction, or patch it
@ to a MVN instruction if the offset is negative. In this case, we
@ need to inspect the first halfword of the opcode, to check whether
@ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if
@ needed. The encoding of the immediate is rather complex for values
@ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower
@ order bits, which can be patched into imm8 directly (and i:imm3
@ cleared)
@
@ 15 11 10 9 5 0 15 14 12 11 8 7 0
@ +-----------+---+---------------------++---+------+----+------+
@ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
@ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
@ +-----------+---+---------------------++---+------+----+------+
@
moveq r0, #0x200000 @ set bit 21, mov to mvn instruction
lsrs r3, r6, #29 @ isolate top 3 bits of displacement
ubfx r6, r6, #21, #8 @ put bits 28:21 into the MOVW imm8 field
bfi r6, r3, #12, #3 @ put bits 31:29 into the MOVW imm3 field
b .Lnext
.Lloop: add r7, r4
adds r4, #4 @ clears Z flag
#ifdef CONFIG_ARM_LPAE
ldrh ip, [r7]
ARM_BE8(rev16 ip, ip)
tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear
bne 0f @ skip to MOVW handling (Z flag is clear)
bic ip, #0x20 @ clear bit 5 (MVN -> MOV)
orr ip, ip, r0, lsr #16 @ MOV -> MVN if offset < 0
ARM_BE8(rev16 ip, ip)
strh ip, [r7]
@ Z flag is set
0:
#endif
ldrh ip, [r7, #2]
ARM_BE8(rev16 ip, ip)
and ip, #0xf00 @ clear everything except Rd field
orreq ip, r0 @ Z flag set -> MOV/MVN -> patch in high bits
orrne ip, r6 @ Z flag clear -> MOVW -> patch in low bits
ARM_BE8(rev16 ip, ip)
strh ip, [r7, #2]
#else
#ifdef CONFIG_CPU_ENDIAN_BE8
@ in BE8, we load data in BE, but instructions still in LE
#define PV_BIT24 0x00000001
#define PV_IMM8_MASK 0xff000000
#define PV_IMMR_MSB 0x00080000
#else
#define PV_BIT24 0x01000000
#define PV_IMM8_MASK 0x000000ff
#define PV_IMMR_MSB 0x00000800
#endif
@
@ The ARM versions of the patchable sequences are
@
@ phys-to-virt: sub <VA>, <PA>, #offset<31:24>, lsl #24
@ sub <VA>, <PA>, #offset<23:16>, lsl #16
@
@ virt-to-phys (non-LPAE): add <PA>, <VA>, #offset<31:24>, lsl #24
@ add <PA>, <VA>, #offset<23:16>, lsl #16
@
@ virt-to-phys (LPAE): movw <reg>, #offset<31:20>
@ adds <PAlo>, <VA>, <reg>, lsl #20
@ mov <PAhi>, #offset<39:32>
@ adc <PAhi>, <PAhi>, #0
@
@ In the non-LPAE case, all patchable instructions are ADD or SUB
@ instructions, where we need to patch in the offset into the
@ immediate field of the opcode, which is emitted with the correct
@ rotation value. (The effective value of the immediate is imm12<7:0>
@ rotated right by [2 * imm12<11:8>] bits)
@
@ 31 28 27 23 22 20 19 16 15 12 11 0
@ +------+-----------------+------+------+-------+
@ ADD | cond | 0 0 1 0 1 0 0 0 | Rn | Rd | imm12 |
@ SUB | cond | 0 0 1 0 0 1 0 0 | Rn | Rd | imm12 |
@ MOV | cond | 0 0 1 1 1 0 1 0 | Rn | Rd | imm12 |
@ MVN | cond | 0 0 1 1 1 1 1 0 | Rn | Rd | imm12 |
@ +------+-----------------+------+------+-------+
@
@ In the LPAE case, we use a MOVW instruction to carry the low offset
@ word, and patch in the high word of the offset into the immediate
@ field of the subsequent MOV instruction, or patch it to a MVN
@ instruction if the offset is negative. We can distinguish MOVW
@ instructions based on bits 23:22 of the opcode, and ADD/SUB can be
@ distinguished from MOV/MVN (all using the encodings above) using
@ bit 24.
@
@ 31 28 27 23 22 20 19 16 15 12 11 0
@ +------+-----------------+------+------+-------+
@ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 | Rd | imm12 |
@ +------+-----------------+------+------+-------+
@
moveq r0, #0x400000 @ set bit 22, mov to mvn instruction
mov r3, r6, lsr #16 @ put offset bits 31-16 into r3
mov r6, r6, lsr #24 @ put offset bits 31-24 into r6
and r3, r3, #0xf0 @ only keep offset bits 23-20 in r3
b .Lnext
.Lloop: ldr ip, [r7, r4]
#ifdef CONFIG_ARM_LPAE
tst ip, #PV_BIT24 @ ADD/SUB have bit 24 clear
beq 1f
ARM_BE8(rev ip, ip)
tst ip, #0xc00000 @ MOVW has bits 23:22 clear
bic ip, ip, #0x400000 @ clear bit 22
bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction
orreq ip, ip, r6, lsl #4 @ MOVW -> mask in offset bits 31-24
orreq ip, ip, r3, lsr #4 @ MOVW -> mask in offset bits 23-20
orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22)
ARM_BE8(rev ip, ip)
b 2f
1:
#endif
tst ip, #PV_IMMR_MSB @ rotation value >= 16 ?
bic ip, ip, #PV_IMM8_MASK
orreq ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24
orrne ip, ip, r3 ARM_BE8(, lsl #24) @ mask in offset bits 23-20
2:
str ip, [r7, r4]
add r4, r4, #4
#endif
.Lnext:
cmp r4, r5
ldrcc r7, [r4] @ use branch for delay slot
bcc .Lloop
ret lr
ENDPROC(__fixup_a_pv_table)
ENTRY(fixup_pv_table)
stmfd sp!, {r4 - r7, lr}
mov r4, r0 @ r0 = table start
add r5, r0, r1 @ r1 = table size
bl __fixup_a_pv_table
ldmfd sp!, {r4 - r7, pc}
ENDPROC(fixup_pv_table)
.data
.align 2
.globl __pv_phys_pfn_offset
.type __pv_phys_pfn_offset, %object
__pv_phys_pfn_offset:
.word 0
.size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset
.globl __pv_offset
.type __pv_offset, %object
__pv_offset:
.quad 0
.size __pv_offset, . -__pv_offset
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment