lua/bpf: implemented emulated LD_ABS64

the DW LD_ABS|IND is prohibited, so it is implemented as a double W load with shift. this is slow as it also has to undo the ntohl semantics of W loads, but makes dissection of 64bit fields with absolute addressing possible

lua/bpf: implemented emulated LD_ABS64
the DW LD_ABS|IND is prohibited, so it is implemented as a double W load with shift. this is slow as it also has to undo the ntohl semantics of W loads, but makes dissection of 64bit fields with absolute addressing possible
fefaf32b · Marek Vavruša · 7addb8ac · fefaf32b
Commit fefaf32b authored Apr 05, 2017 by Marek Vavruša
Show whitespace changes
Inline Side-by-side

Showing with 65 additions and 48 deletions

src/lua/bpf/bpf.lua src/lua/bpf/bpf.lua +65 -48

No files found.
--- a/src/lua/bpf/bpf.lua
+++ b/src/lua/bpf/bpf.lua
@@ -296,53 +296,6 @@ local function bb_end(Vcomp)
 	end
 end

-local function LD_ABS(dst, off, w)
-	local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0
-	-- assert(w < 8, 'NYI: LD_ABS64 is not supported') -- IMM64 has two IMM32 insns fused together
-	emit(BPF.LD + BPF.ABS + const_width[w], dst_reg, 0, 0, off)
-end
-
-local function LD_IND(dst, src, w, off)
-	local src_reg = vreg(src) -- Must materialize first in case dst == src
-	local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0
-	emit(BPF.LD + BPF.IND + const_width[w], dst_reg, src_reg, 0, off or 0)
-end
-
-local function LD_FIELD(a, d, w, imm)
-	if imm then
-		LD_ABS(a, imm, w)
-	else
-		LD_IND(a, d, w)
-	end
-end
-
-- @note: This is specific now as it expects registers reserved
-local function LD_IMM_X(dst_reg, src_type, imm, w)
-	if w == 8 then -- IMM64 must be done in two instructions with imm64 = (lo(imm32), hi(imm32))
-		emit(BPF.LD + const_width[w], dst_reg, src_type, 0, ffi.cast('uint32_t', imm))
-		-- Must shift in two steps as bit.lshift supports [0..31]
-		emit(0, 0, 0, 0, ffi.cast('uint32_t', bit.lshift(bit.lshift(imm, 16), 16)))
-	else
-		emit(BPF.LD + const_width[w], dst_reg, src_type, 0, imm)
-	end
-end
-
-local function LOAD(dst, src, off, vtype)
-	local base = V[src].const
-	assert(base.__dissector, 'NYI: load() on variable that doesnt have dissector')
-	-- Cast to different type if requested
-	vtype = vtype or base.__dissector
-	local w = ffi.sizeof(vtype)
-	assert(w <= 4, 'NYI: load() supports 1/2/4 bytes at a time only')
-	if base.off then -- Absolute address to payload
-		LD_ABS(dst, off + base.off, w)
-	else -- Indirect address to payload
-		LD_IND(dst, src, w, off)
-	end
-	V[dst].type = vtype
-	V[dst].const = nil -- Dissected value is not constant anymore
-end
-
 local function CMP_STR(a, b, op)
 	assert(op == 'JEQ' or op == 'JNE', 'NYI: only equivallence stack/string only supports == or ~=')
 	-- I have no better idea how to implement it than unrolled XOR loop, as we can fixup only one JMP
@@ -463,7 +416,6 @@ local function ALU_REG(dst, a, b, op)
 	end
 end

-
 local function ALU_IMM_NV(dst, a, b, op)
 	-- Do DST = IMM(a) op VAR(b) where we can't invert because
 	-- the registers are u64 but immediates are u32, so complement
@@ -472,6 +424,71 @@ local function ALU_IMM_NV(dst, a, b, op)
 	ALU_REG(dst, stackslots+1, b, op)
 end

+local function LD_ABS(dst, off, w)
+	if w < 8 then
+		local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0
+		emit(BPF.LD + BPF.ABS + const_width[w], dst_reg, 0, 0, off)
+	elseif w == 8 then
+		-- LD_ABS|IND prohibits DW, we need to do two W loads and combine them
+		local tmp_reg = vreg(stackslots, 0, true, builtins.width_type(w)) -- Reserve R0
+		emit(BPF.LD + BPF.ABS + const_width[4], tmp_reg, 0, 0, off + 4)
+		if ffi.abi('le') then -- LD_ABS has htonl() semantics, reverse
+			emit(BPF.ALU + BPF.END + BPF.TO_BE, tmp_reg, 0, 0, 32)
+		end
+		ALU_IMM(stackslots, stackslots, 32, 'LSH')
+		local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0, spill tmp variable
+		emit(BPF.LD + BPF.ABS + const_width[4], dst_reg, 0, 0, off)
+		if ffi.abi('le') then -- LD_ABS has htonl() semantics, reverse
+			emit(BPF.ALU + BPF.END + BPF.TO_BE, dst_reg, 0, 0, 32)
+		end
+		ALU_REG(dst, dst, stackslots, 'OR')
+		V[stackslots].reg = nil -- Free temporary registers
+	else
+		assert(w < 8, 'NYI: only LD_ABS of 1/2/4/8 is supported')
+	end
+end
+
+local function LD_IND(dst, src, w, off)
+	local src_reg = vreg(src) -- Must materialize first in case dst == src
+	local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0
+	emit(BPF.LD + BPF.IND + const_width[w], dst_reg, src_reg, 0, off or 0)
+end
+
+local function LD_FIELD(a, d, w, imm)
+	if imm then
+		LD_ABS(a, imm, w)
+	else
+		LD_IND(a, d, w)
+	end
+end
+
+-- @note: This is specific now as it expects registers reserved
+local function LD_IMM_X(dst_reg, src_type, imm, w)
+	if w == 8 then -- IMM64 must be done in two instructions with imm64 = (lo(imm32), hi(imm32))
+		emit(BPF.LD + const_width[w], dst_reg, src_type, 0, ffi.cast('uint32_t', imm))
+		-- Must shift in two steps as bit.lshift supports [0..31]
+		emit(0, 0, 0, 0, ffi.cast('uint32_t', bit.lshift(bit.lshift(imm, 16), 16)))
+	else
+		emit(BPF.LD + const_width[w], dst_reg, src_type, 0, imm)
+	end
+end
+
+local function LOAD(dst, src, off, vtype)
+	local base = V[src].const
+	assert(base.__dissector, 'NYI: load() on variable that doesnt have dissector')
+	-- Cast to different type if requested
+	vtype = vtype or base.__dissector
+	local w = ffi.sizeof(vtype)
+	assert(w <= 4, 'NYI: load() supports 1/2/4 bytes at a time only')
+	if base.off then -- Absolute address to payload
+		LD_ABS(dst, off + base.off, w)
+	else -- Indirect address to payload
+		LD_IND(dst, src, w, off)
+	end
+	V[dst].type = vtype
+	V[dst].const = nil -- Dissected value is not constant anymore
+end
+
 local function BUILTIN(func, ...)
 	local builtin_export = {
 		-- Compiler primitives (work with variable slots, emit instructions)