Commit 634f4b0a authored by 4ast's avatar 4ast Committed by GitHub

Merge pull request #1180 from iovisor/scanf_string_support

Enable recursive scanf support for char[] as string
parents 2923e204 37ff0e32
......@@ -153,24 +153,119 @@ static void debug_printf(Module *mod, IRBuilder<> &B, const string &fmt, vector<
B.CreateCall(fprintf_fn, args);
}
static void finish_sscanf(IRBuilder<> &B, vector<Value *> *args, string *fmt,
const map<string, Value *> &locals, bool exact_args) {
// fmt += "%n";
// int nread = 0;
// int n = sscanf(s, fmt, args..., &nread);
// if (n < 0) return -1;
// s = &s[nread];
Value *sptr = locals.at("sptr");
Value *nread = locals.at("nread");
Function *cur_fn = B.GetInsertBlock()->getParent();
Function *sscanf_fn = B.GetInsertBlock()->getModule()->getFunction("sscanf");
*fmt += "%n";
B.CreateStore(B.getInt32(0), nread);
GlobalVariable *fmt_gvar = B.CreateGlobalString(*fmt, "fmt");
(*args)[1] = B.CreateInBoundsGEP(fmt_gvar, {B.getInt64(0), B.getInt64(0)});
(*args)[0] = B.CreateLoad(sptr);
args->push_back(nread);
CallInst *call = B.CreateCall(sscanf_fn, *args);
call->setTailCall(true);
BasicBlock *label_true = BasicBlock::Create(B.getContext(), "", cur_fn);
BasicBlock *label_false = BasicBlock::Create(B.getContext(), "", cur_fn);
// exact_args means fail if don't consume exact number of "%" inputs
// exact_args is disabled for string parsing (empty case)
Value *cond = exact_args ? B.CreateICmpNE(call, B.getInt32(args->size() - 3))
: B.CreateICmpSLT(call, B.getInt32(0));
B.CreateCondBr(cond, label_true, label_false);
B.SetInsertPoint(label_true);
B.CreateRet(B.getInt32(-1));
B.SetInsertPoint(label_false);
// s = &s[nread];
B.CreateStore(
B.CreateInBoundsGEP(B.CreateLoad(sptr), B.CreateLoad(nread, true)), sptr);
args->resize(2);
fmt->clear();
}
// recursive helper to capture the arguments
static void parse_type(IRBuilder<> &B, vector<Value *> *args, string *fmt,
Type *type, Value *out, bool is_writer) {
Type *type, Value *out,
const map<string, Value *> &locals, bool is_writer) {
if (StructType *st = dyn_cast<StructType>(type)) {
*fmt += "{ ";
unsigned idx = 0;
for (auto field : st->elements()) {
parse_type(B, args, fmt, field, B.CreateStructGEP(type, out, idx++), is_writer);
parse_type(B, args, fmt, field, B.CreateStructGEP(type, out, idx++),
locals, is_writer);
*fmt += " ";
}
*fmt += "}";
} else if (ArrayType *at = dyn_cast<ArrayType>(type)) {
if (at->getElementType() == B.getInt8Ty()) {
// treat i8[] as a char string instead of as an array of u8's
if (is_writer) {
*fmt += "\"%s\"";
args->push_back(out);
} else {
// When reading strings, scanf doesn't support empty "", so we need to
// break this up into multiple scanf calls. To understand it, let's take
// an example:
// struct Event {
// u32 a;
// struct {
// char x[64];
// int y;
// } b[2];
// u32 c;
// };
// The writer string would look like:
// "{ 0x%x [ { \"%s\" 0x%x } { \"%s\" 0x%x } ] 0x%x }"
// But the reader string needs to restart at each \"\".
// reader0(const char *s, struct Event *val) {
// int nread, rc;
// nread = 0;
// rc = sscanf(s, "{ %i [ { \"%n", &val->a, &nread);
// if (rc != 1) return -1;
// s += nread; nread = 0;
// rc = sscanf(s, "%[^\"]%n", &val->b[0].x, &nread);
// if (rc < 0) return -1;
// s += nread; nread = 0;
// rc = sscanf(s, "\" %i } { \"%n", &val->b[0].y, &nread);
// if (rc != 1) return -1;
// s += nread; nread = 0;
// rc = sscanf(s, "%[^\"]%n", &val->b[1].x, &nread);
// if (rc < 0) return -1;
// s += nread; nread = 0;
// rc = sscanf(s, "\" %i } ] %i }%n", &val->b[1].y, &val->c, &nread);
// if (rc != 2) return -1;
// s += nread; nread = 0;
// return 0;
// }
*fmt += "\"";
finish_sscanf(B, args, fmt, locals, true);
*fmt = "%[^\"]";
args->push_back(out);
finish_sscanf(B, args, fmt, locals, false);
*fmt = "\"";
}
} else {
*fmt += "[ ";
for (size_t i = 0; i < at->getNumElements(); ++i) {
parse_type(B, args, fmt, at->getElementType(), B.CreateStructGEP(type, out, i), is_writer);
parse_type(B, args, fmt, at->getElementType(),
B.CreateStructGEP(type, out, i), locals, is_writer);
*fmt += " ";
}
*fmt += "]";
}
} else if (isa<PointerType>(type)) {
*fmt += "0xl";
if (is_writer)
......@@ -196,6 +291,21 @@ static void parse_type(IRBuilder<> &B, vector<Value *> *args, string *fmt,
}
}
// make_reader generates a dynamic function in the instruction set of the host
// (not bpf) that is able to convert c-strings in the pretty-print format of
// make_writer back into binary representations. The encoding of the string
// takes the llvm ir structure format, which closely maps the c structure but
// not exactly (no support for unions for instance).
// The general algorithm is:
// pod types (u8..u64) <= %i
// array types
// u8[] no nested quotes :( <= "..."
// !u8[] <= [ %i %i ... ]
// struct types
// struct { u8 a; u64 b; } <= { %i %i }
// nesting is supported
// struct { struct { u8 a[]; }; } <= { "" }
// struct { struct { u64 a[]; }; } <= { [ %i %i .. ] }
string BPFModule::make_reader(Module *mod, Type *type) {
auto fn_it = readers_.find(type);
if (fn_it != readers_.end())
......@@ -209,6 +319,16 @@ string BPFModule::make_reader(Module *mod, Type *type) {
IRBuilder<> B(*ctx_);
FunctionType *sscanf_fn_type = FunctionType::get(
B.getInt32Ty(), {B.getInt8PtrTy(), B.getInt8PtrTy()}, /*isVarArg=*/true);
Function *sscanf_fn = mod->getFunction("sscanf");
if (!sscanf_fn) {
sscanf_fn = Function::Create(sscanf_fn_type, GlobalValue::ExternalLinkage,
"sscanf", mod);
sscanf_fn->setCallingConv(CallingConv::C);
sscanf_fn->addFnAttr(Attribute::NoUnwind);
}
string name = "reader" + std::to_string(readers_.size());
vector<Type *> fn_args({B.getInt8PtrTy(), PointerType::getUnqual(type)});
FunctionType *fn_type = FunctionType::get(B.getInt32Ty(), fn_args, /*isVarArg=*/false);
......@@ -223,46 +343,41 @@ string BPFModule::make_reader(Module *mod, Type *type) {
arg_out->setName("out");
BasicBlock *label_entry = BasicBlock::Create(*ctx_, "entry", fn);
BasicBlock *label_exit = BasicBlock::Create(*ctx_, "exit", fn);
B.SetInsertPoint(label_entry);
vector<Value *> args({arg_in, nullptr});
Value *nread = B.CreateAlloca(B.getInt32Ty());
Value *sptr = B.CreateAlloca(B.getInt8PtrTy());
map<string, Value *> locals{{"nread", nread}, {"sptr", sptr}};
B.CreateStore(arg_in, sptr);
vector<Value *> args({nullptr, nullptr});
string fmt;
parse_type(B, &args, &fmt, type, arg_out, false);
GlobalVariable *fmt_gvar = B.CreateGlobalString(fmt, "fmt");
args[1] = B.CreateInBoundsGEP(fmt_gvar, vector<Value *>({B.getInt64(0), B.getInt64(0)}));
parse_type(B, &args, &fmt, type, arg_out, locals, false);
if (0)
debug_printf(mod, B, "%p %p\n", vector<Value *>({arg_in, arg_out}));
vector<Type *> sscanf_fn_args({B.getInt8PtrTy(), B.getInt8PtrTy()});
FunctionType *sscanf_fn_type = FunctionType::get(B.getInt32Ty(), sscanf_fn_args, /*isVarArg=*/true);
Function *sscanf_fn = mod->getFunction("sscanf");
if (!sscanf_fn)
sscanf_fn = Function::Create(sscanf_fn_type, GlobalValue::ExternalLinkage, "sscanf", mod);
sscanf_fn->setCallingConv(CallingConv::C);
sscanf_fn->addFnAttr(Attribute::NoUnwind);
CallInst *call = B.CreateCall(sscanf_fn, args);
call->setTailCall(true);
BasicBlock *label_then = BasicBlock::Create(*ctx_, "then", fn);
Value *is_neq = B.CreateICmpNE(call, B.getInt32(args.size() - 2));
B.CreateCondBr(is_neq, label_then, label_exit);
B.SetInsertPoint(label_then);
B.CreateRet(B.getInt32(-1));
finish_sscanf(B, &args, &fmt, locals, true);
B.SetInsertPoint(label_exit);
B.CreateRet(B.getInt32(0));
readers_[type] = name;
return name;
}
// make_writer generates a dynamic function in the instruction set of the host
// (not bpf) that is able to pretty-print key/leaf entries as a c-string. The
// encoding of the string takes the llvm ir structure format, which closely maps
// the c structure but not exactly (no support for unions for instance).
// The general algorithm is:
// pod types (u8..u64) => 0x%x
// array types
// u8[] => "..."
// !u8[] => [ 0x%x 0x%x ... ]
// struct types
// struct { u8 a; u64 b; } => { 0x%x 0x%x }
// nesting is supported
// struct { struct { u8 a[]; }; } => { "" }
// struct { struct { u64 a[]; }; } => { [ 0x%x 0x%x .. ] }
string BPFModule::make_writer(Module *mod, Type *type) {
auto fn_it = writers_.find(type);
if (fn_it != writers_.end())
......@@ -293,9 +408,12 @@ string BPFModule::make_writer(Module *mod, Type *type) {
BasicBlock *label_entry = BasicBlock::Create(*ctx_, "entry", fn);
B.SetInsertPoint(label_entry);
map<string, Value *> locals{
{"nread", B.CreateAlloca(B.getInt64Ty())},
};
vector<Value *> args({arg_out, B.CreateZExt(arg_len, B.getInt64Ty()), nullptr});
string fmt;
parse_type(B, &args, &fmt, type, arg_in, true);
parse_type(B, &args, &fmt, type, arg_in, locals, true);
GlobalVariable *fmt_gvar = B.CreateGlobalString(fmt, "fmt");
......
......@@ -121,6 +121,38 @@ BPF_HASH(stats, int, struct { u32 a[3]; u32 b; }, 10);
self.assertEqual(l.a[2], 3)
self.assertEqual(l.b, 4)
def test_sscanf_string(self):
text = """
struct Symbol {
char name[128];
char path[128];
};
struct Event {
uint32_t pid;
uint32_t tid;
struct Symbol stack[64];
};
BPF_TABLE("array", int, struct Event, comms, 1);
"""
b = BPF(text=text)
t = b.get_table("comms")
s1 = t.leaf_sprintf(t[0])
fill = b' { "" "" }' * 63
self.assertEqual(s1, b'{ 0x0 0x0 [ { "" "" }%s ] }' % fill)
l = t.Leaf(1, 2)
name = b"libxyz"
path = b"/usr/lib/libxyz.so"
l.stack[0].name = name
l.stack[0].path = path
s2 = t.leaf_sprintf(l)
self.assertEqual(s2,
b'{ 0x1 0x2 [ { "%s" "%s" }%s ] }' % (name, path, fill))
l = t.leaf_scanf(s2)
self.assertEqual(l.pid, 1)
self.assertEqual(l.tid, 2)
self.assertEqual(l.stack[0].name, name)
self.assertEqual(l.stack[0].path, path)
def test_iosnoop(self):
text = """
#include <linux/blkdev.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment