Commit 00198dab authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "On the kernel side there's two x86 PMU driver fixes and a uprobes fix,
  plus on the tooling side there's a number of fixes and some late
  updates"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits)
  perf sched timehist: Fix invalid period calculation
  perf sched timehist: Remove hardcoded 'comm_width' check at print_summary
  perf sched timehist: Enlarge default 'comm_width'
  perf sched timehist: Honour 'comm_width' when aligning the headers
  perf/x86: Fix overlap counter scheduling bug
  perf/x86/pebs: Fix handling of PEBS buffer overflows
  samples/bpf: Move open_raw_sock to separate header
  samples/bpf: Remove perf_event_open() declaration
  samples/bpf: Be consistent with bpf_load_program bpf_insn parameter
  tools lib bpf: Add bpf_prog_{attach,detach}
  samples/bpf: Switch over to libbpf
  perf diff: Do not overwrite valid build id
  perf annotate: Don't throw error for zero length symbols
  perf bench futex: Fix lock-pi help string
  perf trace: Check if MAP_32BIT is defined (again)
  samples/bpf: Make perf_event_read() static
  uprobes: Fix uprobes on MIPS, allow for a cache flush after ixol breakpoint creation
  samples/bpf: Make samples more libbpf-centric
  tools lib bpf: Add flags to bpf_create_map()
  tools lib bpf: use __u32 from linux/types.h
  ...
parents 9004fda5 3705b975
...@@ -2110,6 +2110,27 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -2110,6 +2110,27 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
GLOBAL_STATUS_LBRS_FROZEN); GLOBAL_STATUS_LBRS_FROZEN);
if (!status) if (!status)
goto done; goto done;
/*
* In case multiple PEBS events are sampled at the same time,
* it is possible to have GLOBAL_STATUS bit 62 set indicating
* PEBS buffer overflow and also seeing at most 3 PEBS counters
* having their bits set in the status register. This is a sign
* that there was at least one PEBS record pending at the time
* of the PMU interrupt. PEBS counters must only be processed
* via the drain_pebs() calls and not via the regular sample
* processing loop coming after that the function, otherwise
* phony regular samples may be generated in the sampling buffer
* not marked with the EXACT tag. Another possibility is to have
* one PEBS event and at least one non-PEBS event whic hoverflows
* while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will
* not be set, yet the overflow status bit for the PEBS counter will
* be on Skylake.
*
* To avoid this problem, we systematically ignore the PEBS-enabled
* counters from the GLOBAL_STATUS mask and we always process PEBS
* events via drain_pebs().
*/
status &= ~cpuc->pebs_enabled;
/* /*
* PEBS overflow sets bit 62 in the global status register * PEBS overflow sets bit 62 in the global status register
...@@ -2117,15 +2138,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -2117,15 +2138,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
if (__test_and_clear_bit(62, (unsigned long *)&status)) { if (__test_and_clear_bit(62, (unsigned long *)&status)) {
handled++; handled++;
x86_pmu.drain_pebs(regs); x86_pmu.drain_pebs(regs);
/*
* There are cases where, even though, the PEBS ovfl bit is set
* in GLOBAL_OVF_STATUS, the PEBS events may also have their
* overflow bits set for their counters. We must clear them
* here because they have been processed as exact samples in
* the drain_pebs() routine. They must not be processed again
* in the for_each_bit_set() loop for regular samples below.
*/
status &= ~cpuc->pebs_enabled;
status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
} }
......
...@@ -669,7 +669,7 @@ static struct event_constraint snbep_uncore_cbox_constraints[] = { ...@@ -669,7 +669,7 @@ static struct event_constraint snbep_uncore_cbox_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), UNCORE_EVENT_CONSTRAINT(0x1f, 0xe),
UNCORE_EVENT_CONSTRAINT(0x21, 0x3), UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
UNCORE_EVENT_CONSTRAINT(0x23, 0x3), UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
UNCORE_EVENT_CONSTRAINT(0x31, 0x3), UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
......
...@@ -1194,7 +1194,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) ...@@ -1194,7 +1194,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
/* Reserve the 1st slot for get_trampoline_vaddr() */ /* Reserve the 1st slot for get_trampoline_vaddr() */
set_bit(0, area->bitmap); set_bit(0, area->bitmap);
atomic_set(&area->slot_count, 1); atomic_set(&area->slot_count, 1);
copy_to_page(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE); arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE);
if (!xol_add_vma(mm, area)) if (!xol_add_vma(mm, area))
return area; return area;
......
...@@ -35,40 +35,43 @@ hostprogs-y += tc_l2_redirect ...@@ -35,40 +35,43 @@ hostprogs-y += tc_l2_redirect
hostprogs-y += lwt_len_hist hostprogs-y += lwt_len_hist
hostprogs-y += xdp_tx_iptunnel hostprogs-y += xdp_tx_iptunnel
test_lru_dist-objs := test_lru_dist.o libbpf.o # Libbpf dependencies
sock_example-objs := sock_example.o libbpf.o LIBBPF := ../../tools/lib/bpf/bpf.o
fds_example-objs := bpf_load.o libbpf.o fds_example.o
sockex1-objs := bpf_load.o libbpf.o sockex1_user.o test_lru_dist-objs := test_lru_dist.o $(LIBBPF)
sockex2-objs := bpf_load.o libbpf.o sockex2_user.o sock_example-objs := sock_example.o $(LIBBPF)
sockex3-objs := bpf_load.o libbpf.o sockex3_user.o fds_example-objs := bpf_load.o $(LIBBPF) fds_example.o
tracex1-objs := bpf_load.o libbpf.o tracex1_user.o sockex1-objs := bpf_load.o $(LIBBPF) sockex1_user.o
tracex2-objs := bpf_load.o libbpf.o tracex2_user.o sockex2-objs := bpf_load.o $(LIBBPF) sockex2_user.o
tracex3-objs := bpf_load.o libbpf.o tracex3_user.o sockex3-objs := bpf_load.o $(LIBBPF) sockex3_user.o
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o tracex1-objs := bpf_load.o $(LIBBPF) tracex1_user.o
tracex5-objs := bpf_load.o libbpf.o tracex5_user.o tracex2-objs := bpf_load.o $(LIBBPF) tracex2_user.o
tracex6-objs := bpf_load.o libbpf.o tracex6_user.o tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o
test_probe_write_user-objs := bpf_load.o libbpf.o test_probe_write_user_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o
trace_output-objs := bpf_load.o libbpf.o trace_output_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o
lathist-objs := bpf_load.o libbpf.o lathist_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
spintest-objs := bpf_load.o libbpf.o spintest_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o
test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o
test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o
test_cgrp2_attach2-objs := libbpf.o test_cgrp2_attach2.o cgroup_helpers.o test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o
test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o
test_cgrp2_sock2-objs := bpf_load.o libbpf.o test_cgrp2_sock2.o test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o
xdp1-objs := bpf_load.o libbpf.o xdp1_user.o test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o cgroup_helpers.o
test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o
test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o
xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
# reuse xdp1 source intentionally # reuse xdp1 source intentionally
xdp2-objs := bpf_load.o libbpf.o xdp1_user.o xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
test_current_task_under_cgroup-objs := bpf_load.o libbpf.o cgroup_helpers.o \ test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \
test_current_task_under_cgroup_user.o test_current_task_under_cgroup_user.o
trace_event-objs := bpf_load.o libbpf.o trace_event_user.o trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
sampleip-objs := bpf_load.o libbpf.o sampleip_user.o sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o
tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o
lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
xdp_tx_iptunnel-objs := bpf_load.o libbpf.o xdp_tx_iptunnel_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
# Tell kbuild to always build the programs # Tell kbuild to always build the programs
always := $(hostprogs-y) always := $(hostprogs-y)
...@@ -104,7 +107,10 @@ always += lwt_len_hist_kern.o ...@@ -104,7 +107,10 @@ always += lwt_len_hist_kern.o
always += xdp_tx_iptunnel_kern.o always += xdp_tx_iptunnel_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
HOSTCFLAGS += -I$(srctree)/tools/perf
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
HOSTLOADLIBES_fds_example += -lelf HOSTLOADLIBES_fds_example += -lelf
......
eBPF sample programs eBPF sample programs
==================== ====================
This directory contains a mini eBPF library, test stubs, verifier This directory contains a test stubs, verifier test-suite and examples
test-suite and examples for using eBPF. for using eBPF. The examples use libbpf from tools/lib/bpf.
Build dependencies Build dependencies
================== ==================
......
...@@ -22,25 +22,34 @@ ...@@ -22,25 +22,34 @@
#include <poll.h> #include <poll.h>
#include <ctype.h> #include <ctype.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_helpers.h"
#include "bpf_load.h" #include "bpf_load.h"
#include "perf-sys.h"
#define DEBUGFS "/sys/kernel/debug/tracing/" #define DEBUGFS "/sys/kernel/debug/tracing/"
static char license[128]; static char license[128];
static int kern_version; static int kern_version;
static bool processed_sec[128]; static bool processed_sec[128];
char bpf_log_buf[BPF_LOG_BUF_SIZE];
int map_fd[MAX_MAPS]; int map_fd[MAX_MAPS];
int prog_fd[MAX_PROGS]; int prog_fd[MAX_PROGS];
int event_fd[MAX_PROGS]; int event_fd[MAX_PROGS];
int prog_cnt; int prog_cnt;
int prog_array_fd = -1; int prog_array_fd = -1;
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
unsigned int map_flags;
};
static int populate_prog_array(const char *event, int prog_fd) static int populate_prog_array(const char *event, int prog_fd)
{ {
int ind = atoi(event), err; int ind = atoi(event), err;
err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
if (err < 0) { if (err < 0) {
printf("failed to store prog_fd in prog_array\n"); printf("failed to store prog_fd in prog_array\n");
return -1; return -1;
...@@ -58,6 +67,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) ...@@ -58,6 +67,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_perf_event = strncmp(event, "perf_event", 10) == 0; bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
size_t insns_cnt = size / sizeof(struct bpf_insn);
enum bpf_prog_type prog_type; enum bpf_prog_type prog_type;
char buf[256]; char buf[256];
int fd, efd, err, id; int fd, efd, err, id;
...@@ -87,9 +97,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) ...@@ -87,9 +97,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return -1; return -1;
} }
fd = bpf_prog_load(prog_type, prog, size, license, kern_version); fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
bpf_log_buf, BPF_LOG_BUF_SIZE);
if (fd < 0) { if (fd < 0) {
printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
return -1; return -1;
} }
...@@ -169,7 +180,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) ...@@ -169,7 +180,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
id = atoi(buf); id = atoi(buf);
attr.config = id; attr.config = id;
efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
if (efd < 0) { if (efd < 0) {
printf("event %d fd %d err %s\n", id, efd, strerror(errno)); printf("event %d fd %d err %s\n", id, efd, strerror(errno));
return -1; return -1;
......
#ifndef __BPF_LOAD_H #ifndef __BPF_LOAD_H
#define __BPF_LOAD_H #define __BPF_LOAD_H
#include "libbpf.h"
#define MAX_MAPS 32 #define MAX_MAPS 32
#define MAX_PROGS 32 #define MAX_PROGS 32
extern int map_fd[MAX_MAPS]; extern int map_fd[MAX_MAPS];
extern int prog_fd[MAX_PROGS]; extern int prog_fd[MAX_PROGS];
extern int event_fd[MAX_PROGS]; extern int event_fd[MAX_PROGS];
extern char bpf_log_buf[BPF_LOG_BUF_SIZE];
extern int prog_cnt; extern int prog_cnt;
/* parses elf file compiled by llvm .c->.o /* parses elf file compiled by llvm .c->.o
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "bpf_load.h" #include "bpf_load.h"
#include "libbpf.h" #include "libbpf.h"
#include "sock_example.h"
#define BPF_F_PIN (1 << 0) #define BPF_F_PIN (1 << 0)
#define BPF_F_GET (1 << 1) #define BPF_F_GET (1 << 1)
...@@ -49,17 +50,19 @@ static int bpf_map_create(void) ...@@ -49,17 +50,19 @@ static int bpf_map_create(void)
static int bpf_prog_create(const char *object) static int bpf_prog_create(const char *object)
{ {
static const struct bpf_insn insns[] = { static struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 1), BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}; };
size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn);
if (object) { if (object) {
assert(!load_bpf_file((char *)object)); assert(!load_bpf_file((char *)object));
return prog_fd[0]; return prog_fd[0];
} else { } else {
return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER,
insns, sizeof(insns), "GPL", 0); insns, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
} }
} }
...@@ -83,12 +86,12 @@ static int bpf_do_map(const char *file, uint32_t flags, uint32_t key, ...@@ -83,12 +86,12 @@ static int bpf_do_map(const char *file, uint32_t flags, uint32_t key,
} }
if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) { if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) {
ret = bpf_update_elem(fd, &key, &value, 0); ret = bpf_map_update_elem(fd, &key, &value, 0);
printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value, printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value,
ret, strerror(errno)); ret, strerror(errno));
assert(ret == 0); assert(ret == 0);
} else if (flags & BPF_F_KEY) { } else if (flags & BPF_F_KEY) {
ret = bpf_lookup_elem(fd, &key, &value); ret = bpf_map_lookup_elem(fd, &key, &value);
printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value, printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value,
ret, strerror(errno)); ret, strerror(errno));
assert(ret == 0); assert(ret == 0);
......
...@@ -73,7 +73,7 @@ static void get_data(int fd) ...@@ -73,7 +73,7 @@ static void get_data(int fd)
for (c = 0; c < MAX_CPU; c++) { for (c = 0; c < MAX_CPU; c++) {
for (i = 0; i < MAX_ENTRIES; i++) { for (i = 0; i < MAX_ENTRIES; i++) {
key = c * MAX_ENTRIES + i; key = c * MAX_ENTRIES + i;
bpf_lookup_elem(fd, &key, &value); bpf_map_lookup_elem(fd, &key, &value);
cpu_hist[c].data[i] = value; cpu_hist[c].data[i] = value;
if (value > cpu_hist[c].max) if (value > cpu_hist[c].max)
......
/* eBPF mini library */
#include <stdlib.h>
#include <stdio.h>
#include <linux/unistd.h>
#include <unistd.h>
#include <string.h>
#include <linux/netlink.h>
#include <linux/bpf.h>
#include <errno.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <linux/if_packet.h>
#include <arpa/inet.h>
#include "libbpf.h"
static __u64 ptr_to_u64(void *ptr)
{
return (__u64) (unsigned long) ptr;
}
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
int max_entries, int map_flags)
{
union bpf_attr attr = {
.map_type = map_type,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries,
.map_flags = map_flags,
};
return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
}
int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
.value = ptr_to_u64(value),
.flags = flags,
};
return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}
int bpf_lookup_elem(int fd, void *key, void *value)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
.value = ptr_to_u64(value),
};
return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
}
int bpf_delete_elem(int fd, void *key)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
};
return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
}
int bpf_get_next_key(int fd, void *key, void *next_key)
{
union bpf_attr attr = {
.map_fd = fd,
.key = ptr_to_u64(key),
.next_key = ptr_to_u64(next_key),
};
return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
}
#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
char bpf_log_buf[LOG_BUF_SIZE];
int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int prog_len,
const char *license, int kern_version)
{
union bpf_attr attr = {
.prog_type = prog_type,
.insns = ptr_to_u64((void *) insns),
.insn_cnt = prog_len / sizeof(struct bpf_insn),
.license = ptr_to_u64((void *) license),
.log_buf = ptr_to_u64(bpf_log_buf),
.log_size = LOG_BUF_SIZE,
.log_level = 1,
};
/* assign one field outside of struct init to make sure any
* padding is zero initialized
*/
attr.kern_version = kern_version;
bpf_log_buf[0] = 0;
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr = {
.target_fd = target_fd,
.attach_bpf_fd = prog_fd,
.attach_type = type,
};
return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr));
}
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr = {
.target_fd = target_fd,
.attach_type = type,
};
return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr));
}
int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr = {
.pathname = ptr_to_u64((void *)pathname),
.bpf_fd = fd,
};
return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
}
int bpf_obj_get(const char *pathname)
{
union bpf_attr attr = {
.pathname = ptr_to_u64((void *)pathname),
};
return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
}
int open_raw_sock(const char *name)
{
struct sockaddr_ll sll;
int sock;
sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
if (sock < 0) {
printf("cannot create raw socket\n");
return -1;
}
memset(&sll, 0, sizeof(sll));
sll.sll_family = AF_PACKET;
sll.sll_ifindex = if_nametoindex(name);
sll.sll_protocol = htons(ETH_P_ALL);
if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
printf("bind to %s: %s\n", name, strerror(errno));
close(sock);
return -1;
}
return sock;
}
int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
int group_fd, unsigned long flags)
{
return syscall(__NR_perf_event_open, attr, pid, cpu,
group_fd, flags);
}
...@@ -2,27 +2,9 @@ ...@@ -2,27 +2,9 @@
#ifndef __LIBBPF_H #ifndef __LIBBPF_H
#define __LIBBPF_H #define __LIBBPF_H
struct bpf_insn; #include <bpf/bpf.h>
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
int max_entries, int map_flags);
int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags);
int bpf_lookup_elem(int fd, void *key, void *value);
int bpf_delete_elem(int fd, void *key);
int bpf_get_next_key(int fd, void *key, void *next_key);
int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_len,
const char *license, int kern_version);
int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type);
int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
int bpf_obj_pin(int fd, const char *pathname); struct bpf_insn;
int bpf_obj_get(const char *pathname);
#define LOG_BUF_SIZE (256 * 1024)
extern char bpf_log_buf[LOG_BUF_SIZE];
/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
...@@ -203,10 +185,4 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; ...@@ -203,10 +185,4 @@ extern char bpf_log_buf[LOG_BUF_SIZE];
.off = 0, \ .off = 0, \
.imm = 0 }) .imm = 0 })
/* create RAW socket and bind to interface 'name' */
int open_raw_sock(const char *name);
struct perf_event_attr;
int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
int group_fd, unsigned long flags);
#endif #endif
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#define MAX_INDEX 64 #define MAX_INDEX 64
#define MAX_STARS 38 #define MAX_STARS 38
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static void stars(char *str, long val, long max, int width) static void stars(char *str, long val, long max, int width)
{ {
int i; int i;
...@@ -41,13 +43,13 @@ int main(int argc, char **argv) ...@@ -41,13 +43,13 @@ int main(int argc, char **argv)
return -1; return -1;
} }
while (bpf_get_next_key(map_fd, &key, &next_key) == 0) { while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) {
if (next_key >= MAX_INDEX) { if (next_key >= MAX_INDEX) {
fprintf(stderr, "Key %lu out of bounds\n", next_key); fprintf(stderr, "Key %lu out of bounds\n", next_key);
continue; continue;
} }
bpf_lookup_elem(map_fd, &next_key, values); bpf_map_lookup_elem(map_fd, &next_key, values);
sum = 0; sum = 0;
for (i = 0; i < nr_cpus; i++) for (i = 0; i < nr_cpus; i++)
......
...@@ -49,14 +49,14 @@ static void print_stack(struct key_t *key, __u64 count) ...@@ -49,14 +49,14 @@ static void print_stack(struct key_t *key, __u64 count)
int i; int i;
printf("%s;", key->target); printf("%s;", key->target);
if (bpf_lookup_elem(map_fd[3], &key->tret, ip) != 0) { if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) {
printf("---;"); printf("---;");
} else { } else {
for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
print_ksym(ip[i]); print_ksym(ip[i]);
} }
printf("-;"); printf("-;");
if (bpf_lookup_elem(map_fd[3], &key->wret, ip) != 0) { if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) {
printf("---;"); printf("---;");
} else { } else {
for (i = 0; i < PERF_MAX_STACK_DEPTH; i++) for (i = 0; i < PERF_MAX_STACK_DEPTH; i++)
...@@ -77,8 +77,8 @@ static void print_stacks(int fd) ...@@ -77,8 +77,8 @@ static void print_stacks(int fd)
struct key_t key = {}, next_key; struct key_t key = {}, next_key;
__u64 value; __u64 value;
while (bpf_get_next_key(fd, &key, &next_key) == 0) { while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
bpf_lookup_elem(fd, &next_key, &value); bpf_map_lookup_elem(fd, &next_key, &value);
print_stack(&next_key, value); print_stack(&next_key, value);
key = next_key; key = next_key;
} }
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
#include "perf-sys.h"
#define DEFAULT_FREQ 99 #define DEFAULT_FREQ 99
#define DEFAULT_SECS 5 #define DEFAULT_SECS 5
...@@ -49,7 +50,7 @@ static int sampling_start(int *pmu_fd, int freq) ...@@ -49,7 +50,7 @@ static int sampling_start(int *pmu_fd, int freq)
}; };
for (i = 0; i < nr_cpus; i++) { for (i = 0; i < nr_cpus; i++) {
pmu_fd[i] = perf_event_open(&pe_sample_attr, -1 /* pid */, i, pmu_fd[i] = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i,
-1 /* group_fd */, 0 /* flags */); -1 /* group_fd */, 0 /* flags */);
if (pmu_fd[i] < 0) { if (pmu_fd[i] < 0) {
fprintf(stderr, "ERROR: Initializing perf sampling\n"); fprintf(stderr, "ERROR: Initializing perf sampling\n");
...@@ -95,8 +96,8 @@ static void print_ip_map(int fd) ...@@ -95,8 +96,8 @@ static void print_ip_map(int fd)
/* fetch IPs and counts */ /* fetch IPs and counts */
key = 0, i = 0; key = 0, i = 0;
while (bpf_get_next_key(fd, &key, &next_key) == 0) { while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
bpf_lookup_elem(fd, &next_key, &value); bpf_map_lookup_elem(fd, &next_key, &value);
counts[i].ip = next_key; counts[i].ip = next_key;
counts[i++].count = value; counts[i++].count = value;
key = next_key; key = next_key;
......
...@@ -27,6 +27,9 @@ ...@@ -27,6 +27,9 @@
#include <linux/ip.h> #include <linux/ip.h>
#include <stddef.h> #include <stddef.h>
#include "libbpf.h" #include "libbpf.h"
#include "sock_example.h"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int test_sock(void) static int test_sock(void)
{ {
...@@ -54,9 +57,10 @@ static int test_sock(void) ...@@ -54,9 +57,10 @@ static int test_sock(void)
BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */ BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}; };
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, insns_cnt,
"GPL", 0); "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE);
if (prog_fd < 0) { if (prog_fd < 0) {
printf("failed to load prog '%s'\n", strerror(errno)); printf("failed to load prog '%s'\n", strerror(errno));
goto cleanup; goto cleanup;
...@@ -72,13 +76,13 @@ static int test_sock(void) ...@@ -72,13 +76,13 @@ static int test_sock(void)
for (i = 0; i < 10; i++) { for (i = 0; i < 10; i++) {
key = IPPROTO_TCP; key = IPPROTO_TCP;
assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0); assert(bpf_map_lookup_elem(map_fd, &key, &tcp_cnt) == 0);
key = IPPROTO_UDP; key = IPPROTO_UDP;
assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0); assert(bpf_map_lookup_elem(map_fd, &key, &udp_cnt) == 0);
key = IPPROTO_ICMP; key = IPPROTO_ICMP;
assert(bpf_lookup_elem(map_fd, &key, &icmp_cnt) == 0); assert(bpf_map_lookup_elem(map_fd, &key, &icmp_cnt) == 0);
printf("TCP %lld UDP %lld ICMP %lld packets\n", printf("TCP %lld UDP %lld ICMP %lld packets\n",
tcp_cnt, udp_cnt, icmp_cnt); tcp_cnt, udp_cnt, icmp_cnt);
......
#include <stdlib.h>
#include <stdio.h>
#include <linux/unistd.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <linux/if_packet.h>
#include <arpa/inet.h>
#include "libbpf.h"
static inline int open_raw_sock(const char *name)
{
struct sockaddr_ll sll;
int sock;
sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
if (sock < 0) {
printf("cannot create raw socket\n");
return -1;
}
memset(&sll, 0, sizeof(sll));
sll.sll_family = AF_PACKET;
sll.sll_ifindex = if_nametoindex(name);
sll.sll_protocol = htons(ETH_P_ALL);
if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
printf("bind to %s: %s\n", name, strerror(errno));
close(sock);
return -1;
}
return sock;
}
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/bpf.h> #include <linux/bpf.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
#include "sock_example.h"
#include <unistd.h> #include <unistd.h>
#include <arpa/inet.h> #include <arpa/inet.h>
...@@ -32,13 +33,13 @@ int main(int ac, char **argv) ...@@ -32,13 +33,13 @@ int main(int ac, char **argv)
int key; int key;
key = IPPROTO_TCP; key = IPPROTO_TCP;
assert(bpf_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0); assert(bpf_map_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0);
key = IPPROTO_UDP; key = IPPROTO_UDP;
assert(bpf_lookup_elem(map_fd[0], &key, &udp_cnt) == 0); assert(bpf_map_lookup_elem(map_fd[0], &key, &udp_cnt) == 0);
key = IPPROTO_ICMP; key = IPPROTO_ICMP;
assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0); assert(bpf_map_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
printf("TCP %lld UDP %lld ICMP %lld bytes\n", printf("TCP %lld UDP %lld ICMP %lld bytes\n",
tcp_cnt, udp_cnt, icmp_cnt); tcp_cnt, udp_cnt, icmp_cnt);
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/bpf.h> #include <linux/bpf.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
#include "sock_example.h"
#include <unistd.h> #include <unistd.h>
#include <arpa/inet.h> #include <arpa/inet.h>
#include <sys/resource.h> #include <sys/resource.h>
...@@ -39,8 +40,8 @@ int main(int ac, char **argv) ...@@ -39,8 +40,8 @@ int main(int ac, char **argv)
int key = 0, next_key; int key = 0, next_key;
struct pair value; struct pair value;
while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
bpf_lookup_elem(map_fd[0], &next_key, &value); bpf_map_lookup_elem(map_fd[0], &next_key, &value);
printf("ip %s bytes %lld packets %lld\n", printf("ip %s bytes %lld packets %lld\n",
inet_ntoa((struct in_addr){htonl(next_key)}), inet_ntoa((struct in_addr){htonl(next_key)}),
value.bytes, value.packets); value.bytes, value.packets);
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/bpf.h> #include <linux/bpf.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
#include "sock_example.h"
#include <unistd.h> #include <unistd.h>
#include <arpa/inet.h> #include <arpa/inet.h>
#include <sys/resource.h> #include <sys/resource.h>
...@@ -54,8 +55,8 @@ int main(int argc, char **argv) ...@@ -54,8 +55,8 @@ int main(int argc, char **argv)
sleep(1); sleep(1);
printf("IP src.port -> dst.port bytes packets\n"); printf("IP src.port -> dst.port bytes packets\n");
while (bpf_get_next_key(map_fd[2], &key, &next_key) == 0) { while (bpf_map_get_next_key(map_fd[2], &key, &next_key) == 0) {
bpf_lookup_elem(map_fd[2], &next_key, &value); bpf_map_lookup_elem(map_fd[2], &next_key, &value);
printf("%s.%05d -> %s.%05d %12lld %12lld\n", printf("%s.%05d -> %s.%05d %12lld %12lld\n",
inet_ntoa((struct in_addr){htonl(next_key.src)}), inet_ntoa((struct in_addr){htonl(next_key.src)}),
next_key.port16[0], next_key.port16[0],
......
...@@ -31,8 +31,8 @@ int main(int ac, char **argv) ...@@ -31,8 +31,8 @@ int main(int ac, char **argv)
for (i = 0; i < 5; i++) { for (i = 0; i < 5; i++) {
key = 0; key = 0;
printf("kprobing funcs:"); printf("kprobing funcs:");
while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
bpf_lookup_elem(map_fd[0], &next_key, &value); bpf_map_lookup_elem(map_fd[0], &next_key, &value);
assert(next_key == value); assert(next_key == value);
sym = ksym_search(value); sym = ksym_search(value);
printf(" %s", sym->name); printf(" %s", sym->name);
...@@ -41,8 +41,8 @@ int main(int ac, char **argv) ...@@ -41,8 +41,8 @@ int main(int ac, char **argv)
if (key) if (key)
printf("\n"); printf("\n");
key = 0; key = 0;
while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0)
bpf_delete_elem(map_fd[0], &next_key); bpf_map_delete_elem(map_fd[0], &next_key);
sleep(1); sleep(1);
} }
......
...@@ -60,9 +60,9 @@ int main(int argc, char **argv) ...@@ -60,9 +60,9 @@ int main(int argc, char **argv)
} }
/* bpf_tunnel_key.remote_ipv4 expects host byte orders */ /* bpf_tunnel_key.remote_ipv4 expects host byte orders */
ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0); ret = bpf_map_update_elem(array_fd, &array_key, &ifindex, 0);
if (ret) { if (ret) {
perror("bpf_update_elem"); perror("bpf_map_update_elem");
goto out; goto out;
} }
......
...@@ -85,9 +85,9 @@ int main(int argc, char **argv) ...@@ -85,9 +85,9 @@ int main(int argc, char **argv)
} }
} }
ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0); ret = bpf_map_update_elem(array_fd, &array_key, &cg2_fd, 0);
if (ret) { if (ret) {
perror("bpf_update_elem"); perror("bpf_map_update_elem");
goto out; goto out;
} }
......
...@@ -36,6 +36,8 @@ enum { ...@@ -36,6 +36,8 @@ enum {
MAP_KEY_BYTES, MAP_KEY_BYTES,
}; };
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(int map_fd, int verdict) static int prog_load(int map_fd, int verdict)
{ {
struct bpf_insn prog[] = { struct bpf_insn prog[] = {
...@@ -66,9 +68,11 @@ static int prog_load(int map_fd, int verdict) ...@@ -66,9 +68,11 @@ static int prog_load(int map_fd, int verdict)
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}; };
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, sizeof(prog), "GPL", 0); prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
} }
static int usage(const char *argv0) static int usage(const char *argv0)
...@@ -108,10 +112,10 @@ static int attach_filter(int cg_fd, int type, int verdict) ...@@ -108,10 +112,10 @@ static int attach_filter(int cg_fd, int type, int verdict)
} }
while (1) { while (1) {
key = MAP_KEY_PACKETS; key = MAP_KEY_PACKETS;
assert(bpf_lookup_elem(map_fd, &key, &pkt_cnt) == 0); assert(bpf_map_lookup_elem(map_fd, &key, &pkt_cnt) == 0);
key = MAP_KEY_BYTES; key = MAP_KEY_BYTES;
assert(bpf_lookup_elem(map_fd, &key, &byte_cnt) == 0); assert(bpf_map_lookup_elem(map_fd, &key, &byte_cnt) == 0);
printf("cgroup received %lld packets, %lld bytes\n", printf("cgroup received %lld packets, %lld bytes\n",
pkt_cnt, byte_cnt); pkt_cnt, byte_cnt);
......
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
#define BAR "/foo/bar/" #define BAR "/foo/bar/"
#define PING_CMD "ping -c1 -w1 127.0.0.1" #define PING_CMD "ping -c1 -w1 127.0.0.1"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(int verdict) static int prog_load(int verdict)
{ {
int ret; int ret;
...@@ -39,9 +41,11 @@ static int prog_load(int verdict) ...@@ -39,9 +41,11 @@ static int prog_load(int verdict)
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}; };
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, sizeof(prog), "GPL", 0); prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
if (ret < 0) { if (ret < 0) {
log_err("Loading program"); log_err("Loading program");
......
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#include "libbpf.h" #include "libbpf.h"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(int idx) static int prog_load(int idx)
{ {
struct bpf_insn prog[] = { struct bpf_insn prog[] = {
...@@ -33,9 +35,10 @@ static int prog_load(int idx) ...@@ -33,9 +35,10 @@ static int prog_load(int idx)
BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}; };
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt,
"GPL", 0); "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE);
} }
static int usage(const char *argv0) static int usage(const char *argv0)
......
...@@ -36,7 +36,7 @@ int main(int argc, char **argv) ...@@ -36,7 +36,7 @@ int main(int argc, char **argv)
if (!cg2) if (!cg2)
goto err; goto err;
if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { if (bpf_map_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {
log_err("Adding target cgroup to map"); log_err("Adding target cgroup to map");
goto err; goto err;
} }
...@@ -50,7 +50,7 @@ int main(int argc, char **argv) ...@@ -50,7 +50,7 @@ int main(int argc, char **argv)
*/ */
sync(); sync();
bpf_lookup_elem(map_fd[1], &idx, &remote_pid); bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid);
if (local_pid != remote_pid) { if (local_pid != remote_pid) {
fprintf(stderr, fprintf(stderr,
...@@ -64,10 +64,10 @@ int main(int argc, char **argv) ...@@ -64,10 +64,10 @@ int main(int argc, char **argv)
goto err; goto err;
remote_pid = 0; remote_pid = 0;
bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY); bpf_map_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY);
sync(); sync();
bpf_lookup_elem(map_fd[1], &idx, &remote_pid); bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid);
if (local_pid == remote_pid) { if (local_pid == remote_pid) {
fprintf(stderr, "BPF cgroup negative test did not work\n"); fprintf(stderr, "BPF cgroup negative test did not work\n");
......
...@@ -134,7 +134,7 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, ...@@ -134,7 +134,7 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
int seen = 0; int seen = 0;
lru->total++; lru->total++;
if (!bpf_lookup_elem(lru->map_fd, &key, &node)) { if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) {
if (node) { if (node) {
list_move(&node->list, &lru->list); list_move(&node->list, &lru->list);
return 1; return 1;
...@@ -151,7 +151,7 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, ...@@ -151,7 +151,7 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
node = list_last_entry(&lru->list, node = list_last_entry(&lru->list,
struct pfect_lru_node, struct pfect_lru_node,
list); list);
bpf_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
} }
node->key = key; node->key = key;
...@@ -159,10 +159,10 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, ...@@ -159,10 +159,10 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
lru->nr_misses++; lru->nr_misses++;
if (seen) { if (seen) {
assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
} else { } else {
lru->nr_unique++; lru->nr_unique++;
assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
} }
return seen; return seen;
...@@ -285,11 +285,11 @@ static void do_test_lru_dist(int task, void *data) ...@@ -285,11 +285,11 @@ static void do_test_lru_dist(int task, void *data)
pfect_lru_lookup_or_insert(&pfect_lru, key); pfect_lru_lookup_or_insert(&pfect_lru, key);
if (!bpf_lookup_elem(lru_map_fd, &key, &value)) if (!bpf_map_lookup_elem(lru_map_fd, &key, &value))
continue; continue;
if (bpf_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
printf("bpf_update_elem(lru_map_fd, %llu): errno:%d\n", printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n",
key, errno); key, errno);
assert(0); assert(0);
} }
...@@ -358,19 +358,19 @@ static void test_lru_loss0(int map_type, int map_flags) ...@@ -358,19 +358,19 @@ static void test_lru_loss0(int map_type, int map_flags)
for (key = 1; key <= 1000; key++) { for (key = 1; key <= 1000; key++) {
int start_key, end_key; int start_key, end_key;
assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
start_key = 101; start_key = 101;
end_key = min(key, 900); end_key = min(key, 900);
while (start_key <= end_key) { while (start_key <= end_key) {
bpf_lookup_elem(map_fd, &start_key, value); bpf_map_lookup_elem(map_fd, &start_key, value);
start_key++; start_key++;
} }
} }
for (key = 1; key <= 1000; key++) { for (key = 1; key <= 1000; key++) {
if (bpf_lookup_elem(map_fd, &key, value)) { if (bpf_map_lookup_elem(map_fd, &key, value)) {
if (key <= 100) if (key <= 100)
old_unused_losses++; old_unused_losses++;
else if (key <= 900) else if (key <= 900)
...@@ -408,10 +408,10 @@ static void test_lru_loss1(int map_type, int map_flags) ...@@ -408,10 +408,10 @@ static void test_lru_loss1(int map_type, int map_flags)
value[0] = 1234; value[0] = 1234;
for (key = 1; key <= 1000; key++) for (key = 1; key <= 1000; key++)
assert(!bpf_update_elem(map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
for (key = 1; key <= 1000; key++) { for (key = 1; key <= 1000; key++) {
if (bpf_lookup_elem(map_fd, &key, value)) if (bpf_map_lookup_elem(map_fd, &key, value))
nr_losses++; nr_losses++;
} }
...@@ -436,7 +436,7 @@ static void do_test_parallel_lru_loss(int task, void *data) ...@@ -436,7 +436,7 @@ static void do_test_parallel_lru_loss(int task, void *data)
next_ins_key = stable_base; next_ins_key = stable_base;
value[0] = 1234; value[0] = 1234;
for (i = 0; i < nr_stable_elems; i++) { for (i = 0; i < nr_stable_elems; i++) {
assert(bpf_update_elem(map_fd, &next_ins_key, value, assert(bpf_map_update_elem(map_fd, &next_ins_key, value,
BPF_NOEXIST) == 0); BPF_NOEXIST) == 0);
next_ins_key++; next_ins_key++;
} }
...@@ -448,9 +448,9 @@ static void do_test_parallel_lru_loss(int task, void *data) ...@@ -448,9 +448,9 @@ static void do_test_parallel_lru_loss(int task, void *data)
if (rn % 10) { if (rn % 10) {
key = rn % nr_stable_elems + stable_base; key = rn % nr_stable_elems + stable_base;
bpf_lookup_elem(map_fd, &key, value); bpf_map_lookup_elem(map_fd, &key, value);
} else { } else {
bpf_update_elem(map_fd, &next_ins_key, value, bpf_map_update_elem(map_fd, &next_ins_key, value,
BPF_NOEXIST); BPF_NOEXIST);
next_ins_key++; next_ins_key++;
} }
...@@ -458,7 +458,7 @@ static void do_test_parallel_lru_loss(int task, void *data) ...@@ -458,7 +458,7 @@ static void do_test_parallel_lru_loss(int task, void *data)
key = stable_base; key = stable_base;
for (i = 0; i < nr_stable_elems; i++) { for (i = 0; i < nr_stable_elems; i++) {
if (bpf_lookup_elem(map_fd, &key, value)) if (bpf_map_lookup_elem(map_fd, &key, value))
nr_losses++; nr_losses++;
key++; key++;
} }
......
...@@ -50,7 +50,7 @@ int main(int ac, char **argv) ...@@ -50,7 +50,7 @@ int main(int ac, char **argv)
mapped_addr_in->sin_port = htons(5555); mapped_addr_in->sin_port = htons(5555);
mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255"); mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
assert(!bpf_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY)); assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
assert(listen(serverfd, 5) == 0); assert(listen(serverfd, 5) == 0);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <sys/resource.h> #include <sys/resource.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
#include "perf-sys.h"
#define SAMPLE_FREQ 50 #define SAMPLE_FREQ 50
...@@ -61,14 +62,14 @@ static void print_stack(struct key_t *key, __u64 count) ...@@ -61,14 +62,14 @@ static void print_stack(struct key_t *key, __u64 count)
int i; int i;
printf("%3lld %s;", count, key->comm); printf("%3lld %s;", count, key->comm);
if (bpf_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { if (bpf_map_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) {
printf("---;"); printf("---;");
} else { } else {
for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
print_ksym(ip[i]); print_ksym(ip[i]);
} }
printf("-;"); printf("-;");
if (bpf_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { if (bpf_map_lookup_elem(map_fd[1], &key->userstack, ip) != 0) {
printf("---;"); printf("---;");
} else { } else {
for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
...@@ -98,10 +99,10 @@ static void print_stacks(void) ...@@ -98,10 +99,10 @@ static void print_stacks(void)
int fd = map_fd[0], stack_map = map_fd[1]; int fd = map_fd[0], stack_map = map_fd[1];
sys_read_seen = sys_write_seen = false; sys_read_seen = sys_write_seen = false;
while (bpf_get_next_key(fd, &key, &next_key) == 0) { while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
bpf_lookup_elem(fd, &next_key, &value); bpf_map_lookup_elem(fd, &next_key, &value);
print_stack(&next_key, value); print_stack(&next_key, value);
bpf_delete_elem(fd, &next_key); bpf_map_delete_elem(fd, &next_key);
key = next_key; key = next_key;
} }
...@@ -111,8 +112,8 @@ static void print_stacks(void) ...@@ -111,8 +112,8 @@ static void print_stacks(void)
} }
/* clear stack map */ /* clear stack map */
while (bpf_get_next_key(stack_map, &stackid, &next_id) == 0) { while (bpf_map_get_next_key(stack_map, &stackid, &next_id) == 0) {
bpf_delete_elem(stack_map, &next_id); bpf_map_delete_elem(stack_map, &next_id);
stackid = next_id; stackid = next_id;
} }
} }
...@@ -125,9 +126,9 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) ...@@ -125,9 +126,9 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr)
/* open perf_event on all cpus */ /* open perf_event on all cpus */
for (i = 0; i < nr_cpus; i++) { for (i = 0; i < nr_cpus; i++) {
pmu_fd[i] = perf_event_open(attr, -1, i, -1, 0); pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0);
if (pmu_fd[i] < 0) { if (pmu_fd[i] < 0) {
printf("perf_event_open failed\n"); printf("sys_perf_event_open failed\n");
goto all_cpu_err; goto all_cpu_err;
} }
assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
...@@ -146,9 +147,9 @@ static void test_perf_event_task(struct perf_event_attr *attr) ...@@ -146,9 +147,9 @@ static void test_perf_event_task(struct perf_event_attr *attr)
int pmu_fd; int pmu_fd;
/* open task bound event */ /* open task bound event */
pmu_fd = perf_event_open(attr, 0, -1, -1, 0); pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0);
if (pmu_fd < 0) { if (pmu_fd < 0) {
printf("perf_event_open failed\n"); printf("sys_perf_event_open failed\n");
return; return;
} }
assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <signal.h> #include <signal.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
#include "perf-sys.h"
static int pmu_fd; static int pmu_fd;
...@@ -61,7 +62,7 @@ struct perf_event_sample { ...@@ -61,7 +62,7 @@ struct perf_event_sample {
char data[]; char data[];
}; };
void perf_event_read(print_fn fn) static void perf_event_read(print_fn fn)
{ {
__u64 data_tail = header->data_tail; __u64 data_tail = header->data_tail;
__u64 data_head = header->data_head; __u64 data_head = header->data_head;
...@@ -159,10 +160,10 @@ static void test_bpf_perf_event(void) ...@@ -159,10 +160,10 @@ static void test_bpf_perf_event(void)
}; };
int key = 0; int key = 0;
pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); pmu_fd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
assert(pmu_fd >= 0); assert(pmu_fd >= 0);
assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0); assert(bpf_map_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
} }
......
...@@ -48,12 +48,12 @@ static void print_hist_for_pid(int fd, void *task) ...@@ -48,12 +48,12 @@ static void print_hist_for_pid(int fd, void *task)
long max_value = 0; long max_value = 0;
int i, ind; int i, ind;
while (bpf_get_next_key(fd, &key, &next_key) == 0) { while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
if (memcmp(&next_key, task, SIZE)) { if (memcmp(&next_key, task, SIZE)) {
key = next_key; key = next_key;
continue; continue;
} }
bpf_lookup_elem(fd, &next_key, values); bpf_map_lookup_elem(fd, &next_key, values);
value = 0; value = 0;
for (i = 0; i < nr_cpus; i++) for (i = 0; i < nr_cpus; i++)
value += values[i]; value += values[i];
...@@ -83,7 +83,7 @@ static void print_hist(int fd) ...@@ -83,7 +83,7 @@ static void print_hist(int fd)
int task_cnt = 0; int task_cnt = 0;
int i; int i;
while (bpf_get_next_key(fd, &key, &next_key) == 0) { while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
int found = 0; int found = 0;
for (i = 0; i < task_cnt; i++) for (i = 0; i < task_cnt; i++)
...@@ -136,8 +136,8 @@ int main(int ac, char **argv) ...@@ -136,8 +136,8 @@ int main(int ac, char **argv)
for (i = 0; i < 5; i++) { for (i = 0; i < 5; i++) {
key = 0; key = 0;
while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
bpf_lookup_elem(map_fd[0], &next_key, &value); bpf_map_lookup_elem(map_fd[0], &next_key, &value);
printf("location 0x%lx count %ld\n", next_key, value); printf("location 0x%lx count %ld\n", next_key, value);
key = next_key; key = next_key;
} }
......
...@@ -28,7 +28,7 @@ static void clear_stats(int fd) ...@@ -28,7 +28,7 @@ static void clear_stats(int fd)
memset(values, 0, sizeof(values)); memset(values, 0, sizeof(values));
for (key = 0; key < SLOTS; key++) for (key = 0; key < SLOTS; key++)
bpf_update_elem(fd, &key, values, BPF_ANY); bpf_map_update_elem(fd, &key, values, BPF_ANY);
} }
const char *color[] = { const char *color[] = {
...@@ -89,7 +89,7 @@ static void print_hist(int fd) ...@@ -89,7 +89,7 @@ static void print_hist(int fd)
int i; int i;
for (key = 0; key < SLOTS; key++) { for (key = 0; key < SLOTS; key++) {
bpf_lookup_elem(fd, &key, values); bpf_map_lookup_elem(fd, &key, values);
value = 0; value = 0;
for (i = 0; i < nr_cpus; i++) for (i = 0; i < nr_cpus; i++)
value += values[i]; value += values[i];
......
...@@ -37,8 +37,8 @@ static void print_old_objects(int fd) ...@@ -37,8 +37,8 @@ static void print_old_objects(int fd)
key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */ key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
key = -1; key = -1;
while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
bpf_lookup_elem(map_fd[0], &next_key, &v); bpf_map_lookup_elem(map_fd[0], &next_key, &v);
key = next_key; key = next_key;
if (val - v.val < 1000000000ll) if (val - v.val < 1000000000ll)
/* object was allocated more then 1 sec ago */ /* object was allocated more then 1 sec ago */
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/bpf.h> #include <linux/bpf.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
#include "perf-sys.h"
#define SAMPLE_PERIOD 0x7fffffffffffffffULL #define SAMPLE_PERIOD 0x7fffffffffffffffULL
...@@ -30,13 +31,13 @@ static void test_bpf_perf_event(void) ...@@ -30,13 +31,13 @@ static void test_bpf_perf_event(void)
}; };
for (i = 0; i < nr_cpus; i++) { for (i = 0; i < nr_cpus; i++) {
pmu_fd[i] = perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0); pmu_fd[i] = sys_perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0);
if (pmu_fd[i] < 0) { if (pmu_fd[i] < 0) {
printf("event syscall failed\n"); printf("event syscall failed\n");
goto exit; goto exit;
} }
bpf_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY); bpf_map_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY);
ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
} }
......
...@@ -43,7 +43,7 @@ static void poll_stats(int interval) ...@@ -43,7 +43,7 @@ static void poll_stats(int interval)
for (key = 0; key < nr_keys; key++) { for (key = 0; key < nr_keys; key++) {
__u64 sum = 0; __u64 sum = 0;
assert(bpf_lookup_elem(map_fd[0], &key, values) == 0); assert(bpf_map_lookup_elem(map_fd[0], &key, values) == 0);
for (i = 0; i < nr_cpus; i++) for (i = 0; i < nr_cpus; i++)
sum += (values[i] - prev[key][i]); sum += (values[i] - prev[key][i]);
if (sum) if (sum)
......
...@@ -51,7 +51,7 @@ static void poll_stats(unsigned int kill_after_s) ...@@ -51,7 +51,7 @@ static void poll_stats(unsigned int kill_after_s)
for (proto = 0; proto < nr_protos; proto++) { for (proto = 0; proto < nr_protos; proto++) {
__u64 sum = 0; __u64 sum = 0;
assert(bpf_lookup_elem(map_fd[0], &proto, values) == 0); assert(bpf_map_lookup_elem(map_fd[0], &proto, values) == 0);
for (i = 0; i < nr_cpus; i++) for (i = 0; i < nr_cpus; i++)
sum += (values[i] - prev[proto][i]); sum += (values[i] - prev[proto][i]);
...@@ -237,8 +237,8 @@ int main(int argc, char **argv) ...@@ -237,8 +237,8 @@ int main(int argc, char **argv)
while (min_port <= max_port) { while (min_port <= max_port) {
vip.dport = htons(min_port++); vip.dport = htons(min_port++);
if (bpf_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) { if (bpf_map_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) {
perror("bpf_update_elem(&vip2tnl)"); perror("bpf_map_update_elem(&vip2tnl)");
return 1; return 1;
} }
} }
......
...@@ -73,6 +73,8 @@ enum bpf_cmd { ...@@ -73,6 +73,8 @@ enum bpf_cmd {
BPF_PROG_LOAD, BPF_PROG_LOAD,
BPF_OBJ_PIN, BPF_OBJ_PIN,
BPF_OBJ_GET, BPF_OBJ_GET,
BPF_PROG_ATTACH,
BPF_PROG_DETACH,
}; };
enum bpf_map_type { enum bpf_map_type {
...@@ -85,6 +87,8 @@ enum bpf_map_type { ...@@ -85,6 +87,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY, BPF_MAP_TYPE_CGROUP_ARRAY,
BPF_MAP_TYPE_LRU_HASH,
BPF_MAP_TYPE_LRU_PERCPU_HASH,
}; };
enum bpf_prog_type { enum bpf_prog_type {
...@@ -95,8 +99,23 @@ enum bpf_prog_type { ...@@ -95,8 +99,23 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT, BPF_PROG_TYPE_TRACEPOINT,
BPF_PROG_TYPE_XDP, BPF_PROG_TYPE_XDP,
BPF_PROG_TYPE_PERF_EVENT,
BPF_PROG_TYPE_CGROUP_SKB,
BPF_PROG_TYPE_CGROUP_SOCK,
BPF_PROG_TYPE_LWT_IN,
BPF_PROG_TYPE_LWT_OUT,
BPF_PROG_TYPE_LWT_XMIT,
}; };
enum bpf_attach_type {
BPF_CGROUP_INET_INGRESS,
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
#define BPF_PSEUDO_MAP_FD 1 #define BPF_PSEUDO_MAP_FD 1
/* flags for BPF_MAP_UPDATE_ELEM command */ /* flags for BPF_MAP_UPDATE_ELEM command */
...@@ -105,6 +124,13 @@ enum bpf_prog_type { ...@@ -105,6 +124,13 @@ enum bpf_prog_type {
#define BPF_EXIST 2 /* update existing element */ #define BPF_EXIST 2 /* update existing element */
#define BPF_F_NO_PREALLOC (1U << 0) #define BPF_F_NO_PREALLOC (1U << 0)
/* Instead of having one common LRU list in the
* BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
* which can scale and perform better.
* Note, the LRU nodes (including free nodes) cannot be moved
* across different LRU lists.
*/
#define BPF_F_NO_COMMON_LRU (1U << 1)
union bpf_attr { union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */ struct { /* anonymous struct used by BPF_MAP_CREATE command */
...@@ -140,243 +166,327 @@ union bpf_attr { ...@@ -140,243 +166,327 @@ union bpf_attr {
__aligned_u64 pathname; __aligned_u64 pathname;
__u32 bpf_fd; __u32 bpf_fd;
}; };
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
__u32 target_fd; /* container object to attach to */
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
};
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
/* BPF helper function descriptions:
*
* void *bpf_map_lookup_elem(&map, &key)
* Return: Map value or NULL
*
* int bpf_map_update_elem(&map, &key, &value, flags)
* Return: 0 on success or negative error
*
* int bpf_map_delete_elem(&map, &key)
* Return: 0 on success or negative error
*
* int bpf_probe_read(void *dst, int size, void *src)
* Return: 0 on success or negative error
*
* u64 bpf_ktime_get_ns(void)
* Return: current ktime
*
* int bpf_trace_printk(const char *fmt, int fmt_size, ...)
* Return: length of buffer written or negative error
*
* u32 bpf_prandom_u32(void)
* Return: random value
*
* u32 bpf_raw_smp_processor_id(void)
* Return: SMP processor ID
*
* int bpf_skb_store_bytes(skb, offset, from, len, flags)
* store bytes into packet
* @skb: pointer to skb
* @offset: offset within packet from skb->mac_header
* @from: pointer where to copy bytes from
* @len: number of bytes to store into packet
* @flags: bit 0 - if true, recompute skb->csum
* other bits - reserved
* Return: 0 on success or negative error
*
* int bpf_l3_csum_replace(skb, offset, from, to, flags)
* recompute IP checksum
* @skb: pointer to skb
* @offset: offset within packet where IP checksum is located
* @from: old value of header field
* @to: new value of header field
* @flags: bits 0-3 - size of header field
* other bits - reserved
* Return: 0 on success or negative error
*
* int bpf_l4_csum_replace(skb, offset, from, to, flags)
* recompute TCP/UDP checksum
* @skb: pointer to skb
* @offset: offset within packet where TCP/UDP checksum is located
* @from: old value of header field
* @to: new value of header field
* @flags: bits 0-3 - size of header field
* bit 4 - is pseudo header
* other bits - reserved
* Return: 0 on success or negative error
*
* int bpf_tail_call(ctx, prog_array_map, index)
* jump into another BPF program
* @ctx: context pointer passed to next program
* @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
* @index: index inside array that selects specific program to run
* Return: 0 on success or negative error
*
* int bpf_clone_redirect(skb, ifindex, flags)
* redirect to another netdev
* @skb: pointer to skb
* @ifindex: ifindex of the net device
* @flags: bit 0 - if set, redirect to ingress instead of egress
* other bits - reserved
* Return: 0 on success or negative error
*
* u64 bpf_get_current_pid_tgid(void)
* Return: current->tgid << 32 | current->pid
*
* u64 bpf_get_current_uid_gid(void)
* Return: current_gid << 32 | current_uid
*
* int bpf_get_current_comm(char *buf, int size_of_buf)
* stores current->comm into buf
* Return: 0 on success or negative error
*
* u32 bpf_get_cgroup_classid(skb)
* retrieve a proc's classid
* @skb: pointer to skb
* Return: classid if != 0
*
* int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
* Return: 0 on success or negative error
*
* int bpf_skb_vlan_pop(skb)
* Return: 0 on success or negative error
*
* int bpf_skb_get_tunnel_key(skb, key, size, flags)
* int bpf_skb_set_tunnel_key(skb, key, size, flags)
* retrieve or populate tunnel metadata
* @skb: pointer to skb
* @key: pointer to 'struct bpf_tunnel_key'
* @size: size of 'struct bpf_tunnel_key'
* @flags: room for future extensions
* Return: 0 on success or negative error
*
* u64 bpf_perf_event_read(&map, index)
* Return: Number events read or error code
*
* int bpf_redirect(ifindex, flags)
* redirect to another netdev
* @ifindex: ifindex of the net device
* @flags: bit 0 - if set, redirect to ingress instead of egress
* other bits - reserved
* Return: TC_ACT_REDIRECT
*
* u32 bpf_get_route_realm(skb)
* retrieve a dst's tclassid
* @skb: pointer to skb
* Return: realm if != 0
*
* int bpf_perf_event_output(ctx, map, index, data, size)
* output perf raw sample
* @ctx: struct pt_regs*
* @map: pointer to perf_event_array map
* @index: index of event in the map
* @data: data on stack to be output as raw data
* @size: size of data
* Return: 0 on success or negative error
*
* int bpf_get_stackid(ctx, map, flags)
* walk user or kernel stack and return id
* @ctx: struct pt_regs*
* @map: pointer to stack_trace map
* @flags: bits 0-7 - numer of stack frames to skip
* bit 8 - collect user stack instead of kernel
* bit 9 - compare stacks by hash only
* bit 10 - if two different stacks hash into the same stackid
* discard old
* other bits - reserved
* Return: >= 0 stackid on success or negative error
*
* s64 bpf_csum_diff(from, from_size, to, to_size, seed)
* calculate csum diff
* @from: raw from buffer
* @from_size: length of from buffer
* @to: raw to buffer
* @to_size: length of to buffer
* @seed: optional seed
* Return: csum result or negative error code
*
* int bpf_skb_get_tunnel_opt(skb, opt, size)
* retrieve tunnel options metadata
* @skb: pointer to skb
* @opt: pointer to raw tunnel option data
* @size: size of @opt
* Return: option size
*
* int bpf_skb_set_tunnel_opt(skb, opt, size)
* populate tunnel options metadata
* @skb: pointer to skb
* @opt: pointer to raw tunnel option data
* @size: size of @opt
* Return: 0 on success or negative error
*
* int bpf_skb_change_proto(skb, proto, flags)
* Change protocol of the skb. Currently supported is v4 -> v6,
* v6 -> v4 transitions. The helper will also resize the skb. eBPF
* program is expected to fill the new headers via skb_store_bytes
* and lX_csum_replace.
* @skb: pointer to skb
* @proto: new skb->protocol type
* @flags: reserved
* Return: 0 on success or negative error
*
* int bpf_skb_change_type(skb, type)
* Change packet type of skb.
* @skb: pointer to skb
* @type: new skb->pkt_type type
* Return: 0 on success or negative error
*
* int bpf_skb_under_cgroup(skb, map, index)
* Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 skb failed the cgroup2 descendant test
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*
* u32 bpf_get_hash_recalc(skb)
* Retrieve and possibly recalculate skb->hash.
* @skb: pointer to skb
* Return: hash
*
* u64 bpf_get_current_task(void)
* Returns current task_struct
* Return: current
*
* int bpf_probe_write_user(void *dst, void *src, int len)
* safely attempt to write to a location
* @dst: destination address in userspace
* @src: source address on stack
* @len: number of bytes to copy
* Return: 0 on success or negative error
*
* int bpf_current_task_under_cgroup(map, index)
* Check cgroup2 membership of current task
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 current failed the cgroup2 descendant test
* == 1 current succeeded the cgroup2 descendant test
* < 0 error
*
* int bpf_skb_change_tail(skb, len, flags)
* The helper will resize the skb to the given new size, to be used f.e.
* with control messages.
* @skb: pointer to skb
* @len: new skb length
* @flags: reserved
* Return: 0 on success or negative error
*
* int bpf_skb_pull_data(skb, len)
* The helper will pull in non-linear data in case the skb is non-linear
* and not all of len are part of the linear section. Only needed for
* read/write with direct packet access.
* @skb: pointer to skb
* @len: len to make read/writeable
* Return: 0 on success or negative error
*
* s64 bpf_csum_update(skb, csum)
* Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
* @skb: pointer to skb
* @csum: csum to add
* Return: csum on success or negative error
*
* void bpf_set_hash_invalid(skb)
* Invalidate current skb->hash.
* @skb: pointer to skb
*
* int bpf_get_numa_node_id()
* Return: Id of current NUMA node.
*
* int bpf_skb_change_head()
* Grows headroom of skb and adjusts MAC header offset accordingly.
* Will extends/reallocae as required automatically.
* May change skb data pointer and will thus invalidate any check
* performed for direct packet access.
* @skb: pointer to skb
* @len: length of header to be pushed in front
* @flags: Flags (unused for now)
* Return: 0 on success or negative error
*
* int bpf_xdp_adjust_head(xdp_md, delta)
* Adjust the xdp_md.data by delta
* @xdp_md: pointer to xdp_md
* @delta: An positive/negative integer to be added to xdp_md.data
* Return: 0 on success or negative on error
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(map_lookup_elem), \
FN(map_update_elem), \
FN(map_delete_elem), \
FN(probe_read), \
FN(ktime_get_ns), \
FN(trace_printk), \
FN(get_prandom_u32), \
FN(get_smp_processor_id), \
FN(skb_store_bytes), \
FN(l3_csum_replace), \
FN(l4_csum_replace), \
FN(tail_call), \
FN(clone_redirect), \
FN(get_current_pid_tgid), \
FN(get_current_uid_gid), \
FN(get_current_comm), \
FN(get_cgroup_classid), \
FN(skb_vlan_push), \
FN(skb_vlan_pop), \
FN(skb_get_tunnel_key), \
FN(skb_set_tunnel_key), \
FN(perf_event_read), \
FN(redirect), \
FN(get_route_realm), \
FN(perf_event_output), \
FN(skb_load_bytes), \
FN(get_stackid), \
FN(csum_diff), \
FN(skb_get_tunnel_opt), \
FN(skb_set_tunnel_opt), \
FN(skb_change_proto), \
FN(skb_change_type), \
FN(skb_under_cgroup), \
FN(get_hash_recalc), \
FN(get_current_task), \
FN(probe_write_user), \
FN(current_task_under_cgroup), \
FN(skb_change_tail), \
FN(skb_pull_data), \
FN(csum_update), \
FN(set_hash_invalid), \
FN(get_numa_node_id), \
FN(skb_change_head), \
FN(xdp_adjust_head),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
*/ */
#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
enum bpf_func_id { enum bpf_func_id {
BPF_FUNC_unspec, __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */
BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */
BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */
BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
/**
* skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
* @skb: pointer to skb
* @offset: offset within packet from skb->mac_header
* @from: pointer where to copy bytes from
* @len: number of bytes to store into packet
* @flags: bit 0 - if true, recompute skb->csum
* other bits - reserved
* Return: 0 on success
*/
BPF_FUNC_skb_store_bytes,
/**
* l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
* @skb: pointer to skb
* @offset: offset within packet where IP checksum is located
* @from: old value of header field
* @to: new value of header field
* @flags: bits 0-3 - size of header field
* other bits - reserved
* Return: 0 on success
*/
BPF_FUNC_l3_csum_replace,
/**
* l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
* @skb: pointer to skb
* @offset: offset within packet where TCP/UDP checksum is located
* @from: old value of header field
* @to: new value of header field
* @flags: bits 0-3 - size of header field
* bit 4 - is pseudo header
* other bits - reserved
* Return: 0 on success
*/
BPF_FUNC_l4_csum_replace,
/**
* bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program
* @ctx: context pointer passed to next program
* @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
* @index: index inside array that selects specific program to run
* Return: 0 on success
*/
BPF_FUNC_tail_call,
/**
* bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev
* @skb: pointer to skb
* @ifindex: ifindex of the net device
* @flags: bit 0 - if set, redirect to ingress instead of egress
* other bits - reserved
* Return: 0 on success
*/
BPF_FUNC_clone_redirect,
/**
* u64 bpf_get_current_pid_tgid(void)
* Return: current->tgid << 32 | current->pid
*/
BPF_FUNC_get_current_pid_tgid,
/**
* u64 bpf_get_current_uid_gid(void)
* Return: current_gid << 32 | current_uid
*/
BPF_FUNC_get_current_uid_gid,
/**
* bpf_get_current_comm(char *buf, int size_of_buf)
* stores current->comm into buf
* Return: 0 on success
*/
BPF_FUNC_get_current_comm,
/**
* bpf_get_cgroup_classid(skb) - retrieve a proc's classid
* @skb: pointer to skb
* Return: classid if != 0
*/
BPF_FUNC_get_cgroup_classid,
BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */
BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */
/**
* bpf_skb_[gs]et_tunnel_key(skb, key, size, flags)
* retrieve or populate tunnel metadata
* @skb: pointer to skb
* @key: pointer to 'struct bpf_tunnel_key'
* @size: size of 'struct bpf_tunnel_key'
* @flags: room for future extensions
* Retrun: 0 on success
*/
BPF_FUNC_skb_get_tunnel_key,
BPF_FUNC_skb_set_tunnel_key,
BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */
/**
* bpf_redirect(ifindex, flags) - redirect to another netdev
* @ifindex: ifindex of the net device
* @flags: bit 0 - if set, redirect to ingress instead of egress
* other bits - reserved
* Return: TC_ACT_REDIRECT
*/
BPF_FUNC_redirect,
/**
* bpf_get_route_realm(skb) - retrieve a dst's tclassid
* @skb: pointer to skb
* Return: realm if != 0
*/
BPF_FUNC_get_route_realm,
/**
* bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
* @ctx: struct pt_regs*
* @map: pointer to perf_event_array map
* @index: index of event in the map
* @data: data on stack to be output as raw data
* @size: size of data
* Return: 0 on success
*/
BPF_FUNC_perf_event_output,
BPF_FUNC_skb_load_bytes,
/**
* bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id
* @ctx: struct pt_regs*
* @map: pointer to stack_trace map
* @flags: bits 0-7 - numer of stack frames to skip
* bit 8 - collect user stack instead of kernel
* bit 9 - compare stacks by hash only
* bit 10 - if two different stacks hash into the same stackid
* discard old
* other bits - reserved
* Return: >= 0 stackid on success or negative error
*/
BPF_FUNC_get_stackid,
/**
* bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff
* @from: raw from buffer
* @from_size: length of from buffer
* @to: raw to buffer
* @to_size: length of to buffer
* @seed: optional seed
* Return: csum result
*/
BPF_FUNC_csum_diff,
/**
* bpf_skb_[gs]et_tunnel_opt(skb, opt, size)
* retrieve or populate tunnel options metadata
* @skb: pointer to skb
* @opt: pointer to raw tunnel option data
* @size: size of @opt
* Return: 0 on success for set, option size for get
*/
BPF_FUNC_skb_get_tunnel_opt,
BPF_FUNC_skb_set_tunnel_opt,
/**
* bpf_skb_change_proto(skb, proto, flags)
* Change protocol of the skb. Currently supported is
* v4 -> v6, v6 -> v4 transitions. The helper will also
* resize the skb. eBPF program is expected to fill the
* new headers via skb_store_bytes and lX_csum_replace.
* @skb: pointer to skb
* @proto: new skb->protocol type
* @flags: reserved
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_proto,
/**
* bpf_skb_change_type(skb, type)
* Change packet type of skb.
* @skb: pointer to skb
* @type: new skb->pkt_type type
* Return: 0 on success or negative error
*/
BPF_FUNC_skb_change_type,
/**
* bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 skb failed the cgroup2 descendant test
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*/
BPF_FUNC_skb_under_cgroup,
/**
* bpf_get_hash_recalc(skb)
* Retrieve and possibly recalculate skb->hash.
* @skb: pointer to skb
* Return: hash
*/
BPF_FUNC_get_hash_recalc,
/**
* u64 bpf_get_current_task(void)
* Returns current task_struct
* Return: current
*/
BPF_FUNC_get_current_task,
/**
* bpf_probe_write_user(void *dst, void *src, int len)
* safely attempt to write to a location
* @dst: destination address in userspace
* @src: source address on stack
* @len: number of bytes to copy
* Return: 0 on success or negative error
*/
BPF_FUNC_probe_write_user,
__BPF_FUNC_MAX_ID, __BPF_FUNC_MAX_ID,
}; };
#undef __BPF_ENUM_FN
/* All flags used by eBPF helper functions, placed here. */ /* All flags used by eBPF helper functions, placed here. */
...@@ -450,6 +560,31 @@ struct bpf_tunnel_key { ...@@ -450,6 +560,31 @@ struct bpf_tunnel_key {
__u32 tunnel_label; __u32 tunnel_label;
}; };
/* Generic BPF return codes which all BPF program types may support.
* The values are binary compatible with their TC_ACT_* counter-part to
* provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
* programs.
*
* XDP is handled seprately, see XDP_*.
*/
enum bpf_ret_code {
BPF_OK = 0,
/* 1 reserved */
BPF_DROP = 2,
/* 3-6 reserved */
BPF_REDIRECT = 7,
/* >127 are reserved for prog type specific return codes */
};
struct bpf_sock {
__u32 bound_dev_if;
__u32 family;
__u32 type;
__u32 protocol;
};
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type. /* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other * A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result * return codes are reserved for future use. Unknown return codes will result
......
...@@ -54,7 +54,7 @@ static int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, ...@@ -54,7 +54,7 @@ static int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
} }
int bpf_create_map(enum bpf_map_type map_type, int key_size, int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries) int value_size, int max_entries, __u32 map_flags)
{ {
union bpf_attr attr; union bpf_attr attr;
...@@ -64,13 +64,14 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, ...@@ -64,13 +64,14 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size,
attr.key_size = key_size; attr.key_size = key_size;
attr.value_size = value_size; attr.value_size = value_size;
attr.max_entries = max_entries; attr.max_entries = max_entries;
attr.map_flags = map_flags;
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
} }
int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
size_t insns_cnt, char *license, size_t insns_cnt, char *license,
u32 kern_version, char *log_buf, size_t log_buf_sz) __u32 kern_version, char *log_buf, size_t log_buf_sz)
{ {
int fd; int fd;
union bpf_attr attr; union bpf_attr attr;
...@@ -98,7 +99,7 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, ...@@ -98,7 +99,7 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
} }
int bpf_map_update_elem(int fd, void *key, void *value, int bpf_map_update_elem(int fd, void *key, void *value,
u64 flags) __u64 flags)
{ {
union bpf_attr attr; union bpf_attr attr;
...@@ -166,3 +167,26 @@ int bpf_obj_get(const char *pathname) ...@@ -166,3 +167,26 @@ int bpf_obj_get(const char *pathname)
return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
} }
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
}
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_type = type;
return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
}
...@@ -24,22 +24,25 @@ ...@@ -24,22 +24,25 @@
#include <linux/bpf.h> #include <linux/bpf.h>
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
int max_entries); int max_entries, __u32 map_flags);
/* Recommend log buffer size */ /* Recommend log buffer size */
#define BPF_LOG_BUF_SIZE 65536 #define BPF_LOG_BUF_SIZE 65536
int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
size_t insns_cnt, char *license, size_t insns_cnt, char *license,
u32 kern_version, char *log_buf, __u32 kern_version, char *log_buf,
size_t log_buf_sz); size_t log_buf_sz);
int bpf_map_update_elem(int fd, void *key, void *value, int bpf_map_update_elem(int fd, void *key, void *value,
u64 flags); __u64 flags);
int bpf_map_lookup_elem(int fd, void *key, void *value); int bpf_map_lookup_elem(int fd, void *key, void *value);
int bpf_map_delete_elem(int fd, void *key); int bpf_map_delete_elem(int fd, void *key);
int bpf_map_get_next_key(int fd, void *key, void *next_key); int bpf_map_get_next_key(int fd, void *key, void *next_key);
int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_pin(int fd, const char *pathname);
int bpf_obj_get(const char *pathname); int bpf_obj_get(const char *pathname);
int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type);
int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
#endif #endif
...@@ -854,7 +854,8 @@ bpf_object__create_maps(struct bpf_object *obj) ...@@ -854,7 +854,8 @@ bpf_object__create_maps(struct bpf_object *obj)
*pfd = bpf_create_map(def->type, *pfd = bpf_create_map(def->type,
def->key_size, def->key_size,
def->value_size, def->value_size,
def->max_entries); def->max_entries,
0);
if (*pfd < 0) { if (*pfd < 0) {
size_t j; size_t j;
int err = *pfd; int err = *pfd;
......
...@@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist' ...@@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist'
--migrations:: --migrations::
Show migration events. Show migration events.
-I::
--idle-hist::
Show idle-related events only.
--time:: --time::
Only analyze samples within given time window: <start>,<stop>. Times Only analyze samples within given time window: <start>,<stop>. Times
have the format seconds.microseconds. If start is not given (i.e., time have the format seconds.microseconds. If start is not given (i.e., time
......
...@@ -201,6 +201,7 @@ goals := $(filter-out all sub-make, $(MAKECMDGOALS)) ...@@ -201,6 +201,7 @@ goals := $(filter-out all sub-make, $(MAKECMDGOALS))
$(goals) all: sub-make $(goals) all: sub-make
sub-make: fixdep sub-make: fixdep
@./check-headers.sh
$(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals) $(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals)
else # force_fixdep else # force_fixdep
...@@ -404,99 +405,6 @@ export JEVENTS ...@@ -404,99 +405,6 @@ export JEVENTS
build := -f $(srctree)/tools/build/Makefile.build dir=. obj build := -f $(srctree)/tools/build/Makefile.build dir=. obj
$(PERF_IN): prepare FORCE $(PERF_IN): prepare FORCE
@(test -f ../../include/uapi/linux/perf_event.h && ( \
(diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \
|| echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true
@(test -f ../../include/linux/hash.h && ( \
(diff -B ../include/linux/hash.h ../../include/linux/hash.h >/dev/null) \
|| echo "Warning: tools/include/linux/hash.h differs from kernel" >&2 )) || true
@(test -f ../../include/uapi/linux/hw_breakpoint.h && ( \
(diff -B ../include/uapi/linux/hw_breakpoint.h ../../include/uapi/linux/hw_breakpoint.h >/dev/null) \
|| echo "Warning: tools/include/uapi/linux/hw_breakpoint.h differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/include/asm/disabled-features.h && ( \
(diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \
|| echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/include/asm/required-features.h && ( \
(diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \
|| echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \
(diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \
|| echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/lib/memcpy_64.S && ( \
(diff -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \
|| echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/lib/memset_64.S && ( \
(diff -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \
|| echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true
@(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \
(diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \
|| echo "Warning: tools/arch/arm/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
@(test -f ../../arch/arm64/include/uapi/asm/perf_regs.h && ( \
(diff -B ../arch/arm64/include/uapi/asm/perf_regs.h ../../arch/arm64/include/uapi/asm/perf_regs.h >/dev/null) \
|| echo "Warning: tools/arch/arm64/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
@(test -f ../../arch/powerpc/include/uapi/asm/perf_regs.h && ( \
(diff -B ../arch/powerpc/include/uapi/asm/perf_regs.h ../../arch/powerpc/include/uapi/asm/perf_regs.h >/dev/null) \
|| echo "Warning: tools/arch/powerpc/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/include/uapi/asm/perf_regs.h && ( \
(diff -B ../arch/x86/include/uapi/asm/perf_regs.h ../../arch/x86/include/uapi/asm/perf_regs.h >/dev/null) \
|| echo "Warning: tools/arch/x86/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/include/uapi/asm/kvm.h && ( \
(diff -B ../arch/x86/include/uapi/asm/kvm.h ../../arch/x86/include/uapi/asm/kvm.h >/dev/null) \
|| echo "Warning: tools/arch/x86/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/include/uapi/asm/kvm_perf.h && ( \
(diff -B ../arch/x86/include/uapi/asm/kvm_perf.h ../../arch/x86/include/uapi/asm/kvm_perf.h >/dev/null) \
|| echo "Warning: tools/arch/x86/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/include/uapi/asm/svm.h && ( \
(diff -B ../arch/x86/include/uapi/asm/svm.h ../../arch/x86/include/uapi/asm/svm.h >/dev/null) \
|| echo "Warning: tools/arch/x86/include/uapi/asm/svm.h differs from kernel" >&2 )) || true
@(test -f ../../arch/x86/include/uapi/asm/vmx.h && ( \
(diff -B ../arch/x86/include/uapi/asm/vmx.h ../../arch/x86/include/uapi/asm/vmx.h >/dev/null) \
|| echo "Warning: tools/arch/x86/include/uapi/asm/vmx.h differs from kernel" >&2 )) || true
@(test -f ../../arch/powerpc/include/uapi/asm/kvm.h && ( \
(diff -B ../arch/powerpc/include/uapi/asm/kvm.h ../../arch/powerpc/include/uapi/asm/kvm.h >/dev/null) \
|| echo "Warning: tools/arch/powerpc/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
@(test -f ../../arch/s390/include/uapi/asm/kvm.h && ( \
(diff -B ../arch/s390/include/uapi/asm/kvm.h ../../arch/s390/include/uapi/asm/kvm.h >/dev/null) \
|| echo "Warning: tools/arch/s390/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
@(test -f ../../arch/s390/include/uapi/asm/kvm_perf.h && ( \
(diff -B ../arch/s390/include/uapi/asm/kvm_perf.h ../../arch/s390/include/uapi/asm/kvm_perf.h >/dev/null) \
|| echo "Warning: tools/arch/s390/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true
@(test -f ../../arch/s390/include/uapi/asm/sie.h && ( \
(diff -B ../arch/s390/include/uapi/asm/sie.h ../../arch/s390/include/uapi/asm/sie.h >/dev/null) \
|| echo "Warning: tools/arch/s390/include/uapi/asm/sie.h differs from kernel" >&2 )) || true
@(test -f ../../arch/arm/include/uapi/asm/kvm.h && ( \
(diff -B ../arch/arm/include/uapi/asm/kvm.h ../../arch/arm/include/uapi/asm/kvm.h >/dev/null) \
|| echo "Warning: tools/arch/arm/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
@(test -f ../../arch/arm64/include/uapi/asm/kvm.h && ( \
(diff -B ../arch/arm64/include/uapi/asm/kvm.h ../../arch/arm64/include/uapi/asm/kvm.h >/dev/null) \
|| echo "Warning: tools/arch/arm64/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true
@(test -f ../../include/asm-generic/bitops/arch_hweight.h && ( \
(diff -B ../include/asm-generic/bitops/arch_hweight.h ../../include/asm-generic/bitops/arch_hweight.h >/dev/null) \
|| echo "Warning: tools/include/asm-generic/bitops/arch_hweight.h differs from kernel" >&2 )) || true
@(test -f ../../include/asm-generic/bitops/const_hweight.h && ( \
(diff -B ../include/asm-generic/bitops/const_hweight.h ../../include/asm-generic/bitops/const_hweight.h >/dev/null) \
|| echo "Warning: tools/include/asm-generic/bitops/const_hweight.h differs from kernel" >&2 )) || true
@(test -f ../../include/asm-generic/bitops/__fls.h && ( \
(diff -B ../include/asm-generic/bitops/__fls.h ../../include/asm-generic/bitops/__fls.h >/dev/null) \
|| echo "Warning: tools/include/asm-generic/bitops/__fls.h differs from kernel" >&2 )) || true
@(test -f ../../include/asm-generic/bitops/fls.h && ( \
(diff -B ../include/asm-generic/bitops/fls.h ../../include/asm-generic/bitops/fls.h >/dev/null) \
|| echo "Warning: tools/include/asm-generic/bitops/fls.h differs from kernel" >&2 )) || true
@(test -f ../../include/asm-generic/bitops/fls64.h && ( \
(diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \
|| echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true
@(test -f ../../include/linux/coresight-pmu.h && ( \
(diff -B ../include/linux/coresight-pmu.h ../../include/linux/coresight-pmu.h >/dev/null) \
|| echo "Warning: tools/include/linux/coresight-pmu.h differs from kernel" >&2 )) || true
@(test -f ../../include/uapi/asm-generic/mman-common.h && ( \
(diff -B ../include/uapi/asm-generic/mman-common.h ../../include/uapi/asm-generic/mman-common.h >/dev/null) \
|| echo "Warning: tools/include/uapi/asm-generic/mman-common.h differs from kernel" >&2 )) || true
@(test -f ../../include/uapi/asm-generic/mman.h && ( \
(diff -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>$$" ../include/uapi/asm-generic/mman.h ../../include/uapi/asm-generic/mman.h >/dev/null) \
|| echo "Warning: tools/include/uapi/asm-generic/mman.h differs from kernel" >&2 )) || true
@(test -f ../../include/uapi/linux/mman.h && ( \
(diff -B -I "^#include <\(uapi/\)*asm/mman.h>$$" ../include/uapi/linux/mman.h ../../include/uapi/linux/mman.h >/dev/null) \
|| echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true
$(Q)$(MAKE) $(build)=perf $(Q)$(MAKE) $(build)=perf
$(JEVENTS_IN): FORCE $(JEVENTS_IN): FORCE
......
...@@ -48,7 +48,7 @@ static const struct option options[] = { ...@@ -48,7 +48,7 @@ static const struct option options[] = {
}; };
static const char * const bench_futex_lock_pi_usage[] = { static const char * const bench_futex_lock_pi_usage[] = {
"perf bench futex requeue <options>", "perf bench futex lock-pi <options>",
NULL NULL
}; };
......
...@@ -208,7 +208,7 @@ static void compute_stats(struct c2c_hist_entry *c2c_he, ...@@ -208,7 +208,7 @@ static void compute_stats(struct c2c_hist_entry *c2c_he,
static int process_sample_event(struct perf_tool *tool __maybe_unused, static int process_sample_event(struct perf_tool *tool __maybe_unused,
union perf_event *event, union perf_event *event,
struct perf_sample *sample, struct perf_sample *sample,
struct perf_evsel *evsel __maybe_unused, struct perf_evsel *evsel,
struct machine *machine) struct machine *machine)
{ {
struct c2c_hists *c2c_hists = &c2c.hists; struct c2c_hists *c2c_hists = &c2c.hists;
...@@ -379,7 +379,7 @@ static int symbol_width(struct hists *hists, struct sort_entry *se) ...@@ -379,7 +379,7 @@ static int symbol_width(struct hists *hists, struct sort_entry *se)
static int c2c_width(struct perf_hpp_fmt *fmt, static int c2c_width(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp __maybe_unused, struct perf_hpp *hpp __maybe_unused,
struct hists *hists __maybe_unused) struct hists *hists)
{ {
struct c2c_fmt *c2c_fmt; struct c2c_fmt *c2c_fmt;
struct c2c_dimension *dim; struct c2c_dimension *dim;
...@@ -1127,7 +1127,7 @@ MEAN_ENTRY(mean_lcl_entry, lcl_hitm); ...@@ -1127,7 +1127,7 @@ MEAN_ENTRY(mean_lcl_entry, lcl_hitm);
MEAN_ENTRY(mean_load_entry, load); MEAN_ENTRY(mean_load_entry, load);
static int static int
cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, cpucnt_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he) struct hist_entry *he)
{ {
struct c2c_hist_entry *c2c_he; struct c2c_hist_entry *c2c_he;
...@@ -1141,7 +1141,7 @@ cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, ...@@ -1141,7 +1141,7 @@ cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
} }
static int static int
cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, cl_idx_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he) struct hist_entry *he)
{ {
struct c2c_hist_entry *c2c_he; struct c2c_hist_entry *c2c_he;
...@@ -1155,7 +1155,7 @@ cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, ...@@ -1155,7 +1155,7 @@ cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
} }
static int static int
cl_idx_empty_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he) struct hist_entry *he)
{ {
int width = c2c_width(fmt, hpp, he->hists); int width = c2c_width(fmt, hpp, he->hists);
...@@ -1779,7 +1779,6 @@ static int c2c_hists__init(struct c2c_hists *hists, ...@@ -1779,7 +1779,6 @@ static int c2c_hists__init(struct c2c_hists *hists,
return hpp_list__parse(&hists->list, NULL, sort); return hpp_list__parse(&hists->list, NULL, sort);
} }
__maybe_unused
static int c2c_hists__reinit(struct c2c_hists *c2c_hists, static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
const char *output, const char *output,
const char *sort) const char *sort)
...@@ -2658,7 +2657,7 @@ static int perf_c2c__report(int argc, const char **argv) ...@@ -2658,7 +2657,7 @@ static int perf_c2c__report(int argc, const char **argv)
return err; return err;
} }
static int parse_record_events(const struct option *opt __maybe_unused, static int parse_record_events(const struct option *opt,
const char *str, int unset __maybe_unused) const char *str, int unset __maybe_unused)
{ {
bool *event_set = (bool *) opt->value; bool *event_set = (bool *) opt->value;
......
...@@ -70,8 +70,8 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) ...@@ -70,8 +70,8 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"), OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"),
OPT_INCR('v', "verbose", &verbose, OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"), "be more verbose (show counter open errors, etc)"),
OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"), OPT_BOOLEAN('U', "all-user", &all_user, "collect only user level data"),
OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"), OPT_BOOLEAN('K', "all-kernel", &all_kernel, "collect only kernel level data"),
OPT_END() OPT_END()
}; };
......
...@@ -1687,6 +1687,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1687,6 +1687,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
goto out; goto out;
} }
/* Enable ignoring missing threads when -u option is defined. */
rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
err = -ENOMEM; err = -ENOMEM;
if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options); usage_with_options(record_usage, record_options);
......
...@@ -648,7 +648,7 @@ report_parse_ignore_callees_opt(const struct option *opt __maybe_unused, ...@@ -648,7 +648,7 @@ report_parse_ignore_callees_opt(const struct option *opt __maybe_unused,
} }
static int static int
parse_branch_mode(const struct option *opt __maybe_unused, parse_branch_mode(const struct option *opt,
const char *str __maybe_unused, int unset) const char *str __maybe_unused, int unset)
{ {
int *branch_mode = opt->value; int *branch_mode = opt->value;
......
...@@ -200,6 +200,7 @@ struct perf_sched { ...@@ -200,6 +200,7 @@ struct perf_sched {
/* options for timehist command */ /* options for timehist command */
bool summary; bool summary;
bool summary_only; bool summary_only;
bool idle_hist;
bool show_callchain; bool show_callchain;
unsigned int max_stack; unsigned int max_stack;
bool show_cpu_visual; bool show_cpu_visual;
...@@ -230,6 +231,15 @@ struct evsel_runtime { ...@@ -230,6 +231,15 @@ struct evsel_runtime {
u32 ncpu; /* highest cpu slot allocated */ u32 ncpu; /* highest cpu slot allocated */
}; };
/* per cpu idle time data */
struct idle_thread_runtime {
struct thread_runtime tr;
struct thread *last_thread;
struct rb_root sorted_root;
struct callchain_root callchain;
struct callchain_cursor cursor;
};
/* track idle times per cpu */ /* track idle times per cpu */
static struct thread **idle_threads; static struct thread **idle_threads;
static int idle_max_cpu; static int idle_max_cpu;
...@@ -1765,7 +1775,7 @@ static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) ...@@ -1765,7 +1775,7 @@ static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu)
return r->last_time[cpu]; return r->last_time[cpu];
} }
static int comm_width = 20; static int comm_width = 30;
static char *timehist_get_commstr(struct thread *thread) static char *timehist_get_commstr(struct thread *thread)
{ {
...@@ -1807,7 +1817,7 @@ static void timehist_header(struct perf_sched *sched) ...@@ -1807,7 +1817,7 @@ static void timehist_header(struct perf_sched *sched)
printf(" "); printf(" ");
} }
printf(" %-20s %9s %9s %9s", printf(" %-*s %9s %9s %9s", comm_width,
"task name", "wait time", "sch delay", "run time"); "task name", "wait time", "sch delay", "run time");
printf("\n"); printf("\n");
...@@ -1820,7 +1830,8 @@ static void timehist_header(struct perf_sched *sched) ...@@ -1820,7 +1830,8 @@ static void timehist_header(struct perf_sched *sched)
if (sched->show_cpu_visual) if (sched->show_cpu_visual)
printf(" %*s ", ncpus, ""); printf(" %*s ", ncpus, "");
printf(" %-20s %9s %9s %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)"); printf(" %-*s %9s %9s %9s\n", comm_width,
"[tid/pid]", "(msec)", "(msec)", "(msec)");
/* /*
* separator * separator
...@@ -1830,7 +1841,7 @@ static void timehist_header(struct perf_sched *sched) ...@@ -1830,7 +1841,7 @@ static void timehist_header(struct perf_sched *sched)
if (sched->show_cpu_visual) if (sched->show_cpu_visual)
printf(" %.*s ", ncpus, graph_dotted_line); printf(" %.*s ", ncpus, graph_dotted_line);
printf(" %.20s %.9s %.9s %.9s", printf(" %.*s %.9s %.9s %.9s", comm_width,
graph_dotted_line, graph_dotted_line, graph_dotted_line, graph_dotted_line, graph_dotted_line, graph_dotted_line,
graph_dotted_line); graph_dotted_line);
...@@ -1939,39 +1950,40 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, ...@@ -1939,39 +1950,40 @@ static void timehist_update_runtime_stats(struct thread_runtime *r,
r->total_run_time += r->dt_run; r->total_run_time += r->dt_run;
} }
static bool is_idle_sample(struct perf_sched *sched, static bool is_idle_sample(struct perf_sample *sample,
struct perf_sample *sample, struct perf_evsel *evsel)
struct perf_evsel *evsel,
struct machine *machine)
{ {
struct thread *thread;
struct callchain_cursor *cursor = &callchain_cursor;
/* pid 0 == swapper == idle task */ /* pid 0 == swapper == idle task */
if (sample->pid == 0) if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0)
return true; return perf_evsel__intval(evsel, sample, "prev_pid") == 0;
if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) { return sample->pid == 0;
if (perf_evsel__intval(evsel, sample, "prev_pid") == 0) }
return true;
} static void save_task_callchain(struct perf_sched *sched,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine)
{
struct callchain_cursor *cursor = &callchain_cursor;
struct thread *thread;
/* want main thread for process - has maps */ /* want main thread for process - has maps */
thread = machine__findnew_thread(machine, sample->pid, sample->pid); thread = machine__findnew_thread(machine, sample->pid, sample->pid);
if (thread == NULL) { if (thread == NULL) {
pr_debug("Failed to get thread for pid %d.\n", sample->pid); pr_debug("Failed to get thread for pid %d.\n", sample->pid);
return false; return;
} }
if (!symbol_conf.use_callchain || sample->callchain == NULL) if (!symbol_conf.use_callchain || sample->callchain == NULL)
return false; return;
if (thread__resolve_callchain(thread, cursor, evsel, sample, if (thread__resolve_callchain(thread, cursor, evsel, sample,
NULL, NULL, sched->max_stack + 2) != 0) { NULL, NULL, sched->max_stack + 2) != 0) {
if (verbose) if (verbose)
error("Failed to resolve callchain. Skipping\n"); error("Failed to resolve callchain. Skipping\n");
return false; return;
} }
callchain_cursor_commit(cursor); callchain_cursor_commit(cursor);
...@@ -1994,8 +2006,24 @@ static bool is_idle_sample(struct perf_sched *sched, ...@@ -1994,8 +2006,24 @@ static bool is_idle_sample(struct perf_sched *sched,
callchain_cursor_advance(cursor); callchain_cursor_advance(cursor);
} }
}
static int init_idle_thread(struct thread *thread)
{
struct idle_thread_runtime *itr;
thread__set_comm(thread, idle_comm, 0);
itr = zalloc(sizeof(*itr));
if (itr == NULL)
return -ENOMEM;
return false; init_stats(&itr->tr.run_stats);
callchain_init(&itr->callchain);
callchain_cursor_reset(&itr->cursor);
thread__set_priv(thread, itr);
return 0;
} }
/* /*
...@@ -2004,7 +2032,7 @@ static bool is_idle_sample(struct perf_sched *sched, ...@@ -2004,7 +2032,7 @@ static bool is_idle_sample(struct perf_sched *sched,
*/ */
static int init_idle_threads(int ncpu) static int init_idle_threads(int ncpu)
{ {
int i; int i, ret;
idle_threads = zalloc(ncpu * sizeof(struct thread *)); idle_threads = zalloc(ncpu * sizeof(struct thread *));
if (!idle_threads) if (!idle_threads)
...@@ -2018,7 +2046,9 @@ static int init_idle_threads(int ncpu) ...@@ -2018,7 +2046,9 @@ static int init_idle_threads(int ncpu)
if (idle_threads[i] == NULL) if (idle_threads[i] == NULL)
return -ENOMEM; return -ENOMEM;
thread__set_comm(idle_threads[i], idle_comm, 0); ret = init_idle_thread(idle_threads[i]);
if (ret < 0)
return ret;
} }
return 0; return 0;
...@@ -2065,14 +2095,23 @@ static struct thread *get_idle_thread(int cpu) ...@@ -2065,14 +2095,23 @@ static struct thread *get_idle_thread(int cpu)
idle_threads[cpu] = thread__new(0, 0); idle_threads[cpu] = thread__new(0, 0);
if (idle_threads[cpu]) { if (idle_threads[cpu]) {
idle_threads[cpu]->tid = 0; if (init_idle_thread(idle_threads[cpu]) < 0)
thread__set_comm(idle_threads[cpu], idle_comm, 0); return NULL;
} }
} }
return idle_threads[cpu]; return idle_threads[cpu];
} }
static void save_idle_callchain(struct idle_thread_runtime *itr,
struct perf_sample *sample)
{
if (!symbol_conf.use_callchain || sample->callchain == NULL)
return;
callchain_cursor__copy(&itr->cursor, &callchain_cursor);
}
/* /*
* handle runtime stats saved per thread * handle runtime stats saved per thread
*/ */
...@@ -2111,7 +2150,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, ...@@ -2111,7 +2150,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched,
{ {
struct thread *thread; struct thread *thread;
if (is_idle_sample(sched, sample, evsel, machine)) { if (is_idle_sample(sample, evsel)) {
thread = get_idle_thread(sample->cpu); thread = get_idle_thread(sample->cpu);
if (thread == NULL) if (thread == NULL)
pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
...@@ -2124,13 +2163,37 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, ...@@ -2124,13 +2163,37 @@ static struct thread *timehist_get_thread(struct perf_sched *sched,
pr_debug("Failed to get thread for tid %d. skipping sample.\n", pr_debug("Failed to get thread for tid %d. skipping sample.\n",
sample->tid); sample->tid);
} }
save_task_callchain(sched, sample, evsel, machine);
if (sched->idle_hist) {
struct thread *idle;
struct idle_thread_runtime *itr;
idle = get_idle_thread(sample->cpu);
if (idle == NULL) {
pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
return NULL;
}
itr = thread__priv(idle);
if (itr == NULL)
return NULL;
itr->last_thread = thread;
/* copy task callchain when entering to idle */
if (perf_evsel__intval(evsel, sample, "next_pid") == 0)
save_idle_callchain(itr, sample);
}
} }
return thread; return thread;
} }
static bool timehist_skip_sample(struct perf_sched *sched, static bool timehist_skip_sample(struct perf_sched *sched,
struct thread *thread) struct thread *thread,
struct perf_evsel *evsel,
struct perf_sample *sample)
{ {
bool rc = false; bool rc = false;
...@@ -2139,10 +2202,19 @@ static bool timehist_skip_sample(struct perf_sched *sched, ...@@ -2139,10 +2202,19 @@ static bool timehist_skip_sample(struct perf_sched *sched,
sched->skipped_samples++; sched->skipped_samples++;
} }
if (sched->idle_hist) {
if (strcmp(perf_evsel__name(evsel), "sched:sched_switch"))
rc = true;
else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 &&
perf_evsel__intval(evsel, sample, "next_pid") != 0)
rc = true;
}
return rc; return rc;
} }
static void timehist_print_wakeup_event(struct perf_sched *sched, static void timehist_print_wakeup_event(struct perf_sched *sched,
struct perf_evsel *evsel,
struct perf_sample *sample, struct perf_sample *sample,
struct machine *machine, struct machine *machine,
struct thread *awakened) struct thread *awakened)
...@@ -2155,8 +2227,8 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, ...@@ -2155,8 +2227,8 @@ static void timehist_print_wakeup_event(struct perf_sched *sched,
return; return;
/* show wakeup unless both awakee and awaker are filtered */ /* show wakeup unless both awakee and awaker are filtered */
if (timehist_skip_sample(sched, thread) && if (timehist_skip_sample(sched, thread, evsel, sample) &&
timehist_skip_sample(sched, awakened)) { timehist_skip_sample(sched, awakened, evsel, sample)) {
return; return;
} }
...@@ -2201,7 +2273,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool, ...@@ -2201,7 +2273,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool,
/* show wakeups if requested */ /* show wakeups if requested */
if (sched->show_wakeups && if (sched->show_wakeups &&
!perf_time__skip_sample(&sched->ptime, sample->time)) !perf_time__skip_sample(&sched->ptime, sample->time))
timehist_print_wakeup_event(sched, sample, machine, thread); timehist_print_wakeup_event(sched, evsel, sample, machine, thread);
return 0; return 0;
} }
...@@ -2228,8 +2300,8 @@ static void timehist_print_migration_event(struct perf_sched *sched, ...@@ -2228,8 +2300,8 @@ static void timehist_print_migration_event(struct perf_sched *sched,
if (thread == NULL) if (thread == NULL)
return; return;
if (timehist_skip_sample(sched, thread) && if (timehist_skip_sample(sched, thread, evsel, sample) &&
timehist_skip_sample(sched, migrated)) { timehist_skip_sample(sched, migrated, evsel, sample)) {
return; return;
} }
...@@ -2314,7 +2386,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, ...@@ -2314,7 +2386,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
goto out; goto out;
} }
if (timehist_skip_sample(sched, thread)) if (timehist_skip_sample(sched, thread, evsel, sample))
goto out; goto out;
tr = thread__get_runtime(thread); tr = thread__get_runtime(thread);
...@@ -2333,7 +2405,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, ...@@ -2333,7 +2405,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
if (ptime->start && ptime->start > t) if (ptime->start && ptime->start > t)
goto out; goto out;
if (ptime->start > tprev) if (tprev && ptime->start > tprev)
tprev = ptime->start; tprev = ptime->start;
/* /*
...@@ -2350,7 +2422,39 @@ static int timehist_sched_change_event(struct perf_tool *tool, ...@@ -2350,7 +2422,39 @@ static int timehist_sched_change_event(struct perf_tool *tool,
t = ptime->end; t = ptime->end;
} }
timehist_update_runtime_stats(tr, t, tprev); if (!sched->idle_hist || thread->tid == 0) {
timehist_update_runtime_stats(tr, t, tprev);
if (sched->idle_hist) {
struct idle_thread_runtime *itr = (void *)tr;
struct thread_runtime *last_tr;
BUG_ON(thread->tid != 0);
if (itr->last_thread == NULL)
goto out;
/* add current idle time as last thread's runtime */
last_tr = thread__get_runtime(itr->last_thread);
if (last_tr == NULL)
goto out;
timehist_update_runtime_stats(last_tr, t, tprev);
/*
* remove delta time of last thread as it's not updated
* and otherwise it will show an invalid value next
* time. we only care total run time and run stat.
*/
last_tr->dt_run = 0;
last_tr->dt_wait = 0;
last_tr->dt_delay = 0;
if (itr->cursor.nr)
callchain_append(&itr->callchain, &itr->cursor, t - tprev);
itr->last_thread = NULL;
}
}
if (!sched->summary_only) if (!sched->summary_only)
timehist_print_sample(sched, sample, &al, thread, t); timehist_print_sample(sched, sample, &al, thread, t);
...@@ -2457,6 +2561,60 @@ static int show_deadthread_runtime(struct thread *t, void *priv) ...@@ -2457,6 +2561,60 @@ static int show_deadthread_runtime(struct thread *t, void *priv)
return __show_thread_runtime(t, priv); return __show_thread_runtime(t, priv);
} }
static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
{
const char *sep = " <- ";
struct callchain_list *chain;
size_t ret = 0;
char bf[1024];
bool first;
if (node == NULL)
return 0;
ret = callchain__fprintf_folded(fp, node->parent);
first = (ret == 0);
list_for_each_entry(chain, &node->val, list) {
if (chain->ip >= PERF_CONTEXT_MAX)
continue;
if (chain->ms.sym && chain->ms.sym->ignore)
continue;
ret += fprintf(fp, "%s%s", first ? "" : sep,
callchain_list__sym_name(chain, bf, sizeof(bf),
false));
first = false;
}
return ret;
}
static size_t timehist_print_idlehist_callchain(struct rb_root *root)
{
size_t ret = 0;
FILE *fp = stdout;
struct callchain_node *chain;
struct rb_node *rb_node = rb_first(root);
printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains");
printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line,
graph_dotted_line);
while (rb_node) {
chain = rb_entry(rb_node, struct callchain_node, rb_node);
rb_node = rb_next(rb_node);
ret += fprintf(fp, " ");
print_sched_time(chain->hit, 12);
ret += 16; /* print_sched_time returns 2nd arg + 4 */
ret += fprintf(fp, " %8d ", chain->count);
ret += callchain__fprintf_folded(fp, chain);
ret += fprintf(fp, "\n");
}
return ret;
}
static void timehist_print_summary(struct perf_sched *sched, static void timehist_print_summary(struct perf_sched *sched,
struct perf_session *session) struct perf_session *session)
{ {
...@@ -2469,12 +2627,15 @@ static void timehist_print_summary(struct perf_sched *sched, ...@@ -2469,12 +2627,15 @@ static void timehist_print_summary(struct perf_sched *sched,
memset(&totals, 0, sizeof(totals)); memset(&totals, 0, sizeof(totals));
if (comm_width < 30) if (sched->idle_hist) {
comm_width = 30; printf("\nIdle-time summary\n");
printf("%*s parent sched-out ", comm_width, "comm");
printf("\nRuntime summary\n"); printf(" idle-time min-idle avg-idle max-idle stddev migrations\n");
printf("%*s parent sched-in ", comm_width, "comm"); } else {
printf(" run-time min-run avg-run max-run stddev migrations\n"); printf("\nRuntime summary\n");
printf("%*s parent sched-in ", comm_width, "comm");
printf(" run-time min-run avg-run max-run stddev migrations\n");
}
printf("%*s (count) ", comm_width, ""); printf("%*s (count) ", comm_width, "");
printf(" (msec) (msec) (msec) (msec) %%\n"); printf(" (msec) (msec) (msec) (msec) %%\n");
printf("%.117s\n", graph_dotted_line); printf("%.117s\n", graph_dotted_line);
...@@ -2490,7 +2651,7 @@ static void timehist_print_summary(struct perf_sched *sched, ...@@ -2490,7 +2651,7 @@ static void timehist_print_summary(struct perf_sched *sched,
printf("<no terminated tasks>\n"); printf("<no terminated tasks>\n");
/* CPU idle stats not tracked when samples were skipped */ /* CPU idle stats not tracked when samples were skipped */
if (sched->skipped_samples) if (sched->skipped_samples && !sched->idle_hist)
return; return;
printf("\nIdle stats:\n"); printf("\nIdle stats:\n");
...@@ -2509,6 +2670,35 @@ static void timehist_print_summary(struct perf_sched *sched, ...@@ -2509,6 +2670,35 @@ static void timehist_print_summary(struct perf_sched *sched,
printf(" CPU %2d idle entire time window\n", i); printf(" CPU %2d idle entire time window\n", i);
} }
if (sched->idle_hist && symbol_conf.use_callchain) {
callchain_param.mode = CHAIN_FOLDED;
callchain_param.value = CCVAL_PERIOD;
callchain_register_param(&callchain_param);
printf("\nIdle stats by callchain:\n");
for (i = 0; i < idle_max_cpu; ++i) {
struct idle_thread_runtime *itr;
t = idle_threads[i];
if (!t)
continue;
itr = thread__priv(t);
if (itr == NULL)
continue;
callchain_param.sort(&itr->sorted_root, &itr->callchain,
0, &callchain_param);
printf(" CPU %2d:", i);
print_sched_time(itr->tr.total_run_time, 6);
printf(" msec\n");
timehist_print_idlehist_callchain(&itr->sorted_root);
printf("\n");
}
}
printf("\n" printf("\n"
" Total number of unique tasks: %" PRIu64 "\n" " Total number of unique tasks: %" PRIu64 "\n"
"Total number of context switches: %" PRIu64 "\n" "Total number of context switches: %" PRIu64 "\n"
...@@ -3036,6 +3226,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -3036,6 +3226,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"),
OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"),
OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"),
OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"),
OPT_STRING(0, "time", &sched.time_str, "str", OPT_STRING(0, "time", &sched.time_str, "str",
"Time span for analysis (start,stop)"), "Time span for analysis (start,stop)"),
OPT_PARENT(sched_options) OPT_PARENT(sched_options)
......
...@@ -2195,7 +2195,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, ...@@ -2195,7 +2195,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
} }
static static
int process_stat_config_event(struct perf_tool *tool __maybe_unused, int process_stat_config_event(struct perf_tool *tool,
union perf_event *event, union perf_event *event,
struct perf_session *session __maybe_unused) struct perf_session *session __maybe_unused)
{ {
...@@ -2238,7 +2238,7 @@ static int set_maps(struct perf_stat *st) ...@@ -2238,7 +2238,7 @@ static int set_maps(struct perf_stat *st)
} }
static static
int process_thread_map_event(struct perf_tool *tool __maybe_unused, int process_thread_map_event(struct perf_tool *tool,
union perf_event *event, union perf_event *event,
struct perf_session *session __maybe_unused) struct perf_session *session __maybe_unused)
{ {
...@@ -2257,7 +2257,7 @@ int process_thread_map_event(struct perf_tool *tool __maybe_unused, ...@@ -2257,7 +2257,7 @@ int process_thread_map_event(struct perf_tool *tool __maybe_unused,
} }
static static
int process_cpu_map_event(struct perf_tool *tool __maybe_unused, int process_cpu_map_event(struct perf_tool *tool,
union perf_event *event, union perf_event *event,
struct perf_session *session __maybe_unused) struct perf_session *session __maybe_unused)
{ {
......
#!/bin/sh
HEADERS='
include/uapi/linux/perf_event.h
include/linux/hash.h
include/uapi/linux/hw_breakpoint.h
arch/x86/include/asm/disabled-features.h
arch/x86/include/asm/required-features.h
arch/x86/include/asm/cpufeatures.h
arch/arm/include/uapi/asm/perf_regs.h
arch/arm64/include/uapi/asm/perf_regs.h
arch/powerpc/include/uapi/asm/perf_regs.h
arch/x86/include/uapi/asm/perf_regs.h
arch/x86/include/uapi/asm/kvm.h
arch/x86/include/uapi/asm/kvm_perf.h
arch/x86/include/uapi/asm/svm.h
arch/x86/include/uapi/asm/vmx.h
arch/powerpc/include/uapi/asm/kvm.h
arch/s390/include/uapi/asm/kvm.h
arch/s390/include/uapi/asm/kvm_perf.h
arch/s390/include/uapi/asm/sie.h
arch/arm/include/uapi/asm/kvm.h
arch/arm64/include/uapi/asm/kvm.h
include/asm-generic/bitops/arch_hweight.h
include/asm-generic/bitops/const_hweight.h
include/asm-generic/bitops/__fls.h
include/asm-generic/bitops/fls.h
include/asm-generic/bitops/fls64.h
include/linux/coresight-pmu.h
include/uapi/asm-generic/mman-common.h
'
check () {
file=$1
opts=
shift
while [ -n "$*" ]; do
opts="$opts \"$1\""
shift
done
cmd="diff $opts ../$file ../../$file > /dev/null"
test -f ../../$file &&
eval $cmd || echo "Warning: $file differs from kernel" >&2
}
# simple diff check
for i in $HEADERS; do
check $i -B
done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
check arch/x86/lib/memset_64.S -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
check include/uapi/asm-generic/mman.h -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>"
check include/uapi/linux/mman.h -B -I "^#include <\(uapi/\)*asm/mman.h>"
...@@ -55,6 +55,7 @@ struct record_opts { ...@@ -55,6 +55,7 @@ struct record_opts {
bool all_user; bool all_user;
bool tail_synthesize; bool tail_synthesize;
bool overwrite; bool overwrite;
bool ignore_missing_thread;
unsigned int freq; unsigned int freq;
unsigned int mmap_pages; unsigned int mmap_pages;
unsigned int auxtrace_mmap_pages; unsigned int auxtrace_mmap_pages;
......
...@@ -185,6 +185,10 @@ static struct test generic_tests[] = { ...@@ -185,6 +185,10 @@ static struct test generic_tests[] = {
.desc = "Synthesize thread map", .desc = "Synthesize thread map",
.func = test__thread_map_synthesize, .func = test__thread_map_synthesize,
}, },
{
.desc = "Remove thread map",
.func = test__thread_map_remove,
},
{ {
.desc = "Synthesize cpu map", .desc = "Synthesize cpu map",
.func = test__cpu_map_synthesize, .func = test__cpu_map_synthesize,
......
...@@ -80,6 +80,7 @@ const char *test__bpf_subtest_get_desc(int subtest); ...@@ -80,6 +80,7 @@ const char *test__bpf_subtest_get_desc(int subtest);
int test__bpf_subtest_get_nr(void); int test__bpf_subtest_get_nr(void);
int test_session_topology(int subtest); int test_session_topology(int subtest);
int test__thread_map_synthesize(int subtest); int test__thread_map_synthesize(int subtest);
int test__thread_map_remove(int subtest);
int test__cpu_map_synthesize(int subtest); int test__cpu_map_synthesize(int subtest);
int test__synthesize_stat_config(int subtest); int test__synthesize_stat_config(int subtest);
int test__synthesize_stat(int subtest); int test__synthesize_stat(int subtest);
......
#include <stdlib.h>
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#include <sys/prctl.h> #include <sys/prctl.h>
...@@ -93,3 +94,46 @@ int test__thread_map_synthesize(int subtest __maybe_unused) ...@@ -93,3 +94,46 @@ int test__thread_map_synthesize(int subtest __maybe_unused)
return 0; return 0;
} }
int test__thread_map_remove(int subtest __maybe_unused)
{
struct thread_map *threads;
char *str;
int i;
TEST_ASSERT_VAL("failed to allocate map string",
asprintf(&str, "%d,%d", getpid(), getppid()) >= 0);
threads = thread_map__new_str(str, NULL, 0);
TEST_ASSERT_VAL("failed to allocate thread_map",
threads);
if (verbose)
thread_map__fprintf(threads, stderr);
TEST_ASSERT_VAL("failed to remove thread",
!thread_map__remove(threads, 0));
TEST_ASSERT_VAL("thread_map count != 1", threads->nr == 1);
if (verbose)
thread_map__fprintf(threads, stderr);
TEST_ASSERT_VAL("failed to remove thread",
!thread_map__remove(threads, 0));
TEST_ASSERT_VAL("thread_map count != 0", threads->nr == 0);
if (verbose)
thread_map__fprintf(threads, stderr);
TEST_ASSERT_VAL("failed to not remove thread",
thread_map__remove(threads, 0));
for (i = 0; i < threads->nr; i++)
free(threads->map[i].comm);
free(threads);
return 0;
}
...@@ -42,7 +42,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, ...@@ -42,7 +42,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
P_MMAP_FLAG(SHARED); P_MMAP_FLAG(SHARED);
P_MMAP_FLAG(PRIVATE); P_MMAP_FLAG(PRIVATE);
#ifdef MAP_32BIT
P_MMAP_FLAG(32BIT); P_MMAP_FLAG(32BIT);
#endif
P_MMAP_FLAG(ANONYMOUS); P_MMAP_FLAG(ANONYMOUS);
P_MMAP_FLAG(DENYWRITE); P_MMAP_FLAG(DENYWRITE);
P_MMAP_FLAG(EXECUTABLE); P_MMAP_FLAG(EXECUTABLE);
......
...@@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ...@@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
ui_browser__set_color(browser, color); ui_browser__set_color(browser, color);
if (dl->ins.ops && dl->ins.ops->scnprintf) { if (dl->ins.ops && dl->ins.ops->scnprintf) {
if (ins__is_jump(&dl->ins)) { if (ins__is_jump(&dl->ins)) {
bool fwd = dl->ops.target.offset > (u64)dl->offset; bool fwd = dl->ops.target.offset > dl->offset;
ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR :
SLSMG_UARROW_CHAR); SLSMG_UARROW_CHAR);
...@@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy ...@@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy
{ {
if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins)
|| !disasm_line__has_offset(dl) || !disasm_line__has_offset(dl)
|| dl->ops.target.offset >= symbol__size(sym)) || dl->ops.target.offset < 0
|| dl->ops.target.offset >= (s64)symbol__size(sym))
return false; return false;
return true; return true;
......
...@@ -223,13 +223,19 @@ bool ins__is_call(const struct ins *ins) ...@@ -223,13 +223,19 @@ bool ins__is_call(const struct ins *ins)
static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
{ {
const char *s = strchr(ops->raw, '+'); const char *s = strchr(ops->raw, '+');
const char *c = strchr(ops->raw, ',');
ops->target.addr = strtoull(ops->raw, NULL, 16); if (c++ != NULL)
ops->target.addr = strtoull(c, NULL, 16);
else
ops->target.addr = strtoull(ops->raw, NULL, 16);
if (s++ != NULL) if (s++ != NULL) {
ops->target.offset = strtoull(s, NULL, 16); ops->target.offset = strtoull(s, NULL, 16);
else ops->target.offset_avail = true;
ops->target.offset = UINT64_MAX; } else {
ops->target.offset_avail = false;
}
return 0; return 0;
} }
...@@ -237,7 +243,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op ...@@ -237,7 +243,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
static int jump__scnprintf(struct ins *ins, char *bf, size_t size, static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
struct ins_operands *ops) struct ins_operands *ops)
{ {
if (!ops->target.addr) if (!ops->target.addr || ops->target.offset < 0)
return ins__raw_scnprintf(ins, bf, size, ops); return ins__raw_scnprintf(ins, bf, size, ops);
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset);
...@@ -641,7 +647,8 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, ...@@ -641,7 +647,8 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr));
if (addr < sym->start || addr >= sym->end) { if ((addr < sym->start || addr >= sym->end) &&
(addr != sym->end || sym->start != sym->end)) {
pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n", pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n",
__func__, __LINE__, sym->name, sym->start, addr, sym->end); __func__, __LINE__, sym->name, sym->start, addr, sym->end);
return -ERANGE; return -ERANGE;
...@@ -1205,9 +1212,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, ...@@ -1205,9 +1212,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
if (dl == NULL) if (dl == NULL)
return -1; return -1;
if (dl->ops.target.offset == UINT64_MAX) if (!disasm_line__has_offset(dl)) {
dl->ops.target.offset = dl->ops.target.addr - dl->ops.target.offset = dl->ops.target.addr -
map__rip_2objdump(map, sym->start); map__rip_2objdump(map, sym->start);
dl->ops.target.offset_avail = true;
}
/* kcore has no symbols, so add the call target name */ /* kcore has no symbols, so add the call target name */
if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) { if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) {
......
...@@ -24,7 +24,8 @@ struct ins_operands { ...@@ -24,7 +24,8 @@ struct ins_operands {
char *raw; char *raw;
char *name; char *name;
u64 addr; u64 addr;
u64 offset; s64 offset;
bool offset_avail;
} target; } target;
union { union {
struct { struct {
...@@ -68,7 +69,7 @@ struct disasm_line { ...@@ -68,7 +69,7 @@ struct disasm_line {
static inline bool disasm_line__has_offset(const struct disasm_line *dl) static inline bool disasm_line__has_offset(const struct disasm_line *dl)
{ {
return dl->ops.target.offset != UINT64_MAX; return dl->ops.target.offset_avail;
} }
void disasm_line__free(struct disasm_line *dl); void disasm_line__free(struct disasm_line *dl);
......
...@@ -990,6 +990,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, ...@@ -990,6 +990,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
* it overloads any global configuration. * it overloads any global configuration.
*/ */
apply_config_terms(evsel, opts); apply_config_terms(evsel, opts);
evsel->ignore_missing_thread = opts->ignore_missing_thread;
} }
static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
...@@ -1419,6 +1421,33 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, ...@@ -1419,6 +1421,33 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
return fprintf(fp, " %-32s %s\n", name, val); return fprintf(fp, " %-32s %s\n", name, val);
} }
static bool ignore_missing_thread(struct perf_evsel *evsel,
struct thread_map *threads,
int thread, int err)
{
if (!evsel->ignore_missing_thread)
return false;
/* The system wide setup does not work with threads. */
if (evsel->system_wide)
return false;
/* The -ESRCH is perf event syscall errno for pid's not found. */
if (err != -ESRCH)
return false;
/* If there's only one thread, let it fail. */
if (threads->nr == 1)
return false;
if (thread_map__remove(threads, thread))
return false;
pr_warning("WARNING: Ignored open failure for pid %d\n",
thread_map__pid(threads, thread));
return true;
}
static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
struct thread_map *threads) struct thread_map *threads)
{ {
...@@ -1474,7 +1503,7 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, ...@@ -1474,7 +1503,7 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
for (cpu = 0; cpu < cpus->nr; cpu++) { for (cpu = 0; cpu < cpus->nr; cpu++) {
for (thread = 0; thread < nthreads; thread++) { for (thread = 0; thread < nthreads; thread++) {
int group_fd; int fd, group_fd;
if (!evsel->cgrp && !evsel->system_wide) if (!evsel->cgrp && !evsel->system_wide)
pid = thread_map__pid(threads, thread); pid = thread_map__pid(threads, thread);
...@@ -1484,21 +1513,37 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, ...@@ -1484,21 +1513,37 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
pid, cpus->map[cpu], group_fd, flags); pid, cpus->map[cpu], group_fd, flags);
FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu],
pid, group_fd, flags);
cpus->map[cpu],
group_fd, flags); FD(evsel, cpu, thread) = fd;
if (FD(evsel, cpu, thread) < 0) {
if (fd < 0) {
err = -errno; err = -errno;
if (ignore_missing_thread(evsel, threads, thread, err)) {
/*
* We just removed 1 thread, so take a step
* back on thread index and lower the upper
* nthreads limit.
*/
nthreads--;
thread--;
/* ... and pretend like nothing have happened. */
err = 0;
continue;
}
pr_debug2("\nsys_perf_event_open failed, error %d\n", pr_debug2("\nsys_perf_event_open failed, error %d\n",
err); err);
goto try_fallback; goto try_fallback;
} }
pr_debug2(" = %d\n", FD(evsel, cpu, thread)); pr_debug2(" = %d\n", fd);
if (evsel->bpf_fd >= 0) { if (evsel->bpf_fd >= 0) {
int evt_fd = FD(evsel, cpu, thread); int evt_fd = fd;
int bpf_fd = evsel->bpf_fd; int bpf_fd = evsel->bpf_fd;
err = ioctl(evt_fd, err = ioctl(evt_fd,
......
...@@ -120,6 +120,7 @@ struct perf_evsel { ...@@ -120,6 +120,7 @@ struct perf_evsel {
bool tracking; bool tracking;
bool per_pkg; bool per_pkg;
bool precise_max; bool precise_max;
bool ignore_missing_thread;
/* parse modifier helper */ /* parse modifier helper */
int exclude_GH; int exclude_GH;
int nr_members; int nr_members;
......
...@@ -1459,7 +1459,8 @@ int dso__load(struct dso *dso, struct map *map) ...@@ -1459,7 +1459,8 @@ int dso__load(struct dso *dso, struct map *map)
* Read the build id if possible. This is required for * Read the build id if possible. This is required for
* DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
*/ */
if (is_regular_file(dso->long_name) && if (!dso->has_build_id &&
is_regular_file(dso->long_name) &&
filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
dso__set_build_id(dso, build_id); dso__set_build_id(dso, build_id);
......
...@@ -448,3 +448,25 @@ bool thread_map__has(struct thread_map *threads, pid_t pid) ...@@ -448,3 +448,25 @@ bool thread_map__has(struct thread_map *threads, pid_t pid)
return false; return false;
} }
int thread_map__remove(struct thread_map *threads, int idx)
{
int i;
if (threads->nr < 1)
return -EINVAL;
if (idx >= threads->nr)
return -EINVAL;
/*
* Free the 'idx' item and shift the rest up.
*/
free(threads->map[idx].comm);
for (i = idx; i < threads->nr - 1; i++)
threads->map[i] = threads->map[i + 1];
threads->nr--;
return 0;
}
...@@ -58,4 +58,5 @@ static inline char *thread_map__comm(struct thread_map *map, int thread) ...@@ -58,4 +58,5 @@ static inline char *thread_map__comm(struct thread_map *map, int thread)
void thread_map__read_comms(struct thread_map *threads); void thread_map__read_comms(struct thread_map *threads);
bool thread_map__has(struct thread_map *threads, pid_t pid); bool thread_map__has(struct thread_map *threads, pid_t pid);
int thread_map__remove(struct thread_map *threads, int idx);
#endif /* __PERF_THREAD_MAP_H */ #endif /* __PERF_THREAD_MAP_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment