301 lines
9.4 KiB
Python
301 lines
9.4 KiB
Python
#!/usr/bin/python
|
|
#
|
|
# lockstat Trace and display lock contention stats
|
|
#
|
|
# USAGE: lockstat
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License")
|
|
# 28-Jul-2017 Gisle Dankel Created this.
|
|
|
|
from bcc import BPF
|
|
from ctypes import c_int
|
|
from time import sleep
|
|
from datetime import datetime
|
|
import argparse
|
|
import subprocess
|
|
import os
|
|
|
|
# One Lock object per TGID and uaddr.
|
|
class Lock(object):
|
|
def __init__(self):
|
|
self.contention_count = 0
|
|
self.elapsed_blocked = 0
|
|
self.thread_count = 0
|
|
self.last_stack_syms = []
|
|
|
|
def update(self, count, block_time, last_stack_syms):
|
|
self.contention_count += count
|
|
self.elapsed_blocked += block_time
|
|
self.thread_count += 1
|
|
self.last_stack_syms = last_stack_syms
|
|
|
|
def run_command_get_pid(command):
|
|
p = subprocess.Popen(command.split())
|
|
return p.pid
|
|
|
|
examples = """
|
|
EXAMPLES:
|
|
|
|
./lockstat
|
|
Trace calls to sys_futex and display contented locks every 5 seconds
|
|
for all processes running on the system
|
|
./lockstat -p <pid>
|
|
Trace only for the specified pid and display contended locks
|
|
every 5 seconds
|
|
./lockstat -p <pid> -t
|
|
Trace for a specified pid and print a message on each entry and exit to
|
|
sys_futex until interrupted or killed
|
|
./lockstat -p <pid> 10
|
|
Trace the specified pid and show a message every 10 seconds
|
|
./lockstat -c <command> 1 30
|
|
Run the specified command and display contended locks every 1 second
|
|
for a total of 30 times
|
|
"""
|
|
|
|
description = """
|
|
Trace kernel futex events.
|
|
These often occur because of lock contention, e.g. involving a pthread_mutex.
|
|
This script resemblers the following SystemTap example:
|
|
https://sourceware.org/systemtap/SystemTap_Beginners_Guide/futexcontentionsect.html
|
|
"""
|
|
|
|
parser = argparse.ArgumentParser(description=description,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog=examples)
|
|
parser.add_argument("-p", "--pid", type=int, default=-1,
|
|
help="the PID to trace; if not specified, trace all")
|
|
parser.add_argument("-t", "--trace", action="store_true",
|
|
help="print trace messages for each futex enter/exit")
|
|
parser.add_argument("interval", nargs="?", default=5, type=int,
|
|
help="interval in seconds to print summary")
|
|
parser.add_argument("count", nargs="?", type=int,
|
|
help="number of times to print the report before exiting")
|
|
parser.add_argument("-c", "--command",
|
|
help="execute and trace the specified command")
|
|
|
|
args = parser.parse_args()
|
|
|
|
pid = args.pid
|
|
command = args.command
|
|
interval = args.interval
|
|
num_prints = args.count
|
|
trace_all = args.trace
|
|
|
|
if command is not None:
|
|
print("Executing '%s' and tracing the resulting process." % command)
|
|
pid = run_command_get_pid(command)
|
|
|
|
bpf_source = """
|
|
#include <uapi/linux/futex.h>
|
|
#include <uapi/linux/ptrace.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/time.h>
|
|
|
|
struct comm_t {
|
|
char name[TASK_COMM_LEN];
|
|
};
|
|
|
|
struct lock_key_t {
|
|
u64 uaddr;
|
|
u32 pid;
|
|
u32 tgid;
|
|
};
|
|
|
|
struct lock_info_t {
|
|
u64 elapsed_blocked;
|
|
u64 contention_count;
|
|
u64 sid;
|
|
};
|
|
|
|
BPF_HASH(pid_lock, u32, u64);
|
|
BPF_HASH(pid_blocktime, u32, u64);
|
|
BPF_HASH(tgid_comm, u32, struct comm_t);
|
|
BPF_HASH(lock_stats, struct lock_key_t, struct lock_info_t, 1000000);
|
|
BPF_STACK_TRACE(stack_traces, 16384);
|
|
|
|
static inline int update_stats(u32 pid, u32 tgid, u64 uaddr, u64 block_time, u64 sid) {
|
|
struct lock_key_t key = {};
|
|
struct lock_info_t zero = {};
|
|
struct lock_info_t *info;
|
|
|
|
key.pid = pid;
|
|
key.tgid = tgid;
|
|
key.uaddr = uaddr;
|
|
info = lock_stats.lookup_or_init(&key, &zero);
|
|
info->elapsed_blocked += block_time;
|
|
info->contention_count++;
|
|
info->sid = sid;
|
|
|
|
if (0 == tgid_comm.lookup(&tgid)) {
|
|
struct comm_t comm;
|
|
bpf_get_current_comm(&comm.name, sizeof(comm.name));
|
|
tgid_comm.update(&tgid, &comm);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// FIXME: Should attach to sys_enter_futex and sys_exit_futex tracepoints here,
|
|
// but that does not currently work
|
|
int sys_futex_enter(struct pt_regs *ctx, u32 *uaddr, int op, u32 val,
|
|
struct timespec *utime, u32 *uaddr2, u32 val3) {
|
|
int cmd = op & FUTEX_CMD_MASK;
|
|
if (cmd != FUTEX_WAIT)
|
|
return 0;
|
|
|
|
u64 pid_tgid = bpf_get_current_pid_tgid();
|
|
u32 pid = pid_tgid;
|
|
u32 tgid = pid_tgid >> 32;
|
|
|
|
if (!(THREAD_FILTER))
|
|
return 0;
|
|
|
|
u64 timestamp = bpf_ktime_get_ns();
|
|
u64 uaddr64 = (u64) uaddr;
|
|
pid_lock.update(&pid, &uaddr64);
|
|
pid_blocktime.update(&pid, ×tamp);
|
|
|
|
if (SHOULD_PRINT)
|
|
bpf_trace_printk("enter sys_futex, pid = %u, uaddr = %x, "
|
|
"cmd = %u\\n", pid, uaddr64, cmd);
|
|
return 0;
|
|
}
|
|
|
|
int sys_futex_exit(struct pt_regs *ctx) {
|
|
u64 pid_tgid = bpf_get_current_pid_tgid();
|
|
u32 pid = pid_tgid;
|
|
u32 tgid = pid_tgid >> 32;
|
|
if (!(THREAD_FILTER))
|
|
return 0;
|
|
|
|
u64 *blocktime = pid_blocktime.lookup(&pid);
|
|
u64 *uaddr = pid_lock.lookup(&pid);
|
|
u64 timestamp = bpf_ktime_get_ns();
|
|
u64 elapsed;
|
|
u64 sid;
|
|
|
|
if (blocktime == 0 || uaddr == 0)
|
|
return 0; // not FUTEX_WAIT, or (less likely) missed futex_enter
|
|
|
|
elapsed = timestamp - *blocktime;
|
|
|
|
sid = stack_traces.get_stackid(ctx, BPF_F_USER_STACK);
|
|
update_stats(pid, tgid, *uaddr, elapsed, sid);
|
|
pid_lock.delete(&pid);
|
|
pid_blocktime.delete(&pid);
|
|
|
|
if (SHOULD_PRINT) {
|
|
bpf_trace_printk("exit sys_futex, uaddr = %x, elapsed = %uns\\n",
|
|
uaddr == 0 ? 0 : *uaddr, elapsed);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
"""
|
|
|
|
bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
|
|
|
|
thread_filter = '1'
|
|
if pid != -1:
|
|
print("Tracing pid %d, Ctrl+C to quit." % pid)
|
|
# 'tgid' in kernel space is what people thin of as 'pid' in userspace
|
|
thread_filter = "tgid == %d" % pid
|
|
else:
|
|
print("Tracing all processes, Ctrl+C to quit.")
|
|
|
|
bpf_source = bpf_source.replace("THREAD_FILTER", thread_filter)
|
|
|
|
bpf_program = BPF(text=bpf_source)
|
|
bpf_program.attach_kprobe(event="SyS_futex", fn_name="sys_futex_enter")
|
|
bpf_program.attach_kretprobe(event="SyS_futex", fn_name="sys_futex_exit")
|
|
|
|
def get_syms(stack, pid):
|
|
global bpf_program
|
|
syms = []
|
|
for addr in stack:
|
|
s = bpf_program.sym(addr, pid, show_offset=True)
|
|
syms.append(s)
|
|
return syms
|
|
|
|
def print_syms(syms):
|
|
print("=========")
|
|
for f in syms:
|
|
print(f)
|
|
print("=========")
|
|
|
|
def is_android_monitor_lock(syms):
|
|
for s in syms:
|
|
if 'art::Monitor::Lock' in s:
|
|
return True
|
|
return False
|
|
|
|
def disp_stack(stack, pid):
|
|
for addr in stack:
|
|
s = bpf_program.sym(addr, pid, show_offset=True)
|
|
|
|
def create_tgid_stats():
|
|
global bpf_program
|
|
stats = bpf_program["lock_stats"]
|
|
res = {}
|
|
stack_traces = bpf_program['stack_traces']
|
|
for key, val in stats.items():
|
|
# Only display Android monitor locks
|
|
if val.sid >= 0:
|
|
ust = stack_traces.walk(val.sid)
|
|
syms = get_syms(ust, key.pid)
|
|
if not is_android_monitor_lock(syms):
|
|
continue
|
|
else:
|
|
continue
|
|
|
|
if not key.tgid in res:
|
|
res[key.tgid] = {}
|
|
if not key.uaddr in res[key.tgid]:
|
|
res[key.tgid][key.uaddr] = Lock()
|
|
|
|
lock = res[key.tgid][key.uaddr]
|
|
lock.update(val.contention_count, val.elapsed_blocked, syms)
|
|
return res
|
|
|
|
def print_comm_stats(stats):
|
|
if stats == {}:
|
|
return
|
|
|
|
comms = bpf_program["tgid_comm"]
|
|
print("\n%s:" % (datetime.now().strftime("%H:%M:%S")))
|
|
for tgid, locks in stats.items():
|
|
comm = comms[c_int(tgid)].name
|
|
print("\n %s (%d):" % (comm, tgid))
|
|
sorted_locks = sorted(locks.items(),
|
|
key=lambda x: x[1].elapsed_blocked,
|
|
reverse=True)
|
|
for addr, stats in sorted_locks:
|
|
print(" %x: %dms (%d contentions involving %d threads, avg %dus)" %
|
|
(addr, stats.elapsed_blocked / 1000000,
|
|
stats.contention_count, stats.thread_count,
|
|
stats.elapsed_blocked / stats.contention_count / 1000))
|
|
|
|
# No use of displaying lock stacks since we're only
|
|
# filtering for Android monitor locks.
|
|
#
|
|
# print("Last stack for this lock:")
|
|
# print_syms(stats.last_stack_syms)
|
|
|
|
count_so_far = 0
|
|
while True:
|
|
if trace_all:
|
|
print(bpf_program.trace_fields())
|
|
else:
|
|
try:
|
|
sleep(interval)
|
|
except KeyboardInterrupt:
|
|
exit()
|
|
print_comm_stats(create_tgid_stats())
|
|
count_so_far += 1
|
|
bpf_program['tgid_comm'].clear()
|
|
bpf_program['lock_stats'].clear()
|
|
bpf_program['pid_lock'].clear()
|
|
bpf_program['pid_blocktime'].clear()
|
|
|
|
if num_prints is not None and count_so_far >= num_prints:
|
|
exit()
|