mirror of
https://github.com/openvswitch/ovs
synced 2025-08-22 09:58:01 +00:00
utilities: Add long poll statistics to the kernel_delay.py script.
This addition uses the existing syscall probes to record statistics related to the OVS 'Unreasonably long ... ms poll interval' message. Basically, it records the min/max/average time between system poll calls. This can be used to determine if a long poll event has occurred during the capture. Signed-off-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Aaron Conole <aconole@redhat.com>
This commit is contained in:
parent
2100a04546
commit
f3036be659
@ -182,11 +182,27 @@ struct syscall_data_key_t {
|
|||||||
u32 syscall;
|
u32 syscall;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct long_poll_data_key_t {
|
||||||
|
u32 pid;
|
||||||
|
u32 tid;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct long_poll_data_t {
|
||||||
|
u64 count;
|
||||||
|
u64 total_ns;
|
||||||
|
u64 min_ns;
|
||||||
|
u64 max_ns;
|
||||||
|
};
|
||||||
|
|
||||||
BPF_HASH(syscall_start, u64, u64);
|
BPF_HASH(syscall_start, u64, u64);
|
||||||
BPF_HASH(syscall_data, struct syscall_data_key_t, struct syscall_data_t);
|
BPF_HASH(syscall_data, struct syscall_data_key_t, struct syscall_data_t);
|
||||||
|
BPF_HASH(long_poll_start, u64, u64);
|
||||||
|
BPF_HASH(long_poll_data, struct long_poll_data_key_t, struct long_poll_data_t);
|
||||||
|
|
||||||
TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
|
TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
|
||||||
u64 pid_tgid = bpf_get_current_pid_tgid();
|
u64 pid_tgid = bpf_get_current_pid_tgid();
|
||||||
|
struct long_poll_data_t *val, init_val = {.min_ns = U64_MAX};
|
||||||
|
struct long_poll_data_key_t key;
|
||||||
|
|
||||||
if (!capture_enabled(pid_tgid))
|
if (!capture_enabled(pid_tgid))
|
||||||
return 0;
|
return 0;
|
||||||
@ -194,6 +210,29 @@ TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
|
|||||||
u64 t = bpf_ktime_get_ns();
|
u64 t = bpf_ktime_get_ns();
|
||||||
syscall_start.update(&pid_tgid, &t);
|
syscall_start.update(&pid_tgid, &t);
|
||||||
|
|
||||||
|
/* Do long poll handling from here on. */
|
||||||
|
if (args->id != <SYSCALL_POLL_ID>)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
u64 *start_ns = long_poll_start.lookup(&pid_tgid);
|
||||||
|
|
||||||
|
if (!start_ns || *start_ns == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
key.pid = pid_tgid >> 32;
|
||||||
|
key.tid = (u32)pid_tgid;
|
||||||
|
|
||||||
|
val = long_poll_data.lookup_or_try_init(&key, &init_val);
|
||||||
|
if (val) {
|
||||||
|
u64 delta = t - *start_ns;
|
||||||
|
val->count++;
|
||||||
|
val->total_ns += delta;
|
||||||
|
if (delta > val->max_ns)
|
||||||
|
val->max_ns = delta;
|
||||||
|
if (delta < val->min_ns)
|
||||||
|
val->min_ns = delta;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -206,6 +245,12 @@ TRACEPOINT_PROBE(raw_syscalls, sys_exit) {
|
|||||||
if (!capture_enabled(pid_tgid))
|
if (!capture_enabled(pid_tgid))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
u64 t = bpf_ktime_get_ns();
|
||||||
|
|
||||||
|
if (args->id == <SYSCALL_POLL_ID>) {
|
||||||
|
long_poll_start.update(&pid_tgid, &t);
|
||||||
|
}
|
||||||
|
|
||||||
key.pid = pid_tgid >> 32;
|
key.pid = pid_tgid >> 32;
|
||||||
key.tid = (u32)pid_tgid;
|
key.tid = (u32)pid_tgid;
|
||||||
key.syscall = args->id;
|
key.syscall = args->id;
|
||||||
@ -217,7 +262,7 @@ TRACEPOINT_PROBE(raw_syscalls, sys_exit) {
|
|||||||
|
|
||||||
val = syscall_data.lookup_or_try_init(&key, &zero);
|
val = syscall_data.lookup_or_try_init(&key, &zero);
|
||||||
if (val) {
|
if (val) {
|
||||||
u64 delta = bpf_ktime_get_ns() - *start_ns;
|
u64 delta = t - *start_ns;
|
||||||
val->count++;
|
val->count++;
|
||||||
val->total_ns += delta;
|
val->total_ns += delta;
|
||||||
if (delta > val->worst_ns)
|
if (delta > val->worst_ns)
|
||||||
@ -1039,6 +1084,9 @@ def process_results(syscall_events=None, trigger_delta=None):
|
|||||||
threads_syscall = {k.tid for k, _ in bpf["syscall_data"].items()
|
threads_syscall = {k.tid for k, _ in bpf["syscall_data"].items()
|
||||||
if k.syscall != 0xffffffff}
|
if k.syscall != 0xffffffff}
|
||||||
|
|
||||||
|
threads_long_poll = {k.tid for k, _ in bpf["long_poll_data"].items()
|
||||||
|
if k.pid != 0xffffffff}
|
||||||
|
|
||||||
threads_run = {k.tid for k, _ in bpf["run_data"].items()
|
threads_run = {k.tid for k, _ in bpf["run_data"].items()
|
||||||
if k.pid != 0xffffffff}
|
if k.pid != 0xffffffff}
|
||||||
|
|
||||||
@ -1055,7 +1103,8 @@ def process_results(syscall_events=None, trigger_delta=None):
|
|||||||
if k.pid != 0xffffffff}
|
if k.pid != 0xffffffff}
|
||||||
|
|
||||||
threads = sorted(threads_syscall | threads_run | threads_ready |
|
threads = sorted(threads_syscall | threads_run | threads_ready |
|
||||||
threads_stopped | threads_hardirq | threads_softirq,
|
threads_stopped | threads_hardirq | threads_softirq |
|
||||||
|
threads_long_poll,
|
||||||
key=lambda x: get_thread_name(options.pid, x))
|
key=lambda x: get_thread_name(options.pid, x))
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -1099,6 +1148,21 @@ def process_results(syscall_events=None, trigger_delta=None):
|
|||||||
print("{}{:20.20} {:6} {:10} {:16,}".format(
|
print("{}{:20.20} {:6} {:10} {:16,}".format(
|
||||||
indent, "TOTAL( - poll):", "", total_count, total_ns))
|
indent, "TOTAL( - poll):", "", total_count, total_ns))
|
||||||
|
|
||||||
|
#
|
||||||
|
# LONG POLL STATISTICS
|
||||||
|
#
|
||||||
|
for k, v in filter(lambda t: t[0].tid == thread,
|
||||||
|
bpf["long_poll_data"].items()):
|
||||||
|
|
||||||
|
print("\n{:10} {:16} {}\n{}{:10} {:>16} {:>16} {:>16}".format(
|
||||||
|
"", "", "[LONG POLL STATISTICS]", indent,
|
||||||
|
"COUNT", "AVERAGE ns", "MIN ns", "MAX ns"))
|
||||||
|
|
||||||
|
print("{}{:10} {:16,} {:16,} {:16,}".format(
|
||||||
|
indent, v.count, int(v.total_ns / max(v.count, 1)),
|
||||||
|
v.min_ns, v.max_ns))
|
||||||
|
break
|
||||||
|
|
||||||
#
|
#
|
||||||
# THREAD RUN STATISTICS
|
# THREAD RUN STATISTICS
|
||||||
#
|
#
|
||||||
@ -1429,6 +1493,8 @@ def main():
|
|||||||
source = source.replace("<STACK_TRACE_ENABLED>", "true"
|
source = source.replace("<STACK_TRACE_ENABLED>", "true"
|
||||||
if options.stack_trace_size > 0 else "false")
|
if options.stack_trace_size > 0 else "false")
|
||||||
|
|
||||||
|
source = source.replace("<SYSCALL_POLL_ID>", str(poll_id))
|
||||||
|
|
||||||
source = source.replace("<INSTALL_OVS_DP_UPCALL_PROBE>", "0"
|
source = source.replace("<INSTALL_OVS_DP_UPCALL_PROBE>", "0"
|
||||||
if options.skip_upcall_stats else "1")
|
if options.skip_upcall_stats else "1")
|
||||||
|
|
||||||
|
@ -67,6 +67,10 @@ with the ``--pid`` option.
|
|||||||
read 0 1 1,292 1,292
|
read 0 1 1,292 1,292
|
||||||
TOTAL( - poll): 519 144,405,334
|
TOTAL( - poll): 519 144,405,334
|
||||||
|
|
||||||
|
[LONG POLL STATISTICS]
|
||||||
|
COUNT AVERAGE ns MIN ns MAX ns
|
||||||
|
58 76,773 7,388 234,129
|
||||||
|
|
||||||
[THREAD RUN STATISTICS]
|
[THREAD RUN STATISTICS]
|
||||||
SCHED_CNT TOTAL ns MIN ns MAX ns
|
SCHED_CNT TOTAL ns MIN ns MAX ns
|
||||||
6 136,764,071 1,480 115,146,424
|
6 136,764,071 1,480 115,146,424
|
||||||
@ -104,6 +108,7 @@ For this, it displays the thread's id (``TID``) and name (``THREAD``),
|
|||||||
followed by resource-specific data. Which are:
|
followed by resource-specific data. Which are:
|
||||||
|
|
||||||
- ``SYSCALL STATISTICS``
|
- ``SYSCALL STATISTICS``
|
||||||
|
- ``LONG POLL STATISTICS``
|
||||||
- ``THREAD RUN STATISTICS``
|
- ``THREAD RUN STATISTICS``
|
||||||
- ``THREAD READY STATISTICS``
|
- ``THREAD READY STATISTICS``
|
||||||
- ``THREAD STOPPED STATISTICS``
|
- ``THREAD STOPPED STATISTICS``
|
||||||
@ -129,6 +134,14 @@ Note that it only counts calls that started and stopped during the
|
|||||||
measurement interval!
|
measurement interval!
|
||||||
|
|
||||||
|
|
||||||
|
``LONG POLL STATISTICS``
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
``LONG POLL STATISTICS`` tell you how long the thread was running between two
|
||||||
|
poll system calls. This relates to the 'Unreasonably long ... ms poll interval'
|
||||||
|
message reported by ovs-vswitchd. More details about this message can be found
|
||||||
|
in the example section.
|
||||||
|
|
||||||
|
|
||||||
``THREAD RUN STATISTICS``
|
``THREAD RUN STATISTICS``
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
``THREAD RUN STATISTICS`` tell you how long the thread was running on a CPU
|
``THREAD RUN STATISTICS`` tell you how long the thread was running on a CPU
|
||||||
|
Loading…
x
Reference in New Issue
Block a user