From f3036be6594b239f0035c464b3d5de28829d72a9 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Mon, 24 Mar 2025 09:06:54 +0100 Subject: [PATCH] utilities: Add long poll statistics to the kernel_delay.py script. This addition uses the existing syscall probes to record statistics related to the OVS 'Unreasonably long ... ms poll interval' message. Basically, it records the min/max/average time between system poll calls. This can be used to determine if a long poll event has occurred during the capture. Signed-off-by: Eelco Chaudron Signed-off-by: Aaron Conole --- utilities/usdt-scripts/kernel_delay.py | 70 ++++++++++++++++++++++++- utilities/usdt-scripts/kernel_delay.rst | 17 +++++- 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/utilities/usdt-scripts/kernel_delay.py b/utilities/usdt-scripts/kernel_delay.py index 367d27e34..2372ca917 100755 --- a/utilities/usdt-scripts/kernel_delay.py +++ b/utilities/usdt-scripts/kernel_delay.py @@ -182,11 +182,27 @@ struct syscall_data_key_t { u32 syscall; }; +struct long_poll_data_key_t { + u32 pid; + u32 tid; +}; + +struct long_poll_data_t { + u64 count; + u64 total_ns; + u64 min_ns; + u64 max_ns; +}; + BPF_HASH(syscall_start, u64, u64); BPF_HASH(syscall_data, struct syscall_data_key_t, struct syscall_data_t); +BPF_HASH(long_poll_start, u64, u64); +BPF_HASH(long_poll_data, struct long_poll_data_key_t, struct long_poll_data_t); TRACEPOINT_PROBE(raw_syscalls, sys_enter) { u64 pid_tgid = bpf_get_current_pid_tgid(); + struct long_poll_data_t *val, init_val = {.min_ns = U64_MAX}; + struct long_poll_data_key_t key; if (!capture_enabled(pid_tgid)) return 0; @@ -194,6 +210,29 @@ TRACEPOINT_PROBE(raw_syscalls, sys_enter) { u64 t = bpf_ktime_get_ns(); syscall_start.update(&pid_tgid, &t); + /* Do long poll handling from here on. */ + if (args->id != ) + return 0; + + u64 *start_ns = long_poll_start.lookup(&pid_tgid); + + if (!start_ns || *start_ns == 0) + return 0; + + key.pid = pid_tgid >> 32; + key.tid = (u32)pid_tgid; + + val = long_poll_data.lookup_or_try_init(&key, &init_val); + if (val) { + u64 delta = t - *start_ns; + val->count++; + val->total_ns += delta; + if (delta > val->max_ns) + val->max_ns = delta; + if (delta < val->min_ns) + val->min_ns = delta; + } + return 0; } @@ -206,6 +245,12 @@ TRACEPOINT_PROBE(raw_syscalls, sys_exit) { if (!capture_enabled(pid_tgid)) return 0; + u64 t = bpf_ktime_get_ns(); + + if (args->id == ) { + long_poll_start.update(&pid_tgid, &t); + } + key.pid = pid_tgid >> 32; key.tid = (u32)pid_tgid; key.syscall = args->id; @@ -217,7 +262,7 @@ TRACEPOINT_PROBE(raw_syscalls, sys_exit) { val = syscall_data.lookup_or_try_init(&key, &zero); if (val) { - u64 delta = bpf_ktime_get_ns() - *start_ns; + u64 delta = t - *start_ns; val->count++; val->total_ns += delta; if (delta > val->worst_ns) @@ -1039,6 +1084,9 @@ def process_results(syscall_events=None, trigger_delta=None): threads_syscall = {k.tid for k, _ in bpf["syscall_data"].items() if k.syscall != 0xffffffff} + threads_long_poll = {k.tid for k, _ in bpf["long_poll_data"].items() + if k.pid != 0xffffffff} + threads_run = {k.tid for k, _ in bpf["run_data"].items() if k.pid != 0xffffffff} @@ -1055,7 +1103,8 @@ def process_results(syscall_events=None, trigger_delta=None): if k.pid != 0xffffffff} threads = sorted(threads_syscall | threads_run | threads_ready | - threads_stopped | threads_hardirq | threads_softirq, + threads_stopped | threads_hardirq | threads_softirq | + threads_long_poll, key=lambda x: get_thread_name(options.pid, x)) # @@ -1099,6 +1148,21 @@ def process_results(syscall_events=None, trigger_delta=None): print("{}{:20.20} {:6} {:10} {:16,}".format( indent, "TOTAL( - poll):", "", total_count, total_ns)) + # + # LONG POLL STATISTICS + # + for k, v in filter(lambda t: t[0].tid == thread, + bpf["long_poll_data"].items()): + + print("\n{:10} {:16} {}\n{}{:10} {:>16} {:>16} {:>16}".format( + "", "", "[LONG POLL STATISTICS]", indent, + "COUNT", "AVERAGE ns", "MIN ns", "MAX ns")) + + print("{}{:10} {:16,} {:16,} {:16,}".format( + indent, v.count, int(v.total_ns / max(v.count, 1)), + v.min_ns, v.max_ns)) + break + # # THREAD RUN STATISTICS # @@ -1429,6 +1493,8 @@ def main(): source = source.replace("", "true" if options.stack_trace_size > 0 else "false") + source = source.replace("", str(poll_id)) + source = source.replace("", "0" if options.skip_upcall_stats else "1") diff --git a/utilities/usdt-scripts/kernel_delay.rst b/utilities/usdt-scripts/kernel_delay.rst index 41620d760..0a2a430a5 100644 --- a/utilities/usdt-scripts/kernel_delay.rst +++ b/utilities/usdt-scripts/kernel_delay.rst @@ -67,13 +67,17 @@ with the ``--pid`` option. read 0 1 1,292 1,292 TOTAL( - poll): 519 144,405,334 + [LONG POLL STATISTICS] + COUNT AVERAGE ns MIN ns MAX ns + 58 76,773 7,388 234,129 + [THREAD RUN STATISTICS] SCHED_CNT TOTAL ns MIN ns MAX ns - 6 136,764,071 1,480 115,146,424 + 6 136,764,071 1,480 115,146,424 [THREAD READY STATISTICS] SCHED_CNT TOTAL ns MAX ns - 7 11,334 6,636 + 7 11,334 6,636 [THREAD STOPPED STATISTICS] STOP_CNT TOTAL ns MAX ns @@ -104,6 +108,7 @@ For this, it displays the thread's id (``TID``) and name (``THREAD``), followed by resource-specific data. Which are: - ``SYSCALL STATISTICS`` +- ``LONG POLL STATISTICS`` - ``THREAD RUN STATISTICS`` - ``THREAD READY STATISTICS`` - ``THREAD STOPPED STATISTICS`` @@ -129,6 +134,14 @@ Note that it only counts calls that started and stopped during the measurement interval! +``LONG POLL STATISTICS`` +~~~~~~~~~~~~~~~~~~~~~~ +``LONG POLL STATISTICS`` tell you how long the thread was running between two +poll system calls. This relates to the 'Unreasonably long ... ms poll interval' +message reported by ovs-vswitchd. More details about this message can be found +in the example section. + + ``THREAD RUN STATISTICS`` ~~~~~~~~~~~~~~~~~~~~~~~~~ ``THREAD RUN STATISTICS`` tell you how long the thread was running on a CPU