log_raw_p2p_msgs.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2021-present The Bitcoin Core developers 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 """ Demonstration of eBPF limitations and the effect on USDT with the 7 net:inbound_message and net:outbound_message tracepoints. """ 8 9 # This script shows a limitation of eBPF when data larger than 32kb is passed to 10 # user-space. It uses BCC (https://github.com/iovisor/bcc) to load a sandboxed 11 # eBPF program into the Linux kernel (root privileges are required). The eBPF 12 # program attaches to two statically defined tracepoints. The tracepoint 13 # 'net:inbound_message' is called when a new P2P message is received, and 14 # 'net:outbound_message' is called on outbound P2P messages. The eBPF program 15 # submits the P2P messages to this script via a BPF ring buffer. The submitted 16 # messages are printed. 17 18 # eBPF Limitations: 19 # 20 # Bitcoin P2P messages can be larger than 32kb (e.g. tx, block, ...). The eBPF 21 # VM's stack is limited to 512 bytes, and we can't allocate more than about 32kb 22 # for a P2P message in the eBPF VM. The message data is cut off when the message 23 # is larger than MAX_MSG_DATA_LENGTH (see definition below). This can be detected 24 # in user-space by comparing the data length to the message length variable. The 25 # message is cut off when the data length is smaller than the message length. 26 # A warning is included with the printed message data. 27 # 28 # Data is submitted to user-space (i.e. to this script) via a ring buffer. The 29 # throughput of the ring buffer is limited. Each p2p_message is about 32kb in 30 # size. In- or outbound messages submitted to the ring buffer in rapid 31 # succession fill the ring buffer faster than it can be read. Some messages are 32 # lost. 33 # 34 # BCC prints: "Possibly lost 2 samples" on lost messages. 35 36 import sys 37 from bcc import BPF, USDT 38 39 # BCC: The C program to be compiled to an eBPF program (by BCC) and loaded into 40 # a sandboxed Linux kernel VM. 41 program = """ 42 #include <uapi/linux/ptrace.h> 43 44 // A min() macro. Prefixed with _TRACEPOINT_TEST to avoid collision with other MIN macros. 45 #define _TRACEPOINT_TEST_MIN(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; }) 46 47 // Maximum possible allocation size 48 // from include/linux/percpu.h in the Linux kernel 49 #define PCPU_MIN_UNIT_SIZE (32 << 10) 50 51 // Tor v3 addresses are 62 chars + 6 chars for the port (':12345'). 52 #define MAX_PEER_ADDR_LENGTH 62 + 6 53 #define MAX_PEER_CONN_TYPE_LENGTH 20 54 #define MAX_MSG_TYPE_LENGTH 20 55 #define MAX_MSG_DATA_LENGTH PCPU_MIN_UNIT_SIZE - 200 56 57 struct p2p_message 58 { 59 u64 peer_id; 60 char peer_addr[MAX_PEER_ADDR_LENGTH]; 61 char peer_conn_type[MAX_PEER_CONN_TYPE_LENGTH]; 62 char msg_type[MAX_MSG_TYPE_LENGTH]; 63 u64 msg_size; 64 u8 msg[MAX_MSG_DATA_LENGTH]; 65 }; 66 67 // We can't store the p2p_message struct on the eBPF stack as it is limited to 68 // 512 bytes and P2P message can be bigger than 512 bytes. However, we can use 69 // an BPF-array with a length of 1 to allocate up to 32768 bytes (this is 70 // defined by PCPU_MIN_UNIT_SIZE in include/linux/percpu.h in the Linux kernel). 71 // Also see https://github.com/iovisor/bcc/issues/2306 72 BPF_ARRAY(msg_arr, struct p2p_message, 1); 73 74 // Two BPF perf buffers for pushing data (here P2P messages) to user-space. 75 BPF_PERF_OUTPUT(inbound_messages); 76 BPF_PERF_OUTPUT(outbound_messages); 77 78 int trace_inbound_message(struct pt_regs *ctx) { 79 int idx = 0; 80 struct p2p_message *msg = msg_arr.lookup(&idx); 81 void *paddr = NULL, *pconn_type = NULL, *pmsg_type = NULL, *pmsg = NULL; 82 83 // lookup() does not return a NULL pointer. However, the BPF verifier 84 // requires an explicit check that that the `msg` pointer isn't a NULL 85 // pointer. See https://github.com/iovisor/bcc/issues/2595 86 if (msg == NULL) return 1; 87 88 bpf_usdt_readarg(1, ctx, &msg->peer_id); 89 bpf_usdt_readarg(2, ctx, &paddr); 90 bpf_probe_read_user_str(&msg->peer_addr, sizeof(msg->peer_addr), paddr); 91 bpf_usdt_readarg(3, ctx, &pconn_type); 92 bpf_probe_read_user_str(&msg->peer_conn_type, sizeof(msg->peer_conn_type), pconn_type); 93 bpf_usdt_readarg(4, ctx, &pmsg_type); 94 bpf_probe_read_user_str(&msg->msg_type, sizeof(msg->msg_type), pmsg_type); 95 bpf_usdt_readarg(5, ctx, &msg->msg_size); 96 bpf_usdt_readarg(6, ctx, &pmsg); 97 bpf_probe_read_user(&msg->msg, _TRACEPOINT_TEST_MIN(msg->msg_size, MAX_MSG_DATA_LENGTH), pmsg); 98 99 inbound_messages.perf_submit(ctx, msg, sizeof(*msg)); 100 return 0; 101 }; 102 103 int trace_outbound_message(struct pt_regs *ctx) { 104 int idx = 0; 105 struct p2p_message *msg = msg_arr.lookup(&idx); 106 107 void *paddr = NULL, *pconn_type = NULL, *pmsg_type = NULL, *pmsg = NULL; 108 109 // lookup() does not return a NULL pointer. However, the BPF verifier 110 // requires an explicit check that that the `msg` pointer isn't a NULL 111 // pointer. See https://github.com/iovisor/bcc/issues/2595 112 if (msg == NULL) return 1; 113 114 bpf_usdt_readarg(1, ctx, &msg->peer_id); 115 bpf_usdt_readarg(2, ctx, &paddr); 116 bpf_probe_read_user_str(&msg->peer_addr, sizeof(msg->peer_addr), paddr); 117 bpf_usdt_readarg(3, ctx, &pconn_type); 118 bpf_probe_read_user_str(&msg->peer_conn_type, sizeof(msg->peer_conn_type), pconn_type); 119 bpf_usdt_readarg(4, ctx, &pmsg_type); 120 bpf_probe_read_user_str(&msg->msg_type, sizeof(msg->msg_type), pmsg_type); 121 bpf_usdt_readarg(5, ctx, &msg->msg_size); 122 bpf_usdt_readarg(6, ctx, &pmsg); 123 bpf_probe_read_user(&msg->msg, _TRACEPOINT_TEST_MIN(msg->msg_size, MAX_MSG_DATA_LENGTH), pmsg); 124 125 outbound_messages.perf_submit(ctx, msg, sizeof(*msg)); 126 return 0; 127 }; 128 """ 129 130 131 def print_message(event, inbound): 132 print("{} {} msg '{}' from peer {} ({}, {}) with {} bytes: {}".format( 133 134 "Warning: incomplete message (only {} out of {} bytes)!".format( 135 len(event.msg), event.msg_size) if len(event.msg) < event.msg_size else "", 136 "inbound" if inbound else "outbound", 137 event.msg_type.decode("utf-8"), 138 event.peer_id, 139 event.peer_conn_type.decode("utf-8"), 140 event.peer_addr.decode("utf-8"), 141 event.msg_size, 142 bytes(event.msg[:event.msg_size]).hex(), 143 ) 144 ) 145 146 147 def main(pid): 148 print(f"Hooking into bitcoind with pid {pid}") 149 bitcoind_with_usdts = USDT(pid=int(pid)) 150 151 # attaching the trace functions defined in the BPF program to the tracepoints 152 bitcoind_with_usdts.enable_probe( 153 probe="inbound_message", fn_name="trace_inbound_message") 154 bitcoind_with_usdts.enable_probe( 155 probe="outbound_message", fn_name="trace_outbound_message") 156 bpf = BPF(text=program, usdt_contexts=[bitcoind_with_usdts]) 157 158 # BCC: perf buffer handle function for inbound_messages 159 def handle_inbound(_, data, size): 160 """ Inbound message handler. 161 162 Called each time a message is submitted to the inbound_messages BPF table.""" 163 164 event = bpf["inbound_messages"].event(data) 165 print_message(event, True) 166 167 # BCC: perf buffer handle function for outbound_messages 168 169 def handle_outbound(_, data, size): 170 """ Outbound message handler. 171 172 Called each time a message is submitted to the outbound_messages BPF table.""" 173 174 event = bpf["outbound_messages"].event(data) 175 print_message(event, False) 176 177 # BCC: add handlers to the inbound and outbound perf buffers 178 bpf["inbound_messages"].open_perf_buffer(handle_inbound) 179 bpf["outbound_messages"].open_perf_buffer(handle_outbound) 180 181 print("Logging raw P2P messages.") 182 print("Messages larger than about 32kb will be cut off!") 183 print("Some messages might be lost!") 184 while True: 185 try: 186 bpf.perf_buffer_poll() 187 except KeyboardInterrupt: 188 exit() 189 190 191 if __name__ == "__main__": 192 if len(sys.argv) != 2: 193 print("USAGE:", sys.argv[0], "<pid of bitcoind>") 194 exit() 195 pid = sys.argv[1] 196 main(pid)