KittyPerfEvent.cpp
1 #include "KittyPerfEvent.hpp" 2 3 bool KittyPerfWatch::add(pid_t tid, uintptr_t addr, KT_WATCH_TYPE bp_type, KT_WATCH_LEN bp_len) 4 { 5 #if defined(__arm__) || defined(__aarch64__) 6 addr &= ~3UL; 7 #endif 8 9 perf_event_attr pe{}; 10 pe.type = PERF_TYPE_BREAKPOINT; 11 pe.config = 0; 12 pe.size = sizeof(pe); 13 14 pe.bp_addr = addr; 15 pe.bp_len = bp_len; 16 pe.bp_type = bp_type; 17 18 pe.sample_period = 1; 19 pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR; 20 21 pe.exclude_kernel = 1; 22 pe.exclude_hv = 1; 23 pe.disabled = 1; 24 pe.wakeup_events = 1; 25 pe.precise_ip = 2; 26 27 int fd = perf_event_open(&pe, tid, -1, -1, 0); 28 if (fd < 0) 29 return false; 30 31 size_t page_sz = sysconf(_SC_PAGESIZE); 32 size_t mmap_sz = (1 + KT_WATCH_PAGES) * page_sz; 33 34 void *base = mmap(nullptr, mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 35 if (!base || base == MAP_FAILED) 36 { 37 ioctl(fd, PERF_EVENT_IOC_RESET, 0); 38 ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); 39 close(fd); 40 return false; 41 } 42 43 auto *meta = reinterpret_cast<perf_event_mmap_page *>(base); 44 if (meta->data_size == 0) 45 { 46 ioctl(fd, PERF_EVENT_IOC_RESET, 0); 47 ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); 48 munmap(base, mmap_sz); 49 close(fd); 50 return false; 51 } 52 53 WatchInfo w{}; 54 w.fd = fd; 55 w.mmap = base; 56 w.mmap_sz = mmap_sz; 57 58 _watches.push_back(w); 59 60 return true; 61 } 62 63 bool KittyPerfTrap::add(pid_t tid, uintptr_t addr, KT_WATCH_TYPE bp_type, KT_WATCH_LEN bp_len) 64 { 65 size_t n = _watches.size(); 66 if (KittyPerfWatch::add(tid, addr, bp_type, bp_len) && ++n == _watches.size()) 67 { 68 int fd = _watches.front().fd; 69 fcntl(fd, F_SETOWN, tid); // set owner thread to receive signals 70 fcntl(fd, F_SETSIG, SIGTRAP); // signal type 71 fcntl(fd, F_SETFL, O_ASYNC); // enable async notification 72 return true; 73 } 74 return false; 75 } 76 77 bool KittyPerfWatch::enable() 78 { 79 for (auto &w : _watches) 80 { 81 if (w.fd >= 0) 82 { 83 ioctl(w.fd, PERF_EVENT_IOC_RESET, 0); 84 if (ioctl(w.fd, PERF_EVENT_IOC_ENABLE, 0) < 0) 85 return false; 86 } 87 } 88 return true; 89 } 90 91 bool KittyPerfWatch::disable() 92 { 93 for (auto &w : _watches) 94 { 95 if (w.fd >= 0) 96 { 97 ioctl(w.fd, PERF_EVENT_IOC_RESET, 0); 98 if (ioctl(w.fd, PERF_EVENT_IOC_DISABLE, 0) < 0) 99 return false; 100 } 101 } 102 return true; 103 } 104 105 void KittyPerfWatch::clear() 106 { 107 for (auto &w : _watches) 108 { 109 if (w.fd >= 0) 110 { 111 ioctl(w.fd, PERF_EVENT_IOC_RESET, 0); 112 ioctl(w.fd, PERF_EVENT_IOC_DISABLE, 0); 113 } 114 115 if (w.mmap && w.mmap != MAP_FAILED && w.mmap_sz) 116 munmap(w.mmap, w.mmap_sz); 117 118 if (w.fd >= 0) 119 close(w.fd); 120 121 w = {}; 122 } 123 124 _watches.clear(); 125 } 126 127 void KittyPerfWatch::consumeRecord(const WatchInfo &w, KittyPerfSample *out) 128 { 129 if (w.mmap == nullptr || w.mmap == MAP_FAILED || w.mmap_sz == 0) 130 return; 131 132 auto *meta = reinterpret_cast<perf_event_mmap_page *>(w.mmap); 133 134 char *data = reinterpret_cast<char *>(w.mmap) + meta->data_offset; 135 const uint64_t data_size = meta->data_size; 136 const uint64_t mask = data_size - 1; 137 138 uint64_t head = meta->data_head; 139 __atomic_thread_fence(__ATOMIC_ACQUIRE); 140 141 uint64_t tail = meta->data_tail; 142 143 while (tail < head) 144 { 145 uint64_t offset = tail & mask; 146 147 perf_event_header hdr = {}; 148 if (offset + sizeof(hdr) <= data_size) 149 { 150 memcpy(&hdr, data + offset, sizeof(hdr)); 151 } 152 else 153 { 154 size_t first = data_size - offset; 155 memcpy(&hdr, data + offset, first); 156 memcpy(reinterpret_cast<char *>(&hdr) + first, data, sizeof(hdr) - first); 157 } 158 159 if (hdr.size < sizeof(perf_event_header)) 160 break; 161 162 if (tail + hdr.size > head) 163 break; 164 165 if (out && hdr.type == PERF_RECORD_SAMPLE) 166 { 167 const size_t payload_size = hdr.size - sizeof(hdr); 168 const uint64_t payload_offset = (offset + sizeof(hdr)) & mask; 169 170 if (payload_size >= sizeof(KittyPerfSample)) 171 { 172 if (payload_offset + sizeof(KittyPerfSample) <= data_size) 173 { 174 memcpy(out, data + payload_offset, sizeof(KittyPerfSample)); 175 } 176 else 177 { 178 size_t first = data_size - payload_offset; 179 memcpy(out, data + payload_offset, first); 180 memcpy(reinterpret_cast<char *>(out) + first, data, sizeof(KittyPerfSample) - first); 181 } 182 } 183 } 184 185 tail += hdr.size; 186 } 187 188 __atomic_thread_fence(__ATOMIC_RELEASE); 189 meta->data_tail = tail; 190 } 191 192 void KittyPerfWatch::pollOnce(int timeout_ms, KittyPerfSample *out) 193 { 194 if (_watches.empty()) 195 return; 196 197 std::vector<pollfd> pfds; 198 pfds.reserve(_watches.size()); 199 200 for (auto &w : _watches) 201 pfds.push_back({w.fd, POLLIN, 0}); 202 203 int ret = ::poll(pfds.data(), pfds.size(), timeout_ms); 204 if (ret <= 0) 205 return; 206 207 for (size_t i = 0; i < pfds.size(); ++i) 208 { 209 if (pfds[i].revents & POLLIN) 210 { 211 consumeRecord(_watches[i], out); 212 } 213 } 214 } 215 216 void KittyPerfWatch::poll(int timeout_ms, const std::function<bool(const KittyPerfSample &)> &cb) 217 { 218 if (!cb || !enable()) 219 return; 220 221 while (true) 222 { 223 KittyPerfSample out{}; 224 pollOnce(timeout_ms, &out); 225 if (cb(out)) 226 break; 227 } 228 229 disable(); 230 }