/ deps / KittyMemoryEx / KittyPerfEvent.cpp
KittyPerfEvent.cpp
  1  #include "KittyPerfEvent.hpp"
  2  
  3  bool KittyPerfWatch::add(pid_t tid, uintptr_t addr, KT_WATCH_TYPE bp_type, KT_WATCH_LEN bp_len)
  4  {
  5  #if defined(__arm__) || defined(__aarch64__)
  6      addr &= ~3UL;
  7  #endif
  8  
  9      perf_event_attr pe{};
 10      pe.type = PERF_TYPE_BREAKPOINT;
 11      pe.config = 0;
 12      pe.size = sizeof(pe);
 13  
 14      pe.bp_addr = addr;
 15      pe.bp_len = bp_len;
 16      pe.bp_type = bp_type;
 17  
 18      pe.sample_period = 1;
 19      pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR;
 20  
 21      pe.exclude_kernel = 1;
 22      pe.exclude_hv = 1;
 23      pe.disabled = 1;
 24      pe.wakeup_events = 1;
 25      pe.precise_ip = 2;
 26  
 27      int fd = perf_event_open(&pe, tid, -1, -1, 0);
 28      if (fd < 0)
 29          return false;
 30  
 31      size_t page_sz = sysconf(_SC_PAGESIZE);
 32      size_t mmap_sz = (1 + KT_WATCH_PAGES) * page_sz;
 33  
 34      void *base = mmap(nullptr, mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 35      if (!base || base == MAP_FAILED)
 36      {
 37          ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 38          ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
 39          close(fd);
 40          return false;
 41      }
 42  
 43      auto *meta = reinterpret_cast<perf_event_mmap_page *>(base);
 44      if (meta->data_size == 0)
 45      {
 46          ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 47          ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
 48          munmap(base, mmap_sz);
 49          close(fd);
 50          return false;
 51      }
 52  
 53      WatchInfo w{};
 54      w.fd = fd;
 55      w.mmap = base;
 56      w.mmap_sz = mmap_sz;
 57  
 58      _watches.push_back(w);
 59  
 60      return true;
 61  }
 62  
 63  bool KittyPerfTrap::add(pid_t tid, uintptr_t addr, KT_WATCH_TYPE bp_type, KT_WATCH_LEN bp_len)
 64  {
 65      size_t n = _watches.size();
 66      if (KittyPerfWatch::add(tid, addr, bp_type, bp_len) && ++n == _watches.size())
 67      {
 68          int fd = _watches.front().fd;
 69          fcntl(fd, F_SETOWN, tid);     // set owner thread to receive signals
 70          fcntl(fd, F_SETSIG, SIGTRAP); // signal type
 71          fcntl(fd, F_SETFL, O_ASYNC);  // enable async notification
 72          return true;
 73      }
 74      return false;
 75  }
 76  
 77  bool KittyPerfWatch::enable()
 78  {
 79      for (auto &w : _watches)
 80      {
 81          if (w.fd >= 0)
 82          {
 83              ioctl(w.fd, PERF_EVENT_IOC_RESET, 0);
 84              if (ioctl(w.fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
 85                  return false;
 86          }
 87      }
 88      return true;
 89  }
 90  
 91  bool KittyPerfWatch::disable()
 92  {
 93      for (auto &w : _watches)
 94      {
 95          if (w.fd >= 0)
 96          {
 97              ioctl(w.fd, PERF_EVENT_IOC_RESET, 0);
 98              if (ioctl(w.fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
 99                  return false;
100          }
101      }
102      return true;
103  }
104  
105  void KittyPerfWatch::clear()
106  {
107      for (auto &w : _watches)
108      {
109          if (w.fd >= 0)
110          {
111              ioctl(w.fd, PERF_EVENT_IOC_RESET, 0);
112              ioctl(w.fd, PERF_EVENT_IOC_DISABLE, 0);
113          }
114  
115          if (w.mmap && w.mmap != MAP_FAILED && w.mmap_sz)
116              munmap(w.mmap, w.mmap_sz);
117  
118          if (w.fd >= 0)
119              close(w.fd);
120  
121          w = {};
122      }
123  
124      _watches.clear();
125  }
126  
127  void KittyPerfWatch::consumeRecord(const WatchInfo &w, KittyPerfSample *out)
128  {
129      if (w.mmap == nullptr || w.mmap == MAP_FAILED || w.mmap_sz == 0)
130          return;
131  
132      auto *meta = reinterpret_cast<perf_event_mmap_page *>(w.mmap);
133  
134      char *data = reinterpret_cast<char *>(w.mmap) + meta->data_offset;
135      const uint64_t data_size = meta->data_size;
136      const uint64_t mask = data_size - 1;
137  
138      uint64_t head = meta->data_head;
139      __atomic_thread_fence(__ATOMIC_ACQUIRE);
140  
141      uint64_t tail = meta->data_tail;
142  
143      while (tail < head)
144      {
145          uint64_t offset = tail & mask;
146  
147          perf_event_header hdr = {};
148          if (offset + sizeof(hdr) <= data_size)
149          {
150              memcpy(&hdr, data + offset, sizeof(hdr));
151          }
152          else
153          {
154              size_t first = data_size - offset;
155              memcpy(&hdr, data + offset, first);
156              memcpy(reinterpret_cast<char *>(&hdr) + first, data, sizeof(hdr) - first);
157          }
158  
159          if (hdr.size < sizeof(perf_event_header))
160              break;
161  
162          if (tail + hdr.size > head)
163              break;
164  
165          if (out && hdr.type == PERF_RECORD_SAMPLE)
166          {
167              const size_t payload_size = hdr.size - sizeof(hdr);
168              const uint64_t payload_offset = (offset + sizeof(hdr)) & mask;
169  
170              if (payload_size >= sizeof(KittyPerfSample))
171              {
172                  if (payload_offset + sizeof(KittyPerfSample) <= data_size)
173                  {
174                      memcpy(out, data + payload_offset, sizeof(KittyPerfSample));
175                  }
176                  else
177                  {
178                      size_t first = data_size - payload_offset;
179                      memcpy(out, data + payload_offset, first);
180                      memcpy(reinterpret_cast<char *>(out) + first, data, sizeof(KittyPerfSample) - first);
181                  }
182              }
183          }
184  
185          tail += hdr.size;
186      }
187  
188      __atomic_thread_fence(__ATOMIC_RELEASE);
189      meta->data_tail = tail;
190  }
191  
192  void KittyPerfWatch::pollOnce(int timeout_ms, KittyPerfSample *out)
193  {
194      if (_watches.empty())
195          return;
196  
197      std::vector<pollfd> pfds;
198      pfds.reserve(_watches.size());
199  
200      for (auto &w : _watches)
201          pfds.push_back({w.fd, POLLIN, 0});
202  
203      int ret = ::poll(pfds.data(), pfds.size(), timeout_ms);
204      if (ret <= 0)
205          return;
206  
207      for (size_t i = 0; i < pfds.size(); ++i)
208      {
209          if (pfds[i].revents & POLLIN)
210          {
211              consumeRecord(_watches[i], out);
212          }
213      }
214  }
215  
216  void KittyPerfWatch::poll(int timeout_ms, const std::function<bool(const KittyPerfSample &)> &cb)
217  {
218      if (!cb || !enable())
219          return;
220  
221      while (true)
222      {
223          KittyPerfSample out{};
224          pollOnce(timeout_ms, &out);
225          if (cb(out))
226              break;
227      }
228  
229      disable();
230  }