/ src / processor / disassembler_objdump.cc
disassembler_objdump.cc
  1  // Copyright (c) 2022, Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  
 29  // disassembler_objdump.: Disassembler that invokes objdump for disassembly.
 30  //
 31  // Author: Mark Brand
 32  
 33  #ifdef HAVE_CONFIG_H
 34  #include <config.h>  // Must come first
 35  #endif
 36  
 37  #include "processor/disassembler_objdump.h"
 38  
 39  #include <unistd.h>
 40  #include <sys/wait.h>
 41  
 42  #include <array>
 43  #include <fstream>
 44  #include <iostream>
 45  #include <iterator>
 46  #include <regex>
 47  #include <sstream>
 48  #include <vector>
 49  
 50  #include "common/linux/eintr_wrapper.h"
 51  #include "common/linux/scoped_pipe.h"
 52  #include "common/linux/scoped_tmpfile.h"
 53  #include "processor/logging.h"
 54  
 55  namespace google_breakpad {
 56  namespace {
 57  
 58  const size_t kMaxX86InstructionLength = 15;
 59  
 60  bool IsInstructionPrefix(const string& token) {
 61    if (token == "lock" || token == "rep" || token == "repz" ||
 62        token == "repnz") {
 63      return true;
 64    }
 65    return false;
 66  }
 67  
 68  bool IsOperandSize(const string& token) {
 69    if (token == "BYTE" || token == "WORD" || token == "DWORD" ||
 70        token == "QWORD" || token == "PTR") {
 71      return true;
 72    }
 73    return false;
 74  }
 75  
 76  bool GetSegmentAddressX86(const DumpContext& context, string segment_name,
 77                            uint64_t& address) {
 78    if (segment_name == "ds") {
 79      address = context.GetContextX86()->ds;
 80    } else if (segment_name == "es") {
 81      address = context.GetContextX86()->es;
 82    } else if (segment_name == "fs") {
 83      address = context.GetContextX86()->fs;
 84    } else if (segment_name == "gs") {
 85      address = context.GetContextX86()->gs;
 86    } else {
 87      BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
 88      return false;
 89    }
 90  
 91    return true;
 92  }
 93  
 94  bool GetSegmentAddressAMD64(const DumpContext& context, string segment_name,
 95                              uint64_t& address) {
 96    if (segment_name == "ds") {
 97      address = 0;
 98    } else if (segment_name == "es") {
 99      address = 0;
100    } else {
101      BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
102      return false;
103    }
104  
105    return true;
106  }
107  
108  bool GetSegmentAddress(const DumpContext& context, string segment_name,
109                         uint64_t& address) {
110    if (context.GetContextCPU() == MD_CONTEXT_X86) {
111      return GetSegmentAddressX86(context, segment_name, address);
112    } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
113      return GetSegmentAddressAMD64(context, segment_name, address);
114    } else {
115      BPLOG(ERROR) << "Unsupported architecture for GetSegmentAddress\n";
116      return false;
117    }
118  }
119  
120  bool GetRegisterValueX86(const DumpContext& context, string register_name,
121                           uint64_t& value) {
122    if (register_name == "eax") {
123      value = context.GetContextX86()->eax;
124    } else if (register_name == "ebx") {
125      value = context.GetContextX86()->ebx;
126    } else if (register_name == "ecx") {
127      value = context.GetContextX86()->ecx;
128    } else if (register_name == "edx") {
129      value = context.GetContextX86()->edx;
130    } else if (register_name == "edi") {
131      value = context.GetContextX86()->edi;
132    } else if (register_name == "esi") {
133      value = context.GetContextX86()->esi;
134    } else if (register_name == "ebp") {
135      value = context.GetContextX86()->ebp;
136    } else if (register_name == "esp") {
137      value = context.GetContextX86()->esp;
138    } else if (register_name == "eip") {
139      value = context.GetContextX86()->eip;
140    } else {
141      BPLOG(ERROR) << "Unsupported register: " << register_name;
142      return false;
143    }
144  
145    return true;
146  }
147  
148  bool GetRegisterValueAMD64(const DumpContext& context, string register_name,
149                             uint64_t& value) {
150    if (register_name == "rax") {
151      value = context.GetContextAMD64()->rax;
152    } else if (register_name == "rbx") {
153      value = context.GetContextAMD64()->rbx;
154    } else if (register_name == "rcx") {
155      value = context.GetContextAMD64()->rcx;
156    } else if (register_name == "rdx") {
157      value = context.GetContextAMD64()->rdx;
158    } else if (register_name == "rdi") {
159      value = context.GetContextAMD64()->rdi;
160    } else if (register_name == "rsi") {
161      value = context.GetContextAMD64()->rsi;
162    } else if (register_name == "rbp") {
163      value = context.GetContextAMD64()->rbp;
164    } else if (register_name == "rsp") {
165      value = context.GetContextAMD64()->rsp;
166    } else if (register_name == "r8") {
167      value = context.GetContextAMD64()->r8;
168    } else if (register_name == "r9") {
169      value = context.GetContextAMD64()->r9;
170    } else if (register_name == "r10") {
171      value = context.GetContextAMD64()->r10;
172    } else if (register_name == "r11") {
173      value = context.GetContextAMD64()->r11;
174    } else if (register_name == "r12") {
175      value = context.GetContextAMD64()->r12;
176    } else if (register_name == "r13") {
177      value = context.GetContextAMD64()->r13;
178    } else if (register_name == "r14") {
179      value = context.GetContextAMD64()->r14;
180    } else if (register_name == "r15") {
181      value = context.GetContextAMD64()->r15;
182    } else if (register_name == "rip") {
183      value = context.GetContextAMD64()->rip;
184    } else {
185      BPLOG(ERROR) << "Unsupported register: " << register_name;
186      return false;
187    }
188  
189    return true;
190  }
191  
192  // Lookup the value of `register_name` in `context`, store it into `value` on
193  // success.
194  // Support for non-full-size registers not implemented, since we're only using
195  // this to evaluate address expressions.
196  bool GetRegisterValue(const DumpContext& context, string register_name,
197                        uint64_t& value) {
198    if (context.GetContextCPU() == MD_CONTEXT_X86) {
199      return GetRegisterValueX86(context, register_name, value);
200    } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
201      return GetRegisterValueAMD64(context, register_name, value);
202    } else {
203      BPLOG(ERROR) << "Unsupported architecture for GetRegisterValue\n";
204      return false;
205    }
206  }
207  }  // namespace
208  
209  // static
210  bool DisassemblerObjdump::DisassembleInstruction(uint32_t cpu,
211                                                   const uint8_t* raw_bytes,
212                                                   unsigned int raw_bytes_len,
213                                                   string& instruction) {
214    // Always initialize outputs
215    instruction = "";
216  
217    if (!raw_bytes || raw_bytes_len == 0) {
218      // There's no need to perform any operation in this case, as there's
219      // clearly no instruction there.
220      return false;
221    }
222  
223    string architecture;
224    if (cpu == MD_CONTEXT_X86) {
225      architecture = "i386";
226    } else if (cpu == MD_CONTEXT_AMD64) {
227      architecture = "i386:x86-64";
228    } else {
229      BPLOG(ERROR) << "Unsupported architecture.";
230      return false;
231    }
232  
233    // Create a temporary file for the raw instruction bytes to pass to
234    // objdump, and write the bytes to the input file.
235    ScopedTmpFile raw_bytes_file;
236    if (!raw_bytes_file.InitData(raw_bytes, raw_bytes_len)) {
237      BPLOG(ERROR) << "Failed creating temporary file.";
238      return false;
239    }
240  
241    // Create a pipe to use to read the disassembly back from objdump.
242    ScopedPipe disassembly_pipe;
243    if (!disassembly_pipe.Init()) {
244      BPLOG(ERROR) << "Failed creating pipe for output.";
245      return false;
246    }
247  
248    pid_t child_pid = fork();
249    if (child_pid < 0) {
250      BPLOG(ERROR) << "Fork failed.";
251      return false;
252    }
253  
254    if (child_pid == 0) {
255      // In the child process, set up the input and output file descriptors.
256      if (dup2(raw_bytes_file.GetFd(), STDIN_FILENO) < 0 ||
257          disassembly_pipe.Dup2WriteFd(STDOUT_FILENO) < 0 ||
258          disassembly_pipe.Dup2WriteFd(STDERR_FILENO) < 0) {
259        BPLOG(ERROR) << "Failed dup'ing file descriptors.";
260        exit(-1);
261      }
262  
263      // We need to close the read end of the pipe in the child process so that
264      // when the parent closes it, the pipe is disconnected.
265      disassembly_pipe.CloseReadFd();
266  
267      // We use "/proc/self/fd/0" here to allow objdump to parse an unnamed file,
268      // since objdump does not have a mode to read from stdin. This cannot be
269      // used with a pipe, since objdump requires that the input is a standard
270      // file.
271      execlp("objdump", "objdump", "-D", "--no-show-raw-insn", "-b", "binary",
272             "-M", "intel", "-m", architecture.c_str(), "/proc/self/fd/0",
273             nullptr);
274  
275      BPLOG(ERROR) << "Failed to exec objdump.";
276      exit(-1);
277    } else {
278      // In the parent process, parse the objdump output.
279  
280      // Match the instruction line, from:
281      //    0:        lock cmpxchg DWORD PTR [esi+0x10],eax
282      // extract the string "lock cmpxchg DWORD PTR [esi+0x10],eax"
283      std::regex instruction_regex(
284          "^\\s+[0-9a-f]+:\\s+"  // "   0:"
285          "((?:\\s*\\S*)+)$");   // "lock cmpxchg..."
286  
287      std::string line;
288      std::smatch match;
289      while (disassembly_pipe.ReadLine(line)) {
290        if (std::regex_match(line, match, instruction_regex)) {
291          instruction = match[1].str();
292          break;
293        }
294      }
295  
296      // Close the read pipe so that objdump will exit (in case we broke out of
297      // the loop above before reading all of the output).
298      disassembly_pipe.CloseReadFd();
299  
300      // Now wait for objdump to exit.
301      int status = 0;
302      HANDLE_EINTR(waitpid(child_pid, &status, 0));
303  
304      if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
305        BPLOG(ERROR) << "objdump didn't run successfully.";
306        return false;
307      }
308  
309      if (instruction == "") {
310        BPLOG(ERROR) << "Failed to find instruction in objdump output.";
311        return false;
312      }
313    }
314  
315    return true;
316  }
317  
318  // static
319  bool DisassemblerObjdump::TokenizeInstruction(const string& instruction,
320                                                string& operation, string& dest,
321                                                string& src) {
322    // Always initialize outputs.
323    operation = "";
324    dest = "";
325    src = "";
326  
327    // Split the instruction into tokens by either whitespace or comma.
328    std::regex token_regex("((?:[^\\s,]+)|,)(?:\\s)*");
329    std::sregex_iterator tokens_begin(instruction.begin(), instruction.end(),
330                                      token_regex);
331  
332    bool found_comma = false;
333    for (auto tokens_iter = tokens_begin; tokens_iter != std::sregex_iterator();
334         ++tokens_iter) {
335      auto token = (*tokens_iter)[1].str();
336      if (operation.size() == 0) {
337        if (IsInstructionPrefix(token))
338          continue;
339        operation = token;
340      } else if (dest.size() == 0) {
341        if (IsOperandSize(token))
342          continue;
343        dest = token;
344      } else if (!found_comma) {
345        if (token == ",") {
346          found_comma = true;
347        } else {
348          BPLOG(ERROR) << "Failed to parse operands from objdump output, expected"
349                          " comma but found \""
350                       << token << "\"";
351          return false;
352        }
353      } else if (src.size() == 0) {
354        if (IsOperandSize(token))
355          continue;
356        src = token;
357      } else {
358        if (token == ",") {
359          BPLOG(ERROR) << "Failed to parse operands from objdump output, found "
360                          "unexpected comma after last operand.";
361          return false;
362        } else {
363          // We just ignore other junk after the last operand unless it's a
364          // comma, which would indicate we're probably still in the middle
365          // of the operands and something has gone wrong
366        }
367      }
368    }
369  
370    if (found_comma && src.size() == 0) {
371      BPLOG(ERROR) << "Failed to parse operands from objdump output, found comma "
372                      "but no src operand.";
373      return false;
374    }
375  
376    return true;
377  }
378  
379  // static
380  bool DisassemblerObjdump::CalculateAddress(const DumpContext& context,
381                                             const string& expression,
382                                             uint64_t& address) {
383    address = 0;
384  
385    // Extract the components of the expression.
386    // fs:[esi+edi*4+0x80] -> ["fs", "esi", "edi", "4", "-", "0x80"]
387    std::regex expression_regex(
388        "^(?:(\\ws):)?"                // "fs:"
389        "\\[(\\w+)"                    // "[esi"
390        "(?:\\+(\\w+)(?:\\*(\\d+)))?"  // "+edi*4"
391        "(?:([\\+-])(0x[0-9a-f]+))?"   // "-0x80"
392        "\\]$");                       // "]"
393  
394    std::smatch match;
395    if (!std::regex_match(expression, match, expression_regex) ||
396        match.size() != 7) {
397      return false;
398    }
399  
400    string segment_name = match[1].str();
401    string register_name = match[2].str();
402    string index_name = match[3].str();
403    string index_stride = match[4].str();
404    string offset_sign = match[5].str();
405    string offset = match[6].str();
406  
407    uint64_t segment_address = 0;
408    uint64_t register_value = 0;
409    uint64_t index_value = 0;
410    uint64_t index_stride_value = 1;
411    uint64_t offset_value = 0;
412  
413    if (segment_name.size() &&
414        !GetSegmentAddress(context, segment_name, segment_address)) {
415      return false;
416    }
417  
418    if (!GetRegisterValue(context, register_name, register_value)) {
419      return false;
420    }
421  
422    if (index_name.size() &&
423        !GetRegisterValue(context, index_name, index_value)) {
424      return false;
425    }
426  
427    if (index_stride.size()) {
428      index_stride_value = strtoull(index_stride.c_str(), nullptr, 0);
429    }
430  
431    if (offset.size()) {
432      offset_value = strtoull(offset.c_str(), nullptr, 0);
433    }
434  
435    address =
436        segment_address + register_value + (index_value * index_stride_value);
437    if (offset_sign == "+") {
438      address += offset_value;
439    } else if (offset_sign == "-") {
440      address -= offset_value;
441    }
442  
443    return true;
444  }
445  
446  DisassemblerObjdump::DisassemblerObjdump(const uint32_t cpu,
447                                           const MemoryRegion* memory_region,
448                                           uint64_t address) {
449    if (address < memory_region->GetBase() ||
450        memory_region->GetBase() + memory_region->GetSize() <= address) {
451      return;
452    }
453  
454    uint8_t ip_bytes[kMaxX86InstructionLength] = {0};
455    size_t ip_bytes_length;
456    for (ip_bytes_length = 0; ip_bytes_length < kMaxX86InstructionLength;
457         ++ip_bytes_length) {
458      // We have to read byte-by-byte here, since we still want to try and
459      // disassemble an instruction even if we don't have enough bytes.
460      if (!memory_region->GetMemoryAtAddress(address + ip_bytes_length,
461                                             &ip_bytes[ip_bytes_length])) {
462        break;
463      }
464    }
465  
466    string instruction;
467    if (!DisassembleInstruction(cpu, ip_bytes, kMaxX86InstructionLength,
468                                instruction)) {
469      return;
470    }
471  
472    if (!TokenizeInstruction(instruction, operation_, dest_, src_)) {
473      return;
474    }
475  }
476  
477  bool DisassemblerObjdump::CalculateSrcAddress(const DumpContext& context,
478                                                uint64_t& address) {
479    return CalculateAddress(context, src_, address);
480  }
481  
482  bool DisassemblerObjdump::CalculateDestAddress(const DumpContext& context,
483                                                 uint64_t& address) {
484    return CalculateAddress(context, dest_, address);
485  }
486  
487  }  // namespace google_breakpad