/ src / processor / disassembler_objdump.h
disassembler_objdump.h
  1  // Copyright (c) 2022, Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  
 29  // disassembler_objdump.h: Disassembler that invokes objdump for disassembly.
 30  //
 31  // Author: Mark Brand
 32  
 33  #ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_
 34  #define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_
 35  
 36  #include <string>
 37  
 38  #include "common/using_std_string.h"
 39  #include "google_breakpad/common/breakpad_types.h"
 40  #include "google_breakpad/processor/dump_context.h"
 41  #include "google_breakpad/processor/memory_region.h"
 42  
 43  namespace google_breakpad {
 44  
 45  // Uses objdump to disassemble a single instruction.
 46  //
 47  // Currently supports disassembly for x86 and x86_64 on linux hosts only; on
 48  // unsupported platform or for unsupported architectures disassembly will fail.
 49  //
 50  // If disassembly is successful, then this allows extracting the instruction
 51  // opcode, source and destination operands, and computing the source and
 52  // destination addresses for instructions that operate on memory.
 53  //
 54  // Example:
 55  //   DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region,
 56  //                                    instruction_ptr);
 57  //   if (disassembler.IsValid()) {
 58  //     uint64_t src_address = 0;
 59  //     std::cerr << disassembler.operation() << " " << disassembler.src()
 60  //               << ", " << disassembler.dest() << std::endl;
 61  //     if (disassembler.CalculateSrcAddress(*context, src_address)) {
 62  //       std::cerr << "[src_address = " << std::hex << src_address << "]\n";
 63  //     }
 64  //   }
 65  class DisassemblerObjdump {
 66   public:
 67    // Construct an ObjdumpDisassembler for the provided `cpu` type, where this is
 68    // one of MD_CONTEXT_X86 or MD_CONTEXT_AMD64. Provided that `address` is
 69    // within `memory_region`, and the memory referenced is a valid instruction,
 70    // this will then be initialized with the disassembly for that instruction.
 71    DisassemblerObjdump(uint32_t cpu,
 72                        const MemoryRegion* memory_region,
 73                        uint64_t address);
 74    ~DisassemblerObjdump() = default;
 75  
 76    // If the source operand of the instruction is a memory operand, compute the
 77    // address referred to by the operand, and store this in `address`. On success
 78    // returns true, otherwise (if computation fails, or if the source operand is
 79    // not a memory operand) returns false and sets `address` to 0.
 80    bool CalculateSrcAddress(const DumpContext& context, uint64_t& address);
 81  
 82    // If the destination operand of the instruction is a memory operand, compute
 83    // the address referred to by the operand, and store this in `address`. On
 84    // success returns true, otherwise (if computation fails, or if the source
 85    // operand is not a memory operand) returns false and sets `address` to 0.
 86    bool CalculateDestAddress(const DumpContext& context, uint64_t& address);
 87  
 88    // If the instruction was disassembled successfully, this will be true.
 89    bool IsValid() const { return operation_.size() != 0; }
 90  
 91    // Returns the operation part of the disassembly, without any prefixes:
 92    //   "pop" eax
 93    //   lock "xchg" eax, edx
 94    const string& operation() const { return operation_; }
 95  
 96    // Returns the destination operand of the disassembly, without memory operand
 97    // size prefixes:
 98    //   mov DWORD PTR "[rax + 16]", edx
 99    const string& dest() const { return dest_; }
100  
101    // Returns the source operand of the disassembly, without memory operand
102    // size prefixes:
103    //   mov rax, QWORD PTR "[rdx]"
104    const string& src() const { return src_; }
105  
106   private:
107    friend class DisassemblerObjdumpForTest;
108  
109    // Writes out the provided `raw_bytes` to a temporary file, and executes objdump
110    // to disassemble according to `cpu`, which must be either MD_CONTEXT_X86 or
111    // MD_CONTEXT_AMD64. Once objdump has completed, parses out the instruction
112    // string from the first instruction in the output and stores it in
113    // `instruction`.
114    static bool DisassembleInstruction(uint32_t cpu, const uint8_t* raw_bytes,
115                                       unsigned int raw_bytes_len,
116                                       string& instruction);
117  
118    // Splits an `instruction` into three parts, the "main" `operation` and
119    // the `dest` and `src` operands.
120    // Example:
121    //   instruction = "lock cmpxchg QWORD PTR [rdi], rsi"
122    //   operation = "cmpxchg", dest = "[rdi]", src = "rsi"
123    static bool TokenizeInstruction(const string& instruction, string& operation,
124                                    string& dest, string& src);
125  
126    // Compute the address referenced by `expression` in `context`.
127    // Supports memory operands in the form
128    //   (segment:)[base_reg(+index_reg*index_stride)(+-offset)]
129    // Returns false if evaluation fails, or if the operand is not a supported
130    // memory operand.
131    static bool CalculateAddress(const DumpContext& context,
132                                 const string& expression,
133                                 uint64_t& address);
134  
135    // The parsed components of the disassembly for the instruction.
136    string operation_ = "";
137    string dest_ = "";
138    string src_ = "";
139  };
140  }  // namespace google_breakpad
141  
142  #endif  // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_