/ src / processor / disassembler_x86.h
disassembler_x86.h
  1  // Copyright 2010 Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  
 29  // disassembler_x86.h: Basic x86 bytecode disassembler
 30  //
 31  // Provides a simple disassembler which wraps libdisasm. This allows simple
 32  // tests to be run against bytecode to test for various properties.
 33  //
 34  // Author: Cris Neckar
 35  
 36  #ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_
 37  #define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_
 38  
 39  #include <stddef.h>
 40  #include <sys/types.h>
 41  
 42  #include "google_breakpad/common/breakpad_types.h"
 43  
 44  namespace libdis {
 45  #include "third_party/libdisasm/libdis.h"
 46  }
 47  
 48  namespace google_breakpad {
 49  
 50  enum {
 51    DISX86_NONE =                 0x0,
 52    DISX86_BAD_BRANCH_TARGET =    0x1,
 53    DISX86_BAD_ARGUMENT_PASSED =  0x2,
 54    DISX86_BAD_WRITE =            0x4,
 55    DISX86_BAD_BLOCK_WRITE =      0x8,
 56    DISX86_BAD_READ =             0x10,
 57    DISX86_BAD_BLOCK_READ =       0x20,
 58    DISX86_BAD_COMPARISON =       0x40
 59  };
 60  
 61  class DisassemblerX86 {
 62    public:
 63      // TODO(cdn): Modify this class to take a MemoryRegion instead of just
 64      // a raw buffer. This will make it easier to use this on arbitrary
 65      // minidumps without first copying out the code segment.
 66      DisassemblerX86(const uint8_t* bytecode, uint32_t, uint32_t);
 67      ~DisassemblerX86();
 68  
 69      // This walks to the next instruction in the memory region and
 70      // sets flags based on the type of instruction and previous state
 71      // including any registers marked as bad through setBadRead()
 72      // or setBadWrite(). This method can be called in a loop to
 73      // disassemble until the end of a region.
 74      uint32_t NextInstruction();
 75  
 76      // Indicates whether the current disassembled instruction was valid.
 77      bool currentInstructionValid() { return instr_valid_; }
 78  
 79      // Returns the current instruction as defined in libdis.h,
 80      // or NULL if the current instruction is not valid.
 81      const libdis::x86_insn_t* currentInstruction() {
 82        return instr_valid_ ? &current_instr_ : NULL;
 83      }
 84  
 85      // Returns the type of the current instruction as defined in libdis.h.
 86      libdis::x86_insn_group currentInstructionGroup() {
 87        return current_instr_.group;
 88      }
 89  
 90      // Indicates whether a return instruction has been encountered.
 91      bool endOfBlock() { return end_of_block_; }
 92  
 93      // The flags set so far for the disassembly.
 94      uint16_t flags() { return flags_; }
 95  
 96      // This sets an indicator that the register used to determine
 97      // src or dest for the current instruction is tainted. These can
 98      // be used after examining the current instruction to indicate,
 99      // for example that a bad read or write occurred and the pointer
100      // stored in the register is currently invalid.
101      bool setBadRead();
102      bool setBadWrite();
103  
104    protected:
105      const uint8_t* bytecode_;
106      uint32_t size_;
107      uint32_t virtual_address_;
108      uint32_t current_byte_offset_;
109      uint32_t current_inst_offset_;
110  
111      bool instr_valid_;
112      libdis::x86_insn_t current_instr_;
113  
114      // TODO(cdn): Maybe also track an expression's index register.
115      // ex: mov eax, [ebx + ecx]; ebx is base, ecx is index.
116      bool register_valid_;
117      libdis::x86_reg_t bad_register_;
118  
119      bool pushed_bad_value_;
120      bool end_of_block_;
121  
122      uint16_t flags_;
123  };
124  
125  }  // namespace google_breakpad
126  
127  #endif  // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_