/ ghidra / SuperImportSymbolsScript.py
SuperImportSymbolsScript.py
  1  #Extended Binana symbols importer script
  2  # @runtime Jython
  3  # @category Binana
  4  # @author Thunderbrew 
  5  # @menupath 
  6  # @toolbar logo.png
  7  
  8  import re
  9  import string
 10  
 11  from ghidra.app.cmd.function import ApplyFunctionSignatureCmd
 12  from ghidra.app.util.parser import FunctionSignatureParser
 13  from ghidra.util.data import DataTypeParser
 14  from ghidra.program.model.symbol import SourceType
 15  from ghidra.program.model.listing import Function, ParameterImpl, VariableStorage
 16  
 17  def find_storage_parameter(str):
 18      match = re.search(r'@<(\w+)?>', str)
 19      
 20      if match:
 21          return match.group(1)
 22      return None
 23  
 24  def strip_storage_parameter(str):
 25      pattern = r'@<[^>]+>'
 26  
 27      # Use re.sub to replace the match with an empty string
 28      return re.sub(pattern, '', str)
 29  
 30  def split_function_parameters(str):
 31      SCAN_FUNC = 1
 32      SCAN_PARAMETER = 2
 33      SCAN_ESCAPE_PAREN = 3
 34  
 35      current_word = ''
 36      parameters = []
 37      state = SCAN_FUNC
 38      paren_level = 0
 39      i = 0
 40      while i < len(str):
 41          c = str[i]
 42  
 43          if state == SCAN_FUNC:
 44              if c == '(':
 45                  state = SCAN_PARAMETER
 46          elif state == SCAN_PARAMETER:
 47              if c == '(':
 48                  state = SCAN_ESCAPE_PAREN
 49                  paren_level = 1
 50                  current_word = current_word + c
 51              elif c == ',':
 52                  parameters.append(current_word.strip())
 53                  current_word = ''
 54              elif c == ')':
 55                  parameters.append(current_word.strip())
 56                  current_word = ''
 57                  break
 58              else:
 59                  current_word = current_word + c
 60          elif state == SCAN_ESCAPE_PAREN:
 61              current_word = current_word + c
 62              if c == '(':
 63                  paren_level = paren_level + 1
 64              elif c == ')':
 65                  paren_level = paren_level - 1
 66  
 67              if paren_level == 0:
 68                  state = SCAN_PARAMETER
 69  
 70          i = i + 1
 71  
 72      if current_word != '':
 73          parameters.append(current_word.strip())
 74      return parameters
 75  
 76  # return: <calling convention> <optional, this ptr type> <stripped type string>, <map of parameter index numbers to register names> 
 77  def strip_function_type(str):
 78      # str = int32_t __stdcall func@<eax>(int32_t x@<edi>)
 79  
 80      parameter_storage = {}
 81  
 82      this_ptr_type = ''
 83  
 84      # [ 'int32_t', '__stdcall', 'func@<eax>(int32_t x@<edi>)' ]
 85      str_parts = str.split(' ') 
 86      # int32_t
 87      return_type = str_parts[0]
 88  
 89      # default
 90      call_conv = '__stdcall'
 91  
 92      i = 0
 93      last_spec = -1
 94      func_start = -1
 95      while i < len(str_parts):
 96          if str_parts[i].startswith('__'):
 97              if str_parts[i].endswith('call'):
 98                  call_conv = str_parts[i]
 99              last_spec = i
100          elif str_parts[i].startswith('func'):
101              func_start = i
102              break
103          i = i + 1
104  
105      function_call = ' '.join(str_parts[func_start:])
106  
107      # [ 'func@<eax>', 'int32_t x@<edi>)' ]
108      func_before_after_paren = function_call.split('(', 1)
109      # func@<eax>
110      func_id = func_before_after_paren[0]
111      # [ 'int32_t x@<edi>' ]
112      func_parameters = split_function_parameters(function_call) 
113  
114      return_parameter_storage = find_storage_parameter(func_id)
115      if return_parameter_storage is not None:
116          parameter_storage[0] = return_parameter_storage
117          func_id = strip_storage_parameter(func_id)
118  
119      # start building stripped function type
120      stripped_type = return_type
121      stripped_type += " "
122      stripped_type += func_id
123  
124      stripped_type += '('
125          
126      n = 1
127      first = True 
128      for argument_parameter in func_parameters:
129          if '__return_ptr' in argument_parameter:
130              argument_parameter = argument_parameter.replace('__return_ptr ', '')
131  
132          if n == 1 and call_conv == '__thiscall':
133              # in Ghidra, a this pointer is always added to the signature 
134              # we need only record the the type for later
135              this_ptr_type = argument_parameter.split(' ', 1)[0]
136              # n = n + 1
137              # continue
138              # 
139              # commented out: let's see if this fixed it
140  
141          if not first:
142              stripped_type += ', '
143          else:
144              first = False
145          argument_parameter_storage = find_storage_parameter(argument_parameter)
146          if argument_parameter_storage is not None:
147             parameter_storage[n] = argument_parameter_storage
148             argument_parameter = strip_storage_parameter(argument_parameter)
149          stripped_type += argument_parameter
150          n = n + 1
151      stripped_type += ')'
152      # todo strip parameters
153      return call_conv, this_ptr_type, stripped_type, parameter_storage
154  
155  def parse_attributes(str):
156      attributes = {}
157      current_key = ''
158      current_value = ''
159      
160      SCAN_ATTRIBUTES = 0
161      SCAN_KEY = 1
162      SCAN_VALUE = 2
163  
164      i = 0
165      state = SCAN_ATTRIBUTES
166      quote = False
167      while i < len(str):
168          c = str[i]
169          i = i + 1
170          if state == SCAN_ATTRIBUTES:
171              if c != ' ':
172                  current_key = c
173                  state = SCAN_KEY
174          elif state == SCAN_KEY:
175              if c == ' ':
176                  # the key terminated early with a space
177                  # this is valid and means it is a boolean attribute
178                  state = SCAN_ATTRIBUTES
179                  attributes[current_key] = True
180                  current_key = ''
181              elif c == '=':
182                  state = SCAN_VALUE
183              else:
184                  current_key = current_key + c
185          elif state == SCAN_VALUE:
186              if quote:
187                  if c == '"':
188                      attributes[current_key] = current_value
189                      current_key = ''
190                      current_value = ''
191                      state = SCAN_ATTRIBUTES
192                  else:
193                      current_value = current_value + c 
194              else:
195                  if c == '"':
196                      quote = True
197                  elif c == ' ':
198                      attributes[current_key] = current_value
199                      current_key = ''
200                      current_value = ''
201                      state = SCAN_ATTRIBUTES
202                  else:
203                      current_value = current_value + c
204      # the line terminated in the middle of scanning a key
205      # that means it's a boolean attribute
206      if state == SCAN_KEY:
207          attributes[current_key] = True
208  
209      return attributes
210  
211  def parse_symbol_entry(line):
212      pieces = line.split(' ', 3)
213      print(len(pieces))
214      if len(pieces) < 3:
215          return None
216  
217      entry = {}
218      entry['label'] = pieces[0]
219      entry['address'] = pieces[1]
220      entry['kind'] = pieces[2]
221      entry['comment'] = ''
222      entry['attributes'] = {}
223  
224      if len(pieces) > 3:
225          et_cetera = pieces[3]
226          attributes = et_cetera
227          index_of_comment_separator = et_cetera.find(';')
228          if index_of_comment_separator != -1:
229              entry['comment'] = et_cetera[index_of_comment_separator+1:].lstrip(' ')
230              attributes = et_cetera[:index_of_comment_separator]
231              attributes = attributes.rstrip(' ')
232          entry['attributes'] = parse_attributes(attributes)        
233  
234      return entry
235  
236  def parse_datatype_string(dt_string):
237      # Get the current program's data type manager
238      dtm = currentProgram.getDataTypeManager()
239      
240      # Initialize the parser using the program's context
241      # Allowed forms: HIDDEN, READ_ONLY, or FULL (updates DTM)
242      parser = DataTypeParser(dtm, dtm, None, DataTypeParser.AllowedDataTypes.ALL)
243      
244      try:
245          # Parse the string into a DataType object
246          parsed_dt = parser.parse(dt_string)
247          print("Successfully parsed: {} as {}".format(dt_string, parsed_dt.getName()))
248          return parsed_dt
249      except Exception as e:
250          print("Error parsing '{}': {}".format(dt_string, e))
251          return None
252  
253  functionManager = currentProgram.getFunctionManager()
254  
255  f = askFile("Navigate to the Binana all.sym file", "Go")
256  
257  def apply_function_symbol(entry):
258      name = entry['label']
259      address = toAddr(entry['address'])
260  
261      func = functionManager.getFunctionAt(address)
262  
263      if func is not None:
264          old_name = func.getName()
265          func.setName(name, SourceType.USER_DEFINED)
266          print("Renamed function {} to {} at address {}".format(old_name, name, address))
267      else:
268          func = createFunction(address, name)
269          print("Created function {} at address {}".format(name, address))
270      
271      func_type = entry['attributes'].get('type')
272      if func_type is not None:
273          calling_convention, this_ptr_type, stripped_func_type, parameter_storage = strip_function_type(func_type)
274  
275          # 4. Initialize the parser
276          # We pass 'None' for the DataTypeManagerService as it's not strictly required here
277          parser = FunctionSignatureParser(currentProgram.getDataTypeManager(), None)
278  
279          print('applying signature: {}'.format(stripped_func_type))
280  
281          func_signature = parser.parse(None, stripped_func_type)
282          if calling_convention == '__usercall':
283              calling_convention = '__stdcall'
284  
285          # if calling_convention == '__thiscall':
286          #     this_ptr_datatype = findDataType(this_ptr_type)
287          #     func_signature.replaceArgument(0, 'this', this_ptr_datatype, '', SourceType.USER_DEFINED)
288  
289          # apply this information to the function
290          func_signature.setCallingConvention(calling_convention)
291          cmd = ApplyFunctionSignatureCmd(address, func_signature, SourceType.USER_DEFINED)
292  
293          if cmd.applyTo(currentProgram):
294              print("Success! Applied signature '{}' to {}".format(entry['label'], entry['address']))
295          else:
296              print("Failed to apply signature. Reason: {}".format(cmd.getStatusMsg()))
297  
298          # if the function is a class method, the storage has to be modified
299          # or if the function passes certain arguments by register, in violation of standard calling convention
300          if len(parameter_storage) != 0 or calling_convention == '__thiscall':
301              # because we have to do this, everything is now manual
302              func.setCustomVariableStorage(True)
303              
304              parameters = func.getParameters()
305              # check if changing return storage is needed
306              if parameter_storage.get(0) is not None:
307                  return_register_storage = currentProgram.getRegister(parameter_storage[0].upper())
308                  return_storage = VariableStorage(currentProgram, return_register_storage)
309                  func.setReturn(func_signature.getReturnType(), return_storage, SourceType.USER_DEFINED)
310  
311              # TODO: adjust this for different architectures
312              # only __usercall and __stdcall can pass arguments through stack by default
313              stack_offset = 4 
314              stack_alignment = 4
315  
316              parameter_index = 0
317              # fix this ptr storage
318              if calling_convention == '__thiscall':
319                  this_ptr_datatype = parse_datatype_string(this_ptr_type)
320                  # TODO: fix for other architectures
321                  # this_parameter_register = currentProgram.getRegister('ECX')
322                  # this_parameter_storage = VariableStorage(currentProgram, this_parameter_register)
323                  # parameters[0] = ParameterImpl(parameters[0].getName(), this_ptr_datatype, this_parameter_storage, currentProgram)
324                  parameters[0] = ParameterImpl(parameters[0].getName(), this_ptr_datatype, parameters[0].getVariableStorage(), currentProgram)
325                  parameter_index = 1
326              # fix storage of main parameters
327              while parameter_index < len(parameters):
328                  if parameter_storage.get(1+parameter_index) is not None:
329                      # this parameter wants to be stored in a register
330                      parameter_register = currentProgram.getRegister(parameter_storage[1+parameter_index].upper())
331                      parameter_variable_storage = VariableStorage(currentProgram, parameter_register)
332                      parameters[parameter_index] = ParameterImpl(parameters[parameter_index].getName(), parameters[parameter_index].getDataType(), parameter_variable_storage, currentProgram)
333                  else:
334                      # this parameter wants to be passed in the stack
335                      parameter_data_type = parameters[parameter_index].getDataType()
336                      parameter_size = parameter_data_type.getLength()
337                      parameter_variable_storage = VariableStorage(currentProgram, stack_offset, parameter_size) 
338                      parameters[parameter_index] = ParameterImpl(parameters[parameter_index].getName(), parameters[parameter_index].getDataType(), parameter_variable_storage, currentProgram)
339                      if parameter_size % stack_alignment != 0:
340                          stack_offset = stack_offset + (parameter_size + (parameter_size - (parameter_size % stack_alignment)))
341                      else:
342                          stack_offset = stack_offset + parameter_size
343                  parameter_index = parameter_index + 1
344  
345              func.replaceParameters(Function.FunctionUpdateType.CUSTOM_STORAGE, True, SourceType.USER_DEFINED, parameters)
346  
347  def apply_data_symbol(entry):
348      address = toAddr(entry['address'])
349  
350      # st = currentProgram.getSymbolTable()
351      # existing_symbols = st.getSymbols(address)
352      # for symbol in existing_symbols:
353      #     st.removeSymbolSpecial(symbol)
354      #     print("Removed existing label: {}".format(symbol.getName()))
355  
356      print("Created label {} at address {}".format(entry['label'], entry['address']))
357      createLabel(address, entry['label'], True)
358      
359      if entry['attributes'].get('type') is not None:
360          data_type = parse_datatype_string(entry['attributes']['type'])
361          data_type_size = data_type.getLength()
362          if data_type is not None:
363              # remove existing defined data at address
364              clearListing(address, address.add(data_type_size - 1))
365              createData(address, data_type)
366              print("applied data type {} to label {}".format(entry['attributes']['type'], entry['label']))
367          
368  
369  for line in file(f.absolutePath):  # note, cannot use open(), since that is in GhidraScript
370      entry = parse_symbol_entry(line)
371      if entry is not None:
372          if entry['kind'] == 'f':
373              apply_function_symbol(entry)
374          elif entry['kind'] == 'l':
375              apply_data_symbol(entry)