SuperImportSymbolsScript.py
1 #Extended Binana symbols importer script 2 # @runtime Jython 3 # @category Binana 4 # @author Thunderbrew 5 # @menupath 6 # @toolbar logo.png 7 8 import re 9 import string 10 11 from ghidra.app.cmd.function import ApplyFunctionSignatureCmd 12 from ghidra.app.util.parser import FunctionSignatureParser 13 from ghidra.util.data import DataTypeParser 14 from ghidra.program.model.symbol import SourceType 15 from ghidra.program.model.listing import Function, ParameterImpl, VariableStorage 16 17 def find_storage_parameter(str): 18 match = re.search(r'@<(\w+)?>', str) 19 20 if match: 21 return match.group(1) 22 return None 23 24 def strip_storage_parameter(str): 25 pattern = r'@<[^>]+>' 26 27 # Use re.sub to replace the match with an empty string 28 return re.sub(pattern, '', str) 29 30 def split_function_parameters(str): 31 SCAN_FUNC = 1 32 SCAN_PARAMETER = 2 33 SCAN_ESCAPE_PAREN = 3 34 35 current_word = '' 36 parameters = [] 37 state = SCAN_FUNC 38 paren_level = 0 39 i = 0 40 while i < len(str): 41 c = str[i] 42 43 if state == SCAN_FUNC: 44 if c == '(': 45 state = SCAN_PARAMETER 46 elif state == SCAN_PARAMETER: 47 if c == '(': 48 state = SCAN_ESCAPE_PAREN 49 paren_level = 1 50 current_word = current_word + c 51 elif c == ',': 52 parameters.append(current_word.strip()) 53 current_word = '' 54 elif c == ')': 55 parameters.append(current_word.strip()) 56 current_word = '' 57 break 58 else: 59 current_word = current_word + c 60 elif state == SCAN_ESCAPE_PAREN: 61 current_word = current_word + c 62 if c == '(': 63 paren_level = paren_level + 1 64 elif c == ')': 65 paren_level = paren_level - 1 66 67 if paren_level == 0: 68 state = SCAN_PARAMETER 69 70 i = i + 1 71 72 if current_word != '': 73 parameters.append(current_word.strip()) 74 return parameters 75 76 # return: <calling convention> <optional, this ptr type> <stripped type string>, <map of parameter index numbers to register names> 77 def strip_function_type(str): 78 # str = int32_t __stdcall func@<eax>(int32_t x@<edi>) 79 80 parameter_storage = {} 81 82 this_ptr_type = '' 83 84 # [ 'int32_t', '__stdcall', 'func@<eax>(int32_t x@<edi>)' ] 85 str_parts = str.split(' ') 86 # int32_t 87 return_type = str_parts[0] 88 89 # default 90 call_conv = '__stdcall' 91 92 i = 0 93 last_spec = -1 94 func_start = -1 95 while i < len(str_parts): 96 if str_parts[i].startswith('__'): 97 if str_parts[i].endswith('call'): 98 call_conv = str_parts[i] 99 last_spec = i 100 elif str_parts[i].startswith('func'): 101 func_start = i 102 break 103 i = i + 1 104 105 function_call = ' '.join(str_parts[func_start:]) 106 107 # [ 'func@<eax>', 'int32_t x@<edi>)' ] 108 func_before_after_paren = function_call.split('(', 1) 109 # func@<eax> 110 func_id = func_before_after_paren[0] 111 # [ 'int32_t x@<edi>' ] 112 func_parameters = split_function_parameters(function_call) 113 114 return_parameter_storage = find_storage_parameter(func_id) 115 if return_parameter_storage is not None: 116 parameter_storage[0] = return_parameter_storage 117 func_id = strip_storage_parameter(func_id) 118 119 # start building stripped function type 120 stripped_type = return_type 121 stripped_type += " " 122 stripped_type += func_id 123 124 stripped_type += '(' 125 126 n = 1 127 first = True 128 for argument_parameter in func_parameters: 129 if '__return_ptr' in argument_parameter: 130 argument_parameter = argument_parameter.replace('__return_ptr ', '') 131 132 if n == 1 and call_conv == '__thiscall': 133 # in Ghidra, a this pointer is always added to the signature 134 # we need only record the the type for later 135 this_ptr_type = argument_parameter.split(' ', 1)[0] 136 # n = n + 1 137 # continue 138 # 139 # commented out: let's see if this fixed it 140 141 if not first: 142 stripped_type += ', ' 143 else: 144 first = False 145 argument_parameter_storage = find_storage_parameter(argument_parameter) 146 if argument_parameter_storage is not None: 147 parameter_storage[n] = argument_parameter_storage 148 argument_parameter = strip_storage_parameter(argument_parameter) 149 stripped_type += argument_parameter 150 n = n + 1 151 stripped_type += ')' 152 # todo strip parameters 153 return call_conv, this_ptr_type, stripped_type, parameter_storage 154 155 def parse_attributes(str): 156 attributes = {} 157 current_key = '' 158 current_value = '' 159 160 SCAN_ATTRIBUTES = 0 161 SCAN_KEY = 1 162 SCAN_VALUE = 2 163 164 i = 0 165 state = SCAN_ATTRIBUTES 166 quote = False 167 while i < len(str): 168 c = str[i] 169 i = i + 1 170 if state == SCAN_ATTRIBUTES: 171 if c != ' ': 172 current_key = c 173 state = SCAN_KEY 174 elif state == SCAN_KEY: 175 if c == ' ': 176 # the key terminated early with a space 177 # this is valid and means it is a boolean attribute 178 state = SCAN_ATTRIBUTES 179 attributes[current_key] = True 180 current_key = '' 181 elif c == '=': 182 state = SCAN_VALUE 183 else: 184 current_key = current_key + c 185 elif state == SCAN_VALUE: 186 if quote: 187 if c == '"': 188 attributes[current_key] = current_value 189 current_key = '' 190 current_value = '' 191 state = SCAN_ATTRIBUTES 192 else: 193 current_value = current_value + c 194 else: 195 if c == '"': 196 quote = True 197 elif c == ' ': 198 attributes[current_key] = current_value 199 current_key = '' 200 current_value = '' 201 state = SCAN_ATTRIBUTES 202 else: 203 current_value = current_value + c 204 # the line terminated in the middle of scanning a key 205 # that means it's a boolean attribute 206 if state == SCAN_KEY: 207 attributes[current_key] = True 208 209 return attributes 210 211 def parse_symbol_entry(line): 212 pieces = line.split(' ', 3) 213 print(len(pieces)) 214 if len(pieces) < 3: 215 return None 216 217 entry = {} 218 entry['label'] = pieces[0] 219 entry['address'] = pieces[1] 220 entry['kind'] = pieces[2] 221 entry['comment'] = '' 222 entry['attributes'] = {} 223 224 if len(pieces) > 3: 225 et_cetera = pieces[3] 226 attributes = et_cetera 227 index_of_comment_separator = et_cetera.find(';') 228 if index_of_comment_separator != -1: 229 entry['comment'] = et_cetera[index_of_comment_separator+1:].lstrip(' ') 230 attributes = et_cetera[:index_of_comment_separator] 231 attributes = attributes.rstrip(' ') 232 entry['attributes'] = parse_attributes(attributes) 233 234 return entry 235 236 def parse_datatype_string(dt_string): 237 # Get the current program's data type manager 238 dtm = currentProgram.getDataTypeManager() 239 240 # Initialize the parser using the program's context 241 # Allowed forms: HIDDEN, READ_ONLY, or FULL (updates DTM) 242 parser = DataTypeParser(dtm, dtm, None, DataTypeParser.AllowedDataTypes.ALL) 243 244 try: 245 # Parse the string into a DataType object 246 parsed_dt = parser.parse(dt_string) 247 print("Successfully parsed: {} as {}".format(dt_string, parsed_dt.getName())) 248 return parsed_dt 249 except Exception as e: 250 print("Error parsing '{}': {}".format(dt_string, e)) 251 return None 252 253 functionManager = currentProgram.getFunctionManager() 254 255 f = askFile("Navigate to the Binana all.sym file", "Go") 256 257 def apply_function_symbol(entry): 258 name = entry['label'] 259 address = toAddr(entry['address']) 260 261 func = functionManager.getFunctionAt(address) 262 263 if func is not None: 264 old_name = func.getName() 265 func.setName(name, SourceType.USER_DEFINED) 266 print("Renamed function {} to {} at address {}".format(old_name, name, address)) 267 else: 268 func = createFunction(address, name) 269 print("Created function {} at address {}".format(name, address)) 270 271 func_type = entry['attributes'].get('type') 272 if func_type is not None: 273 calling_convention, this_ptr_type, stripped_func_type, parameter_storage = strip_function_type(func_type) 274 275 # 4. Initialize the parser 276 # We pass 'None' for the DataTypeManagerService as it's not strictly required here 277 parser = FunctionSignatureParser(currentProgram.getDataTypeManager(), None) 278 279 print('applying signature: {}'.format(stripped_func_type)) 280 281 func_signature = parser.parse(None, stripped_func_type) 282 if calling_convention == '__usercall': 283 calling_convention = '__stdcall' 284 285 # if calling_convention == '__thiscall': 286 # this_ptr_datatype = findDataType(this_ptr_type) 287 # func_signature.replaceArgument(0, 'this', this_ptr_datatype, '', SourceType.USER_DEFINED) 288 289 # apply this information to the function 290 func_signature.setCallingConvention(calling_convention) 291 cmd = ApplyFunctionSignatureCmd(address, func_signature, SourceType.USER_DEFINED) 292 293 if cmd.applyTo(currentProgram): 294 print("Success! Applied signature '{}' to {}".format(entry['label'], entry['address'])) 295 else: 296 print("Failed to apply signature. Reason: {}".format(cmd.getStatusMsg())) 297 298 # if the function is a class method, the storage has to be modified 299 # or if the function passes certain arguments by register, in violation of standard calling convention 300 if len(parameter_storage) != 0 or calling_convention == '__thiscall': 301 # because we have to do this, everything is now manual 302 func.setCustomVariableStorage(True) 303 304 parameters = func.getParameters() 305 # check if changing return storage is needed 306 if parameter_storage.get(0) is not None: 307 return_register_storage = currentProgram.getRegister(parameter_storage[0].upper()) 308 return_storage = VariableStorage(currentProgram, return_register_storage) 309 func.setReturn(func_signature.getReturnType(), return_storage, SourceType.USER_DEFINED) 310 311 # TODO: adjust this for different architectures 312 # only __usercall and __stdcall can pass arguments through stack by default 313 stack_offset = 4 314 stack_alignment = 4 315 316 parameter_index = 0 317 # fix this ptr storage 318 if calling_convention == '__thiscall': 319 this_ptr_datatype = parse_datatype_string(this_ptr_type) 320 # TODO: fix for other architectures 321 # this_parameter_register = currentProgram.getRegister('ECX') 322 # this_parameter_storage = VariableStorage(currentProgram, this_parameter_register) 323 # parameters[0] = ParameterImpl(parameters[0].getName(), this_ptr_datatype, this_parameter_storage, currentProgram) 324 parameters[0] = ParameterImpl(parameters[0].getName(), this_ptr_datatype, parameters[0].getVariableStorage(), currentProgram) 325 parameter_index = 1 326 # fix storage of main parameters 327 while parameter_index < len(parameters): 328 if parameter_storage.get(1+parameter_index) is not None: 329 # this parameter wants to be stored in a register 330 parameter_register = currentProgram.getRegister(parameter_storage[1+parameter_index].upper()) 331 parameter_variable_storage = VariableStorage(currentProgram, parameter_register) 332 parameters[parameter_index] = ParameterImpl(parameters[parameter_index].getName(), parameters[parameter_index].getDataType(), parameter_variable_storage, currentProgram) 333 else: 334 # this parameter wants to be passed in the stack 335 parameter_data_type = parameters[parameter_index].getDataType() 336 parameter_size = parameter_data_type.getLength() 337 parameter_variable_storage = VariableStorage(currentProgram, stack_offset, parameter_size) 338 parameters[parameter_index] = ParameterImpl(parameters[parameter_index].getName(), parameters[parameter_index].getDataType(), parameter_variable_storage, currentProgram) 339 if parameter_size % stack_alignment != 0: 340 stack_offset = stack_offset + (parameter_size + (parameter_size - (parameter_size % stack_alignment))) 341 else: 342 stack_offset = stack_offset + parameter_size 343 parameter_index = parameter_index + 1 344 345 func.replaceParameters(Function.FunctionUpdateType.CUSTOM_STORAGE, True, SourceType.USER_DEFINED, parameters) 346 347 def apply_data_symbol(entry): 348 address = toAddr(entry['address']) 349 350 # st = currentProgram.getSymbolTable() 351 # existing_symbols = st.getSymbols(address) 352 # for symbol in existing_symbols: 353 # st.removeSymbolSpecial(symbol) 354 # print("Removed existing label: {}".format(symbol.getName())) 355 356 print("Created label {} at address {}".format(entry['label'], entry['address'])) 357 createLabel(address, entry['label'], True) 358 359 if entry['attributes'].get('type') is not None: 360 data_type = parse_datatype_string(entry['attributes']['type']) 361 data_type_size = data_type.getLength() 362 if data_type is not None: 363 # remove existing defined data at address 364 clearListing(address, address.add(data_type_size - 1)) 365 createData(address, data_type) 366 print("applied data type {} to label {}".format(entry['attributes']['type'], entry['label'])) 367 368 369 for line in file(f.absolutePath): # note, cannot use open(), since that is in GhidraScript 370 entry = parse_symbol_entry(line) 371 if entry is not None: 372 if entry['kind'] == 'f': 373 apply_function_symbol(entry) 374 elif entry['kind'] == 'l': 375 apply_data_symbol(entry)