make-tokens.go
1 package util 2 3 import ( 4 "compress/gzip" 5 "debug/macho" 6 "encoding/json" 7 "fmt" 8 "os" 9 "path/filepath" 10 "slices" 11 "strings" 12 "time" 13 14 "github.com/thunderbrewhq/binana/go/app" 15 "github.com/thunderbrewhq/binana/go/app/util/dbutil" 16 "github.com/thunderbrewhq/binana/go/db" 17 "github.com/thunderbrewhq/binana/go/pdbconv" 18 "github.com/thunderbrewhq/binana/go/stringrecovery" 19 ) 20 21 type MakeTokenDatabaseParams struct { 22 Source string 23 Output string 24 Format dbutil.DatabaseFormat 25 } 26 27 func MakeTokenDatabase(params *MakeTokenDatabaseParams) { 28 var ( 29 tokens_database tokens_database 30 err error 31 ) 32 if err = tokens_database.Open(params.Output, params.Format); err != nil { 33 app.Fatal(err) 34 } 35 if err = tokens_database.make(params.Source); err != nil { 36 app.Fatal(err) 37 } 38 if err = tokens_database.Close(); err != nil { 39 app.Fatal(err) 40 } 41 } 42 43 type tokens_database struct { 44 sequence uint64 45 writer *dbutil.Writer[db.Token] 46 } 47 48 func (tokens_database *tokens_database) next_token_id() (id uint64) { 49 id = tokens_database.sequence 50 tokens_database.sequence++ 51 return 52 } 53 54 func (tokens_database *tokens_database) Open(name string, format dbutil.DatabaseFormat) (err error) { 55 tokens_database.sequence = 1 56 tokens_database.writer, err = dbutil.Open[db.Token](name, format) 57 return 58 } 59 60 func (tokens_database *tokens_database) Close() (err error) { 61 err = tokens_database.writer.Close() 62 return 63 } 64 65 func (tokens_database *tokens_database) Write(token *db.Token) (err error) { 66 tokens := []db.Token{*token} 67 if err = tokens_database.writer.WriteEntries(tokens); err != nil { 68 return 69 } 70 return 71 } 72 73 func (tokens_database *tokens_database) make_file_pdb(name string) (err error) { 74 exe_name := strings.TrimSuffix(name, ".pdb") + ".exe" 75 var base_address uint64 76 base_address, err = get_exe_base_address(exe_name) 77 if err != nil { 78 return 79 } 80 81 fmt.Fprintln(os.Stderr, "[pdb]", name) 82 var source_id string 83 source_id, err = hash_file(name) 84 if err != nil { 85 return 86 } 87 fmt.Fprintln(os.Stderr, "[pdb]", source_id) 88 // check for the existence of an alternate, .pdb.json.gz file 89 _, err = os.Stat(name + ".json.gz") 90 if err != nil { 91 return 92 } 93 94 var ( 95 gzip_file *os.File 96 gzip_reader *gzip.Reader 97 ) 98 gzip_file, err = os.Open(name + ".json.gz") 99 if err != nil { 100 return 101 } 102 103 var pdb pdbconv.ProgramDatabase 104 gzip_reader, err = gzip.NewReader(gzip_file) 105 json_decoder := json.NewDecoder(gzip_reader) 106 107 if err = json_decoder.Decode(&pdb); err != nil { 108 return 109 } 110 111 gzip_file.Close() 112 113 var v pdb_token_visitor 114 v.init(tokens_database, source_id, base_address) 115 if err = v.visit_all(&pdb); err != nil { 116 return 117 } 118 if err = v.write_tokens(); err != nil { 119 return 120 } 121 122 return 123 } 124 125 func (tokens_database *tokens_database) write_string_token(source_id string, section_name string, address uint64, str string) (err error) { 126 var db_token db.Token 127 db_token.ID = tokens_database.next_token_id() 128 db_token.Source = source_id 129 db_token.Section = section_name 130 db_token.Kind = db.OriginalStringToken 131 db_token.Offset = fmt.Sprintf("%X", address) 132 133 db_token.Names = append(db_token.Names, db.TokenName{db.OriginalName, str}) 134 135 // detect if this is a mangled type identifier 136 if looks_mangled(str) { 137 demangled, err := demangle(str) 138 if err == nil { 139 db_token.Names = append(db_token.Names, db.TokenName{db.DemangledName, demangled}) 140 } 141 } 142 143 err = tokens_database.Write(&db_token) 144 return 145 } 146 147 func (tokens_database *tokens_database) make_file_pe(name string) (err error) { 148 var id string 149 id, err = hash_file(name) 150 if err != nil { 151 return 152 } 153 err = stringrecovery.RecoverFile(name, func(section_name string, address uint64, str string) { 154 fmt.Fprintf(os.Stderr, "[pe] string found: %s %08X %s\n", section_name, address, str) 155 tokens_database.write_string_token(id, section_name, address, str) 156 }) 157 return 158 } 159 160 func (tokens_database *tokens_database) make_file_macho(name string) (err error) { 161 fmt.Fprintln(os.Stderr, "[mach-o]", name) 162 var source_id string 163 source_id, err = hash_file(name) 164 if err != nil { 165 return 166 } 167 fmt.Fprintln(os.Stderr, "[mach-o]", source_id) 168 var ( 169 file *os.File 170 macho_file *macho.File 171 ) 172 file, err = os.Open(name) 173 if err != nil { 174 return 175 } 176 177 macho_file, err = macho.NewFile(file) 178 if err != nil { 179 return 180 } 181 _, dwarf_err := macho_file.DWARF() 182 if dwarf_err == nil { 183 fmt.Fprintln(os.Stderr, "DWARF!") 184 time.Sleep(5 * time.Second) 185 } 186 fmt.Fprintln(os.Stderr, "[mach-o]", "cpu", macho_file.FileHeader.Cpu) 187 fmt.Fprintln(os.Stderr, "[mach-o]", "loads:") 188 // for _, load := range macho_file.Loads { 189 // fmt.Fprintln(os.Stderr, "[mach-o]", load.String()) 190 // } 191 fmt.Fprintln(os.Stderr, "[mach-o]", "sections:") 192 for _, section := range macho_file.Sections { 193 fmt.Fprintln(os.Stderr, "section", section.SectionHeader.Name) 194 } 195 196 if macho_file.Dysymtab != nil { 197 fmt.Fprintln(os.Stderr, "[mach-o]", "does not contain a dysymtab") 198 } else { 199 fmt.Fprintln(os.Stderr, "[mach-o]", "dysymtab:") 200 } 201 202 var imported_symbols []string 203 imported_symbols, err = macho_file.ImportedSymbols() 204 if err != nil { 205 return 206 } 207 if macho_file.Symtab == nil { 208 fmt.Fprintln(os.Stderr, "[mach-o]", "does not contain a symtab") 209 } else { 210 fmt.Fprintln(os.Stderr, "[mach-o]", "symtab:") 211 for _, sym := range macho_file.Symtab.Syms { 212 imported := slices.Contains(imported_symbols, sym.Name) 213 var section_name string 214 if sym.Sect != 0 { 215 section_name = macho_file.Sections[sym.Sect-1].SectionHeader.Name 216 } 217 if imported { 218 fmt.Fprintf(os.Stderr, "[mach-o] imported %s %02x %s\n", section_name, sym.Type, sym.Name) 219 } else { 220 if sym.Name == "" { 221 // fmt.Fprintln(os.Stderr, "[mach-o]", "symbol has no name", "sect="+section_name, sym.Type, sym.Value, sym.Desc) 222 } else { 223 fmt.Fprintf(os.Stderr, "[mach-o] internal %s %02x %s\n", section_name, sym.Type, sym.Name) 224 var token db.Token 225 token.ID = tokens_database.next_token_id() 226 token.Source = source_id 227 token.Kind = db.OriginalSymbolToken 228 token.Section = section_name 229 token.Offset = fmt.Sprintf("%X", sym.Value) 230 token.Names = append(token.Names, db.TokenName{db.OriginalName, sym.Name}) 231 232 if looks_mangled(sym.Name) { 233 demangled, err := demangle(sym.Name) 234 if err == nil { 235 token.Names = append(token.Names, db.TokenName{db.DemangledName, demangled}) 236 } 237 } 238 239 tokens_database.Write(&token) 240 } 241 } 242 } 243 } 244 245 file.Close() 246 247 if err = stringrecovery.RecoverFile(name, func(section_name string, address uint64, str string) { 248 fmt.Fprintf(os.Stderr, "[mach-o] string found: %s %08X %s\n", section_name, address, str) 249 tokens_database.write_string_token(source_id, section_name, address, str) 250 }); err != nil { 251 return 252 } 253 254 return 255 } 256 257 func (tokens_database *tokens_database) make_file(name string) (err error) { 258 switch filepath.Ext(name) { 259 case ".macho": 260 err = tokens_database.make_file_macho(name) 261 case ".pdb": 262 err = tokens_database.make_file_pdb(name) 263 case ".exe": 264 err = tokens_database.make_file_pe(name) 265 } 266 return 267 } 268 269 func (tokens_database *tokens_database) make_directory(name string) (err error) { 270 var entries []os.DirEntry 271 entries, err = os.ReadDir(name) 272 if err != nil { 273 return 274 } 275 276 for _, entry := range entries { 277 if entry.IsDir() { 278 if err = tokens_database.make_directory(filepath.Join(name, entry.Name())); err != nil { 279 return 280 } 281 } else { 282 if err = tokens_database.make_file(filepath.Join(name, entry.Name())); err != nil { 283 return 284 } 285 } 286 } 287 288 return 289 } 290 291 func (tokens_database *tokens_database) make(name string) (err error) { 292 var fi os.FileInfo 293 fi, err = os.Stat(name) 294 if err != nil { 295 return 296 } 297 if fi.IsDir() { 298 err = tokens_database.make_directory(name) 299 } else { 300 err = tokens_database.make_file(name) 301 } 302 return 303 }