/ go / app / util / make-tokens.go
make-tokens.go
  1  package util
  2  
  3  import (
  4  	"compress/gzip"
  5  	"debug/macho"
  6  	"encoding/json"
  7  	"fmt"
  8  	"os"
  9  	"path/filepath"
 10  	"slices"
 11  	"strings"
 12  	"time"
 13  
 14  	"github.com/thunderbrewhq/binana/go/app"
 15  	"github.com/thunderbrewhq/binana/go/app/util/dbutil"
 16  	"github.com/thunderbrewhq/binana/go/db"
 17  	"github.com/thunderbrewhq/binana/go/pdbconv"
 18  	"github.com/thunderbrewhq/binana/go/stringrecovery"
 19  )
 20  
 21  type MakeTokenDatabaseParams struct {
 22  	Source string
 23  	Output string
 24  	Format dbutil.DatabaseFormat
 25  }
 26  
 27  func MakeTokenDatabase(params *MakeTokenDatabaseParams) {
 28  	var (
 29  		tokens_database tokens_database
 30  		err             error
 31  	)
 32  	if err = tokens_database.Open(params.Output, params.Format); err != nil {
 33  		app.Fatal(err)
 34  	}
 35  	if err = tokens_database.make(params.Source); err != nil {
 36  		app.Fatal(err)
 37  	}
 38  	if err = tokens_database.Close(); err != nil {
 39  		app.Fatal(err)
 40  	}
 41  }
 42  
 43  type tokens_database struct {
 44  	sequence uint64
 45  	writer   *dbutil.Writer[db.Token]
 46  }
 47  
 48  func (tokens_database *tokens_database) next_token_id() (id uint64) {
 49  	id = tokens_database.sequence
 50  	tokens_database.sequence++
 51  	return
 52  }
 53  
 54  func (tokens_database *tokens_database) Open(name string, format dbutil.DatabaseFormat) (err error) {
 55  	tokens_database.sequence = 1
 56  	tokens_database.writer, err = dbutil.Open[db.Token](name, format)
 57  	return
 58  }
 59  
 60  func (tokens_database *tokens_database) Close() (err error) {
 61  	err = tokens_database.writer.Close()
 62  	return
 63  }
 64  
 65  func (tokens_database *tokens_database) Write(token *db.Token) (err error) {
 66  	tokens := []db.Token{*token}
 67  	if err = tokens_database.writer.WriteEntries(tokens); err != nil {
 68  		return
 69  	}
 70  	return
 71  }
 72  
 73  func (tokens_database *tokens_database) make_file_pdb(name string) (err error) {
 74  	exe_name := strings.TrimSuffix(name, ".pdb") + ".exe"
 75  	var base_address uint64
 76  	base_address, err = get_exe_base_address(exe_name)
 77  	if err != nil {
 78  		return
 79  	}
 80  
 81  	fmt.Fprintln(os.Stderr, "[pdb]", name)
 82  	var source_id string
 83  	source_id, err = hash_file(name)
 84  	if err != nil {
 85  		return
 86  	}
 87  	fmt.Fprintln(os.Stderr, "[pdb]", source_id)
 88  	// check for the existence of an alternate, .pdb.json.gz file
 89  	_, err = os.Stat(name + ".json.gz")
 90  	if err != nil {
 91  		return
 92  	}
 93  
 94  	var (
 95  		gzip_file   *os.File
 96  		gzip_reader *gzip.Reader
 97  	)
 98  	gzip_file, err = os.Open(name + ".json.gz")
 99  	if err != nil {
100  		return
101  	}
102  
103  	var pdb pdbconv.ProgramDatabase
104  	gzip_reader, err = gzip.NewReader(gzip_file)
105  	json_decoder := json.NewDecoder(gzip_reader)
106  
107  	if err = json_decoder.Decode(&pdb); err != nil {
108  		return
109  	}
110  
111  	gzip_file.Close()
112  
113  	var v pdb_token_visitor
114  	v.init(tokens_database, source_id, base_address)
115  	if err = v.visit_all(&pdb); err != nil {
116  		return
117  	}
118  	if err = v.write_tokens(); err != nil {
119  		return
120  	}
121  
122  	return
123  }
124  
125  func (tokens_database *tokens_database) write_string_token(source_id string, section_name string, address uint64, str string) (err error) {
126  	var db_token db.Token
127  	db_token.ID = tokens_database.next_token_id()
128  	db_token.Source = source_id
129  	db_token.Section = section_name
130  	db_token.Kind = db.OriginalStringToken
131  	db_token.Offset = fmt.Sprintf("%X", address)
132  
133  	db_token.Names = append(db_token.Names, db.TokenName{db.OriginalName, str})
134  
135  	// detect if this is a mangled type identifier
136  	if looks_mangled(str) {
137  		demangled, err := demangle(str)
138  		if err == nil {
139  			db_token.Names = append(db_token.Names, db.TokenName{db.DemangledName, demangled})
140  		}
141  	}
142  
143  	err = tokens_database.Write(&db_token)
144  	return
145  }
146  
147  func (tokens_database *tokens_database) make_file_pe(name string) (err error) {
148  	var id string
149  	id, err = hash_file(name)
150  	if err != nil {
151  		return
152  	}
153  	err = stringrecovery.RecoverFile(name, func(section_name string, address uint64, str string) {
154  		fmt.Fprintf(os.Stderr, "[pe] string found: %s %08X %s\n", section_name, address, str)
155  		tokens_database.write_string_token(id, section_name, address, str)
156  	})
157  	return
158  }
159  
160  func (tokens_database *tokens_database) make_file_macho(name string) (err error) {
161  	fmt.Fprintln(os.Stderr, "[mach-o]", name)
162  	var source_id string
163  	source_id, err = hash_file(name)
164  	if err != nil {
165  		return
166  	}
167  	fmt.Fprintln(os.Stderr, "[mach-o]", source_id)
168  	var (
169  		file       *os.File
170  		macho_file *macho.File
171  	)
172  	file, err = os.Open(name)
173  	if err != nil {
174  		return
175  	}
176  
177  	macho_file, err = macho.NewFile(file)
178  	if err != nil {
179  		return
180  	}
181  	_, dwarf_err := macho_file.DWARF()
182  	if dwarf_err == nil {
183  		fmt.Fprintln(os.Stderr, "DWARF!")
184  		time.Sleep(5 * time.Second)
185  	}
186  	fmt.Fprintln(os.Stderr, "[mach-o]", "cpu", macho_file.FileHeader.Cpu)
187  	fmt.Fprintln(os.Stderr, "[mach-o]", "loads:")
188  	// for _, load := range macho_file.Loads {
189  	//	fmt.Fprintln(os.Stderr, "[mach-o]", load.String())
190  	// }
191  	fmt.Fprintln(os.Stderr, "[mach-o]", "sections:")
192  	for _, section := range macho_file.Sections {
193  		fmt.Fprintln(os.Stderr, "section", section.SectionHeader.Name)
194  	}
195  
196  	if macho_file.Dysymtab != nil {
197  		fmt.Fprintln(os.Stderr, "[mach-o]", "does not contain a dysymtab")
198  	} else {
199  		fmt.Fprintln(os.Stderr, "[mach-o]", "dysymtab:")
200  	}
201  
202  	var imported_symbols []string
203  	imported_symbols, err = macho_file.ImportedSymbols()
204  	if err != nil {
205  		return
206  	}
207  	if macho_file.Symtab == nil {
208  		fmt.Fprintln(os.Stderr, "[mach-o]", "does not contain a symtab")
209  	} else {
210  		fmt.Fprintln(os.Stderr, "[mach-o]", "symtab:")
211  		for _, sym := range macho_file.Symtab.Syms {
212  			imported := slices.Contains(imported_symbols, sym.Name)
213  			var section_name string
214  			if sym.Sect != 0 {
215  				section_name = macho_file.Sections[sym.Sect-1].SectionHeader.Name
216  			}
217  			if imported {
218  				fmt.Fprintf(os.Stderr, "[mach-o] imported %s %02x %s\n", section_name, sym.Type, sym.Name)
219  			} else {
220  				if sym.Name == "" {
221  					//	fmt.Fprintln(os.Stderr, "[mach-o]", "symbol has no name", "sect="+section_name, sym.Type, sym.Value, sym.Desc)
222  				} else {
223  					fmt.Fprintf(os.Stderr, "[mach-o] internal %s %02x %s\n", section_name, sym.Type, sym.Name)
224  					var token db.Token
225  					token.ID = tokens_database.next_token_id()
226  					token.Source = source_id
227  					token.Kind = db.OriginalSymbolToken
228  					token.Section = section_name
229  					token.Offset = fmt.Sprintf("%X", sym.Value)
230  					token.Names = append(token.Names, db.TokenName{db.OriginalName, sym.Name})
231  
232  					if looks_mangled(sym.Name) {
233  						demangled, err := demangle(sym.Name)
234  						if err == nil {
235  							token.Names = append(token.Names, db.TokenName{db.DemangledName, demangled})
236  						}
237  					}
238  
239  					tokens_database.Write(&token)
240  				}
241  			}
242  		}
243  	}
244  
245  	file.Close()
246  
247  	if err = stringrecovery.RecoverFile(name, func(section_name string, address uint64, str string) {
248  		fmt.Fprintf(os.Stderr, "[mach-o] string found: %s %08X %s\n", section_name, address, str)
249  		tokens_database.write_string_token(source_id, section_name, address, str)
250  	}); err != nil {
251  		return
252  	}
253  
254  	return
255  }
256  
257  func (tokens_database *tokens_database) make_file(name string) (err error) {
258  	switch filepath.Ext(name) {
259  	case ".macho":
260  		err = tokens_database.make_file_macho(name)
261  	case ".pdb":
262  		err = tokens_database.make_file_pdb(name)
263  	case ".exe":
264  		err = tokens_database.make_file_pe(name)
265  	}
266  	return
267  }
268  
269  func (tokens_database *tokens_database) make_directory(name string) (err error) {
270  	var entries []os.DirEntry
271  	entries, err = os.ReadDir(name)
272  	if err != nil {
273  		return
274  	}
275  
276  	for _, entry := range entries {
277  		if entry.IsDir() {
278  			if err = tokens_database.make_directory(filepath.Join(name, entry.Name())); err != nil {
279  				return
280  			}
281  		} else {
282  			if err = tokens_database.make_file(filepath.Join(name, entry.Name())); err != nil {
283  				return
284  			}
285  		}
286  	}
287  
288  	return
289  }
290  
291  func (tokens_database *tokens_database) make(name string) (err error) {
292  	var fi os.FileInfo
293  	fi, err = os.Stat(name)
294  	if err != nil {
295  		return
296  	}
297  	if fi.IsDir() {
298  		err = tokens_database.make_directory(name)
299  	} else {
300  		err = tokens_database.make_file(name)
301  	}
302  	return
303  }