/ src / tools / mac / upload_system_symbols / upload_system_symbols.go
upload_system_symbols.go
  1  /* Copyright 2014 Google LLC
  2  
  3  Redistribution and use in source and binary forms, with or without
  4  modification, are permitted provided that the following conditions are
  5  met:
  6  
  7   * Redistributions of source code must retain the above copyright
  8  notice, this list of conditions and the following disclaimer.
  9   * Redistributions in binary form must reproduce the above
 10  copyright notice, this list of conditions and the following disclaimer
 11  in the documentation and/or other materials provided with the
 12  distribution.
 13   * Neither the name of Google LLC nor the names of its
 14  contributors may be used to endorse or promote products derived from
 15  this software without specific prior written permission.
 16  
 17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  */
 29  
 30  /*
 31  Tool upload_system_symbols generates and uploads Breakpad symbol files for OS X system libraries.
 32  
 33  This tool shells out to the dump_syms and symupload Breakpad tools. In its default mode, this
 34  will find all dynamic libraries on the system, run dump_syms to create the Breakpad symbol files,
 35  and then upload them to Google's crash infrastructure.
 36  
 37  The tool can also be used to only dump libraries or upload from a directory. See -help for more
 38  information.
 39  
 40  Both i386 and x86_64 architectures will be dumped and uploaded.
 41  */
 42  package main
 43  
 44  import (
 45  	"debug/macho"
 46  	"flag"
 47  	"fmt"
 48  	"io"
 49  	"io/ioutil"
 50  	"log"
 51  	"os"
 52  	"os/exec"
 53  	"path"
 54  	"regexp"
 55  	"strings"
 56  	"sync"
 57  	"time"
 58  )
 59  
 60  var (
 61  	breakpadTools    = flag.String("breakpad-tools", "out/Release/", "Path to the Breakpad tools directory, containing dump_syms and symupload.")
 62  	uploadOnlyPath   = flag.String("upload-from", "", "Upload a directory of symbol files that has been dumped independently.")
 63  	dumpOnlyPath     = flag.String("dump-to", "", "Dump the symbols to the specified directory, but do not upload them.")
 64  	systemRoot       = flag.String("system-root", "", "Path to the root of the Mac OS X system whose symbols will be dumped.")
 65  	dumpArchitecture = flag.String("arch", "", "The CPU architecture for which symbols should be dumped. If not specified, dumps all architectures.")
 66  	apiKey           = flag.String("api-key", "", "API key to use. If this is present, the `sym-upload-v2` protocol is used.\nSee https://chromium.googlesource.com/breakpad/breakpad/+/HEAD/docs/sym_upload_v2_protocol.md or\n`symupload`'s help for more information.")
 67  )
 68  
 69  var (
 70  	// pathsToScan are the subpaths in the systemRoot that should be scanned for shared libraries.
 71  	pathsToScan = []string{
 72  		"/System/Library/Frameworks",
 73  		"/System/Library/PrivateFrameworks",
 74  		"/usr/lib",
 75  	}
 76  
 77  	// optionalPathsToScan is just like pathsToScan, but the paths are permitted to be absent.
 78  	optionalPathsToScan = []string{
 79  		// Gone in 10.15.
 80  		"/Library/QuickTime",
 81  		// Not present in dumped dyld_shared_caches
 82  		"/System/Library/Components",
 83  	}
 84  
 85  	// uploadServersV1 are the list of servers to which symbols should be
 86  	// uploaded when using the V1 protocol.
 87  	uploadServersV1 = []string{
 88  		"https://clients2.google.com/cr/symbol",
 89  		"https://clients2.google.com/cr/staging_symbol",
 90  	}
 91  	// uploadServersV2 are the list of servers to which symbols should be
 92  	// uploaded when using the V2 protocol.
 93  	uploadServersV2 = []string{
 94  		"https://staging-crashsymbolcollector-pa.googleapis.com",
 95  		"https://prod-crashsymbolcollector-pa.googleapis.com",
 96  	}
 97  
 98  	// uploadServers are the list of servers that should be used, accounting
 99  	// for whether v1 or v2 protocol is used.
100  	uploadServers = uploadServersV1
101  
102  	// blacklistRegexps match paths that should be excluded from dumping.
103  	blacklistRegexps = []*regexp.Regexp{
104  		regexp.MustCompile(`/System/Library/Frameworks/Python\.framework/`),
105  		regexp.MustCompile(`/System/Library/Frameworks/Ruby\.framework/`),
106  		regexp.MustCompile(`_profile\.dylib$`),
107  		regexp.MustCompile(`_debug\.dylib$`),
108  		regexp.MustCompile(`\.a$`),
109  		regexp.MustCompile(`\.dat$`),
110  	}
111  )
112  
113  func main() {
114  	flag.Parse()
115  	log.SetFlags(0)
116  
117  	// If `apiKey` is set, we're using the v2 protocol.
118  	if len(*apiKey) > 0 {
119  		uploadServers = uploadServersV2
120  	}
121  
122  	var uq *UploadQueue
123  
124  	if *uploadOnlyPath != "" {
125  		// -upload-from specified, so handle that case early.
126  		uq = StartUploadQueue()
127  		uploadFromDirectory(*uploadOnlyPath, uq)
128  		uq.Wait()
129  		return
130  	}
131  
132  	if *systemRoot == "" {
133  		log.Fatal("Need a -system-root to dump symbols for")
134  	}
135  
136  	if *dumpOnlyPath != "" {
137  		// -dump-to specified, so make sure that the path is a directory.
138  		if fi, err := os.Stat(*dumpOnlyPath); err != nil {
139  			log.Fatalf("-dump-to location: %v", err)
140  		} else if !fi.IsDir() {
141  			log.Fatal("-dump-to location is not a directory")
142  		}
143  	}
144  
145  	dumpPath := *dumpOnlyPath
146  	if *dumpOnlyPath == "" {
147  		// If -dump-to was not specified, then run the upload pipeline and create
148  		// a temporary dump output directory.
149  		uq = StartUploadQueue()
150  
151  		if p, err := ioutil.TempDir("", "upload_system_symbols"); err != nil {
152  			log.Fatalf("Failed to create temporary directory: %v", err)
153  		} else {
154  			dumpPath = p
155  			defer os.RemoveAll(p)
156  		}
157  	}
158  
159  	dq := StartDumpQueue(*systemRoot, dumpPath, uq)
160  	dq.Wait()
161  	if uq != nil {
162  		uq.Wait()
163  	}
164  }
165  
166  // manglePath reduces an absolute filesystem path to a string suitable as the
167  // base for a file name which encodes some of the original path. The result
168  // concatenates the leading initial from each path component except the last to
169  // the last path component; for example /System/Library/Frameworks/AppKit
170  // becomes SLFAppKit.
171  // Assumes ASCII.
172  func manglePath(path string) string {
173  	components := strings.Split(path, "/")
174  	n := len(components)
175  	builder := strings.Builder{}
176  	for i, component := range components {
177  		if len(component) == 0 {
178  			continue
179  		}
180  		if i < n-1 {
181  			builder.WriteString(component[:1])
182  		} else {
183  			builder.WriteString(component)
184  		}
185  	}
186  	return builder.String()
187  }
188  
189  type WorkerPool struct {
190  	wg sync.WaitGroup
191  }
192  
193  // StartWorkerPool will launch numWorkers goroutines all running workerFunc.
194  // When workerFunc exits, the goroutine will terminate.
195  func StartWorkerPool(numWorkers int, workerFunc func()) *WorkerPool {
196  	p := new(WorkerPool)
197  	for i := 0; i < numWorkers; i++ {
198  		p.wg.Add(1)
199  		go func() {
200  			workerFunc()
201  			p.wg.Done()
202  		}()
203  	}
204  	return p
205  }
206  
207  // Wait for all the workers in the pool to complete the workerFunc.
208  func (p *WorkerPool) Wait() {
209  	p.wg.Wait()
210  }
211  
212  type UploadQueue struct {
213  	*WorkerPool
214  	queue chan string
215  }
216  
217  // StartUploadQueue creates a new worker pool and queue, to which paths to
218  // Breakpad symbol files may be sent for uploading.
219  func StartUploadQueue() *UploadQueue {
220  	uq := &UploadQueue{
221  		queue: make(chan string, 10),
222  	}
223  	uq.WorkerPool = StartWorkerPool(5, uq.worker)
224  	return uq
225  }
226  
227  // Upload enqueues the contents of filepath to be uploaded.
228  func (uq *UploadQueue) Upload(filepath string) {
229  	uq.queue <- filepath
230  }
231  
232  // Done tells the queue that no more files need to be uploaded. This must be
233  // called before WorkerPool.Wait.
234  func (uq *UploadQueue) Done() {
235  	close(uq.queue)
236  }
237  
238  func (uq *UploadQueue) runSymUpload(symfile, server string) *exec.Cmd {
239  	symUpload := path.Join(*breakpadTools, "symupload")
240  	args := []string{symfile, server}
241  	if len(*apiKey) > 0 {
242  		args = append([]string{"-p", "sym-upload-v2", "-k", *apiKey}, args...)
243  	}
244  	return exec.Command(symUpload, args...)
245  }
246  
247  func (uq *UploadQueue) worker() {
248  	for symfile := range uq.queue {
249  		for _, server := range uploadServers {
250  			for i := 0; i < 3; i++ { // Give each upload 3 attempts to succeed.
251  				cmd := uq.runSymUpload(symfile, server)
252  				if output, err := cmd.Output(); err == nil {
253  					// Success. No retry needed.
254  					fmt.Printf("Uploaded %s to %s\n", symfile, server)
255  					break
256  				} else if exitError, ok := err.(*exec.ExitError); ok && exitError.ExitCode() == 2 && *apiKey != "" {
257  					// Exit code 2 in protocol v2 means the file already exists on the server.
258  					// No point retrying.
259  					fmt.Printf("File %s already exists on %s\n", symfile, server)
260  					break
261  				} else {
262  					log.Printf("Error running symupload(%s, %s), attempt %d: %v: %s\n", symfile, server, i, err, output)
263  					time.Sleep(1 * time.Second)
264  				}
265  			}
266  		}
267  	}
268  }
269  
270  type DumpQueue struct {
271  	*WorkerPool
272  	dumpPath string
273  	queue    chan dumpRequest
274  	uq       *UploadQueue
275  }
276  
277  type dumpRequest struct {
278  	path string
279  	arch string
280  }
281  
282  // StartDumpQueue creates a new worker pool to find all the Mach-O libraries in
283  // root and dump their symbols to dumpPath. If an UploadQueue is passed, the
284  // path to the symbol file will be enqueued there, too.
285  func StartDumpQueue(root, dumpPath string, uq *UploadQueue) *DumpQueue {
286  	dq := &DumpQueue{
287  		dumpPath: dumpPath,
288  		queue:    make(chan dumpRequest),
289  		uq:       uq,
290  	}
291  	dq.WorkerPool = StartWorkerPool(12, dq.worker)
292  
293  	findLibsInRoot(root, dq)
294  
295  	return dq
296  }
297  
298  // DumpSymbols enqueues the filepath to have its symbols dumped in the specified
299  // architecture.
300  func (dq *DumpQueue) DumpSymbols(filepath string, arch string) {
301  	dq.queue <- dumpRequest{
302  		path: filepath,
303  		arch: arch,
304  	}
305  }
306  
307  func (dq *DumpQueue) Wait() {
308  	dq.WorkerPool.Wait()
309  	if dq.uq != nil {
310  		dq.uq.Done()
311  	}
312  }
313  
314  func (dq *DumpQueue) done() {
315  	close(dq.queue)
316  }
317  
318  func (dq *DumpQueue) worker() {
319  	dumpSyms := path.Join(*breakpadTools, "dump_syms")
320  
321  	for req := range dq.queue {
322  		filebase := path.Join(dq.dumpPath, manglePath(req.path))
323  		symfile := fmt.Sprintf("%s_%s.sym", filebase, req.arch)
324  		f, err := os.Create(symfile)
325  		if err != nil {
326  			log.Fatalf("Error creating symbol file: %v", err)
327  		}
328  
329  		cmd := exec.Command(dumpSyms, "-a", req.arch, req.path)
330  		cmd.Stdout = f
331  		err = cmd.Run()
332  		f.Close()
333  
334  		if err != nil {
335  			os.Remove(symfile)
336  			log.Printf("Error running dump_syms(%s, %s): %v\n", req.arch, req.path, err)
337  		} else if dq.uq != nil {
338  			dq.uq.Upload(symfile)
339  		}
340  	}
341  }
342  
343  // uploadFromDirectory handles the upload-only case and merely uploads all files in
344  // a directory.
345  func uploadFromDirectory(directory string, uq *UploadQueue) {
346  	d, err := os.Open(directory)
347  	if err != nil {
348  		log.Fatalf("Could not open directory to upload: %v", err)
349  	}
350  	defer d.Close()
351  
352  	entries, err := d.Readdirnames(0)
353  	if err != nil {
354  		log.Fatalf("Could not read directory: %v", err)
355  	}
356  
357  	for _, entry := range entries {
358  		uq.Upload(path.Join(directory, entry))
359  	}
360  
361  	uq.Done()
362  }
363  
364  // findQueue is an implementation detail of the DumpQueue that finds all the
365  // Mach-O files and their architectures.
366  type findQueue struct {
367  	*WorkerPool
368  	queue chan string
369  	dq    *DumpQueue
370  }
371  
372  // findLibsInRoot looks in all the pathsToScan in the root and manages the
373  // interaction between findQueue and DumpQueue.
374  func findLibsInRoot(root string, dq *DumpQueue) {
375  	fq := &findQueue{
376  		queue: make(chan string, 10),
377  		dq:    dq,
378  	}
379  	fq.WorkerPool = StartWorkerPool(12, fq.worker)
380  
381  	for _, p := range pathsToScan {
382  		fq.findLibsInPath(path.Join(root, p), true)
383  	}
384  
385  	for _, p := range optionalPathsToScan {
386  		fq.findLibsInPath(path.Join(root, p), false)
387  	}
388  
389  	close(fq.queue)
390  	fq.Wait()
391  	dq.done()
392  }
393  
394  // findLibsInPath recursively walks the directory tree, sending file paths to
395  // test for being Mach-O to the findQueue.
396  func (fq *findQueue) findLibsInPath(loc string, mustExist bool) {
397  	d, err := os.Open(loc)
398  	if err != nil {
399  		if !mustExist && os.IsNotExist(err) {
400  			return
401  		}
402  		log.Fatalf("Could not open %s: %v", loc, err)
403  	}
404  	defer d.Close()
405  
406  	for {
407  		fis, err := d.Readdir(100)
408  		if err != nil && err != io.EOF {
409  			log.Fatalf("Error reading directory %s: %v", loc, err)
410  		}
411  
412  		for _, fi := range fis {
413  			fp := path.Join(loc, fi.Name())
414  			if fi.IsDir() {
415  				fq.findLibsInPath(fp, true)
416  				continue
417  			} else if fi.Mode()&os.ModeSymlink != 0 {
418  				continue
419  			}
420  
421  			// Test the blacklist in the worker to not slow down this main loop.
422  
423  			fq.queue <- fp
424  		}
425  
426  		if err == io.EOF {
427  			break
428  		}
429  	}
430  }
431  
432  func (fq *findQueue) worker() {
433  	for fp := range fq.queue {
434  		blacklisted := false
435  		for _, re := range blacklistRegexps {
436  			blacklisted = blacklisted || re.MatchString(fp)
437  		}
438  		if blacklisted {
439  			continue
440  		}
441  
442  		f, err := os.Open(fp)
443  		if err != nil {
444  			log.Printf("%s: %v", fp, err)
445  			continue
446  		}
447  
448  		fatFile, err := macho.NewFatFile(f)
449  		if err == nil {
450  			// The file is fat, so dump its architectures.
451  			for _, fatArch := range fatFile.Arches {
452  				fq.dumpMachOFile(fp, fatArch.File)
453  			}
454  			fatFile.Close()
455  		} else if err == macho.ErrNotFat {
456  			// The file isn't fat but may still be MachO.
457  			thinFile, err := macho.NewFile(f)
458  			if err != nil {
459  				log.Printf("%s: %v", fp, err)
460  				continue
461  			}
462  			fq.dumpMachOFile(fp, thinFile)
463  			thinFile.Close()
464  		} else {
465  			f.Close()
466  		}
467  	}
468  }
469  
470  func (fq *findQueue) dumpMachOFile(fp string, image *macho.File) {
471  	if image.Type != MachODylib && image.Type != MachOBundle && image.Type != MachODylinker {
472  		return
473  	}
474  
475  	arch := getArchStringFromHeader(image.FileHeader)
476  	if arch == "" {
477  		// Don't know about this architecture type.
478  		return
479  	}
480  
481  	if (*dumpArchitecture != "" && *dumpArchitecture == arch) || *dumpArchitecture == "" {
482  		fq.dq.DumpSymbols(fp, arch)
483  	}
484  }