/ src / processor / basic_source_line_resolver.cc
basic_source_line_resolver.cc
  1  // Copyright 2010 Google LLC
  2  //
  3  // Redistribution and use in source and binary forms, with or without
  4  // modification, are permitted provided that the following conditions are
  5  // met:
  6  //
  7  //     * Redistributions of source code must retain the above copyright
  8  // notice, this list of conditions and the following disclaimer.
  9  //     * Redistributions in binary form must reproduce the above
 10  // copyright notice, this list of conditions and the following disclaimer
 11  // in the documentation and/or other materials provided with the
 12  // distribution.
 13  //     * Neither the name of Google LLC nor the names of its
 14  // contributors may be used to endorse or promote products derived from
 15  // this software without specific prior written permission.
 16  //
 17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28  //
 29  // basic_source_line_resolver.cc: BasicSourceLineResolver implementation.
 30  //
 31  // See basic_source_line_resolver.h and basic_source_line_resolver_types.h
 32  // for documentation.
 33  
 34  #ifdef HAVE_CONFIG_H
 35  #include <config.h>  // Must come first
 36  #endif
 37  
 38  #include <assert.h>
 39  #include <stdio.h>
 40  #include <string.h>
 41  #include <sys/types.h>
 42  #include <sys/stat.h>
 43  
 44  #include <limits>
 45  #include <map>
 46  #include <memory>
 47  #include <utility>
 48  #include <vector>
 49  
 50  #include "google_breakpad/processor/basic_source_line_resolver.h"
 51  #include "processor/basic_source_line_resolver_types.h"
 52  #include "processor/module_factory.h"
 53  
 54  #include "processor/tokenize.h"
 55  
 56  using std::deque;
 57  using std::make_pair;
 58  using std::map;
 59  using std::unique_ptr;
 60  using std::vector;
 61  
 62  namespace google_breakpad {
 63  
 64  #ifdef _WIN32
 65  #ifdef _MSC_VER
 66  #define strtok_r strtok_s
 67  #endif
 68  #define strtoull _strtoui64
 69  #endif
 70  
 71  namespace {
 72  
 73  // Utility function to tokenize given the presence of an optional initial
 74  // field. In this case, optional_field is the expected string for the optional
 75  // field, and max_tokens is the maximum number of tokens including the optional
 76  // field. Refer to the documentation for Tokenize for descriptions of the other
 77  // arguments.
 78  bool TokenizeWithOptionalField(char* line,
 79                                 const char* optional_field,
 80                                 const char* separators,
 81                                 int max_tokens,
 82                                 vector<char*>* tokens) {
 83    // First tokenize assuming the optional field is not present.  If we then see
 84    // the optional field, additionally tokenize the last token into two tokens.
 85    if (!Tokenize(line, separators, max_tokens - 1, tokens)) {
 86      return false;
 87    }
 88  
 89    if (strcmp(tokens->front(), optional_field) == 0) {
 90      // The optional field is present. Split the last token in two to recover the
 91      // field prior to the last.
 92      vector<char*> last_tokens;
 93      if (!Tokenize(tokens->back(), separators, 2, &last_tokens)) {
 94        return false;
 95      }
 96      // Replace the previous last token with the two new tokens.
 97      tokens->pop_back();
 98      tokens->push_back(last_tokens[0]);
 99      tokens->push_back(last_tokens[1]);
100    }
101  
102    return true;
103  }
104  
105  }  // namespace
106  
107  static const char* kWhitespace = " \r\n";
108  static const int kMaxErrorsPrinted = 5;
109  static const int kMaxErrorsBeforeBailing = 100;
110  
111  BasicSourceLineResolver::BasicSourceLineResolver() :
112      SourceLineResolverBase(new BasicModuleFactory) { }
113  
114  // static
115  void BasicSourceLineResolver::Module::LogParseError(
116     const string& message,
117     int line_number,
118     int* num_errors) {
119    if (++(*num_errors) <= kMaxErrorsPrinted) {
120      if (line_number > 0) {
121        BPLOG(ERROR) << "Line " << line_number << ": " << message;
122      } else {
123        BPLOG(ERROR) << message;
124      }
125    }
126  }
127  
128  bool BasicSourceLineResolver::Module::LoadMapFromMemory(
129      char* memory_buffer,
130      size_t memory_buffer_size) {
131    linked_ptr<Function> cur_func;
132    int line_number = 0;
133    int num_errors = 0;
134    int inline_num_errors = 0;
135    char* save_ptr;
136  
137    // If the length is 0, we can still pretend we have a symbol file. This is
138    // for scenarios that want to test symbol lookup, but don't necessarily care
139    // if certain modules do not have any information, like system libraries.
140    if (memory_buffer_size == 0) {
141      return true;
142    }
143  
144    // Make sure the last character is null terminator.
145    size_t last_null_terminator = memory_buffer_size - 1;
146    if (memory_buffer[last_null_terminator] != '\0') {
147      memory_buffer[last_null_terminator] = '\0';
148    }
149  
150    // Skip any null terminators at the end of the memory buffer, and make sure
151    // there are no other null terminators in the middle of the memory buffer.
152    bool has_null_terminator_in_the_middle = false;
153    while (last_null_terminator > 0 &&
154           memory_buffer[last_null_terminator - 1] == '\0') {
155      last_null_terminator--;
156    }
157    for (size_t i = 0; i < last_null_terminator; i++) {
158      if (memory_buffer[i] == '\0') {
159        memory_buffer[i] = '_';
160        has_null_terminator_in_the_middle = true;
161      }
162    }
163    if (has_null_terminator_in_the_middle) {
164      LogParseError(
165         "Null terminator is not expected in the middle of the symbol data",
166         line_number,
167         &num_errors);
168    }
169  
170    char* buffer;
171    buffer = strtok_r(memory_buffer, "\r\n", &save_ptr);
172  
173    while (buffer != NULL) {
174      ++line_number;
175  
176      if (strncmp(buffer, "FILE ", 5) == 0) {
177        if (!ParseFile(buffer)) {
178          LogParseError("ParseFile on buffer failed", line_number, &num_errors);
179        }
180      } else if (strncmp(buffer, "STACK ", 6) == 0) {
181        if (!ParseStackInfo(buffer)) {
182          LogParseError("ParseStackInfo failed", line_number, &num_errors);
183        }
184      } else if (strncmp(buffer, "FUNC ", 5) == 0) {
185        cur_func.reset(ParseFunction(buffer));
186        if (!cur_func.get()) {
187          LogParseError("ParseFunction failed", line_number, &num_errors);
188        } else {
189          // StoreRange will fail if the function has an invalid address or size.
190          // We'll silently ignore this, the function and any corresponding lines
191          // will be destroyed when cur_func is released.
192          functions_.StoreRange(cur_func->address, cur_func->size, cur_func);
193        }
194      } else if (strncmp(buffer, "PUBLIC ", 7) == 0) {
195        // Clear cur_func: public symbols don't contain line number information.
196        cur_func.reset();
197  
198        if (!ParsePublicSymbol(buffer)) {
199          LogParseError("ParsePublicSymbol failed", line_number, &num_errors);
200        }
201      } else if (strncmp(buffer, "MODULE ", 7) == 0) {
202        // Ignore these.  They're not of any use to BasicSourceLineResolver,
203        // which is fed modules by a SymbolSupplier.  These lines are present to
204        // aid other tools in properly placing symbol files so that they can
205        // be accessed by a SymbolSupplier.
206        //
207        // MODULE <guid> <age> <filename>
208      } else if (strncmp(buffer, "INFO ", 5) == 0) {
209        // Ignore these as well, they're similarly just for housekeeping.
210        //
211        // INFO CODE_ID <code id> <filename>
212      } else if (strncmp(buffer, "INLINE ", 7) == 0) {
213        linked_ptr<Inline> in = ParseInline(buffer);
214        if (!in.get())
215          LogParseError("ParseInline failed", line_number, &inline_num_errors);
216        else
217          cur_func->AppendInline(in);
218      } else if (strncmp(buffer, "INLINE_ORIGIN ", 14) == 0) {
219        if (!ParseInlineOrigin(buffer)) {
220          LogParseError("ParseInlineOrigin failed", line_number,
221                        &inline_num_errors);
222        }
223      } else {
224        if (!cur_func.get()) {
225          LogParseError("Found source line data without a function",
226                         line_number, &num_errors);
227        } else {
228          Line* line = ParseLine(buffer);
229          if (!line) {
230            LogParseError("ParseLine failed", line_number, &num_errors);
231          } else {
232            cur_func->lines.StoreRange(line->address, line->size,
233                                       linked_ptr<Line>(line));
234          }
235        }
236      }
237      if (num_errors > kMaxErrorsBeforeBailing) {
238        break;
239      }
240      buffer = strtok_r(NULL, "\r\n", &save_ptr);
241    }
242    is_corrupt_ = num_errors > 0;
243    return true;
244  }
245  
246  void BasicSourceLineResolver::Module::ConstructInlineFrames(
247      StackFrame* frame,
248      MemAddr address,
249      const ContainedRangeMap<uint64_t, linked_ptr<Inline>>& inline_map,
250      deque<unique_ptr<StackFrame>>* inlined_frames) const {
251    vector<const linked_ptr<Inline>*> inlines;
252    if (!inline_map.RetrieveRanges(address, inlines)) {
253      return;
254    }
255  
256    for (const linked_ptr<Inline>* const in : inlines) {
257      unique_ptr<StackFrame> new_frame =
258          unique_ptr<StackFrame>(new StackFrame(*frame));
259      auto origin = inline_origins_.find(in->get()->origin_id);
260      if (origin != inline_origins_.end()) {
261        new_frame->function_name = origin->second->name;
262      } else {
263        new_frame->function_name = "<name omitted>";
264      }
265      
266      // Store call site file and line in current frame, which will be updated
267      // later.
268      new_frame->source_line = in->get()->call_site_line;
269      if (in->get()->has_call_site_file_id) {
270        auto file = files_.find(in->get()->call_site_file_id);
271        if (file != files_.end()) {
272          new_frame->source_file_name = file->second;
273        }
274      }
275  
276      // Use the starting address of the inlined range as inlined function base.
277      new_frame->function_base = new_frame->module->base_address();
278      for (const auto& range : in->get()->inline_ranges) {
279        if (address >= range.first && address < range.first + range.second) {
280          new_frame->function_base += range.first;
281          break;
282        }
283      }
284      new_frame->trust = StackFrame::FRAME_TRUST_INLINE;
285  
286      // The inlines vector has an order from innermost entry to outermost entry.
287      // By push_back, we will have inlined_frames with the same order.
288      inlined_frames->push_back(std::move(new_frame));
289    }
290  
291    // Update the source file and source line for each inlined frame.
292    if (!inlined_frames->empty()) {
293      string parent_frame_source_file_name = frame->source_file_name;
294      int parent_frame_source_line = frame->source_line;
295      frame->source_file_name = inlined_frames->back()->source_file_name;
296      frame->source_line = inlined_frames->back()->source_line;
297      for (unique_ptr<StackFrame>& inlined_frame : *inlined_frames) {
298        std::swap(inlined_frame->source_file_name, parent_frame_source_file_name);
299        std::swap(inlined_frame->source_line, parent_frame_source_line);
300      }
301    }
302  }
303  
304  void BasicSourceLineResolver::Module::LookupAddress(
305      StackFrame* frame,
306      deque<unique_ptr<StackFrame>>* inlined_frames) const {
307    MemAddr address = frame->instruction - frame->module->base_address();
308  
309    // First, look for a FUNC record that covers address. Use
310    // RetrieveNearestRange instead of RetrieveRange so that, if there
311    // is no such function, we can use the next function to bound the
312    // extent of the PUBLIC symbol we find, below. This does mean we
313    // need to check that address indeed falls within the function we
314    // find; do the range comparison in an overflow-friendly way.
315    linked_ptr<Function> func;
316    linked_ptr<PublicSymbol> public_symbol;
317    MemAddr function_base;
318    MemAddr function_size;
319    MemAddr public_address;
320    if (functions_.RetrieveNearestRange(address, &func, &function_base,
321                                        NULL /* delta */, &function_size) &&
322        address >= function_base && address - function_base < function_size) {
323      frame->function_name = func->name;
324      frame->function_base = frame->module->base_address() + function_base;
325      frame->is_multiple = func->is_multiple;
326  
327      linked_ptr<Line> line;
328      MemAddr line_base;
329      if (func->lines.RetrieveRange(address, &line, &line_base, NULL /* delta */,
330                                    NULL /* size */)) {
331        FileMap::const_iterator it = files_.find(line->source_file_id);
332        if (it != files_.end()) {
333          frame->source_file_name = files_.find(line->source_file_id)->second;
334        }
335        frame->source_line = line->line;
336        frame->source_line_base = frame->module->base_address() + line_base;
337      }
338  
339      // Check if this is inlined function call.
340      if (inlined_frames) {
341        ConstructInlineFrames(frame, address, func->inlines, inlined_frames);
342      }
343    } else if (public_symbols_.Retrieve(address,
344                                        &public_symbol, &public_address) &&
345               (!func.get() || public_address > function_base)) {
346      frame->function_name = public_symbol->name;
347      frame->function_base = frame->module->base_address() + public_address;
348      frame->is_multiple = public_symbol->is_multiple;
349    }
350  }
351  
352  WindowsFrameInfo* BasicSourceLineResolver::Module::FindWindowsFrameInfo(
353      const StackFrame* frame) const {
354    MemAddr address = frame->instruction - frame->module->base_address();
355    scoped_ptr<WindowsFrameInfo> result(new WindowsFrameInfo());
356  
357    // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and
358    // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order.
359    // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that
360    // includes its own program string.
361    // WindowsFrameInfo::STACK_INFO_FPO is the older type
362    // corresponding to the FPO_DATA struct. See stackwalker_x86.cc.
363    linked_ptr<WindowsFrameInfo> frame_info;
364    if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA]
365         .RetrieveRange(address, &frame_info))
366        || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO]
367            .RetrieveRange(address, &frame_info))) {
368      result->CopyFrom(*frame_info.get());
369      return result.release();
370    }
371  
372    // Even without a relevant STACK line, many functions contain
373    // information about how much space their parameters consume on the
374    // stack. Use RetrieveNearestRange instead of RetrieveRange, so that
375    // we can use the function to bound the extent of the PUBLIC symbol,
376    // below. However, this does mean we need to check that ADDRESS
377    // falls within the retrieved function's range; do the range
378    // comparison in an overflow-friendly way.
379    linked_ptr<Function> function;
380    MemAddr function_base, function_size;
381    if (functions_.RetrieveNearestRange(address, &function, &function_base,
382                                        NULL /* delta */, &function_size) &&
383        address >= function_base && address - function_base < function_size) {
384      result->parameter_size = function->parameter_size;
385      result->valid |= WindowsFrameInfo::VALID_PARAMETER_SIZE;
386      return result.release();
387    }
388  
389    // PUBLIC symbols might have a parameter size. Use the function we
390    // found above to limit the range the public symbol covers.
391    linked_ptr<PublicSymbol> public_symbol;
392    MemAddr public_address;
393    if (public_symbols_.Retrieve(address, &public_symbol, &public_address) &&
394        (!function.get() || public_address > function_base)) {
395      result->parameter_size = public_symbol->parameter_size;
396    }
397  
398    return NULL;
399  }
400  
401  CFIFrameInfo* BasicSourceLineResolver::Module::FindCFIFrameInfo(
402      const StackFrame* frame) const {
403    MemAddr address = frame->instruction - frame->module->base_address();
404    MemAddr initial_base, initial_size;
405    string initial_rules;
406  
407    // Find the initial rule whose range covers this address. That
408    // provides an initial set of register recovery rules. Then, walk
409    // forward from the initial rule's starting address to frame's
410    // instruction address, applying delta rules.
411    if (!cfi_initial_rules_.RetrieveRange(address, &initial_rules, &initial_base,
412                                          NULL /* delta */, &initial_size)) {
413      return NULL;
414    }
415  
416    // Create a frame info structure, and populate it with the rules from
417    // the STACK CFI INIT record.
418    scoped_ptr<CFIFrameInfo> rules(new CFIFrameInfo());
419    if (!ParseCFIRuleSet(initial_rules, rules.get()))
420      return NULL;
421  
422    // Find the first delta rule that falls within the initial rule's range.
423    map<MemAddr, string>::const_iterator delta =
424      cfi_delta_rules_.lower_bound(initial_base);
425  
426    // Apply delta rules up to and including the frame's address.
427    while (delta != cfi_delta_rules_.end() && delta->first <= address) {
428      ParseCFIRuleSet(delta->second, rules.get());
429      delta++;
430    }
431  
432    return rules.release();
433  }
434  
435  bool BasicSourceLineResolver::Module::ParseFile(char* file_line) {
436    long index;
437    char* filename;
438    if (SymbolParseHelper::ParseFile(file_line, &index, &filename)) {
439      files_.insert(make_pair(index, string(filename)));
440      return true;
441    }
442    return false;
443  }
444  
445  bool BasicSourceLineResolver::Module::ParseInlineOrigin(
446    char* inline_origin_line) {
447    bool has_file_id;
448    long origin_id;
449    long source_file_id;
450    char* origin_name;
451    if (SymbolParseHelper::ParseInlineOrigin(inline_origin_line, &has_file_id,
452                                             &origin_id, &source_file_id,
453                                             &origin_name)) {
454      inline_origins_.insert(make_pair(
455          origin_id,
456          new InlineOrigin(has_file_id, source_file_id, origin_name)));
457      return true;
458    }
459    return false;
460  }
461  
462  linked_ptr<BasicSourceLineResolver::Inline>
463  BasicSourceLineResolver::Module::ParseInline(char* inline_line) {
464    bool has_call_site_file_id;
465    long inline_nest_level;
466    long call_site_line;
467    long call_site_file_id;
468    long origin_id;
469    vector<std::pair<MemAddr, MemAddr>> ranges;
470    if (SymbolParseHelper::ParseInline(inline_line, &has_call_site_file_id,
471                                       &inline_nest_level, &call_site_line,
472                                       &call_site_file_id, &origin_id, &ranges)) {
473      return linked_ptr<Inline>(new Inline(has_call_site_file_id,
474                                           inline_nest_level, call_site_line,
475                                           call_site_file_id, origin_id, ranges));
476    }
477    return linked_ptr<Inline>();
478  }
479  
480  BasicSourceLineResolver::Function*
481  BasicSourceLineResolver::Module::ParseFunction(char* function_line) {
482    bool is_multiple;
483    uint64_t address;
484    uint64_t size;
485    long stack_param_size;
486    char* name;
487    if (SymbolParseHelper::ParseFunction(function_line, &is_multiple, &address,
488                                         &size, &stack_param_size, &name)) {
489      return new Function(name, address, size, stack_param_size, is_multiple);
490    }
491    return NULL;
492  }
493  
494  BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine(
495      char* line_line) {
496    uint64_t address;
497    uint64_t size;
498    long line_number;
499    long source_file;
500  
501    if (SymbolParseHelper::ParseLine(line_line, &address, &size, &line_number,
502                                     &source_file)) {
503      return new Line(address, size, source_file, line_number);
504    }
505    return NULL;
506  }
507  
508  bool BasicSourceLineResolver::Module::ParsePublicSymbol(char* public_line) {
509    bool is_multiple;
510    uint64_t address;
511    long stack_param_size;
512    char* name;
513  
514    if (SymbolParseHelper::ParsePublicSymbol(public_line, &is_multiple, &address,
515                                             &stack_param_size, &name)) {
516      // A few public symbols show up with an address of 0.  This has been seen
517      // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow,
518      // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1.  They would conflict
519      // with one another if they were allowed into the public_symbols_ map,
520      // but since the address is obviously invalid, gracefully accept them
521      // as input without putting them into the map.
522      if (address == 0) {
523        return true;
524      }
525  
526      linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address,
527                                                       stack_param_size,
528                                                       is_multiple));
529      return public_symbols_.Store(address, symbol);
530    }
531    return false;
532  }
533  
534  bool BasicSourceLineResolver::Module::ParseStackInfo(char* stack_info_line) {
535    // Skip "STACK " prefix.
536    stack_info_line += 6;
537  
538    // Find the token indicating what sort of stack frame walking
539    // information this is.
540    while (*stack_info_line == ' ')
541      stack_info_line++;
542    const char* platform = stack_info_line;
543    while (!strchr(kWhitespace, *stack_info_line))
544      stack_info_line++;
545    *stack_info_line++ = '\0';
546  
547    // MSVC stack frame info.
548    if (strcmp(platform, "WIN") == 0) {
549      int type = 0;
550      uint64_t rva, code_size;
551      linked_ptr<WindowsFrameInfo>
552        stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line,
553                                                           type,
554                                                           rva,
555                                                           code_size));
556      if (stack_frame_info == NULL)
557        return false;
558  
559      // TODO(mmentovai): I wanted to use StoreRange's return value as this
560      // method's return value, but MSVC infrequently outputs stack info that
561      // violates the containment rules.  This happens with a section of code
562      // in strncpy_s in test_app.cc (testdata/minidump2).  There, problem looks
563      // like this:
564      //   STACK WIN 4 4242 1a a 0 ...  (STACK WIN 4 base size prolog 0 ...)
565      //   STACK WIN 4 4243 2e 9 0 ...
566      // ContainedRangeMap treats these two blocks as conflicting.  In reality,
567      // when the prolog lengths are taken into account, the actual code of
568      // these blocks doesn't conflict.  However, we can't take the prolog lengths
569      // into account directly here because we'd wind up with a different set
570      // of range conflicts when MSVC outputs stack info like this:
571      //   STACK WIN 4 1040 73 33 0 ...
572      //   STACK WIN 4 105a 59 19 0 ...
573      // because in both of these entries, the beginning of the code after the
574      // prolog is at 0x1073, and the last byte of contained code is at 0x10b2.
575      // Perhaps we could get away with storing ranges by rva + prolog_size
576      // if ContainedRangeMap were modified to allow replacement of
577      // already-stored values.
578  
579      windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info);
580      return true;
581    } else if (strcmp(platform, "CFI") == 0) {
582      // DWARF CFI stack frame info
583      return ParseCFIFrameInfo(stack_info_line);
584    } else {
585      // Something unrecognized.
586      return false;
587    }
588  }
589  
590  bool BasicSourceLineResolver::Module::ParseCFIFrameInfo(
591      char* stack_info_line) {
592    char* cursor;
593  
594    // Is this an INIT record or a delta record?
595    char* init_or_address = strtok_r(stack_info_line, " \r\n", &cursor);
596    if (!init_or_address)
597      return false;
598  
599    if (strcmp(init_or_address, "INIT") == 0) {
600      // This record has the form "STACK INIT <address> <size> <rules...>".
601      char* address_field = strtok_r(NULL, " \r\n", &cursor);
602      if (!address_field) return false;
603  
604      char* size_field = strtok_r(NULL, " \r\n", &cursor);
605      if (!size_field) return false;
606  
607      char* initial_rules = strtok_r(NULL, "\r\n", &cursor);
608      if (!initial_rules) return false;
609  
610      MemAddr address = strtoul(address_field, NULL, 16);
611      MemAddr size    = strtoul(size_field,    NULL, 16);
612      cfi_initial_rules_.StoreRange(address, size, initial_rules);
613      return true;
614    }
615  
616    // This record has the form "STACK <address> <rules...>".
617    char* address_field = init_or_address;
618    char* delta_rules = strtok_r(NULL, "\r\n", &cursor);
619    if (!delta_rules) return false;
620    MemAddr address = strtoul(address_field, NULL, 16);
621    cfi_delta_rules_[address] = delta_rules;
622    return true;
623  }
624  
625  bool BasicSourceLineResolver::Function::AppendInline(linked_ptr<Inline> in) {
626    // This happends if in's parent wasn't added due to a malformed INLINE record.
627    if (in->inline_nest_level > last_added_inline_nest_level + 1)
628      return false;
629  
630    last_added_inline_nest_level = in->inline_nest_level;
631  
632    // Store all ranges into current level of inlines.
633    for (auto range : in->inline_ranges)
634      inlines.StoreRange(range.first, range.second, in);
635    return true;
636  }
637  
638  // static
639  bool SymbolParseHelper::ParseFile(char* file_line, long* index,
640                                    char** filename) {
641    // FILE <id> <filename>
642    assert(strncmp(file_line, "FILE ", 5) == 0);
643    file_line += 5;  // skip prefix
644  
645    vector<char*> tokens;
646    if (!Tokenize(file_line, kWhitespace, 2, &tokens)) {
647      return false;
648    }
649  
650    char* after_number;
651    *index = strtol(tokens[0], &after_number, 10);
652    if (!IsValidAfterNumber(after_number) || *index < 0 ||
653        *index == std::numeric_limits<long>::max()) {
654      return false;
655    }
656  
657    *filename = tokens[1];
658    if (!*filename) {
659      return false;
660    }
661  
662    return true;
663  }
664  
665  // static
666  bool SymbolParseHelper::ParseInlineOrigin(char* inline_origin_line,
667                                            bool* has_file_id,
668                                            long* origin_id,
669                                            long* file_id,
670                                            char** name) {
671    // Old INLINE_ORIGIN format:
672    // INLINE_ORIGIN <origin_id> <file_id> <name>
673    // New INLINE_ORIGIN format:
674    // INLINE_ORIGIN <origin_id> <name>
675    assert(strncmp(inline_origin_line, "INLINE_ORIGIN ", 14) == 0);
676    inline_origin_line += 14;  // skip prefix
677    vector<char*> tokens;
678    // Split the line into two parts so that the first token is "<origin_id>", and
679    // second token is either "<file_id> <name>"" or "<name>"" depending on the
680    // format version.
681    if (!Tokenize(inline_origin_line, kWhitespace, 2, &tokens)) {
682      return false;
683    }
684  
685    char* after_number;
686    *origin_id = strtol(tokens[0], &after_number, 10);
687    if (!IsValidAfterNumber(after_number) || *origin_id < 0 ||
688        *origin_id == std::numeric_limits<long>::max()) {
689      return false;
690    }
691  
692    // If the field after origin_id is a number, then it's old format.
693    char* remaining_line = tokens[1];
694    *has_file_id = true;
695    for (size_t i = 0;
696         i < strlen(remaining_line) && remaining_line[i] != ' ' && *has_file_id;
697         ++i) {
698      // If the file id is -1, it might be an artificial function that doesn't
699      // have file id. So, we consider -1 as a valid special case.
700      if (remaining_line[i] == '-' && i == 0) {
701        continue;
702      }
703      *has_file_id = isdigit(remaining_line[i]);
704    }
705  
706    if (*has_file_id) {
707      // If it's old format, split "<file_id> <name>" to {"<field_id>", "<name>"}.
708      if (!Tokenize(remaining_line, kWhitespace, 2, &tokens)) {
709        return false;
710      }
711      *file_id = strtol(tokens[0], &after_number, 10);
712      // If the file id is -1, it might be an artificial function that doesn't
713      // have file id. So, we consider -1 as a valid special case.
714      if (!IsValidAfterNumber(after_number) || *file_id < -1 ||
715          *file_id == std::numeric_limits<long>::max()) {
716        return false;
717      }
718    }
719  
720    *name = tokens[1];
721    if (!*name) {
722      return false;
723    }
724  
725    return true;
726  }
727  
728  // static
729  bool SymbolParseHelper::ParseInline(
730      char* inline_line,
731      bool* has_call_site_file_id,
732      long* inline_nest_level,
733      long* call_site_line,
734      long* call_site_file_id,
735      long* origin_id,
736      vector<std::pair<MemAddr, MemAddr>>* ranges) {
737    // Old INLINE format:
738    // INLINE <inline_nest_level> <call_site_line> <origin_id> [<address> <size>]+
739    // New INLINE format:
740    // INLINE <inline_nest_level> <call_site_line> <call_site_file_id> <origin_id>
741    // [<address> <size>]+
742    assert(strncmp(inline_line, "INLINE ", 7) == 0);
743    inline_line += 7; // skip prefix
744  
745    vector<char*> tokens;
746    // Increase max_tokens if necessary.
747    Tokenize(inline_line, kWhitespace, 512, &tokens);
748  
749    // Determine the version of INLINE record by parity of the vector length.
750    *has_call_site_file_id = tokens.size() % 2 == 0;
751  
752    // The length of the vector should be at least 5.
753    if (tokens.size() < 5) {
754      return false;
755    }
756  
757    char* after_number;
758    size_t next_idx = 0;
759  
760    *inline_nest_level = strtol(tokens[next_idx++], &after_number, 10);
761    if (!IsValidAfterNumber(after_number) || *inline_nest_level < 0 ||
762        *inline_nest_level == std::numeric_limits<long>::max()) {
763      return false;
764    }
765  
766    *call_site_line = strtol(tokens[next_idx++], &after_number, 10);
767    if (!IsValidAfterNumber(after_number) || *call_site_line < 0 ||
768        *call_site_line == std::numeric_limits<long>::max()) {
769      return false;
770    }
771  
772    if (*has_call_site_file_id) {
773      *call_site_file_id = strtol(tokens[next_idx++], &after_number, 10);
774      // If the file id is -1, it might be an artificial function that doesn't
775      // have file id. So, we consider -1 as a valid special case.
776      if (!IsValidAfterNumber(after_number) || *call_site_file_id < -1 ||
777          *call_site_file_id == std::numeric_limits<long>::max()) {
778        return false;
779      }
780    }
781  
782    *origin_id = strtol(tokens[next_idx++], &after_number, 10);
783    if (!IsValidAfterNumber(after_number) || *origin_id < 0 ||
784        *origin_id == std::numeric_limits<long>::max()) {
785      return false;
786    }
787  
788    while (next_idx < tokens.size()) {
789      MemAddr address = strtoull(tokens[next_idx++], &after_number, 16);
790      if (!IsValidAfterNumber(after_number) ||
791          address == std::numeric_limits<unsigned long long>::max()) {
792        return false;
793      }
794      MemAddr size = strtoull(tokens[next_idx++], &after_number, 16);
795      if (!IsValidAfterNumber(after_number) ||
796          size == std::numeric_limits<unsigned long long>::max()) {
797        return false;
798      }
799      ranges->push_back({address, size});
800    }
801  
802    return true;
803  }
804  
805  // static
806  bool SymbolParseHelper::ParseFunction(char* function_line, bool* is_multiple,
807                                        uint64_t* address, uint64_t* size,
808                                        long* stack_param_size, char** name) {
809    // FUNC [<multiple>] <address> <size> <stack_param_size> <name>
810    assert(strncmp(function_line, "FUNC ", 5) == 0);
811    function_line += 5;  // skip prefix
812  
813    vector<char*> tokens;
814    if (!TokenizeWithOptionalField(function_line, "m", kWhitespace, 5, &tokens)) {
815      return false;
816    }
817  
818    *is_multiple = strcmp(tokens[0], "m") == 0;
819    int next_token = *is_multiple ? 1 : 0;
820  
821    char* after_number;
822    *address = strtoull(tokens[next_token++], &after_number, 16);
823    if (!IsValidAfterNumber(after_number) ||
824        *address == std::numeric_limits<unsigned long long>::max()) {
825      return false;
826    }
827    *size = strtoull(tokens[next_token++], &after_number, 16);
828    if (!IsValidAfterNumber(after_number) ||
829        *size == std::numeric_limits<unsigned long long>::max()) {
830      return false;
831    }
832    *stack_param_size = strtol(tokens[next_token++], &after_number, 16);
833    if (!IsValidAfterNumber(after_number) ||
834        *stack_param_size == std::numeric_limits<long>::max() ||
835        *stack_param_size < 0) {
836      return false;
837    }
838    *name = tokens[next_token++];
839  
840    return true;
841  }
842  
843  // static
844  bool SymbolParseHelper::ParseLine(char* line_line, uint64_t* address,
845                                    uint64_t* size, long* line_number,
846                                    long* source_file) {
847    // <address> <size> <line number> <source file id>
848    vector<char*> tokens;
849    if (!Tokenize(line_line, kWhitespace, 4, &tokens)) {
850      return false;
851    }
852  
853    char* after_number;
854    *address  = strtoull(tokens[0], &after_number, 16);
855    if (!IsValidAfterNumber(after_number) ||
856        *address == std::numeric_limits<unsigned long long>::max()) {
857      return false;
858    }
859    *size = strtoull(tokens[1], &after_number, 16);
860    if (!IsValidAfterNumber(after_number) ||
861        *size == std::numeric_limits<unsigned long long>::max()) {
862      return false;
863    }
864    *line_number = strtol(tokens[2], &after_number, 10);
865    if (!IsValidAfterNumber(after_number) ||
866        *line_number == std::numeric_limits<long>::max()) {
867      return false;
868    }
869    *source_file = strtol(tokens[3], &after_number, 10);
870    if (!IsValidAfterNumber(after_number) || *source_file < 0 ||
871        *source_file == std::numeric_limits<long>::max()) {
872      return false;
873    }
874  
875    // Valid line numbers normally start from 1, however there are functions that
876    // are associated with a source file but not associated with any line number
877    // (block helper function) and for such functions the symbol file contains 0
878    // for the line numbers.  Hence, 0 should be treated as a valid line number.
879    // For more information on block helper functions, please, take a look at:
880    // http://clang.llvm.org/docs/Block-ABI-Apple.html
881    if (*line_number < 0) {
882      return false;
883    }
884  
885    return true;
886  }
887  
888  // static
889  bool SymbolParseHelper::ParsePublicSymbol(char* public_line, bool* is_multiple,
890                                            uint64_t* address,
891                                            long* stack_param_size,
892                                            char** name) {
893    // PUBLIC [<multiple>] <address> <stack_param_size> <name>
894    assert(strncmp(public_line, "PUBLIC ", 7) == 0);
895    public_line += 7;  // skip prefix
896  
897    vector<char*> tokens;
898    if (!TokenizeWithOptionalField(public_line, "m", kWhitespace, 4, &tokens)) {
899      return false;
900    }
901  
902    *is_multiple = strcmp(tokens[0], "m") == 0;
903    int next_token = *is_multiple ? 1 : 0;
904  
905    char* after_number;
906    *address = strtoull(tokens[next_token++], &after_number, 16);
907    if (!IsValidAfterNumber(after_number) ||
908        *address == std::numeric_limits<unsigned long long>::max()) {
909      return false;
910    }
911    *stack_param_size = strtol(tokens[next_token++], &after_number, 16);
912    if (!IsValidAfterNumber(after_number) ||
913        *stack_param_size == std::numeric_limits<long>::max() ||
914        *stack_param_size < 0) {
915      return false;
916    }
917    *name = tokens[next_token++];
918  
919    return true;
920  }
921  
922  // static
923  bool SymbolParseHelper::IsValidAfterNumber(char* after_number) {
924    if (after_number != NULL && strchr(kWhitespace, *after_number) != NULL) {
925      return true;
926    }
927    return false;
928  }
929  
930  }  // namespace google_breakpad