/ lib / sanitizer_common / sanitizer_common_interceptors_format.inc
sanitizer_common_interceptors_format.inc
  1  //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
  2  //
  3  //                     The LLVM Compiler Infrastructure
  4  //
  5  // This file is distributed under the University of Illinois Open Source
  6  // License. See LICENSE.TXT for details.
  7  //
  8  //===----------------------------------------------------------------------===//
  9  //
 10  // Scanf/printf implementation for use in *Sanitizer interceptors.
 11  // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
 12  // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
 13  // with a few common GNU extensions.
 14  //
 15  //===----------------------------------------------------------------------===//
 16  
 17  #include <stdarg.h>
 18  
 19  static const char *parse_number(const char *p, int *out) {
 20    *out = internal_atoll(p);
 21    while (*p >= '0' && *p <= '9')
 22      ++p;
 23    return p;
 24  }
 25  
 26  static const char *maybe_parse_param_index(const char *p, int *out) {
 27    // n$
 28    if (*p >= '0' && *p <= '9') {
 29      int number;
 30      const char *q = parse_number(p, &number);
 31      CHECK(q);
 32      if (*q == '$') {
 33        *out = number;
 34        p = q + 1;
 35      }
 36    }
 37  
 38    // Otherwise, do not change p. This will be re-parsed later as the field
 39    // width.
 40    return p;
 41  }
 42  
 43  static bool char_is_one_of(char c, const char *s) {
 44    return !!internal_strchr(s, c);
 45  }
 46  
 47  static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
 48    if (char_is_one_of(*p, "jztLq")) {
 49      ll[0] = *p;
 50      ++p;
 51    } else if (*p == 'h') {
 52      ll[0] = 'h';
 53      ++p;
 54      if (*p == 'h') {
 55        ll[1] = 'h';
 56        ++p;
 57      }
 58    } else if (*p == 'l') {
 59      ll[0] = 'l';
 60      ++p;
 61      if (*p == 'l') {
 62        ll[1] = 'l';
 63        ++p;
 64      }
 65    }
 66    return p;
 67  }
 68  
 69  // Returns true if the character is an integer conversion specifier.
 70  static bool format_is_integer_conv(char c) {
 71    return char_is_one_of(c, "diouxXn");
 72  }
 73  
 74  // Returns true if the character is an floating point conversion specifier.
 75  static bool format_is_float_conv(char c) {
 76    return char_is_one_of(c, "aAeEfFgG");
 77  }
 78  
 79  // Returns string output character size for string-like conversions,
 80  // or 0 if the conversion is invalid.
 81  static int format_get_char_size(char convSpecifier,
 82                                  const char lengthModifier[2]) {
 83    if (char_is_one_of(convSpecifier, "CS")) {
 84      return sizeof(wchar_t);
 85    }
 86  
 87    if (char_is_one_of(convSpecifier, "cs[")) {
 88      if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
 89        return sizeof(wchar_t);
 90      else if (lengthModifier[0] == '\0')
 91        return sizeof(char);
 92    }
 93  
 94    return 0;
 95  }
 96  
 97  enum FormatStoreSize {
 98    // Store size not known in advance; can be calculated as wcslen() of the
 99    // destination buffer.
100    FSS_WCSLEN = -2,
101    // Store size not known in advance; can be calculated as strlen() of the
102    // destination buffer.
103    FSS_STRLEN = -1,
104    // Invalid conversion specifier.
105    FSS_INVALID = 0
106  };
107  
108  // Returns the memory size of a format directive (if >0), or a value of
109  // FormatStoreSize.
110  static int format_get_value_size(char convSpecifier,
111                                   const char lengthModifier[2],
112                                   bool promote_float) {
113    if (format_is_integer_conv(convSpecifier)) {
114      switch (lengthModifier[0]) {
115      case 'h':
116        return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
117      case 'l':
118        return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
119      case 'q':
120        return sizeof(long long);
121      case 'L':
122        return sizeof(long long);
123      case 'j':
124        return sizeof(INTMAX_T);
125      case 'z':
126        return sizeof(SIZE_T);
127      case 't':
128        return sizeof(PTRDIFF_T);
129      case 0:
130        return sizeof(int);
131      default:
132        return FSS_INVALID;
133      }
134    }
135  
136    if (format_is_float_conv(convSpecifier)) {
137      switch (lengthModifier[0]) {
138      case 'L':
139      case 'q':
140        return sizeof(long double);
141      case 'l':
142        return lengthModifier[1] == 'l' ? sizeof(long double)
143                                             : sizeof(double);
144      case 0:
145        // Printf promotes floats to doubles but scanf does not
146        return promote_float ? sizeof(double) : sizeof(float);
147      default:
148        return FSS_INVALID;
149      }
150    }
151  
152    if (convSpecifier == 'p') {
153      if (lengthModifier[0] != 0)
154        return FSS_INVALID;
155      return sizeof(void *);
156    }
157  
158    return FSS_INVALID;
159  }
160  
161  struct ScanfDirective {
162    int argIdx; // argument index, or -1 if not specified ("%n$")
163    int fieldWidth;
164    const char *begin;
165    const char *end;
166    bool suppressed; // suppress assignment ("*")
167    bool allocate;   // allocate space ("m")
168    char lengthModifier[2];
169    char convSpecifier;
170    bool maybeGnuMalloc;
171  };
172  
173  // Parse scanf format string. If a valid directive in encountered, it is
174  // returned in dir. This function returns the pointer to the first
175  // unprocessed character, or 0 in case of error.
176  // In case of the end-of-string, a pointer to the closing \0 is returned.
177  static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
178                                      ScanfDirective *dir) {
179    internal_memset(dir, 0, sizeof(*dir));
180    dir->argIdx = -1;
181  
182    while (*p) {
183      if (*p != '%') {
184        ++p;
185        continue;
186      }
187      dir->begin = p;
188      ++p;
189      // %%
190      if (*p == '%') {
191        ++p;
192        continue;
193      }
194      if (*p == '\0') {
195        return nullptr;
196      }
197      // %n$
198      p = maybe_parse_param_index(p, &dir->argIdx);
199      CHECK(p);
200      // *
201      if (*p == '*') {
202        dir->suppressed = true;
203        ++p;
204      }
205      // Field width
206      if (*p >= '0' && *p <= '9') {
207        p = parse_number(p, &dir->fieldWidth);
208        CHECK(p);
209        if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
210          return nullptr;
211      }
212      // m
213      if (*p == 'm') {
214        dir->allocate = true;
215        ++p;
216      }
217      // Length modifier.
218      p = maybe_parse_length_modifier(p, dir->lengthModifier);
219      // Conversion specifier.
220      dir->convSpecifier = *p++;
221      // Consume %[...] expression.
222      if (dir->convSpecifier == '[') {
223        if (*p == '^')
224          ++p;
225        if (*p == ']')
226          ++p;
227        while (*p && *p != ']')
228          ++p;
229        if (*p == 0)
230          return nullptr; // unexpected end of string
231                          // Consume the closing ']'.
232        ++p;
233      }
234      // This is unfortunately ambiguous between old GNU extension
235      // of %as, %aS and %a[...] and newer POSIX %a followed by
236      // letters s, S or [.
237      if (allowGnuMalloc && dir->convSpecifier == 'a' &&
238          !dir->lengthModifier[0]) {
239        if (*p == 's' || *p == 'S') {
240          dir->maybeGnuMalloc = true;
241          ++p;
242        } else if (*p == '[') {
243          // Watch for %a[h-j%d], if % appears in the
244          // [...] range, then we need to give up, we don't know
245          // if scanf will parse it as POSIX %a [h-j %d ] or
246          // GNU allocation of string with range dh-j plus %.
247          const char *q = p + 1;
248          if (*q == '^')
249            ++q;
250          if (*q == ']')
251            ++q;
252          while (*q && *q != ']' && *q != '%')
253            ++q;
254          if (*q == 0 || *q == '%')
255            return nullptr;
256          p = q + 1; // Consume the closing ']'.
257          dir->maybeGnuMalloc = true;
258        }
259      }
260      dir->end = p;
261      break;
262    }
263    return p;
264  }
265  
266  static int scanf_get_value_size(ScanfDirective *dir) {
267    if (dir->allocate) {
268      if (!char_is_one_of(dir->convSpecifier, "cCsS["))
269        return FSS_INVALID;
270      return sizeof(char *);
271    }
272  
273    if (dir->maybeGnuMalloc) {
274      if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
275        return FSS_INVALID;
276      // This is ambiguous, so check the smaller size of char * (if it is
277      // a GNU extension of %as, %aS or %a[...]) and float (if it is
278      // POSIX %a followed by s, S or [ letters).
279      return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
280    }
281  
282    if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
283      bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
284      unsigned charSize =
285          format_get_char_size(dir->convSpecifier, dir->lengthModifier);
286      if (charSize == 0)
287        return FSS_INVALID;
288      if (dir->fieldWidth == 0) {
289        if (!needsTerminator)
290          return charSize;
291        return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
292      }
293      return (dir->fieldWidth + needsTerminator) * charSize;
294    }
295  
296    return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
297  }
298  
299  // Common part of *scanf interceptors.
300  // Process format string and va_list, and report all store ranges.
301  // Stops when "consuming" n_inputs input items.
302  static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
303                           const char *format, va_list aq) {
304    CHECK_GT(n_inputs, 0);
305    const char *p = format;
306  
307    COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
308  
309    while (*p) {
310      ScanfDirective dir;
311      p = scanf_parse_next(p, allowGnuMalloc, &dir);
312      if (!p)
313        break;
314      if (dir.convSpecifier == 0) {
315        // This can only happen at the end of the format string.
316        CHECK_EQ(*p, 0);
317        break;
318      }
319      // Here the directive is valid. Do what it says.
320      if (dir.argIdx != -1) {
321        // Unsupported.
322        break;
323      }
324      if (dir.suppressed)
325        continue;
326      int size = scanf_get_value_size(&dir);
327      if (size == FSS_INVALID) {
328        Report("WARNING: unexpected format specifier in scanf interceptor: "
329          "%.*s\n", dir.end - dir.begin, dir.begin);
330        break;
331      }
332      void *argp = va_arg(aq, void *);
333      if (dir.convSpecifier != 'n')
334        --n_inputs;
335      if (n_inputs < 0)
336        break;
337      if (size == FSS_STRLEN) {
338        size = internal_strlen((const char *)argp) + 1;
339      } else if (size == FSS_WCSLEN) {
340        // FIXME: actually use wcslen() to calculate it.
341        size = 0;
342      }
343      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
344    }
345  }
346  
347  #if SANITIZER_INTERCEPT_PRINTF
348  
349  struct PrintfDirective {
350    int fieldWidth;
351    int fieldPrecision;
352    int argIdx; // width argument index, or -1 if not specified ("%*n$")
353    int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
354    const char *begin;
355    const char *end;
356    bool starredWidth;
357    bool starredPrecision;
358    char lengthModifier[2];
359    char convSpecifier;
360  };
361  
362  static const char *maybe_parse_number(const char *p, int *out) {
363    if (*p >= '0' && *p <= '9')
364      p = parse_number(p, out);
365    return p;
366  }
367  
368  static const char *maybe_parse_number_or_star(const char *p, int *out,
369                                                bool *star) {
370    if (*p == '*') {
371      *star = true;
372      ++p;
373    } else {
374      *star = false;
375      p = maybe_parse_number(p, out);
376    }
377    return p;
378  }
379  
380  // Parse printf format string. Same as scanf_parse_next.
381  static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
382    internal_memset(dir, 0, sizeof(*dir));
383    dir->argIdx = -1;
384    dir->precisionIdx = -1;
385  
386    while (*p) {
387      if (*p != '%') {
388        ++p;
389        continue;
390      }
391      dir->begin = p;
392      ++p;
393      // %%
394      if (*p == '%') {
395        ++p;
396        continue;
397      }
398      if (*p == '\0') {
399        return nullptr;
400      }
401      // %n$
402      p = maybe_parse_param_index(p, &dir->precisionIdx);
403      CHECK(p);
404      // Flags
405      while (char_is_one_of(*p, "'-+ #0")) {
406        ++p;
407      }
408      // Field width
409      p = maybe_parse_number_or_star(p, &dir->fieldWidth,
410                                     &dir->starredWidth);
411      if (!p)
412        return nullptr;
413      // Precision
414      if (*p == '.') {
415        ++p;
416        // Actual precision is optional (surprise!)
417        p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
418                                       &dir->starredPrecision);
419        if (!p)
420          return nullptr;
421        // m$
422        if (dir->starredPrecision) {
423          p = maybe_parse_param_index(p, &dir->precisionIdx);
424          CHECK(p);
425        }
426      }
427      // Length modifier.
428      p = maybe_parse_length_modifier(p, dir->lengthModifier);
429      // Conversion specifier.
430      dir->convSpecifier = *p++;
431      dir->end = p;
432      break;
433    }
434    return p;
435  }
436  
437  static int printf_get_value_size(PrintfDirective *dir) {
438    if (dir->convSpecifier == 'm') {
439      return sizeof(char *);
440    }
441  
442    if (char_is_one_of(dir->convSpecifier, "cCsS")) {
443      unsigned charSize =
444          format_get_char_size(dir->convSpecifier, dir->lengthModifier);
445      if (charSize == 0)
446        return FSS_INVALID;
447      if (char_is_one_of(dir->convSpecifier, "sS")) {
448        return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
449      }
450      return charSize;
451    }
452  
453    return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
454  }
455  
456  #define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
457    do {                                                             \
458      if (format_is_float_conv(convSpecifier)) {                     \
459        switch (size) {                                              \
460        case 8:                                                      \
461          va_arg(*aq, double);                                       \
462          break;                                                     \
463        case 12:                                                     \
464          va_arg(*aq, long double);                                  \
465          break;                                                     \
466        case 16:                                                     \
467          va_arg(*aq, long double);                                  \
468          break;                                                     \
469        default:                                                     \
470          Report("WARNING: unexpected floating-point arg size"       \
471                 " in printf interceptor: %d\n", size);              \
472          return;                                                    \
473        }                                                            \
474      } else {                                                       \
475        switch (size) {                                              \
476        case 1:                                                      \
477        case 2:                                                      \
478        case 4:                                                      \
479          va_arg(*aq, u32);                                          \
480          break;                                                     \
481        case 8:                                                      \
482          va_arg(*aq, u64);                                          \
483          break;                                                     \
484        default:                                                     \
485          Report("WARNING: unexpected arg size"                      \
486                 " in printf interceptor: %d\n", size);              \
487          return;                                                    \
488        }                                                            \
489      }                                                              \
490    } while (0)
491  
492  // Common part of *printf interceptors.
493  // Process format string and va_list, and report all load ranges.
494  static void printf_common(void *ctx, const char *format, va_list aq) {
495    COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
496  
497    const char *p = format;
498  
499    while (*p) {
500      PrintfDirective dir;
501      p = printf_parse_next(p, &dir);
502      if (!p)
503        break;
504      if (dir.convSpecifier == 0) {
505        // This can only happen at the end of the format string.
506        CHECK_EQ(*p, 0);
507        break;
508      }
509      // Here the directive is valid. Do what it says.
510      if (dir.argIdx != -1 || dir.precisionIdx != -1) {
511        // Unsupported.
512        break;
513      }
514      if (dir.starredWidth) {
515        // Dynamic width
516        SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
517      }
518      if (dir.starredPrecision) {
519        // Dynamic precision
520        SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
521      }
522      int size = printf_get_value_size(&dir);
523      if (size == FSS_INVALID) {
524        Report("WARNING: unexpected format specifier in printf "
525               "interceptor: %.*s\n", dir.end - dir.begin, dir.begin);
526        break;
527      }
528      if (dir.convSpecifier == 'n') {
529        void *argp = va_arg(aq, void *);
530        COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
531        continue;
532      } else if (size == FSS_STRLEN) {
533        if (void *argp = va_arg(aq, void *)) {
534          if (dir.starredPrecision) {
535            // FIXME: properly support starred precision for strings.
536            size = 0;
537          } else if (dir.fieldPrecision > 0) {
538            // Won't read more than "precision" symbols.
539            size = internal_strnlen((const char *)argp, dir.fieldPrecision);
540            if (size < dir.fieldPrecision) size++;
541          } else {
542            // Whole string will be accessed.
543            size = internal_strlen((const char *)argp) + 1;
544          }
545          COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
546        }
547      } else if (size == FSS_WCSLEN) {
548        if (void *argp = va_arg(aq, void *)) {
549          // FIXME: Properly support wide-character strings (via wcsrtombs).
550          size = 0;
551          COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
552        }
553      } else {
554        // Skip non-pointer args
555        SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
556      }
557    }
558  }
559  
560  #endif // SANITIZER_INTERCEPT_PRINTF