macros.rs
1 //! Macros to make it easier to work with rate-limited logs 2 //! 3 4 /// Log a rate-limited failure message. 5 /// 6 /// This macro looks at a single `Result<T,E>`, 7 /// and keeps track of how many times the `Ok` and `Err` branches are seen. 8 /// After a delay, it reports via [`tracing::event!`] 9 /// how many errors it has seen since its last report. 10 /// (It only reports an absence of errors 11 /// when such an absence has followed 12 /// an error report.) 13 /// 14 /// ## A simple example 15 /// 16 /// ``` 17 /// # use std::num::ParseIntError; 18 /// # let s = "1234"; 19 /// # let source = "the cache"; 20 /// use tor_log_ratelim::log_ratelim; 21 /// let r: Result<u8, ParseIntError> = s.parse(); 22 /// 23 /// log_ratelim!( 24 /// // The activity we were doing. 25 /// "Parsing a value from {}", source; 26 /// // The result we got. 27 /// r; 28 /// ); 29 /// ``` 30 /// 31 /// This invocation could report errors like 32 /// ```text 33 /// WARN: Parsing a value from the cache: error (occurred 9/12 times in the last 5 minutes): number too large to fit in target type 34 /// ``` 35 /// 36 /// After a while without errors, it might log: 37 /// ```text 38 /// WARN: Parsing a value from the cache: now working (occurred 0/100 times in th last hour) 39 /// ``` 40 /// 41 /// ## Important concept: Activities 42 /// 43 /// Every invocation of `log_ratelim!` defines a _set_ of rate limits 44 /// with respect to a collection of **activities**. 45 /// Each separate **activity** value gets its own rate limit. 46 /// This lets you have separate rate limits for different operations, 47 /// such as connecting to different parties, 48 /// or invoking different programs. 49 /// 50 /// Typical activities might be 51 /// `"Connecting to port {}", p` 52 /// or 53 /// `"Trying to start program {}", p` 54 /// 55 /// (These activities should be described using a verb ending with "-ing", 56 /// to make the output nice.) 57 /// 58 /// ## Requirements on the error type. 59 /// 60 /// The error type for each `Result` passed to this macro must implement: 61 /// * [`Clone`] 62 /// * [`Send`] 63 /// * [`Error`](std::error::Error) 64 /// 65 /// ## Reports are representative 66 /// 67 /// No matter how many failures are seen between log messages, 68 /// `log_ratelim!` only records and reports 69 /// one error for each time it logs. 70 /// 71 /// Its current behavior is to record and report 72 /// the _first_ error for each logged interval, 73 /// and discard the others. 74 /// This can lead to confusing results if the error is not representative. 75 /// 76 /// ## Advanced syntax 77 /// 78 /// The `log_ratelim` macro can record additional information for its 79 /// representative error report, 80 /// and can log information on successes as well. 81 /// 82 /// A full invocation of `log_ratelim!` looks like this: 83 /// 84 /// ``` 85 /// # use std::num::ParseIntError; 86 /// # let s = "1234"; 87 /// # let source = "the cache"; 88 /// # let more_information = |_| ""; 89 /// use tor_log_ratelim::log_ratelim; 90 /// let r: Result<u8, ParseIntError> = s.parse(); 91 /// log_ratelim!( 92 /// "Parsing a value from {}", source; 93 /// r; 94 /// Err(x) => WARN, "The problem was {}", more_information(x); 95 /// Ok(v) => TRACE, "Parsed {} successfully", v; 96 /// ); 97 /// ``` 98 /// 99 /// Here the clause starting with `Err(x)` 100 /// tells the logger to include a message along with the error report, 101 /// and we explicitly specifies the level at which 102 /// to log our failure reports. 103 /// 104 /// Note that the error itself is **always** reported; 105 /// there is no need to say 106 /// `Err(e) => WARN, "{}", e`. 107 /// In fact, doing so will create a redundant report of 108 /// the error. 109 // 110 // TODO: I don't think it makes sense to have an Ok() logger. 111 // Instead, we could just say 112 // log_ratelim!("Parsing a value from {}", source; r;); 113 // trace!("parsed value from {}: {:?}", source, r); 114 // This is probably better, since it logs a trace for every occurrence. 115 // 116 /// The clause starting with `Ok(v)` tells the logger what to do on success: 117 /// each individual success causes a _non-rate-limited_ 118 /// message at TRACE level. 119 /// 120 /// The `Ok() ...` clause 121 /// and the `Err() ...` clause 122 /// are both optional. 123 /// 124 /// Within the Err() clause, 125 /// the format string and its arguments 126 /// are optional. 127 // 128 // TODO performance notes: 129 // 130 // There are many opportunities for possibly making this code go faster: 131 // - Optimize the case where there is only one activity. 132 // - Use a non-string key to distinguish activities, to avoid formatting 133 // the activity string needlessly. 134 // - Limit copies (of activity and of error). 135 // - Use Event and Metadata from the tracing crate to defer formatting 136 // - Check Metadata early for the case where we don't want to report the 137 // event at all. 138 // 139 // Let's not pursue any of these until we know that this code actually 140 // shows up in a critical path. 141 #[macro_export] 142 macro_rules! log_ratelim { 143 // ==== 144 // Actual implementation for rate-limited logging. 145 // ==== 146 147 // Nobody invokes this syntax directly; it's used as a common body for the 148 // various other syntaxes. 149 { 150 @impl activity_format: ( $act_fmt:literal $(, $act_arg:expr)* ) ; 151 result: ($result:expr ) ; 152 on_error: (Err($err_pat:pat), $err_level:ident $(, $err_fmt:literal $(, $err_arg:expr)* )? ); 153 $( on_ok: (Ok($ok_pat:pat), $ok_level:ident, $ok_fmt:literal $(, $ok_arg:expr)* ); )? 154 } => { 155 #[allow(clippy::redundant_closure_call)] 156 (||{ 157 use $crate::macro_prelude::*; 158 let Some(runtime) = rt_support() else { 159 // Nobody has called `install_runtime()`: we should just log whatever 160 // happened and not worry about the rate-limiting. 161 match &$result { 162 #[allow(clippy::redundant_pattern)] 163 Err(ref the_error @ $err_pat) => { 164 tracing::event!( 165 tracing::Level::$err_level, 166 concat!($act_fmt, $(": ", $err_fmt, )? ": {}"), 167 $($act_arg,)* 168 $( $($err_arg, )* )? 169 the_error.report() 170 ); 171 } 172 $(Ok($ok_pat) => { 173 tracing::event!( 174 tracing::Level::$ok_level, 175 $ok_fmt 176 $(, $ok_arg)* 177 ); 178 })? 179 #[allow(unreachable_patterns)] 180 Ok(_) => {} 181 } 182 return; 183 }; 184 185 /// An implementation of Loggable for this log message. 186 // 187 // We use a separate implementation here so that the tracing metadata will get 188 // constructed correctly. If we called tracing::event! from a location in 189 // `tor-log-ratelim`, all the messages would appear to originate from there. 190 // 191 // (TODO: We could use tracing::Metadata explicitly, perhaps? That might be hard.) 192 struct Lg(LogState); 193 impl Loggable for Lg { 194 fn flush(&mut self, summarizing: std::time::Duration) -> Activity { 195 let activity = self.0.activity(); 196 match activity { 197 Activity::Active => { 198 tracing::event!( 199 tracing::Level::$err_level, 200 "{}", 201 self.0.display_problem(summarizing) 202 ); 203 } 204 Activity::Dormant => { 205 tracing::event!( 206 // Using err_level here is in some respects confusing: 207 // if the _presence_ of the problem is (say) a WARN, 208 // why should its newfound absence also be a WARN? 209 // 210 // We have had to decide which is worse: 211 // that a user only watching WARNs 212 // might not see a problem has gone away, 213 // or that a non-problem would be reported 214 // at an excessive severity. 215 // We went with the latter. 216 tracing::Level::$err_level, 217 "{}", 218 self.0.display_recovery(summarizing) 219 ); 220 } 221 } 222 self.0.reset(); 223 activity 224 } 225 } 226 227 /// A lazy map from activity keys to weak RateLim handles. 228 // 229 // The strong reference for each RateLim is held by a task that flushes 230 // the logger as appropriate, and drops the strong reference once it's 231 // quiescent. 232 static LOGGERS: Lazy<Mutex<WeakValueHashMap<String, Weak<RateLim<Lg>>>>> = 233 Lazy::new(|| Mutex::new(WeakValueHashMap::new())); 234 235 // We assign a separate rate limit for each activity. 236 // For now, this is string-ly typed. 237 let activity = format!($act_fmt $(, $act_arg)*); 238 let key = activity.clone(); 239 240 match &$result { 241 #[allow(clippy::redundant_pattern)] 242 Err(ref the_error @ $err_pat) => { 243 // The operation failed. 244 // 245 // 1) Create a rate-limited logger for this activity if one did not 246 // already exist. 247 let logger = LOGGERS 248 .lock() 249 .expect("poisoned lock") 250 .entry(key) 251 .or_insert_with(|| RateLim::new(Lg(LogState::new(activity)))); 252 // 2) Note failure in the activity with note_fail(). 253 logger.event(runtime, |lg| lg.0.note_fail(|| 254 // 2b) If this is the first time that this activity failed since the 255 // last flush, record the formatted err_msg, and a Clone of the error. 256 ( 257 $crate::log_ratelim!{@first_nonempty 258 { $( Some(format!($err_fmt $(, $err_arg)* )) )? } 259 { None } 260 }, 261 Some(Box::new(the_error.clone())) 262 ) 263 )); 264 } 265 Ok($crate::log_ratelim!{@first_nonempty { $($ok_pat)? } {_} }) => { 266 // The operation succeeded. 267 // 268 // 1) If this activity is tracked, call note_ok() on it. 269 if let Some(logger) = LOGGERS 270 .lock() 271 .expect("poisoned lock") 272 .get(&key) { 273 logger.nonevent(|lg| lg.0.note_ok()); 274 } 275 // 2) If we have a per-success item to log, log it. 276 $( 277 tracing::event!(tracing::Level::$ok_level, $ok_fmt $(, $ok_arg )* ); 278 )? 279 } 280 } 281 })() 282 }; 283 284 // ====== 285 // Exposed, documented syntax. 286 // ====== 287 288 // Regular invocation with an Err(_) case. 289 { 290 $act_fmt:literal $(, $act_arg:expr )* $(,)? ; 291 $result:expr ; 292 Err($err_pat:pat) => $err_level:ident $(, $err_fmt:literal $(, $err_arg:expr)* )? $(,)? 293 $(; Ok($ok_pat:pat) => $ok_level:ident, $ok_fmt:literal $(, $ok_arg:expr )* $(,)? )? 294 $(;)? 295 } => { 296 $crate::log_ratelim!{ 297 @impl 298 activity_format: ( $act_fmt $(, $act_arg)* ); 299 result: ($result); 300 on_error: (Err($err_pat), $err_level $(, $err_fmt $(, $err_arg)* )? ); 301 $( on_ok: ( Ok($ok_pat), $ok_level, $ok_fmt $(, $ok_arg)* ); )? 302 } 303 }; 304 // Regular invocation with no Err(_) case. 305 { 306 $act_fmt:literal $(, $act_arg:expr )* $(,)? ; 307 $result:expr 308 $(; Ok($ok_pat:pat) => $ok_level:ident, $ok_fmt:literal $(, $ok_arg:expr )* $(,)? )? 309 $(;)? 310 } => { 311 $crate::log_ratelim!{ 312 @impl 313 activity_format: ( $act_fmt $(, $act_arg)* ); 314 result: ($result); 315 on_error: (Err(_), WARN); 316 $( on_ok: ( Ok($ok_pat), $ok_level, $ok_fmt $(, $ok_arg)* ); )? 317 } 318 }; 319 320 // Expand to the first of two bodies that has at least one token in it. 321 { @first_nonempty { $($a:tt)+ } { $($b:tt)* }} => { $($a)+ }; 322 { @first_nonempty { } { $($b:tt)* } } => { $($b)+ }; 323 } 324 325 #[cfg(test)] 326 mod test_syntax { 327 #![allow(dead_code)] 328 329 #[derive(Clone, Debug, thiserror::Error)] 330 enum MyErr { 331 #[error("it didn't work")] 332 DidntWork, 333 } 334 impl MyErr { 335 fn badness(&self) -> u8 { 336 3 337 } 338 } 339 340 /// This doesn't actually run or test anything; it just makes sure that all 341 /// the different syntaxes work. 342 fn various_syntaxes(friend: &str, r: &Result<u32, MyErr>) { 343 log_ratelim!( 344 "saying hi to {}", friend; 345 r; 346 ); 347 348 log_ratelim!( 349 "saying hi to {}", friend; 350 r; 351 Err(_) => WARN; 352 ); 353 354 log_ratelim!( 355 "saying hi to {}", friend; 356 r; 357 Err(e) => WARN, "badness={}", e.badness(); 358 ); 359 360 log_ratelim!( 361 "saying hi to {}", friend; 362 r; 363 Ok(v) => TRACE, "nothing bad happened; v={}", v; 364 ); 365 366 log_ratelim!( 367 "saying hi to {}", friend; 368 r; 369 Ok(v) => TRACE, "nothing bad happened; v={}", v; 370 ); 371 372 log_ratelim!( 373 "saying hi to {}", friend; 374 r; 375 Err(e) => WARN, "badness={}", e.badness(); 376 Ok(v) => TRACE, "nothing bad happened; v={}", v; 377 ); 378 } 379 }