trace.lua
1 --[[ 2 ToasterGen Spin 3 4 Copyright (C) 2025 Clifton Toaster Reid <cliftontreid@duck.com> 5 6 This library is free software: you can redistribute it and/or modify 7 it under the terms of the GNU Lesser General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public License 17 along with this library. If not, see <https://www.gnu.org/licenses/>. 18 ]] 19 20 ---@class ZipkinSpan 21 ---@field id string Unique 64bit identifier encoded as 16 lowercase hex characters. 22 ---@field traceId string Unique 64 or 128bit identifier encoded as 16 or 32 lowercase hex characters. 23 ---@field parentId string | nil Unique 64bit identifier encoded as 16 lowercase hex characters. 24 ---@field name string | nil The logical operation this span represents in lowercase (e.g. rpc method). 25 ---@field timestamp number | nil Epoch microseconds of the start of this span, possibly absent if incomplete. 26 ---@field duration number | nil Duration in microseconds of the critical path, if known. Durations of less than one are rounded up. Duration of children can be longer than their parents due to asynchronous operations. 27 ---@field kind "CLIENT" | "SERVER" | "PRODUCER" | "CONSUMER" | nil When present, kind clarifies timestamp, duration and remoteEndpoint. When absent, the span is local or incomplete. Unlike client and server, there is no direct critical path latency relationship between producer and consumer spans. 28 ---@field localEndpoint ZipkinEndpoint | nil The network context of a node in the service graph 29 ---@field remoteEndpoint ZipkinEndpoint | nil The network context of a node in the service graph 30 ---@field annotations ZipkinAnnotation[] | nil Associates events that explain latency with the time they happened. 31 ---@field tags table<string, string> | nil Adds context to a span, for search, viewing and analysis. For example, a key "your_app.version" would let you lookup traces by version. A tag "sql.query" isn't searchable, but it can help in debugging when viewing a trace. 32 ---@field setParentId fun(self: ZipkinSpan, parentId: string): ZipkinSpan Sets the parentId of the span. 33 ---@field setName fun(self: ZipkinSpan, name: string): ZipkinSpan Sets the name of the span operation. 34 ---@field setKind fun(self: ZipkinSpan, kind: "CLIENT" | "SERVER" | "PRODUCER" | "CONSUMER"): ZipkinSpan Sets the kind of the span. 35 ---@field setLocalEndpoint fun(self: ZipkinSpan, serviceName: string|nil, ipv4: string|nil, ipv6: string|nil, port: number|nil): ZipkinSpan Sets the local endpoint of the span. 36 ---@field setRemoteEndpoint fun(self: ZipkinSpan, serviceName: string|nil, ipv4: string|nil, ipv6: string|nil, port: number|nil): ZipkinSpan Sets the remote endpoint of the span. 37 ---@field addAnnotation fun(self: ZipkinSpan, value: string): ZipkinSpan Adds an annotation event to the span. 38 ---@field addTag fun(self: ZipkinSpan, key: string, value: string): ZipkinSpan Adds a key-value tag to the span. 39 ---@field endSpan fun(self: ZipkinSpan): ZipkinSpan Ends the span by calculating and setting its duration. 40 41 ---@class ZipkinEndpoint 42 ---@field serviceName string | nil Lower-case label of this node in the service graph, such as "favstar". Leave absent if unknown. 43 ---@field ipv4 string | nil The text representation of the primary IPv4 address associated with this connection. Ex. 192.168.99.100 Absent if unknown. 44 ---@field ipv6 string | nil The text representation of the primary IPv6 address associated with a connection. Ex. 2001:db8::c001 Absent if unknown. 45 ---@field port number | nil Depending on context, this could be a listen port or the client-side of a socket. Absent if unknown. Please don't set to zero. 46 47 ---@class ZipkinAnnotation 48 ---@field timestamp number Epoch microseconds of this event. For example, 1502787600000000 corresponds to 2017-08-15 09:00 UTC This value should be set directly by instrumentation, using the most precise value possible. For example, gettimeofday or multiplying epoch millis by 1000. 49 ---@field value string Usually a short tag indicating an event, like "error". While possible to add larger data, such as garbage collection details, low cardinality event names both keep the size of spans down and also are easy to search against. 50 51 local expect = require("cc.expect").expect 52 local Logger = require("src.log") 53 54 ---@type string | nil 55 local TEMPO_URL = nil 56 ---@type ZipkinSpan[] 57 local spans = {} 58 59 local Trace = {} 60 local SpanMethods = {} 61 62 --- Returns the current time since epoch in microseconds. 63 ---@return number current timestamp in microseconds 64 local function getTimestamp() 65 local time = os.epoch() -- Returns milliseconds since epoch 66 local microseconds = time * 1000 67 return microseconds 68 end 69 70 --- Sets the parentId of the span. 71 ---@param self ZipkinSpan the span instance 72 ---@param parentId string Unique 64-bit identifier encoded as 16 hex characters. 73 ---@return ZipkinSpan the span instance 74 function SpanMethods:setParentId(parentId) 75 expect(1, parentId, "string") 76 77 self.parentId = string.lower(parentId) 78 return self 79 end 80 81 --- Sets the name of the span operation. 82 ---@param self ZipkinSpan the span instance 83 ---@param name string Logical operation name in lowercase. 84 ---@return ZipkinSpan the span instance 85 function SpanMethods:setName(name) 86 expect(1, name, "string") 87 88 self.name = string.lower(name) 89 return self 90 end 91 92 --- Sets the kind of the span (CLIENT, SERVER, PRODUCER, or CONSUMER). 93 ---@param self ZipkinSpan the span instance 94 ---@param kind string The span kind (CLIENT, SERVER, PRODUCER, or CONSUMER). 95 ---@return ZipkinSpan the span instance 96 function SpanMethods:setKind(kind) 97 expect(1, kind, "string") 98 99 if kind ~= "CLIENT" and kind ~= "SERVER" and kind ~= "PRODUCER" and kind ~= "CONSUMER" then 100 error("Invalid kind. Must be one of CLIENT, SERVER, PRODUCER, or CONSUMER.") 101 end 102 self.kind = string.upper(kind) 103 return self 104 end 105 106 --- Internal helper to set or clear an endpoint on the span. 107 ---@param self ZipkinSpan the span instance 108 ---@param endpointKey "localEndpoint" | "remoteEndpoint" field to set 109 ---@param serviceName string|nil Service name in lowercase or nil to clear 110 ---@param ipv4 string|nil IPv4 address or nil to clear 111 ---@param ipv6 string|nil IPv6 address or nil to clear 112 ---@param port number|nil Port number or nil to clear 113 ---@return ZipkinSpan the span instance 114 local function _setEndpoint(self, endpointKey, serviceName, ipv4, ipv6, port) 115 -- If all are nil, set the specified endpoint field to nil 116 if not serviceName and not ipv4 and not ipv6 and not port then 117 self[endpointKey] = nil 118 return self 119 end 120 121 -- Create or reuse the endpoint table 122 local endpoint = self[endpointKey] or {} 123 self[endpointKey] = endpoint 124 125 -- Assign values, converting strings to lowercaseP 126 endpoint.serviceName = serviceName and string.lower(serviceName) or nil 127 endpoint.ipv4 = ipv4 and string.lower(ipv4) or nil 128 endpoint.ipv6 = ipv6 and string.lower(ipv6) or nil 129 endpoint.port = port or nil 130 131 return self 132 end 133 134 --- Sets the local endpoint of the span. 135 ---@param self ZipkinSpan the span instance 136 ---@param serviceName string|nil Service name 137 ---@param ipv4 string|nil IPv4 address 138 ---@param ipv6 string|nil IPv6 address 139 ---@param port number|nil Port number 140 ---@return ZipkinSpan the span instance 141 function SpanMethods:setLocalEndpoint(serviceName, ipv4, ipv6, port) 142 expect(1, serviceName, "string", "nil") 143 expect(2, ipv4, "string", "nil") 144 expect(3, ipv6, "string", "nil") 145 expect(4, port, "number", "nil") 146 return _setEndpoint(self, "localEndpoint", serviceName, ipv4, ipv6, port) 147 end 148 149 --- Sets the remote endpoint of the span. 150 ---@param self ZipkinSpan the span instance 151 ---@param serviceName string|nil Service name 152 ---@param ipv4 string|nil IPv4 address 153 ---@param ipv6 string|nil IPv6 address 154 ---@param port number|nil Port number 155 ---@return ZipkinSpan the span instance 156 function SpanMethods:setRemoteEndpoint(serviceName, ipv4, ipv6, port) 157 expect(1, serviceName, "string", "nil") 158 expect(2, ipv4, "string", "nil") 159 expect(3, ipv6, "string", "nil") 160 expect(4, port, "number", "nil") 161 return _setEndpoint(self, "remoteEndpoint", serviceName, ipv4, ipv6, port) 162 end 163 164 --- Adds an annotation event to the span with the current timestamp. 165 ---@param self ZipkinSpan the span instance 166 ---@param value string Short tag value for the annotation 167 ---@return ZipkinSpan the span instance 168 function SpanMethods:addAnnotation(value) 169 expect(1, value, "string") 170 171 local timestamp = getTimestamp() 172 173 if not self.annotations then 174 self.annotations = {} 175 end 176 177 table.insert(self.annotations, { timestamp = timestamp, value = value }) 178 return self 179 end 180 181 --- Adds a key-value tag to the span for search and analysis. 182 ---@param self ZipkinSpan the span instance 183 ---@param key string Tag key 184 ---@param value string Tag value 185 ---@return ZipkinSpan the span instance 186 function SpanMethods:addTag(key, value) 187 expect(1, key, "string") 188 expect(2, value, "string") 189 190 if not self.tags then 191 self.tags = {} 192 end 193 194 self.tags[key] = value 195 return self 196 end 197 198 --- Ends the span by calculating and setting its duration. 199 ---@param self ZipkinSpan the span instance 200 ---@return ZipkinSpan the span instance 201 ---@throws error if the span was not started 202 function SpanMethods:endSpan() 203 if not self.timestamp then 204 error("Span has not started yet.") 205 end 206 207 if not self.duration then 208 local endTime = getTimestamp() 209 self.duration = endTime - self.timestamp 210 end 211 212 return self 213 end 214 215 --- Generates a random 64-bit ID as 16 lowercase hexadecimal characters. 216 ---@return string generated hex ID 217 function Trace.rndID() 218 local id = math.random(0, 0xFFFFFFFFFFFFFFFF) 219 return string.format("%016x", id) 220 end 221 222 --- Creates a new ZipkinSpan with unique IDs and start timestamp. 223 ---@return ZipkinSpan a new span instance 224 function Trace.new() 225 local timestamp = getTimestamp() 226 local span = { 227 id = Trace.rndID(), 228 traceId = Trace.rndID(), 229 parentId = nil, 230 name = nil, 231 timestamp = timestamp, 232 duration = nil, 233 kind = nil, 234 localEndpoint = nil, 235 remoteEndpoint = nil, 236 annotations = nil, 237 tags = nil, 238 } 239 240 -- Set the metatable to allow method chaining 241 setmetatable(span, { __index = SpanMethods }) 242 243 return span 244 end 245 246 --- Sets the Tempo URL for sending spans. 247 ---@param url string The Tempo URL 248 ---@return string The configured Tempo URL 249 function Trace.setTempoURL(url) 250 expect(1, url, "string") 251 local valid, reason = http.checkURL(url) 252 if not valid then 253 Logger.error("Invalid Tempo URL: " .. reason) 254 error("Invalid Tempo URL: " .. reason) 255 end 256 257 TEMPO_URL = url 258 return TEMPO_URL 259 end 260 261 --- Adds a span to the list of spans to be sent to Tempo. 262 ---@param span ZipkinSpan The span to add 263 ---@return ZipkinSpan the span instance 264 function Trace.addSpan(span) 265 expect(1, span, "table") 266 267 if not span.id or not span.traceId then 268 error("Span must have an id and traceId.") 269 end 270 271 if not TEMPO_URL then 272 error("Tempo URL is not set. Use Trace.setTempoURL(url) to set it.") 273 end 274 275 -- Add the span to the list of spans 276 table.insert(spans, span) 277 278 return span 279 end 280 281 --- Sends the collected spans to the configured Tempo URL. 282 ---@return boolean success True if the spans were sent successfully or if there were no spans to send, false otherwise. 283 function Trace.sendTempo() 284 if not TEMPO_URL then 285 Logger.error("Tempo URL is not set. Cannot send spans.") 286 return false 287 end 288 289 if #spans == 0 then 290 return true 291 end 292 293 local jsonPayload = textutils.serializeJSON(spans) 294 local headers = { ["Content-Type"] = "application/json" } 295 296 local reason = http.post(TEMPO_URL, jsonPayload, headers) 297 298 if reason.getResponseCode() ~= 202 then 299 return false 300 end 301 302 -- Clear the spans list after successful sending 303 spans = {} 304 return true 305 end 306 307 return Trace