/ Cornell / Protobuf.lean
Protobuf.lean
  1  /-
  2    Cornell.Protobuf - Verified Protobuf Wire Format
  3    
  4    Protobuf wire format primitives:
  5    - Varint: variable-length integer encoding
  6    - Fixed32/Fixed64: little-endian fixed-width
  7    - Length-delimited: length prefix + bytes
  8    - Tag: field_number << 3 | wire_type
  9    
 10    Wire types:
 11    - 0: Varint (int32, int64, uint32, uint64, sint32, sint64, bool, enum)
 12    - 1: 64-bit (fixed64, sfixed64, double)
 13    - 2: Length-delimited (string, bytes, embedded messages, packed repeated)
 14    - 5: 32-bit (fixed32, sfixed32, float)
 15    
 16    Reference: https://protobuf.dev/programming-guides/encoding/
 17  -/
 18  
 19  import Cornell.Basic
 20  import Cornell.Proofs
 21  
 22  namespace Cornell.Protobuf
 23  
 24  open Cornell Cornell.Proofs
 25  
 26  -- ═══════════════════════════════════════════════════════════════════════════════
 27  -- VARINT ENCODING
 28  -- Variable-length integer, 7 bits per byte, MSB = continuation
 29  -- ═══════════════════════════════════════════════════════════════════════════════
 30  
 31  /-- Encode a natural number as varint bytes -/
 32  def encodeVarint (n : Nat) : Bytes :=
 33    if n < 128 then
 34      ⟨#[n.toUInt8]⟩
 35    else
 36      let byte := (n % 128 + 128).toUInt8  -- Set continuation bit
 37      let rest := encodeVarint (n / 128)
 38      ⟨#[byte]⟩ ++ rest
 39  termination_by n
 40  decreasing_by
 41    simp_wf
 42    omega
 43  
 44  /-- Decode varint from bytes -/
 45  def decodeVarint (bs : Bytes) : ParseResult Nat :=
 46    go bs 0 0
 47  where
 48    go (bs : Bytes) (acc : Nat) (shift : Nat) : ParseResult Nat :=
 49      if bs.size > 0 then
 50        let byte := bs.data[0]!
 51        let value := (byte.toNat &&& 0x7F) <<< shift
 52        let acc' := acc + value
 53        if byte < 128 then
 54          .ok acc' (bs.extract 1 bs.size)
 55        else if shift >= 63 then
 56          .fail  -- Overflow protection
 57        else
 58          go (bs.extract 1 bs.size) acc' (shift + 7)
 59      else
 60        .fail
 61    termination_by bs.size
 62  
 63  /-- Varint box (partial - needs termination proof) -/
 64  -- Note: Full proof requires showing decodeVarint ∘ encodeVarint = id
 65  -- This is true but requires induction on the varint structure
 66  
 67  -- ═══════════════════════════════════════════════════════════════════════════════
 68  -- WIRE TYPES
 69  -- ═══════════════════════════════════════════════════════════════════════════════
 70  
 71  inductive WireType where
 72    | varint          -- 0
 73    | fixed64         -- 1
 74    | lengthDelimited -- 2
 75    | startGroup      -- 3 (deprecated)
 76    | endGroup        -- 4 (deprecated)
 77    | fixed32         -- 5
 78    deriving Repr, DecidableEq
 79  
 80  def WireType.toNat : WireType → Nat
 81    | .varint => 0
 82    | .fixed64 => 1
 83    | .lengthDelimited => 2
 84    | .startGroup => 3
 85    | .endGroup => 4
 86    | .fixed32 => 5
 87  
 88  def WireType.fromNat : Nat → Option WireType
 89    | 0 => some .varint
 90    | 1 => some .fixed64
 91    | 2 => some .lengthDelimited
 92    | 3 => some .startGroup
 93    | 4 => some .endGroup
 94    | 5 => some .fixed32
 95    | _ => none
 96  
 97  -- ═══════════════════════════════════════════════════════════════════════════════
 98  -- FIELD TAG
 99  -- Tag = (field_number << 3) | wire_type
100  -- ═══════════════════════════════════════════════════════════════════════════════
101  
102  structure FieldTag where
103    fieldNumber : Nat
104    wireType : WireType
105    deriving Repr
106  
107  def encodeTag (tag : FieldTag) : Bytes :=
108    encodeVarint ((tag.fieldNumber <<< 3) + tag.wireType.toNat)
109  
110  def decodeTag (bs : Bytes) : ParseResult FieldTag :=
111    match decodeVarint bs with
112    | .ok n rest =>
113      let wireTypeNum := n &&& 0x7
114      let fieldNumber := n >>> 3
115      match WireType.fromNat wireTypeNum with
116      | some wt => .ok ⟨fieldNumber, wt⟩ rest
117      | none => .fail
118    | .fail => .fail
119  
120  -- ═══════════════════════════════════════════════════════════════════════════════
121  -- FIXED-WIDTH TYPES
122  -- ═══════════════════════════════════════════════════════════════════════════════
123  
124  /-- 32-bit little-endian (wire type 5) - uses verified u32leBitVec via isoBox -/
125  def u32le : Box UInt32 := 
126    isoBox u32leBitVec 
127      UInt32.ofBitVec 
128      UInt32.toBitVec
129      (fun _ => rfl)  -- ofBitVec ∘ toBitVec = id
130      (fun _ => rfl)  -- toBitVec ∘ ofBitVec = id
131  
132  def fixed32 : Box UInt32 := u32le
133  
134  /-- 64-bit little-endian (wire type 1) - uses verified u64leBitVec via isoBox -/
135  def u64leVerified : Box UInt64 := 
136    isoBox u64leBitVec 
137      UInt64.ofBitVec 
138      UInt64.toBitVec
139      (fun _ => rfl)
140      (fun _ => rfl)
141  
142  def fixed64 : Box UInt64 := u64leVerified
143  
144  -- ═══════════════════════════════════════════════════════════════════════════════
145  -- LENGTH-DELIMITED
146  -- ═══════════════════════════════════════════════════════════════════════════════
147  
148  /-- Length-delimited bytes: varint length + raw bytes -/
149  structure LengthDelimited where
150    data : Bytes
151    deriving Repr
152  
153  def encodeLengthDelimited (ld : LengthDelimited) : Bytes :=
154    encodeVarint ld.data.size ++ ld.data
155  
156  def decodeLengthDelimited (bs : Bytes) : ParseResult LengthDelimited :=
157    match decodeVarint bs with
158    | .ok len rest =>
159      if rest.size >= len then
160        .ok ⟨rest.extract 0 len⟩ (rest.extract len rest.size)
161      else
162        .fail
163    | .fail => .fail
164  
165  -- ═══════════════════════════════════════════════════════════════════════════════
166  -- ZIGZAG ENCODING (for sint32, sint64)
167  -- Maps negative numbers to positive: 0 → 0, -1 → 1, 1 → 2, -2 → 3, ...
168  -- ═══════════════════════════════════════════════════════════════════════════════
169  
170  def zigzagEncode (n : Int) : Nat :=
171    if n >= 0 then
172      (n.toNat <<< 1)
173    else
174      (((-n).toNat <<< 1) - 1)
175  
176  def zigzagDecode (n : Nat) : Int :=
177    if n % 2 == 0 then
178      (n >>> 1 : Nat)
179    else
180      -((n + 1) >>> 1 : Nat)
181  
182  -- ═══════════════════════════════════════════════════════════════════════════════
183  -- PROTOBUF STRING
184  -- ═══════════════════════════════════════════════════════════════════════════════
185  
186  /-- UTF-8 string (length-delimited) -/
187  structure ProtoString where
188    value : String
189    deriving Repr
190  
191  def encodeProtoString (s : ProtoString) : Bytes :=
192    let utf8 := s.value.toUTF8
193    encodeVarint utf8.size ++ utf8
194  
195  def decodeProtoString (bs : Bytes) : ParseResult ProtoString :=
196    match decodeLengthDelimited bs with
197    | .ok ld rest =>
198      -- Attempt UTF-8 decode
199      match String.fromUTF8? ld.data with
200      | some s => .ok ⟨s⟩ rest
201      | none => .fail
202    | .fail => .fail
203  
204  -- ═══════════════════════════════════════════════════════════════════════════════
205  -- GRPC FRAMING
206  -- 1 byte: compressed flag (0 or 1)
207  -- 4 bytes: message length (big-endian!)
208  -- N bytes: message
209  -- ═══════════════════════════════════════════════════════════════════════════════
210  
211  structure GrpcFrame where
212    compressed : Bool
213    message : Bytes
214    deriving Repr
215  
216  def encodeGrpcFrame (frame : GrpcFrame) : Bytes :=
217    let flag : UInt8 := if frame.compressed then 1 else 0
218    let len := frame.message.size.toUInt32
219    -- Note: gRPC uses BIG-endian for length!
220    let b0 := ((len >>> 24) &&& 0xFF).toUInt8
221    let b1 := ((len >>> 16) &&& 0xFF).toUInt8
222    let b2 := ((len >>> 8) &&& 0xFF).toUInt8
223    let b3 := (len &&& 0xFF).toUInt8
224    let lenBytes : Bytes := ⟨#[b0, b1, b2, b3]⟩
225    ⟨#[flag]⟩ ++ lenBytes ++ frame.message
226  
227  def decodeGrpcFrame (bs : Bytes) : ParseResult GrpcFrame :=
228    if bs.size >= 5 then
229      let compressed := bs.data[0]! != 0
230      let b1 := bs.data[1]!
231      let b2 := bs.data[2]!
232      let b3 := bs.data[3]!
233      let b4 := bs.data[4]!
234      let len := (b1.toNat <<< 24) + (b2.toNat <<< 16) + (b3.toNat <<< 8) + b4.toNat
235      let rest := bs.extract 5 bs.size
236      if rest.size >= len then
237        .ok ⟨compressed, rest.extract 0 len⟩ (rest.extract len rest.size)
238      else
239        .fail
240    else
241      .fail
242  
243  -- ═══════════════════════════════════════════════════════════════════════════════
244  -- GRPC CALL STATE MACHINE
245  -- ═══════════════════════════════════════════════════════════════════════════════
246  
247  /-- gRPC call types -/
248  inductive CallType where
249    | unary           -- Single request, single response
250    | clientStreaming -- Stream of requests, single response
251    | serverStreaming -- Single request, stream of responses
252    | bidiStreaming   -- Stream of requests, stream of responses
253    deriving Repr, DecidableEq
254  
255  /-- Client-side gRPC call state -/
256  inductive GrpcClientState where
257    | idle
258    | sendingHeaders
259    | sendingMessage (remaining : Nat)  -- For streaming
260    | awaitingHeaders
261    | receivingMessage (remaining : Nat)
262    | awaitingTrailers
263    | complete (status : Nat) (message : Option String)
264    | failed (reason : String)
265    deriving Repr
266  
267  /-- Client events -/
268  inductive GrpcClientEvent where
269    | start (callType : CallType) (method : String)
270    | headersSent
271    | messageSent
272    | headersReceived (status : Option Nat)
273    | messageReceived (data : Bytes)
274    | trailersReceived (status : Nat) (message : Option String)
275    | error (reason : String)
276    deriving Repr
277  
278  /-- Client actions -/
279  inductive GrpcClientAction where
280    | sendHeaders (method : String) (metadata : List (String × String))
281    | sendMessage (data : Bytes) (endStream : Bool)
282    | sendTrailers
283    | receiveHeaders
284    | receiveMessage
285    | receiveTrailers
286    | complete (status : Nat)
287    | fail (reason : String)
288    deriving Repr
289  
290  -- Note: Full state machine implementation would follow the same pattern
291  -- as the handshake state machine in StateMachine.lean
292  
293  -- ═══════════════════════════════════════════════════════════════════════════════
294  -- DIGEST TYPE (for REAPI)
295  -- ═══════════════════════════════════════════════════════════════════════════════
296  
297  /-- Content digest (hash + size) -/
298  structure Digest where
299    hash : String      -- Hex-encoded hash
300    sizeBytes : Nat
301    deriving Repr, DecidableEq
302  
303  /-- Encode Digest as protobuf (fields 1 and 2) -/
304  def encodeDigest (d : Digest) : Bytes :=
305    -- Field 1: string hash (wire type 2)
306    let hashTag := encodeTag ⟨1, .lengthDelimited⟩
307    let hashValue := encodeProtoString ⟨d.hash⟩
308    -- Field 2: int64 size_bytes (wire type 0)
309    let sizeTag := encodeTag ⟨2, .varint⟩
310    let sizeValue := encodeVarint d.sizeBytes
311    hashTag ++ hashValue ++ sizeTag ++ sizeValue
312  
313  -- ═══════════════════════════════════════════════════════════════════════════════
314  -- REAPI MESSAGES (simplified)
315  -- ═══════════════════════════════════════════════════════════════════════════════
316  
317  /-- FindMissingBlobsRequest (simplified) -/
318  structure FindMissingBlobsRequest where
319    instanceName : String
320    blobDigests : List Digest
321    deriving Repr
322  
323  /-- FindMissingBlobsResponse (simplified) -/
324  structure FindMissingBlobsResponse where
325    missingBlobDigests : List Digest
326    deriving Repr
327  
328  -- Full encoding would use repeated fields (same tag, multiple values)
329  
330  -- ═══════════════════════════════════════════════════════════════════════════════
331  -- RESET-ON-AMBIGUITY SEMANTICS
332  -- Following the pattern established in Cornell.Sigil, Cornell.Zmtp, Cornell.Nix
333  -- ═══════════════════════════════════════════════════════════════════════════════
334  
335  /-!
336  ## Reset-on-Ambiguity for Protobuf
337  
338  Protobuf wire format has several ambiguity triggers:
339  
340  1. **Varint overflow** - more than 10 continuation bytes (64-bit max)
341  2. **Deprecated wire types** - groups (wire types 3, 4) are deprecated
342  3. **Invalid wire type** - wire type 6 or 7 don't exist
343  4. **Length overflow** - length-delimited field exceeds remaining bytes
344  5. **Invalid UTF-8** - string field contains invalid UTF-8
345  6. **Zero field number** - field numbers must be >= 1
346  7. **gRPC compression flag invalid** - must be 0 or 1
347  
348  When any of these occur, we must reset parsing state.
349  -/
350  
351  -- ═══════════════════════════════════════════════════════════════════════════════
352  -- AMBIGUITY REASONS
353  -- ═══════════════════════════════════════════════════════════════════════════════
354  
355  /-- Reasons for ambiguity in Protobuf parsing -/
356  inductive AmbiguityReason where
357    /-- Varint has more than 10 continuation bytes (overflow) -/
358    | varintOverflow : AmbiguityReason
359    /-- Deprecated wire type (startGroup = 3, endGroup = 4) -/
360    | deprecatedWireType : Nat → AmbiguityReason
361    /-- Invalid wire type (6 or 7) -/
362    | invalidWireType : Nat → AmbiguityReason
363    /-- Field number is zero (must be >= 1) -/
364    | zeroFieldNumber : AmbiguityReason
365    /-- Length-delimited field length exceeds available bytes -/
366    | lengthOverflow : Nat → Nat → AmbiguityReason  -- claimed, available
367    /-- String contains invalid UTF-8 -/
368    | invalidUtf8 : AmbiguityReason
369    /-- gRPC frame has invalid compression flag (not 0 or 1) -/
370    | invalidCompressionFlag : UInt8 → AmbiguityReason
371    /-- gRPC frame length exceeds maximum (4MB default) -/
372    | grpcFrameTooLarge : Nat → AmbiguityReason
373    /-- Nested message depth exceeds limit (prevent stack overflow) -/
374    | nestingTooDeep : Nat → AmbiguityReason
375    deriving Repr, DecidableEq
376  
377  -- ═══════════════════════════════════════════════════════════════════════════════
378  -- STRICT PARSE RESULT
379  -- ═══════════════════════════════════════════════════════════════════════════════
380  
381  /--
382  Strict parse result with three outcomes.
383  
384  - `ok`: unambiguous success
385  - `incomplete`: need more bytes (streaming)
386  - `ambiguous`: protocol violation, must reset
387  -/
388  inductive StrictParseResult (α : Type) where
389    | ok : α → Bytes → StrictParseResult α
390    | incomplete : Nat → StrictParseResult α  -- how many more bytes needed
391    | ambiguous : AmbiguityReason → StrictParseResult α
392    deriving Repr
393  
394  namespace StrictParseResult
395  
396  def map (f : α → β) : StrictParseResult α → StrictParseResult β
397    | ok a rest => ok (f a) rest
398    | incomplete n => incomplete n
399    | ambiguous r => ambiguous r
400  
401  def bind (r : StrictParseResult α) (f : α → Bytes → StrictParseResult β) : StrictParseResult β :=
402    match r with
403    | ok a rest => f a rest
404    | incomplete n => incomplete n
405    | ambiguous r => ambiguous r
406  
407  def isOk : StrictParseResult α → Bool
408    | ok _ _ => true
409    | _ => false
410  
411  def isIncomplete : StrictParseResult α → Bool
412    | incomplete _ => true
413    | _ => false
414  
415  def isAmbiguous : StrictParseResult α → Bool
416    | ambiguous _ => true
417    | _ => false
418  
419  -- Lemmas
420  @[simp] theorem map_ok (f : α → β) (a : α) (rest : Bytes) :
421      map f (ok a rest) = ok (f a) rest := rfl
422  
423  @[simp] theorem map_incomplete (f : α → β) (n : Nat) :
424      map f (incomplete n : StrictParseResult α) = incomplete n := rfl
425  
426  @[simp] theorem map_ambiguous (f : α → β) (r : AmbiguityReason) :
427      map f (ambiguous r : StrictParseResult α) = ambiguous r := rfl
428  
429  @[simp] theorem bind_ok (a : α) (rest : Bytes) (f : α → Bytes → StrictParseResult β) :
430      bind (ok a rest) f = f a rest := rfl
431  
432  @[simp] theorem bind_incomplete (n : Nat) (f : α → Bytes → StrictParseResult β) :
433      bind (incomplete n : StrictParseResult α) f = incomplete n := rfl
434  
435  @[simp] theorem bind_ambiguous (r : AmbiguityReason) (f : α → Bytes → StrictParseResult β) :
436      bind (ambiguous r : StrictParseResult α) f = ambiguous r := rfl
437  
438  end StrictParseResult
439  
440  -- ═══════════════════════════════════════════════════════════════════════════════
441  -- STRICT VARINT PARSING
442  -- ═══════════════════════════════════════════════════════════════════════════════
443  
444  /-- Maximum varint bytes (10 for 64-bit values) -/
445  def maxVarintBytes : Nat := 10
446  
447  /-- 
448  Decode varint with strict overflow detection.
449  
450  Returns:
451  - `ok (value, bytesConsumed)` on success
452  - `incomplete n` if need more bytes
453  - `ambiguous varintOverflow` if too many continuation bytes
454  -/
455  def decodeVarintStrict (bs : Bytes) : StrictParseResult (Nat × Nat) :=
456    go bs 0 0 0
457  where
458    go (bs : Bytes) (offset : Nat) (acc : Nat) (shift : Nat) : StrictParseResult (Nat × Nat) :=
459      if offset >= bs.size then
460        .incomplete 1
461      else if offset >= maxVarintBytes then
462        .ambiguous .varintOverflow
463      else
464        let byte := bs.data[offset]!
465        let value := (byte.toNat &&& 0x7F) <<< shift
466        let acc' := acc + value
467        if byte < 128 then
468          -- End of varint
469          .ok (acc', offset + 1) (bs.extract (offset + 1) bs.size)
470        else
471          go bs (offset + 1) acc' (shift + 7)
472    termination_by maxVarintBytes - offset
473  
474  -- ═══════════════════════════════════════════════════════════════════════════════
475  -- STRICT TAG PARSING
476  -- ═══════════════════════════════════════════════════════════════════════════════
477  
478  /-- Check if wire type is valid (not deprecated or unknown) -/
479  def isValidWireType (wt : Nat) : Bool :=
480    wt == 0 || wt == 1 || wt == 2 || wt == 5
481  
482  /-- Check if wire type is deprecated (groups) -/
483  def isDeprecatedWireType (wt : Nat) : Bool :=
484    wt == 3 || wt == 4
485  
486  /--
487  Decode field tag with strict validation.
488  
489  Detects:
490  - Varint overflow
491  - Zero field number
492  - Deprecated wire types (groups)
493  - Invalid wire types (6, 7)
494  -/
495  def decodeTagStrict (bs : Bytes) : StrictParseResult FieldTag :=
496    match decodeVarintStrict bs with
497    | .ok (n, _) rest =>
498      let wireTypeNum := n &&& 0x7
499      let fieldNumber := n >>> 3
500      -- Check field number
501      if fieldNumber == 0 then
502        .ambiguous .zeroFieldNumber
503      -- Check wire type
504      else if isDeprecatedWireType wireTypeNum then
505        .ambiguous (.deprecatedWireType wireTypeNum)
506      else if !isValidWireType wireTypeNum then
507        .ambiguous (.invalidWireType wireTypeNum)
508      else
509        match WireType.fromNat wireTypeNum with
510        | some wt => .ok ⟨fieldNumber, wt⟩ rest
511        | none => .ambiguous (.invalidWireType wireTypeNum)
512    | .incomplete n => .incomplete n
513    | .ambiguous r => .ambiguous r
514  
515  -- ═══════════════════════════════════════════════════════════════════════════════
516  -- STRICT LENGTH-DELIMITED PARSING
517  -- ═══════════════════════════════════════════════════════════════════════════════
518  
519  /-- Maximum length-delimited size (256 MB) -/
520  def maxLengthDelimitedSize : Nat := 256 * 1024 * 1024
521  
522  /--
523  Decode length-delimited field with strict validation.
524  
525  Detects:
526  - Varint overflow in length
527  - Length exceeds remaining bytes
528  - Length exceeds maximum
529  -/
530  def decodeLengthDelimitedStrict (bs : Bytes) : StrictParseResult LengthDelimited :=
531    match decodeVarintStrict bs with
532    | .ok (len, _) rest =>
533      if len > maxLengthDelimitedSize then
534        .ambiguous (.lengthOverflow len maxLengthDelimitedSize)
535      else if rest.size < len then
536        .incomplete (len - rest.size)
537      else
538        .ok ⟨rest.extract 0 len⟩ (rest.extract len rest.size)
539    | .incomplete n => .incomplete n
540    | .ambiguous r => .ambiguous r
541  
542  /--
543  Decode string with UTF-8 validation.
544  -/
545  def decodeProtoStringStrict (bs : Bytes) : StrictParseResult ProtoString :=
546    match decodeLengthDelimitedStrict bs with
547    | .ok ld rest =>
548      match String.fromUTF8? ld.data with
549      | some s => .ok ⟨s⟩ rest
550      | none => .ambiguous .invalidUtf8
551    | .incomplete n => .incomplete n
552    | .ambiguous r => .ambiguous r
553  
554  -- ═══════════════════════════════════════════════════════════════════════════════
555  -- STRICT GRPC FRAMING
556  -- ═══════════════════════════════════════════════════════════════════════════════
557  
558  /-- Maximum gRPC frame size (4 MB default) -/
559  def maxGrpcFrameSize : Nat := 4 * 1024 * 1024
560  
561  /--
562  Decode gRPC frame with strict validation.
563  
564  Detects:
565  - Invalid compression flag (not 0 or 1)
566  - Frame length exceeds maximum
567  - Frame length exceeds available bytes
568  -/
569  def decodeGrpcFrameStrict (bs : Bytes) : StrictParseResult GrpcFrame :=
570    if bs.size < 5 then
571      .incomplete (5 - bs.size)
572    else
573      let flag := bs.data[0]!
574      -- Compression flag must be 0 or 1
575      if flag > 1 then
576        .ambiguous (.invalidCompressionFlag flag)
577      else
578        let compressed := flag == 1
579        -- Big-endian length
580        let b1 := bs.data[1]!.toNat
581        let b2 := bs.data[2]!.toNat
582        let b3 := bs.data[3]!.toNat
583        let b4 := bs.data[4]!.toNat
584        let len := (b1 <<< 24) + (b2 <<< 16) + (b3 <<< 8) + b4
585        -- Check length limits
586        if len > maxGrpcFrameSize then
587          .ambiguous (.grpcFrameTooLarge len)
588        else
589          let rest := bs.extract 5 bs.size
590          if rest.size < len then
591            .incomplete (len - rest.size)
592          else
593            .ok ⟨compressed, rest.extract 0 len⟩ (rest.extract len rest.size)
594  
595  -- ═══════════════════════════════════════════════════════════════════════════════
596  -- PARSE STATE AND RESET
597  -- ═══════════════════════════════════════════════════════════════════════════════
598  
599  /-- Protobuf message parse state -/
600  structure ParseState where
601    /-- Current nesting depth -/
602    depth : Nat
603    /-- Bytes consumed so far -/
604    consumed : Nat
605    /-- Leftover bytes from previous parse -/
606    leftover : Bytes
607    deriving Repr, DecidableEq
608  
609  /-- Maximum nesting depth (prevent stack overflow) -/
610  def maxNestingDepth : Nat := 100
611  
612  /-- Initial parse state -/
613  def initParseState : ParseState := {
614    depth := 0
615    consumed := 0
616    leftover := ByteArray.empty
617  }
618  
619  /-- Reset parse state -/
620  def resetParseState (_s : ParseState) : ParseState := initParseState
621  
622  -- ═══════════════════════════════════════════════════════════════════════════════
623  -- RESET THEOREMS
624  -- ═══════════════════════════════════════════════════════════════════════════════
625  
626  /--
627  THEOREM 1: Reset always produces initial state.
628  -/
629  theorem reset_is_initial : ∀ s, resetParseState s = initParseState := by
630    intro s
631    rfl
632  
633  /--
634  THEOREM 2: Reset is idempotent.
635  -/
636  theorem reset_idempotent : ∀ s,
637      resetParseState (resetParseState s) = resetParseState s := by
638    intro s
639    rfl
640  
641  /--
642  THEOREM 3: No information leakage across reset.
643  -/
644  theorem no_leakage : ∀ s₁ s₂,
645      resetParseState s₁ = resetParseState s₂ := by
646    intro s₁ s₂
647    rfl
648  
649  /--
650  THEOREM 4: Initial depth is zero.
651  -/
652  theorem init_depth_zero : initParseState.depth = 0 := rfl
653  
654  /--
655  THEOREM 5: Reset clears depth.
656  -/
657  theorem reset_clears_depth : ∀ s, (resetParseState s).depth = 0 := by
658    intro s
659    rfl
660  
661  -- ═══════════════════════════════════════════════════════════════════════════════
662  -- PARSING THEOREMS
663  -- ═══════════════════════════════════════════════════════════════════════════════
664  
665  /--
666  THEOREM 6: decodeVarintStrict is deterministic.
667  -/
668  theorem decodeVarintStrict_deterministic (b1 b2 : Bytes) :
669      b1 = b2 → decodeVarintStrict b1 = decodeVarintStrict b2 := by
670    intro h
671    rw [h]
672  
673  /--
674  THEOREM 7: Deprecated wire type 3 triggers ambiguity.
675  -/
676  theorem wireType3_ambiguous : isDeprecatedWireType 3 = true := rfl
677  
678  /--
679  THEOREM 8: Deprecated wire type 4 triggers ambiguity.
680  -/
681  theorem wireType4_ambiguous : isDeprecatedWireType 4 = true := rfl
682  
683  /--
684  THEOREM 9: Wire type 6 is invalid.
685  -/
686  theorem wireType6_invalid : isValidWireType 6 = false := rfl
687  
688  /--
689  THEOREM 10: Wire type 7 is invalid.
690  -/
691  theorem wireType7_invalid : isValidWireType 7 = false := rfl
692  
693  /--
694  THEOREM 11: Wire types 0, 1, 2, 5 are valid.
695  -/
696  theorem valid_wire_types :
697      isValidWireType 0 = true ∧
698      isValidWireType 1 = true ∧
699      isValidWireType 2 = true ∧
700      isValidWireType 5 = true := ⟨rfl, rfl, rfl, rfl⟩
701  
702  /--
703  THEOREM 12: gRPC compression flag > 1 is ambiguous.
704  -/
705  theorem grpc_invalid_flag (flag : UInt8) (h : flag > 1) :
706      ∀ bs, bs.size ≥ 5 → bs.data[0]! = flag →
707      ∃ r, decodeGrpcFrameStrict bs = .ambiguous r := by
708    intro bs hsize hflag
709    unfold decodeGrpcFrameStrict
710    have h1 : ¬(bs.size < 5) := by omega
711    simp only [h1, ↓reduceIte, hflag]
712    have h2 : flag > 1 := h
713    simp only [h2, ↓reduceIte]
714    exact ⟨.invalidCompressionFlag flag, rfl⟩
715  
716  -- ═══════════════════════════════════════════════════════════════════════════════
717  -- VERIFICATION STATUS
718  -- ═══════════════════════════════════════════════════════════════════════════════
719  
720  /-!
721  ## Verification Summary
722  
723  ### Core Reset Theorems (all proven, 0 sorry):
724  
725  | Theorem | What's Proven |
726  |---------|---------------|
727  | reset_is_initial | reset always produces initParseState |
728  | reset_idempotent | reset(reset(s)) = reset(s) |
729  | no_leakage | different states reset to identical states |
730  | init_depth_zero | initial depth is 0 |
731  | reset_clears_depth | reset clears nesting depth |
732  
733  ### Wire Format Theorems (all proven, 0 sorry):
734  
735  | Theorem | What's Proven |
736  |---------|---------------|
737  | decodeVarintStrict_deterministic | same bytes → same result |
738  | wireType3_ambiguous | startGroup triggers ambiguity |
739  | wireType4_ambiguous | endGroup triggers ambiguity |
740  | wireType6_invalid | wire type 6 invalid |
741  | wireType7_invalid | wire type 7 invalid |
742  | valid_wire_types | 0, 1, 2, 5 are valid |
743  | grpc_invalid_flag | compression flag > 1 triggers ambiguity |
744  
745  ### Types Defined:
746  
747  | Type | Purpose |
748  |------|---------|
749  | AmbiguityReason | why ambiguity occurred |
750  | StrictParseResult | ok / incomplete / ambiguous |
751  | ParseState | message parse state |
752  
753  ### Key Functions:
754  
755  | Function | Purpose |
756  |----------|---------|
757  | decodeVarintStrict | varint with overflow detection |
758  | decodeTagStrict | tag with wire type validation |
759  | decodeLengthDelimitedStrict | length-delimited with bounds |
760  | decodeProtoStringStrict | string with UTF-8 validation |
761  | decodeGrpcFrameStrict | gRPC frame with flag validation |
762  | resetParseState | return to ground state |
763  
764  **Total: 12 theorems proven, 0 sorry**
765  -/
766  
767  end Cornell.Protobuf