clap-parser-test.hs
1 {-# LANGUAGE DeriveGeneric #-} 2 {-# LANGUAGE OverloadedStrings #-} 3 {-# LANGUAGE RecordWildCards #-} 4 5 {- | Property tests for clap parser using ground truth from Rust fuzzer 6 7 Strategy: 8 1. Run clap-fuzzer.rs with random seeds to generate help text + ground truth 9 2. Parse the help text with our Haskell parser 10 3. Compare parsed result against ground truth JSON 11 -} 12 module Main where 13 14 import Hedgehog 15 import qualified Hedgehog.Gen as Gen 16 import qualified Hedgehog.Range as Range 17 18 import Control.Exception (SomeException) 19 import qualified Control.Exception as E 20 import Control.Monad (forM, forM_, unless, void, when) 21 import Data.Aeson (FromJSON (..), eitherDecodeStrict) 22 import qualified Data.Aeson as Aeson 23 import qualified Data.ByteString.Char8 as BS 24 import Data.Either (isRight) 25 import Data.Maybe (catMaybes, isJust, mapMaybe) 26 import Data.Text (Text) 27 import qualified Data.Text as T 28 import qualified Data.Text.Encoding as TE 29 import qualified Data.Text.IO as TIO 30 import Data.Void 31 import GHC.Generics (Generic) 32 import System.Exit (ExitCode (..)) 33 import System.Process (readProcess, readProcessWithExitCode) 34 import Text.Megaparsec 35 import Text.Megaparsec.Char (char, digitChar, letterChar, string) 36 37 -- ============================================================================ 38 -- Ground truth types (from JSON) 39 -- ============================================================================ 40 41 data GroundTruth = GroundTruth 42 { gtName :: Text 43 , gtPositionalArgs :: [PositionalTruth] 44 , gtOptions :: [OptionTruth] 45 , gtSubcommands :: [SubcommandTruth] 46 } 47 deriving (Show, Eq, Generic) 48 49 data OptionTruth = OptionTruth 50 { otShort :: Maybe Char 51 , otLong :: Maybe Text 52 , otValueName :: Maybe Text 53 , otHelp :: Text 54 , otRequired :: Bool 55 , otTakesValue :: Bool 56 , otMultiple :: Bool 57 , otEnv :: Maybe Text 58 , otDefault :: Maybe Text 59 } 60 deriving (Show, Eq, Generic) 61 62 data SubcommandTruth = SubcommandTruth 63 { stName :: Text 64 , stAbout :: Text 65 } 66 deriving (Show, Eq, Generic) 67 68 data PositionalTruth = PositionalTruth 69 { ptName :: Text 70 , ptHelp :: Text 71 , ptRequired :: Bool 72 , ptMultiple :: Bool 73 } 74 deriving (Show, Eq, Generic) 75 76 instance FromJSON GroundTruth where 77 parseJSON = Aeson.withObject "GroundTruth" $ \o -> 78 GroundTruth 79 <$> o Aeson..: "name" 80 <*> o Aeson..: "positional_args" 81 <*> o Aeson..: "options" 82 <*> o Aeson..: "subcommands" 83 84 instance FromJSON PositionalTruth where 85 parseJSON = Aeson.withObject "PositionalTruth" $ \o -> 86 PositionalTruth 87 <$> o Aeson..: "name" 88 <*> o Aeson..: "help" 89 <*> o Aeson..: "required" 90 <*> o Aeson..: "multiple" 91 92 instance FromJSON OptionTruth where 93 parseJSON = Aeson.withObject "OptionTruth" $ \o -> 94 OptionTruth 95 <$> o Aeson..: "short" 96 <*> o Aeson..: "long" 97 <*> o Aeson..: "value_name" 98 <*> o Aeson..: "help" 99 <*> o Aeson..: "required" 100 <*> o Aeson..: "takes_value" 101 <*> o Aeson..: "multiple" 102 <*> o Aeson..: "env" 103 <*> o Aeson..: "default" 104 105 instance FromJSON SubcommandTruth where 106 parseJSON = Aeson.withObject "SubcommandTruth" $ \o -> 107 SubcommandTruth 108 <$> o Aeson..: "name" 109 <*> o Aeson..: "about" 110 111 -- ============================================================================ 112 -- Parsed types (what our parser produces) 113 -- ============================================================================ 114 115 data ClapOption = ClapOption 116 { optShort :: Maybe Char 117 , optLong :: Maybe Text 118 , optArg :: Maybe Text 119 , optDesc :: Text 120 } 121 deriving (Show, Eq) 122 123 data ClapPositional = ClapPositional 124 { posName :: Text 125 , posRequired :: Bool -- <NAME> vs [NAME] 126 } 127 deriving (Show, Eq) 128 129 data ClapSection = ClapSection 130 { secName :: Text 131 , secOptions :: [ClapOption] 132 } 133 deriving (Show, Eq) 134 135 data ClapHelp = ClapHelp 136 { helpSections :: [ClapSection] 137 , helpPositionals :: [ClapPositional] 138 } 139 deriving (Show, Eq) 140 141 -- ============================================================================ 142 -- Parser (from clap-parser.hs) 143 -- ============================================================================ 144 145 type Parser = Parsec Void Text 146 147 hspace :: Parser () 148 hspace = void $ takeWhileP Nothing (\c -> c == ' ' || c == '\t') 149 150 restOfLine :: Parser Text 151 restOfLine = do 152 content <- takeWhileP Nothing (/= '\n') 153 void (char '\n') <|> eof 154 return content 155 156 shortOpt :: Parser Char 157 shortOpt = char '-' *> (letterChar <|> digitChar <|> char '.') 158 159 longOpt :: Parser Text 160 longOpt = string "--" *> takeWhile1P Nothing isOptChar 161 where 162 isOptChar c = c == '-' || c == '_' || c `elem` ['a' .. 'z'] || c `elem` ['A' .. 'Z'] || c `elem` ['0' .. '9'] 163 164 argPlaceholder :: Parser Text 165 argPlaceholder = 166 choice 167 [ char '=' *> takeWhile1P Nothing (\c -> c /= ' ' && c /= '\t' && c /= '\n') 168 , try $ hspace *> char '<' *> takeWhile1P Nothing (/= '>') <* char '>' 169 , char '[' *> (char '=' *> takeWhile1P Nothing (\c -> c /= ']' && c /= '\n')) <* char ']' 170 ] 171 172 optionLine :: Parser ClapOption 173 optionLine = do 174 hspace 175 mShort <- optional $ try (shortOpt <* optional (string ", ")) 176 mLong <- optional $ try longOpt 177 mArg <- optional $ try argPlaceholder 178 hspace 179 desc <- restOfLine 180 case (mShort, mLong) of 181 (Nothing, Nothing) -> empty 182 _ -> 183 return 184 ClapOption 185 { optShort = mShort 186 , optLong = mLong 187 , optArg = mArg 188 , optDesc = T.strip desc 189 } 190 191 isSectionLine :: Text -> Bool 192 isSectionLine t = 193 let stripped = T.strip t 194 in not (T.null stripped) 195 && T.last stripped == ':' 196 && not (T.isPrefixOf "-" stripped) 197 && not (T.isPrefixOf " " t) 198 && not (T.isInfixOf "://" stripped) 199 200 -- Parse a positional argument line like " [NAME]" or " <NAME>" 201 positionalLine :: Parser ClapPositional 202 positionalLine = do 203 hspace 204 (name, req) <- 205 choice 206 [ do 207 -- Required: <NAME> 208 _ <- char '<' 209 n <- takeWhile1P Nothing (\c -> c /= '>' && c /= '\n') 210 _ <- char '>' 211 return (n, True) 212 , do 213 -- Optional: [NAME] 214 _ <- char '[' 215 n <- takeWhile1P Nothing (\c -> c /= ']' && c /= '\n') 216 _ <- char ']' 217 return (n, False) 218 ] 219 _ <- restOfLine 220 return ClapPositional{posName = name, posRequired = req} 221 222 parseHelp :: Text -> ClapHelp 223 parseHelp input = 224 let lns = T.lines input 225 (sections, positionals) = go [] [] Nothing lns 226 in ClapHelp{helpSections = sections, helpPositionals = positionals} 227 where 228 go secAcc posAcc _curSec [] = 229 case _curSec of 230 Just (name, opts) -> (reverse (ClapSection name (reverse opts) : secAcc), reverse posAcc) 231 Nothing -> (reverse secAcc, reverse posAcc) 232 go secAcc posAcc curSec (line : rest) 233 | isSectionLine line = 234 let secName = T.dropEnd 1 (T.strip line) 235 secAcc' = case curSec of 236 Just (name, opts) -> ClapSection name (reverse opts) : secAcc 237 Nothing -> secAcc 238 in go secAcc' posAcc (Just (secName, [])) rest 239 | Just (name, opts) <- curSec = 240 -- In Arguments section, try to parse positional first 241 if name == "Arguments" || name == "POSITIONAL ARGUMENTS" 242 then case parse positionalLine "" (line <> "\n") of 243 Right pos -> go secAcc (pos : posAcc) curSec rest 244 Left _ -> 245 if T.null (T.strip line) || T.isPrefixOf " " line 246 then go secAcc posAcc curSec rest 247 else go (ClapSection name (reverse opts) : secAcc) posAcc Nothing rest 248 else case parse optionLine "" (line <> "\n") of 249 Right opt -> go secAcc posAcc (Just (name, opt : opts)) rest 250 Left _ -> 251 -- Skip blank lines and continuation lines (indented) 252 if T.null (T.strip line) || T.isPrefixOf " " line 253 then go secAcc posAcc curSec rest 254 else go (ClapSection name (reverse opts) : secAcc) posAcc Nothing rest 255 | otherwise = go secAcc posAcc curSec rest 256 257 -- ============================================================================ 258 -- Comparison logic 259 -- ============================================================================ 260 261 -- | Find the Options section from parsed help 262 getOptionsSection :: ClapHelp -> Maybe ClapSection 263 getOptionsSection help = 264 case filter (\s -> secName s == "Options") (helpSections help) of 265 [sec] -> Just sec 266 _ -> Nothing 267 268 -- | Check if a parsed option matches ground truth option 269 optionMatches :: ClapOption -> OptionTruth -> Bool 270 optionMatches parsed truth = 271 optShort parsed == otShort truth 272 && optLong parsed == otLong truth 273 && 274 -- Value name comparison: parsed has it iff truth takes_value 275 (isJust (optArg parsed) == otTakesValue truth) 276 && 277 -- If both have value names, they should match 278 ( case (optArg parsed, otValueName truth) of 279 (Just p, Just t) -> p == t 280 _ -> True 281 ) 282 283 -- | Find matching ground truth option for a parsed option 284 findMatch :: ClapOption -> [OptionTruth] -> Maybe OptionTruth 285 findMatch parsed truths = 286 case filter (optionMatches parsed) truths of 287 [t] -> Just t 288 _ -> Nothing 289 290 -- | Comparison result 291 data CompareResult = CompareResult 292 { crOptMatched :: Int 293 , crOptMissedFromTruth :: [OptionTruth] -- In truth but not parsed 294 , crOptExtraInParsed :: [ClapOption] -- In parsed but not truth (excluding -h, -V) 295 , crPosMatched :: Int 296 , crPosMissedFromTruth :: [PositionalTruth] 297 , crPosExtraInParsed :: [ClapPositional] 298 } 299 deriving (Show) 300 301 -- | Check if positional matches 302 positionalMatches :: ClapPositional -> PositionalTruth -> Bool 303 positionalMatches parsed truth = 304 posName parsed == ptName truth 305 && posRequired parsed == ptRequired truth 306 307 -- | Compare parsed options against ground truth 308 compareOptions :: [ClapOption] -> [OptionTruth] -> ([OptionTruth], [ClapOption], Int) 309 compareOptions parsed truths = 310 let 311 -- Filter out help and version from parsed (clap adds these automatically) 312 userParsed = filter (\o -> optLong o /= Just "help" && optLong o /= Just "version") parsed 313 314 -- Try to match each parsed option 315 matches = [(p, findMatch p truths) | p <- userParsed] 316 matched = [(p, t) | (p, Just t) <- matches] 317 unmatched = [p | (p, Nothing) <- matches] 318 319 -- Find truth options that weren't matched 320 matchedTruths = map snd matched 321 missedTruths = filter (`notElem` matchedTruths) truths 322 in 323 (missedTruths, unmatched, length matched) 324 325 -- | Compare parsed positionals against ground truth 326 comparePositionals :: [ClapPositional] -> [PositionalTruth] -> ([PositionalTruth], [ClapPositional], Int) 327 comparePositionals parsed truths = 328 let matches = [(p, filter (positionalMatches p) truths) | p <- parsed] 329 matched = [(p, t) | (p, [t]) <- matches] 330 unmatched = [p | (p, []) <- matches] 331 matchedTruths = map snd matched 332 missedTruths = filter (`notElem` matchedTruths) truths 333 in (missedTruths, unmatched, length matched) 334 335 -- | Full comparison 336 compareAll :: ClapHelp -> GroundTruth -> CompareResult 337 compareAll parsed truth = 338 let optSection = filter (\s -> secName s == "Options") (helpSections parsed) 339 parsedOpts = concatMap secOptions optSection 340 parsedPos = helpPositionals parsed 341 342 (optMissed, optExtra, optMatched) = compareOptions parsedOpts (gtOptions truth) 343 (posMissed, posExtra, posMatched) = comparePositionals parsedPos (gtPositionalArgs truth) 344 in CompareResult 345 { crOptMatched = optMatched 346 , crOptMissedFromTruth = optMissed 347 , crOptExtraInParsed = optExtra 348 , crPosMatched = posMatched 349 , crPosMissedFromTruth = posMissed 350 , crPosExtraInParsed = posExtra 351 } 352 353 isGoodResult :: CompareResult -> Bool 354 isGoodResult cr = 355 null (crOptMissedFromTruth cr) 356 && null (crOptExtraInParsed cr) 357 && null (crPosMissedFromTruth cr) 358 && null (crPosExtraInParsed cr) 359 360 -- ============================================================================ 361 -- Fuzzer integration 362 -- ============================================================================ 363 364 -- | Run the Rust fuzzer and get output 365 runFuzzer :: Int -> Bool -> IO (Either String (GroundTruth, Text)) 366 runFuzzer seed useShort = do 367 let args = ["clap-fuzzer.rs", "--seed", show seed] ++ ["--short" | useShort] 368 result <- E.try $ readProcessWithExitCode "rust-script" args "" 369 case result of 370 Left (e :: SomeException) -> return $ Left $ "Failed to run fuzzer: " ++ show e 371 Right (ExitSuccess, stdout, _stderr) -> do 372 let lns = lines stdout 373 case lns of 374 (jsonLine : "---" : helpLines) -> 375 case eitherDecodeStrict (BS.pack jsonLine) of 376 Left err -> return $ Left $ "JSON parse error: " ++ err 377 Right truth -> return $ Right (truth, T.pack $ unlines helpLines) 378 _ -> return $ Left $ "Unexpected fuzzer output format" 379 Right (ExitFailure code, _, stderr) -> 380 return $ Left $ "Fuzzer failed with code " ++ show code ++ ": " ++ stderr 381 382 -- | Test a single seed 383 testSeed :: Int -> Bool -> IO (Either String CompareResult) 384 testSeed seed useShort = do 385 fuzzerResult <- runFuzzer seed useShort 386 case fuzzerResult of 387 Left err -> return $ Left err 388 Right (truth, helpText) -> do 389 let parsed = parseHelp helpText 390 return $ Right $ compareAll parsed truth 391 392 -- ============================================================================ 393 -- Generators for pure Haskell testing (fallback) 394 -- ============================================================================ 395 396 genShortChar :: Gen Char 397 genShortChar = Gen.element $ ['a' .. 'z'] ++ ['A' .. 'Z'] ++ ['0' .. '9'] 398 399 genLongName :: Gen Text 400 genLongName = do 401 first <- Gen.element ['a' .. 'z'] 402 rest <- Gen.text (Range.linear 1 15) (Gen.element $ ['a' .. 'z'] ++ ['-']) 403 let name = T.cons first rest 404 cleaned = T.replace "--" "-" name 405 return $ T.dropWhileEnd (== '-') cleaned 406 407 genArgName :: Gen Text 408 genArgName = do 409 first <- Gen.element ['A' .. 'Z'] 410 rest <- Gen.text (Range.linear 0 8) (Gen.element $ ['A' .. 'Z'] ++ ['_']) 411 return $ T.cons first rest 412 413 genDesc :: Gen Text 414 genDesc = do 415 words' <- Gen.list (Range.linear 1 10) genWord 416 return $ T.intercalate " " words' 417 where 418 genWord = Gen.text (Range.linear 1 10) (Gen.element $ ['a' .. 'z'] ++ ['A' .. 'Z']) 419 420 genOption :: Gen ClapOption 421 genOption = do 422 hasShort <- Gen.bool 423 hasLong <- Gen.bool 424 let hasShort' = hasShort || not hasLong 425 426 mShort <- if hasShort' then Just <$> genShortChar else pure Nothing 427 mLong <- if hasLong || not hasShort' then Just <$> genLongName else pure Nothing 428 mArg <- Gen.maybe genArgName 429 desc <- genDesc 430 431 return 432 ClapOption 433 { optShort = mShort 434 , optLong = mLong 435 , optArg = mArg 436 , optDesc = desc 437 } 438 439 genSectionName :: Gen Text 440 genSectionName = do 441 words' <- Gen.list (Range.linear 1 3) genUpperWord 442 return $ T.intercalate " " words' 443 where 444 genUpperWord = Gen.text (Range.linear 2 10) (Gen.element ['A' .. 'Z']) 445 446 genSection :: Gen ClapSection 447 genSection = do 448 name <- genSectionName 449 opts <- Gen.list (Range.linear 1 10) genOption 450 return ClapSection{secName = name, secOptions = opts} 451 452 genHelp :: Gen ClapHelp 453 genHelp = do 454 sections <- Gen.list (Range.linear 1 5) genSection 455 return ClapHelp{helpSections = sections, helpPositionals = []} 456 457 -- ============================================================================ 458 -- Renderer (for roundtrip testing) 459 -- ============================================================================ 460 461 renderHelp :: ClapHelp -> Text 462 renderHelp ClapHelp{..} = T.unlines $ concatMap renderSection helpSections 463 464 -- Note: We don't render positionals in roundtrip tests since they use the Options format 465 466 renderSection :: ClapSection -> [Text] 467 renderSection ClapSection{..} = 468 [secName <> ":"] ++ map renderOption secOptions 469 470 renderOption :: ClapOption -> Text 471 renderOption ClapOption{..} = 472 let short = maybe "" (\c -> "-" <> T.singleton c) optShort 473 long = maybe "" ("--" <>) optLong 474 arg = maybe "" ("=" <>) optArg 475 flags = case (optShort, optLong) of 476 (Just _, Just l) -> short <> ", --" <> l <> arg 477 (Just _, Nothing) -> short <> arg 478 (Nothing, Just l) -> "--" <> l <> arg 479 (Nothing, Nothing) -> "" 480 padding = T.replicate (max 1 (30 - T.length flags)) " " 481 in " " <> flags <> padding <> optDesc 482 483 -- ============================================================================ 484 -- Properties 485 -- ============================================================================ 486 487 normalize :: ClapHelp -> ClapHelp 488 normalize h = h{helpSections = map normSec (helpSections h)} 489 where 490 normSec s = s{secOptions = map normOpt (secOptions s)} 491 normOpt o = o{optDesc = T.strip (optDesc o)} 492 493 -- | Roundtrip: generate -> render -> parse -> compare 494 prop_roundtrip :: Property 495 prop_roundtrip = withTests 500 $ property $ do 496 help <- forAll genHelp 497 let rendered = renderHelp help 498 parsed = parseHelp rendered 499 normalize help === normalize parsed 500 501 -- | Parse random unicode without crashing 502 prop_parse_nocrash :: Property 503 prop_parse_nocrash = withTests 500 $ property $ do 504 text <- forAll $ Gen.text (Range.linear 0 2000) Gen.unicode 505 let result = parseHelp text 506 assert $ length (helpSections result) >= 0 507 508 -- | All parsed options have at least short or long 509 prop_options_valid :: Property 510 prop_options_valid = withTests 500 $ property $ do 511 help <- forAll genHelp 512 let rendered = renderHelp help 513 parsed = parseHelp rendered 514 forM_ (helpSections parsed) $ \sec -> 515 forM_ (secOptions sec) $ \opt -> 516 assert $ isJust (optShort opt) || isJust (optLong opt) 517 518 -- ============================================================================ 519 -- Main 520 -- ============================================================================ 521 522 main :: IO () 523 main = do 524 putStrLn "=== Clap Parser Tests ===" 525 putStrLn "" 526 527 -- Run Hedgehog roundtrip tests first 528 putStrLn "--- Hedgehog Roundtrip Tests ---" 529 roundtripOk <- 530 checkParallel $ 531 Group 532 "Roundtrip" 533 [ ("prop_roundtrip", prop_roundtrip) 534 , ("prop_parse_nocrash", prop_parse_nocrash) 535 , ("prop_options_valid", prop_options_valid) 536 ] 537 putStrLn "" 538 539 -- Now test against real clap output 540 putStrLn "--- Fuzzer Integration Tests ---" 541 putStrLn "Testing parser against real clap output..." 542 putStrLn "" 543 544 -- Test a range of seeds 545 let seeds = [1 .. 500] 546 results <- forM seeds $ \seed -> do 547 shortResult <- testSeed seed True 548 longResult <- testSeed seed False 549 return (seed, shortResult, longResult) 550 551 -- Summarize results 552 let failures = 553 [ (s, sr, lr) 554 | (s, sr, lr) <- results 555 , not (either (const True) isGoodResult sr) 556 || not (either (const True) isGoodResult lr) 557 ] 558 559 errors = 560 [(s, e) | (s, Left e, _) <- results] 561 ++ [(s, e) | (s, _, Left e) <- results] 562 563 putStrLn $ "Tested " ++ show (length seeds) ++ " seeds (short + long help)" 564 putStrLn $ "Errors (fuzzer/setup): " ++ show (length errors) 565 putStrLn $ "Parse mismatches: " ++ show (length failures - length errors) 566 putStrLn "" 567 568 -- Show some failures 569 when (not (null failures)) $ do 570 putStrLn "=== Sample Failures ===" 571 forM_ (take 3 failures) $ \(seed, shortRes, longRes) -> do 572 putStrLn $ "Seed " ++ show seed ++ ":" 573 let showResult label cr = do 574 putStrLn $ 575 " " 576 ++ label 577 ++ ": opts=" 578 ++ show (crOptMatched cr) 579 ++ " pos=" 580 ++ show (crPosMatched cr) 581 when (not (null (crOptMissedFromTruth cr))) $ do 582 putStrLn $ " Opts missed from truth:" 583 forM_ (crOptMissedFromTruth cr) $ \t -> 584 putStrLn $ 585 " " 586 ++ show (otShort t) 587 ++ " / " 588 ++ show (otLong t) 589 ++ " takes_value=" 590 ++ show (otTakesValue t) 591 when (not (null (crOptExtraInParsed cr))) $ do 592 putStrLn $ " Opts extra in parsed:" 593 forM_ (crOptExtraInParsed cr) $ \p -> 594 putStrLn $ " " ++ show (optShort p) ++ " / " ++ show (optLong p) 595 when (not (null (crPosMissedFromTruth cr))) $ do 596 putStrLn $ " Positionals missed from truth:" 597 forM_ (crPosMissedFromTruth cr) $ \t -> 598 putStrLn $ " " ++ T.unpack (ptName t) ++ " required=" ++ show (ptRequired t) 599 when (not (null (crPosExtraInParsed cr))) $ do 600 putStrLn $ " Positionals extra in parsed:" 601 forM_ (crPosExtraInParsed cr) $ \p -> 602 putStrLn $ " " ++ T.unpack (posName p) ++ " required=" ++ show (posRequired p) 603 case shortRes of 604 Left err -> putStrLn $ " Short: ERROR - " ++ err 605 Right cr -> unless (isGoodResult cr) $ showResult "Short" cr 606 case longRes of 607 Left err -> putStrLn $ " Long: ERROR - " ++ err 608 Right cr -> unless (isGoodResult cr) $ showResult "Long" cr 609 putStrLn "" 610 611 -- Final verdict 612 putStrLn "=== Summary ===" 613 if roundtripOk && null failures 614 then putStrLn "All tests passed!" 615 else do 616 unless roundtripOk $ putStrLn "Hedgehog tests FAILED" 617 unless (null failures) $ putStrLn $ "Fuzzer tests: " ++ show (length failures) ++ " failures" 618 putStrLn "SOME TESTS FAILED"