test_captator_parser_tokenizer.py
1 from captator import parser 2 3 4 def test_tokenize_text_1(): 5 text = """ 6 [Rank] 7 Secunda die infra Octavam Epiphaniæ;;Semiduplex;;5.6;;ex Sancti/01-06 8 """ 9 tokenized = parser.Tokenizer(text) 10 11 assert len(tokenized) == 20 12 13 14 def test_tokenize_text_2(): 15 text = """ 16 [Rule] 17 Gloria 18 CredoDA 19 Prefatio=Epi 20 Suffragium=Maria2;Papa;Ecclesia;; 21 Infra octavam Epiphaniæ Domini 22 """ 23 tokenized = parser.Tokenizer(text) 24 25 assert len(tokenized) == 25 26 27 28 def test_tokenize_text_3(): 29 text = """ 30 [Introitus] 31 !Malach 3:1; 1 Par 29:12 32 v. Ecce, advénit dominátor Dóminus: et regnum in manu ejus et potéstas et impérium. 33 !Ps 71:1 34 Deus, judícium tuum Regi da: et justítiam tuam Fílio Regis. 35 &Gloria 36 v. Ecce, advénit dominátor Dóminus: et regnum in manu ejus et potéstas et impérium. 37 """ 38 tokenized = parser.Tokenizer(text) 39 40 assert len(tokenized) == 68 41 42 43 def test_tokenize_text_4(): 44 text = """ 45 [Evangelium] 46 @Tempora/Nat2-0 47 48 [Offertorium] 49 @Tempora/Nat30 50 51 [Secreta] 52 @Commune/C2 53 54 [Commemoratio Secreta] (rubrica tridentina) 55 !Pro S. Stephano Protomartyre 56 @Sancti/08-03:Secreta 57 !Pro S. Joanne Evangelista 58 @Sancti/12-27:Secreta 59 !Pro Ss. Innocentibus 60 @Sancti/12-28:Secreta 61 62 [Communio] 63 @Tempora/Nat30 64 """ 65 tokenized = parser.Tokenizer(text) 66 67 assert len(tokenized) == 74