testtranslation.py
1 """ 2 Translation module tests 3 """ 4 5 import unittest 6 import time 7 8 import requests 9 10 from txtai.pipeline import Translation 11 12 13 class TestTranslation(unittest.TestCase): 14 """ 15 Translation tests. 16 """ 17 18 @classmethod 19 def setUpClass(cls): 20 """ 21 Create single translation instance. 22 """ 23 24 cls.translate = Translation() 25 26 # Preload list of models. Handle HF Hub errors. 27 complete, wait = False, 1 28 while not complete: 29 try: 30 cls.translate.lookup("en", "es") 31 complete = True 32 except requests.exceptions.HTTPError: 33 # Exponential backoff 34 time.sleep(wait) 35 36 # Wait up to 16 seconds 37 wait = min(wait * 2, 16) 38 39 def testDetect(self): 40 """ 41 Test language detection 42 """ 43 44 test = ["This is a test language detection."] 45 language = self.translate.detect(test) 46 47 self.assertListEqual(language, ["en"]) 48 49 def testDetectWithCustomFunc(self): 50 """ 51 Test language detection with custom function 52 """ 53 54 def dummy_func(text): 55 return ["en" for x in text] 56 57 translate = Translation(langdetect=dummy_func) 58 59 test = ["This is a test language detection."] 60 language = translate.detect(test) 61 62 self.assertListEqual(language, ["en"]) 63 64 def testLongTranslation(self): 65 """ 66 Test a translation longer than max tokenization length 67 """ 68 69 text = "This is a test translation to Spanish. " * 100 70 translation = self.translate(text, "es") 71 72 # Validate translation text 73 self.assertIsNotNone(translation) 74 75 def testM2M100Translation(self): 76 """ 77 Test a translation using M2M100 models 78 """ 79 80 text = self.translate("This is a test translation to Croatian", "hr") 81 82 # Validate translation text 83 self.assertEqual(text, "Ovo je testni prijevod na hrvatski") 84 85 def testMarianTranslation(self): 86 """ 87 Test a translation using Marian models 88 """ 89 90 text = "This is a test translation into Spanish" 91 translation = self.translate(text, "es") 92 93 # Validate translation text 94 self.assertEqual(translation, "Esta es una traducción de prueba al español") 95 96 # Validate translation back 97 translation = self.translate(translation, "en") 98 self.assertEqual(translation, text) 99 100 def testNoLang(self): 101 """ 102 Test no matching language id 103 """ 104 105 self.assertIsNone(self.translate.langid([], "zz")) 106 107 def testNoModel(self): 108 """ 109 Test no known available model found 110 """ 111 112 self.assertEqual(self.translate.modelpath("zz", "en"), "Helsinki-NLP/opus-mt-mul-en") 113 114 def testNoTranslation(self): 115 """ 116 Test translation skipped when text already in destination language 117 """ 118 119 text = "This is a test translation to English" 120 translation = self.translate(text, "en") 121 122 # Validate no translation 123 self.assertEqual(text, translation) 124 125 def testShowmodelsChunked(self): 126 """ 127 Test a long translation with showmodels flag. When text is chunked 128 by the tokenizer, results should still be properly concatenated as 129 a 3-tuple (translation, language, model) rather than a malformed tuple. 130 """ 131 132 text = "This is a test translation to Spanish. " * 100 133 result = self.translate(text, "es", showmodels=True) 134 135 # Result should be a tuple of exactly 3 elements 136 self.assertIsInstance(result, tuple) 137 self.assertEqual(len(result), 3) 138 139 translation, language, modelpath = result 140 141 # Translation should be a single string, not a nested tuple 142 self.assertIsInstance(translation, str) 143 self.assertIsNotNone(translation) 144 self.assertGreater(len(translation), 0) 145 146 # Language and model should be valid strings 147 self.assertEqual(language, "en") 148 self.assertIsInstance(modelpath, str) 149 150 def testTranslationWithShowmodels(self): 151 """ 152 Test a translation using Marian models and showmodels flag to return 153 model and language. 154 """ 155 156 text = "This is a test translation into Spanish" 157 result = self.translate(text, "es", showmodels=True) 158 159 translation, language, modelpath = result 160 # Validate translation text 161 self.assertEqual(translation, "Esta es una traducción de prueba al español") 162 # Validate detected language 163 self.assertEqual(language, "en") 164 # Validate model 165 self.assertEqual(modelpath, "Helsinki-NLP/opus-mt-en-es") 166 167 # Validate translation back 168 result = self.translate(translation, "en", showmodels=True) 169 170 translation, language, modelpath = result 171 self.assertEqual(translation, text) 172 # Validate detected language 173 self.assertEqual(language, "es") 174 # Validate model 175 self.assertEqual(modelpath, "Helsinki-NLP/opus-mt-es-en")