Core.hs
1 {-| 2 Module : Gargantext.Core 3 Description : Supported Natural language 4 Copyright : (c) CNRS, 2017-Present 5 License : AGPL + CECILL v3 6 Maintainer : team@gargantext.org 7 Stability : experimental 8 Portability : POSIX 9 10 -} 11 12 {-# LANGUAGE DeriveAnyClass #-} 13 {-# LANGUAGE ScopedTypeVariables #-} 14 15 module Gargantext.Core 16 where 17 18 import Data.Aeson 19 import Data.LanguageCodes qualified as ISO639 20 import Data.Bimap qualified as Bimap 21 import Data.Bimap (Bimap) 22 import Data.Morpheus.Types (GQLType) 23 import Data.Swagger 24 import Data.Text (pack) 25 import Gargantext.Prelude hiding (All) 26 import Servant.API 27 import Test.QuickCheck 28 import Control.Exception (throw) 29 import Prelude (userError) 30 31 ------------------------------------------------------------------------ 32 -- | Language of a Text 33 -- For simplicity, we suppose text has an homogenous language 34 -- 35 -- - EN == english 36 -- - FR == french 37 -- - DE == deutch 38 -- - IT == italian 39 -- - ES == spanish 40 -- - PL == polish 41 -- - ZH == chinese 42 -- 43 -- ... add your language and help us to implement it (: 44 45 -- | All languages supported 46 -- NOTE: Use international country codes 47 -- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes 48 -- TODO This should be deprecated in favor of iso-639 library 49 data Lang = DE 50 | EL 51 | EN 52 | ES 53 | FR 54 | IT 55 | PL 56 | PT 57 | RU 58 | UK 59 | ZH 60 deriving (Read, Show, Eq, Ord, Enum, Bounded, Generic, GQLType) 61 62 -- | Defaults to 'EN' in all those places where a language is mandatory, 63 -- but an optional one has been passed. 64 withDefaultLanguage :: Maybe Lang -> Lang 65 withDefaultLanguage = fromMaybe defaultLanguage 66 67 -- | The default 'Lang'. 68 defaultLanguage :: Lang 69 defaultLanguage = EN 70 71 instance ToJSON Lang 72 instance FromJSON Lang 73 instance ToSchema Lang where 74 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions 75 instance FromHttpApiData Lang 76 where 77 -- parseUrlPiece is exactly the 'read' instance, 78 -- if we are disciplined. Either way, this needs to 79 -- be tested. 80 parseUrlPiece fragment = case readMaybe fragment of 81 Nothing -> Left $ "Unexpected value of Lang: " <> fragment 82 Just lang -> Right lang 83 instance ToHttpApiData Lang where 84 toUrlPiece = pack . show 85 instance Hashable Lang 86 instance Arbitrary Lang where 87 arbitrary = arbitraryBoundedEnum 88 89 toISO639 :: Lang -> ISO639.ISO639_1 90 toISO639 DE = ISO639.DE 91 toISO639 EL = ISO639.EL 92 toISO639 EN = ISO639.EN 93 toISO639 ES = ISO639.ES 94 toISO639 FR = ISO639.FR 95 toISO639 IT = ISO639.IT 96 toISO639 PL = ISO639.PL 97 toISO639 PT = ISO639.PT 98 toISO639 RU = ISO639.RU 99 toISO639 UK = ISO639.UK 100 toISO639 ZH = ISO639.ZH 101 102 iso639ToText :: ISO639.ISO639_1 -> Text 103 iso639ToText la = pack [a, b] 104 where 105 (a, b) = ISO639.toChars la 106 107 -- | https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes 108 toISO639Lang :: Lang -> Text 109 toISO639Lang DE = "de" 110 toISO639Lang EL = "el" 111 toISO639Lang EN = "en" 112 toISO639Lang ES = "es" 113 toISO639Lang FR = "fr" 114 toISO639Lang IT = "it" 115 toISO639Lang PL = "pl" 116 toISO639Lang PT = "pt" 117 toISO639Lang RU = "ru" 118 toISO639Lang UK = "uk" 119 toISO639Lang ZH = "zh" 120 121 allLangs :: [Lang] 122 allLangs = [minBound .. maxBound] 123 124 class HasDBid a where 125 toDBid :: a -> Int 126 lookupDBid :: Int -> Maybe a 127 128 -- NOTE: We try to use numeric codes for countries 129 -- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes 130 -- https://en.wikipedia.org/wiki/ISO_3166-1_numeric#004 131 -- The pattern matching ensures this mapping will always be total 132 -- once we add a new 'Lang'. 133 langIds :: Bimap Lang Int 134 langIds = Bimap.fromList $ allLangs <&> \lid -> case lid of 135 DE -> (lid, 276) 136 EL -> (lid, 300) 137 EN -> (lid, 2) 138 ES -> (lid, 724) 139 FR -> (lid, 1) 140 IT -> (lid, 380) 141 PL -> (lid, 616) 142 PT -> (lid, 620) 143 RU -> (lid, 643) 144 UK -> (lid, 804) 145 ZH -> (lid, 156) 146 147 instance HasDBid Lang where 148 -- /NOTE/ this lookup cannot fail because 'dbIds' is defined as a total function 149 -- over its domain. 150 toDBid lang = langIds Bimap.! lang 151 lookupDBid dbId = Bimap.lookupR dbId langIds 152 153 ------------------------------------------------------------------------ 154 data NLPServerConfig = NLPServerConfig 155 { server :: !PosTagAlgo 156 , url :: !URI } 157 deriving (Show, Eq, Generic) 158 ------------------------------------------------------------------------ 159 type Form = Text 160 type Lem = Text 161 ------------------------------------------------------------------------ 162 data PosTagAlgo = CoreNLP | JohnSnowServer | Spacy 163 deriving (Show, Read, Eq, Ord, Generic, GQLType) 164 165 instance Hashable PosTagAlgo 166 167 instance HasDBid PosTagAlgo where 168 toDBid CoreNLP = 1 169 toDBid JohnSnowServer = 2 170 toDBid Spacy = 3 171 lookupDBid 1 = Just CoreNLP 172 lookupDBid 2 = Just JohnSnowServer 173 lookupDBid 3 = Just Spacy 174 lookupDBid _ = Nothing 175 176 177 -- | Tries to convert the given integer into the relevant DB identifier, failing 178 -- with an error if the conversion cannot be performed. 179 fromDBid :: forall a. (HasCallStack, HasDBid a, Typeable a) => Int -> a 180 fromDBid i = case lookupDBid i of 181 Nothing -> 182 let err = userError $ "HasDBid " <> show (typeRep (Proxy :: Proxy a)) <> " not found or not implemented." 183 in throw $ WithStacktrace callStack err 184 Just v -> v