/ src / Gargantext / Core.hs
Core.hs
  1  {-|
  2  Module      : Gargantext.Core
  3  Description : Supported Natural language
  4  Copyright   : (c) CNRS, 2017-Present
  5  License     : AGPL + CECILL v3
  6  Maintainer  : team@gargantext.org
  7  Stability   : experimental
  8  Portability : POSIX
  9  
 10  -}
 11  
 12  {-# LANGUAGE DeriveAnyClass      #-}
 13  {-# LANGUAGE ScopedTypeVariables #-}
 14  
 15  module Gargantext.Core
 16    where
 17  
 18  import Data.Aeson
 19  import Data.LanguageCodes qualified as ISO639
 20  import Data.Bimap qualified as Bimap
 21  import Data.Bimap (Bimap)
 22  import Data.Morpheus.Types (GQLType)
 23  import Data.Swagger
 24  import Data.Text (pack)
 25  import Gargantext.Prelude hiding (All)
 26  import Servant.API
 27  import Test.QuickCheck
 28  import Control.Exception (throw)
 29  import Prelude (userError)
 30  
 31  ------------------------------------------------------------------------
 32  -- | Language of a Text
 33  -- For simplicity, we suppose text has an homogenous language
 34  --
 35  --  - EN == english
 36  --  - FR == french
 37  --  - DE == deutch
 38  --  - IT == italian
 39  --  - ES == spanish
 40  --  - PL == polish
 41  --  - ZH == chinese
 42  --
 43  --  ... add your language and help us to implement it (:
 44  
 45  -- | All languages supported
 46  -- NOTE: Use international country codes
 47  -- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
 48  -- TODO This should be deprecated in favor of iso-639 library
 49  data Lang = DE
 50            | EL
 51            | EN
 52            | ES
 53            | FR
 54            | IT
 55            | PL
 56            | PT
 57            | RU
 58            | UK
 59            | ZH
 60    deriving (Read, Show, Eq, Ord, Enum, Bounded, Generic, GQLType)
 61  
 62  -- | Defaults to 'EN' in all those places where a language is mandatory,
 63  -- but an optional one has been passed.
 64  withDefaultLanguage :: Maybe Lang -> Lang
 65  withDefaultLanguage = fromMaybe defaultLanguage
 66  
 67  -- | The default 'Lang'.
 68  defaultLanguage :: Lang
 69  defaultLanguage = EN
 70  
 71  instance ToJSON Lang
 72  instance FromJSON Lang
 73  instance ToSchema Lang where
 74    declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
 75  instance FromHttpApiData Lang
 76    where
 77      -- parseUrlPiece is exactly the 'read' instance,
 78      -- if we are disciplined. Either way, this needs to
 79      -- be tested.
 80      parseUrlPiece fragment = case readMaybe fragment of
 81        Nothing   -> Left $ "Unexpected value of Lang: " <> fragment
 82        Just lang -> Right lang
 83  instance ToHttpApiData Lang where
 84    toUrlPiece = pack . show
 85  instance Hashable Lang
 86  instance Arbitrary Lang where
 87    arbitrary = arbitraryBoundedEnum
 88  
 89  toISO639 :: Lang -> ISO639.ISO639_1
 90  toISO639 DE = ISO639.DE
 91  toISO639 EL = ISO639.EL
 92  toISO639 EN = ISO639.EN
 93  toISO639 ES = ISO639.ES
 94  toISO639 FR = ISO639.FR
 95  toISO639 IT = ISO639.IT
 96  toISO639 PL = ISO639.PL
 97  toISO639 PT = ISO639.PT
 98  toISO639 RU = ISO639.RU
 99  toISO639 UK = ISO639.UK
100  toISO639 ZH = ISO639.ZH
101  
102  iso639ToText :: ISO639.ISO639_1 -> Text
103  iso639ToText la = pack [a, b]
104    where
105      (a, b) = ISO639.toChars la
106  
107  -- | https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
108  toISO639Lang :: Lang -> Text
109  toISO639Lang DE = "de"
110  toISO639Lang EL = "el"
111  toISO639Lang EN = "en"
112  toISO639Lang ES = "es"
113  toISO639Lang FR = "fr"
114  toISO639Lang IT = "it"
115  toISO639Lang PL = "pl"
116  toISO639Lang PT = "pt"
117  toISO639Lang RU = "ru"
118  toISO639Lang UK = "uk"
119  toISO639Lang ZH = "zh"
120  
121  allLangs :: [Lang]
122  allLangs = [minBound .. maxBound]
123  
124  class HasDBid a where
125    toDBid     :: a   -> Int
126    lookupDBid :: Int -> Maybe a
127  
128  -- NOTE: We try to use numeric codes for countries
129  -- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
130  -- https://en.wikipedia.org/wiki/ISO_3166-1_numeric#004
131  -- The pattern matching ensures this mapping will always be total
132  -- once we add a new 'Lang'.
133  langIds :: Bimap Lang Int
134  langIds = Bimap.fromList $ allLangs <&> \lid -> case lid of
135    DE  -> (lid, 276)
136    EL  -> (lid, 300)
137    EN  -> (lid, 2)
138    ES  -> (lid, 724)
139    FR  -> (lid, 1)
140    IT  -> (lid, 380)
141    PL  -> (lid, 616)
142    PT  -> (lid, 620)
143    RU  -> (lid, 643)
144    UK  -> (lid, 804)
145    ZH  -> (lid, 156)
146  
147  instance HasDBid Lang where
148    -- /NOTE/ this lookup cannot fail because 'dbIds' is defined as a total function
149    -- over its domain.
150    toDBid lang     = langIds Bimap.! lang
151    lookupDBid dbId = Bimap.lookupR dbId langIds
152  
153  ------------------------------------------------------------------------
154  data NLPServerConfig = NLPServerConfig
155    { server :: !PosTagAlgo
156    , url    :: !URI }
157    deriving (Show, Eq, Generic)
158  ------------------------------------------------------------------------
159  type Form = Text
160  type Lem  = Text
161  ------------------------------------------------------------------------
162  data PosTagAlgo = CoreNLP | JohnSnowServer | Spacy
163    deriving (Show, Read, Eq, Ord, Generic, GQLType)
164  
165  instance Hashable PosTagAlgo
166  
167  instance HasDBid PosTagAlgo where
168    toDBid CoreNLP        = 1
169    toDBid JohnSnowServer = 2
170    toDBid Spacy          = 3
171    lookupDBid 1          = Just CoreNLP
172    lookupDBid 2          = Just JohnSnowServer
173    lookupDBid 3          = Just Spacy
174    lookupDBid _          = Nothing
175  
176  
177  -- | Tries to convert the given integer into the relevant DB identifier, failing
178  -- with an error if the conversion cannot be performed.
179  fromDBid :: forall a. (HasCallStack, HasDBid a, Typeable a) => Int -> a
180  fromDBid i = case lookupDBid i of
181    Nothing ->
182      let err = userError $ "HasDBid " <> show (typeRep (Proxy :: Proxy a)) <> " not found or not implemented."
183      in throw $ WithStacktrace callStack err
184    Just v  -> v