/ src / python / txtai / scoring / sif.py
sif.py
 1  """
 2  SIF module
 3  """
 4  
 5  import numpy as np
 6  
 7  from .tfidf import TFIDF
 8  
 9  
10  class SIF(TFIDF):
11      """
12      Smooth Inverse Frequency (SIF) scoring.
13      """
14  
15      def __init__(self, config=None):
16          super().__init__(config)
17  
18          # SIF configurable parameters
19          self.a = self.config.get("a", 1e-3)
20  
21      def computefreq(self, tokens):
22          # Default method computes frequency for a single entry
23          # SIF uses word frequencies across entire index
24          return {token: self.wordfreq[token] for token in tokens}
25  
26      def score(self, freq, idf, length):
27          # Set freq to word frequencies across entire index when freq and idf shape don't match
28          if isinstance(freq, np.ndarray) and freq.shape != np.array(idf).shape:
29              freq.fill(freq.sum())
30  
31          # Calculate SIF score
32          return self.a / (self.a + freq / self.tokens)