protocol.py
1 from __future__ import annotations 2 3 """Protocol for preprocessor implementations.""" 4 5 from typing import Protocol 6 7 8 class Preprocessor(Protocol): 9 """Protocol for preprocessor implementations. 10 11 Any class implementing this method can preprocess documents to remove 12 repeated content, clean text, etc. This allows swapping preprocessing 13 strategies without changing the rest of the code. 14 """ 15 16 def preprocess(self, text: str) -> str: 17 """Preprocess text to remove repeated content. 18 19 Parameters 20 ---------- 21 text 22 The text to preprocess. 23 24 Returns 25 ------- 26 Preprocessed text with repeated content removed. 27 """ 28 ... 29