read.py
1 """ 2 Read imports 3 """ 4 5 import re 6 7 from smolagents import Tool 8 9 from ...pipeline import Textractor 10 11 12 class ReadTool(Tool): 13 """ 14 The ReadTool retrieves file or url content. This tool automatically extracts text content from 15 binary files using the Textractor pipeline. 16 """ 17 18 # pylint: disable=W0231 19 def __init__(self, maxlength=40000): 20 """ 21 Creates a ReadTool. 22 23 Args: 24 maxlength: Truncate content above this maxlength 25 """ 26 27 # Tool parameters 28 self.name = "read" 29 self.description = ( 30 "Implementation of a file read tool. Returns file content. Also supports reading web content. " 31 "Use this tool to browse webpages in addition to reading files." 32 ) 33 self.inputs = {"path": {"type": "string", "description": "File path or url"}} 34 self.output_type = "any" 35 36 # Create textractor instance 37 self.textractor = Textractor() 38 self.maxlength = maxlength 39 40 # Validate parameters and initialize tool 41 super().__init__() 42 43 # pylint: disable=W0221 44 def forward(self, path): 45 """ 46 Reads content from path. 47 48 Args: 49 path: file path or url 50 51 Returns: 52 content 53 """ 54 55 content = self.textractor(path) 56 content = re.sub(r"\n{3,}", "\n\n", content) 57 58 # Truncate content to max length 59 return content[: self.maxlength]