/ src / python / txtai / agent / tool / read.py
read.py
 1  """
 2  Read imports
 3  """
 4  
 5  import re
 6  
 7  from smolagents import Tool
 8  
 9  from ...pipeline import Textractor
10  
11  
12  class ReadTool(Tool):
13      """
14      The ReadTool retrieves file or url content. This tool automatically extracts text content from
15      binary files using the Textractor pipeline.
16      """
17  
18      # pylint: disable=W0231
19      def __init__(self, maxlength=40000):
20          """
21          Creates a ReadTool.
22  
23          Args:
24              maxlength: Truncate content above this maxlength
25          """
26  
27          # Tool parameters
28          self.name = "read"
29          self.description = (
30              "Implementation of a file read tool. Returns file content. Also supports reading web content. "
31              "Use this tool to browse webpages in addition to reading files."
32          )
33          self.inputs = {"path": {"type": "string", "description": "File path or url"}}
34          self.output_type = "any"
35  
36          # Create textractor instance
37          self.textractor = Textractor()
38          self.maxlength = maxlength
39  
40          # Validate parameters and initialize tool
41          super().__init__()
42  
43      # pylint: disable=W0221
44      def forward(self, path):
45          """
46          Reads content from path.
47  
48          Args:
49              path: file path or url
50  
51          Returns:
52              content
53          """
54  
55          content = self.textractor(path)
56          content = re.sub(r"\n{3,}", "\n\n", content)
57  
58          # Truncate content to max length
59          return content[: self.maxlength]