/ restai / agent2 / react_prompt.py
react_prompt.py
  1  """Text-based ReAct mode for agent2.
  2  
  3  Used as a fallback when an LLM doesn't support native function calling. The
  4  runtime augments the system prompt with tool descriptions + a strict response
  5  format, sends NO `tools=` array to the provider, and parses the LLM's plain
  6  text reply for `Action:` / `Action Input:` / `Final Answer:` patterns.
  7  
  8  This module is pure functions + a small parse-result dataclass — no I/O.
  9  """
 10  from __future__ import annotations
 11  
 12  import json
 13  import re
 14  from dataclasses import dataclass, field
 15  from typing import Any, Literal, Optional, Sequence
 16  
 17  from .tool_adapter import AdaptedTool
 18  
 19  
 20  REACT_SYSTEM_TEMPLATE = """\
 21  {base_system}
 22  
 23  You have access to the following tools. Use them when needed to answer the user.
 24  
 25  {tool_descriptions}
 26  
 27  To use a tool, respond using EXACTLY this format (one action at a time):
 28  
 29  Thought: <your reasoning about what to do next>
 30  Action: <tool name — must be one of: {tool_names}>
 31  Action Input: <a single JSON object matching the tool's input schema>
 32  
 33  After you call a tool, you will receive an Observation in the next turn. Then continue with another Thought/Action, or if you have enough information, respond with:
 34  
 35  Thought: <final reasoning>
 36  Final Answer: <your answer to the user>
 37  
 38  Rules:
 39  - Only ONE Action per response.
 40  - Action Input MUST be a single valid JSON object on a single line OR inside a ```json``` fenced block.
 41  - If you don't need any tool, skip straight to "Final Answer:".
 42  - Never include both an Action and a Final Answer in the same response.
 43  """
 44  
 45  
 46  # ---------- system prompt rendering ----------
 47  
 48  
 49  def _condense_property(name: str, prop: dict) -> str:
 50      """Render one JSON-schema property as `name: type` (with optional default)."""
 51      ptype = prop.get("type", "any")
 52      if ptype == "array":
 53          items = prop.get("items") or {}
 54          item_type = items.get("type", "any") if isinstance(items, dict) else "any"
 55          ptype = f"array<{item_type}>"
 56      desc = prop.get("description")
 57      if desc:
 58          return f"{name}: {ptype} ({desc})"
 59      return f"{name}: {ptype}"
 60  
 61  
 62  def format_tool_for_react(tool: AdaptedTool) -> str:
 63      """Render one tool as a compact prompt-friendly description block."""
 64      schema = tool.input_schema or {}
 65      properties = schema.get("properties") or {}
 66      required = set(schema.get("required") or [])
 67  
 68      if properties:
 69          param_lines = []
 70          for name, prop in properties.items():
 71              marker = "" if name in required else "?"
 72              param_lines.append("    - " + _condense_property(name + marker, prop or {}))
 73          params_block = "\n" + "\n".join(param_lines)
 74      else:
 75          params_block = " (no arguments)"
 76  
 77      description = (tool.description or "").strip() or tool.name
 78      return f"- {tool.name}: {description}\n  Arguments:{params_block}"
 79  
 80  
 81  def build_react_system_prompt(base_system: str, tools: Sequence[AdaptedTool]) -> str:
 82      """Compose the augmented system prompt the LLM sees in ReAct mode."""
 83      base = (base_system or "You are a helpful assistant.").strip()
 84  
 85      if not tools:
 86          # No tools — degenerate case; just ask the model to answer directly.
 87          return (
 88              base
 89              + "\n\nYou have no tools available. Respond directly with:\n"
 90              "Thought: <your reasoning>\nFinal Answer: <your answer>"
 91          )
 92  
 93      descriptions = "\n\n".join(format_tool_for_react(t) for t in tools)
 94      names = ", ".join(t.name for t in tools)
 95      return REACT_SYSTEM_TEMPLATE.format(
 96          base_system=base,
 97          tool_descriptions=descriptions,
 98          tool_names=names,
 99      )
100  
101  
102  # ---------- response parser ----------
103  
104  
105  @dataclass
106  class ReactParseResult:
107      kind: Literal["action", "final", "text"]
108      thought: str = ""
109      action_name: Optional[str] = None
110      action_input: dict = field(default_factory=dict)
111      final_text: str = ""
112  
113  
114  _THOUGHT_RE = re.compile(r"(?im)^\s*thought\s*:\s*(.+?)(?=^\s*(?:action|final\s*answer|action\s*input)\s*:|\Z)", re.DOTALL)
115  _ACTION_RE = re.compile(r"(?im)^\s*action\s*:\s*([^\n\r]+)")
116  _ACTION_INPUT_RE = re.compile(
117      r"(?im)^\s*action\s*input\s*:\s*(.+?)(?=^\s*(?:observation|thought|action|final\s*answer)\s*:|\Z)",
118      re.DOTALL,
119  )
120  _FINAL_RE = re.compile(
121      r"(?im)^\s*final\s*answer\s*:\s*(.+?)\Z",
122      re.DOTALL,
123  )
124  _FENCE_RE = re.compile(r"```(?:json)?\s*(\{.*?\})\s*```", re.DOTALL)
125  
126  
127  def _strip_fences(text: str) -> str:
128      """If the text contains a ```json``` block, return its inner JSON; else return text."""
129      m = _FENCE_RE.search(text)
130      if m:
131          return m.group(1)
132      return text
133  
134  
135  _JSON_DECODER = json.JSONDecoder()
136  
137  
138  def _try_load_json(text: str) -> Optional[dict]:
139      """Try to parse a JSON object out of `text`. Returns the dict or None.
140  
141      Uses `json.JSONDecoder.raw_decode` to consume exactly one JSON value
142      starting at the first `{`, which tolerates trailing junk after the
143      object (a common LLM output pattern).
144      """
145      text = (text or "").strip()
146      if not text:
147          return None
148  
149      candidate = _strip_fences(text)
150      try:
151          loaded = json.loads(candidate)
152          if isinstance(loaded, dict):
153              return loaded
154      except json.JSONDecodeError:
155          pass
156  
157      start = candidate.find("{")
158      if start < 0:
159          return None
160      try:
161          obj, _ = _JSON_DECODER.raw_decode(candidate[start:])
162      except json.JSONDecodeError:
163          return None
164      return obj if isinstance(obj, dict) else None
165  
166  
167  def parse_react_response(text: str) -> ReactParseResult:
168      """Parse a raw LLM response in ReAct format.
169  
170      Returns one of:
171          ReactParseResult(kind="action", thought, action_name, action_input)
172          ReactParseResult(kind="final", thought, final_text)
173          ReactParseResult(kind="text", final_text)   # parser couldn't find structured output
174      """
175      if not text:
176          return ReactParseResult(kind="text", final_text="")
177  
178      thought_match = _THOUGHT_RE.search(text)
179      thought = thought_match.group(1).strip() if thought_match else ""
180  
181      action_match = _ACTION_RE.search(text)
182      action_input_match = _ACTION_INPUT_RE.search(text)
183  
184      # If both Action and Final Answer appear, action wins (model is still working)
185      if action_match:
186          name = action_match.group(1).strip()
187          # Strip surrounding quotes / backticks the model sometimes adds
188          name = name.strip().strip("`'\"")
189          # Cut off anything after a newline or stray backtick block
190          name = name.split("\n", 1)[0].strip()
191  
192          input_dict: dict = {}
193          if action_input_match:
194              input_text = action_input_match.group(1).strip()
195              parsed_input = _try_load_json(input_text)
196              if parsed_input is not None:
197                  input_dict = parsed_input
198          else:
199              # No explicit "Action Input:" label — try to find a JSON object after the Action line
200              after_action = text[action_match.end():]
201              parsed_input = _try_load_json(after_action)
202              if parsed_input is not None:
203                  input_dict = parsed_input
204  
205          if name:
206              return ReactParseResult(
207                  kind="action",
208                  thought=thought,
209                  action_name=name,
210                  action_input=input_dict,
211              )
212  
213      final_match = _FINAL_RE.search(text)
214      if final_match:
215          return ReactParseResult(
216              kind="final",
217              thought=thought,
218              final_text=final_match.group(1).strip(),
219          )
220  
221      # No structured output found — treat the whole response as a final answer
222      # so the loop terminates instead of hanging.
223      return ReactParseResult(kind="text", final_text=text.strip())